diff --git a/api/src/main/java/com/cloud/event/EventTypes.java b/api/src/main/java/com/cloud/event/EventTypes.java index a30518aaf17..d9a70b98e89 100644 --- a/api/src/main/java/com/cloud/event/EventTypes.java +++ b/api/src/main/java/com/cloud/event/EventTypes.java @@ -19,6 +19,13 @@ package com.cloud.event; import java.util.HashMap; import java.util.Map; +import org.apache.cloudstack.acl.Role; +import org.apache.cloudstack.acl.RolePermission; +import org.apache.cloudstack.annotation.Annotation; +import org.apache.cloudstack.config.Configuration; +import org.apache.cloudstack.ha.HAConfig; +import org.apache.cloudstack.usage.Usage; + import com.cloud.dc.DataCenter; import com.cloud.dc.Pod; import com.cloud.dc.StorageNetworkIpRange; @@ -69,12 +76,6 @@ import com.cloud.user.User; import com.cloud.vm.Nic; import com.cloud.vm.NicSecondaryIp; import com.cloud.vm.VirtualMachine; -import org.apache.cloudstack.acl.Role; -import org.apache.cloudstack.acl.RolePermission; -import org.apache.cloudstack.annotation.Annotation; -import org.apache.cloudstack.config.Configuration; -import org.apache.cloudstack.ha.HAConfig; -import org.apache.cloudstack.usage.Usage; public class EventTypes { @@ -106,6 +107,7 @@ public class EventTypes { public static final String EVENT_ROUTER_HA = "ROUTER.HA"; public static final String EVENT_ROUTER_UPGRADE = "ROUTER.UPGRADE"; public static final String EVENT_ROUTER_DIAGNOSTICS = "ROUTER.DIAGNOSTICS"; + public static final String EVENT_ROUTER_HEALTH_CHECKS = "ROUTER.HEALTH.CHECKS"; // Console proxy public static final String EVENT_PROXY_CREATE = "PROXY.CREATE"; @@ -603,6 +605,7 @@ public class EventTypes { entityEventDetails.put(EVENT_ROUTER_HA, VirtualRouter.class); entityEventDetails.put(EVENT_ROUTER_UPGRADE, VirtualRouter.class); entityEventDetails.put(EVENT_ROUTER_DIAGNOSTICS, VirtualRouter.class); + entityEventDetails.put(EVENT_ROUTER_HEALTH_CHECKS, VirtualRouter.class); entityEventDetails.put(EVENT_PROXY_CREATE, VirtualMachine.class); entityEventDetails.put(EVENT_PROXY_DESTROY, VirtualMachine.class); diff --git a/api/src/main/java/com/cloud/network/NetworkService.java b/api/src/main/java/com/cloud/network/NetworkService.java index 04f240b5e13..f90c3d423b2 100644 --- a/api/src/main/java/com/cloud/network/NetworkService.java +++ b/api/src/main/java/com/cloud/network/NetworkService.java @@ -72,7 +72,9 @@ public interface NetworkService { boolean deleteNetwork(long networkId, boolean forced); - boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; + boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; + + boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; int getActiveNicsInNetwork(long networkId); diff --git a/api/src/main/java/com/cloud/network/RouterHealthCheckResult.java b/api/src/main/java/com/cloud/network/RouterHealthCheckResult.java new file mode 100644 index 00000000000..eb65ae9088e --- /dev/null +++ b/api/src/main/java/com/cloud/network/RouterHealthCheckResult.java @@ -0,0 +1,34 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.network; + +import java.util.Date; + +public interface RouterHealthCheckResult { + long getRouterId(); + + String getCheckName(); + + String getCheckType(); + + boolean getCheckResult(); + + Date getLastUpdateTime(); + + String getParsedCheckDetails(); +} diff --git a/api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java b/api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java index 815ae4d6ae3..98fb8be7c7a 100644 --- a/api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java +++ b/api/src/main/java/com/cloud/network/VirtualNetworkApplianceService.java @@ -31,8 +31,7 @@ public interface VirtualNetworkApplianceService { /** * Starts domain router * - * @param cmd - * the command specifying router's id + * @param cmd the command specifying router's id * @return DomainRouter object */ VirtualRouter startRouter(long routerId, boolean reprogramNetwork) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; @@ -51,10 +50,8 @@ public interface VirtualNetworkApplianceService { /** * Stops domain router * - * @param id - * of the router - * @param forced - * just do it. caller knows best. + * @param id of the router + * @param forced just do it. caller knows best. * @return router if successful, null otherwise * @throws ResourceUnavailableException * @throws ConcurrentOperationException @@ -68,4 +65,13 @@ public interface VirtualNetworkApplianceService { VirtualRouter findRouter(long routerId); List upgradeRouterTemplate(UpgradeRouterTemplateCmd cmd); + + /** + * Updates router with latest health checkdata, runs health checks and persists health checks on virtual router if feasible. + * Throws relevant exception if feature is disabled or failures occur. + * + * @param routerId id of the router + * @return + */ + boolean performRouterHealthChecks(long routerId); } diff --git a/api/src/main/java/com/cloud/network/vpc/VpcService.java b/api/src/main/java/com/cloud/network/vpc/VpcService.java index 241e27b1552..5fc339e64cb 100644 --- a/api/src/main/java/com/cloud/network/vpc/VpcService.java +++ b/api/src/main/java/com/cloud/network/vpc/VpcService.java @@ -21,6 +21,7 @@ import java.util.Map; import org.apache.cloudstack.api.command.user.vpc.ListPrivateGatewaysCmd; import org.apache.cloudstack.api.command.user.vpc.ListStaticRoutesCmd; +import org.apache.cloudstack.api.command.user.vpc.RestartVPCCmd; import com.cloud.exception.ConcurrentOperationException; import com.cloud.exception.InsufficientAddressCapacityException; @@ -29,6 +30,7 @@ import com.cloud.exception.NetworkRuleConflictException; import com.cloud.exception.ResourceAllocationException; import com.cloud.exception.ResourceUnavailableException; import com.cloud.network.IpAddress; +import com.cloud.user.User; import com.cloud.utils.Pair; public interface VpcService { @@ -132,7 +134,9 @@ public interface VpcService { * @return * @throws InsufficientCapacityException */ - boolean restartVpc(long id, boolean cleanUp, boolean makeredundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; + boolean restartVpc(RestartVPCCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; + + boolean restartVpc(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException; /** * Returns a Private gateway found in the VPC by id diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index b1ea142f7a6..33e3867a68d 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -727,6 +727,7 @@ public class ApiConstants { public static final String VIRTUAL_SIZE = "virtualsize"; public static final String NETSCALER_CONTROLCENTER_ID = "netscalercontrolcenterid"; public static final String NETSCALER_SERVICEPACKAGE_ID = "netscalerservicepackageid"; + public static final String FETCH_ROUTER_HEALTH_CHECK_RESULTS = "fetchhealthcheckresults"; public static final String ZONE_ID_LIST = "zoneids"; public static final String DESTINATION_ZONE_ID_LIST = "destzoneids"; @@ -748,6 +749,13 @@ public class ApiConstants { public static final String FILES = "files"; public static final String VOLUME_IDS = "volumeids"; + public static final String ROUTER_ID = "routerid"; + public static final String ROUTER_HEALTH_CHECKS = "healthchecks"; + public static final String ROUTER_CHECK_NAME = "checkname"; + public static final String ROUTER_CHECK_TYPE = "checktype"; + public static final String LAST_UPDATED = "lastupdated"; + public static final String PERFORM_FRESH_CHECKS = "performfreshchecks"; + public enum HostDetails { all, capacity, events, stats, min; } diff --git a/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java b/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java index 740ee468702..510562bf54a 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java +++ b/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java @@ -22,6 +22,7 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.management.ManagementServerHost; import org.apache.cloudstack.affinity.AffinityGroup; import org.apache.cloudstack.affinity.AffinityGroupResponse; @@ -146,6 +147,7 @@ import com.cloud.network.PhysicalNetwork; import com.cloud.network.PhysicalNetworkServiceProvider; import com.cloud.network.PhysicalNetworkTrafficType; import com.cloud.network.RemoteAccessVpn; +import com.cloud.network.RouterHealthCheckResult; import com.cloud.network.Site2SiteCustomerGateway; import com.cloud.network.Site2SiteVpnConnection; import com.cloud.network.Site2SiteVpnGateway; @@ -466,4 +468,6 @@ public interface ResponseGenerator { SSHKeyPairResponse createSSHKeyPairResponse(SSHKeyPair sshkeyPair, boolean privatekey); ManagementServerResponse createManagementResponse(ManagementServerHost mgmt); + + List createHealthCheckResponse(VirtualMachine router, List healthCheckResults); } diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/internallb/ListInternalLBVMsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/internallb/ListInternalLBVMsCmd.java index ba2054c3c24..fd415858f56 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/internallb/ListInternalLBVMsCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/internallb/ListInternalLBVMsCmd.java @@ -16,6 +16,7 @@ // under the License. package org.apache.cloudstack.api.command.admin.internallb; +import org.apache.commons.lang.BooleanUtils; import org.apache.log4j.Logger; import org.apache.cloudstack.api.APICommand; @@ -73,6 +74,11 @@ public class ListInternalLBVMsCmd extends BaseListProjectAndAccountResourcesCmd @Parameter(name = ApiConstants.FOR_VPC, type = CommandType.BOOLEAN, description = "if true is passed for this parameter, list only VPC Internal LB VMs") private Boolean forVpc; + + @Parameter(name = ApiConstants.FETCH_ROUTER_HEALTH_CHECK_RESULTS, type = CommandType.BOOLEAN, since = "4.14", + description = "if true is passed for this parameter, also fetch last executed health check results for the VM. Default is false") + private Boolean fetchHealthCheckResults; + ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// @@ -117,6 +123,10 @@ public class ListInternalLBVMsCmd extends BaseListProjectAndAccountResourcesCmd return Role.INTERNAL_LB_VM.toString(); } + public boolean shouldFetchHealthCheckResults() { + return BooleanUtils.isTrue(fetchHealthCheckResults); + } + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/router/GetRouterHealthCheckResultsCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/router/GetRouterHealthCheckResultsCmd.java new file mode 100644 index 00000000000..5efc6de9e94 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/router/GetRouterHealthCheckResultsCmd.java @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.router; + +import java.util.List; + +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.ApiErrorCode; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.DomainRouterResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultsListResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.commons.lang.BooleanUtils; +import org.apache.log4j.Logger; + +import com.cloud.exception.InvalidParameterValueException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.network.router.VirtualRouter; +import com.cloud.user.Account; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.vm.VirtualMachine; + +@APICommand(name = GetRouterHealthCheckResultsCmd.APINAME, + responseObject = RouterHealthCheckResultsListResponse.class, + description = "Starts a router.", + entityType = {VirtualMachine.class}, + requestHasSensitiveInfo = false, + responseHasSensitiveInfo = false, + since = "4.14.0") +public class GetRouterHealthCheckResultsCmd extends BaseCmd { + public static final Logger s_logger = Logger.getLogger(GetRouterHealthCheckResultsCmd.class.getName()); + public static final String APINAME = "getRouterHealthCheckResults"; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + + @Parameter(name = ApiConstants.ROUTER_ID, type = CommandType.UUID, entityType = DomainRouterResponse.class, + required = true, description = "the ID of the router") + private Long routerId; + + @Parameter(name = ApiConstants.PERFORM_FRESH_CHECKS, type = CommandType.BOOLEAN, description = "if true is passed for this parameter, " + + "health checks are performed on the fly. Else last performed checks data is fetched") + private Boolean performFreshChecks; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public Long getRouterId() { + return routerId; + } + + public boolean shouldPerformFreshChecks() { + return BooleanUtils.isTrue(performFreshChecks); + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + VirtualRouter router = _entityMgr.findById(VirtualRouter.class, getRouterId()); + if (router != null) { + return router.getAccountId(); + } + + return Account.ACCOUNT_ID_SYSTEM; + } + + @Override + public void execute() throws ResourceUnavailableException, InvalidParameterValueException, ServerApiException { + CallContext.current().setEventDetails("Router Id: " + this._uuidMgr.getUuid(VirtualMachine.class, getRouterId())); + VirtualRouter router = _routerService.findRouter(getRouterId()); + if (router == null || router.getRole() != VirtualRouter.Role.VIRTUAL_ROUTER) { + throw new InvalidParameterValueException("Can't find router by routerId"); + } + + try { + List healthChecks = _queryService.listRouterHealthChecks(this); + RouterHealthCheckResultsListResponse routerResponse = new RouterHealthCheckResultsListResponse(); + routerResponse.setRouterId(router.getUuid()); + routerResponse.setHealthChecks(healthChecks); + routerResponse.setObjectName("routerhealthchecks"); + routerResponse.setResponseName(getCommandName()); + setResponseObject(routerResponse); + } catch (CloudRuntimeException ex){ + ex.printStackTrace(); + throw new ServerApiException(ApiErrorCode.INTERNAL_ERROR, "Failed to execute command due to exception: " + ex.getLocalizedMessage()); + } + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/router/ListRoutersCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/router/ListRoutersCmd.java index 121fc5bc14d..4fabcf5df76 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/router/ListRoutersCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/router/ListRoutersCmd.java @@ -16,6 +16,7 @@ // under the License. package org.apache.cloudstack.api.command.admin.router; +import org.apache.commons.lang.BooleanUtils; import org.apache.log4j.Logger; import org.apache.cloudstack.api.APICommand; @@ -80,6 +81,10 @@ public class ListRoutersCmd extends BaseListProjectAndAccountResourcesCmd { @Parameter(name = ApiConstants.VERSION, type = CommandType.STRING, description = "list virtual router elements by version") private String version; + @Parameter(name = ApiConstants.FETCH_ROUTER_HEALTH_CHECK_RESULTS, type = CommandType.BOOLEAN, since = "4.14", + description = "if true is passed for this parameter, also fetch last executed health check results for the router. Default is false") + private Boolean fetchHealthCheckResults; + ///////////////////////////////////////////////////// /////////////////// Accessors /////////////////////// ///////////////////////////////////////////////////// @@ -132,6 +137,11 @@ public class ListRoutersCmd extends BaseListProjectAndAccountResourcesCmd { return Role.VIRTUAL_ROUTER.toString(); } + public boolean shouldFetchHealthCheckResults() { + return BooleanUtils.isTrue(fetchHealthCheckResults); + } + + ///////////////////////////////////////////////////// /////////////// API Implementation/////////////////// ///////////////////////////////////////////////////// diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/network/RestartNetworkCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/network/RestartNetworkCmd.java index 645ae5aff8e..d422966388c 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/user/network/RestartNetworkCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/network/RestartNetworkCmd.java @@ -96,7 +96,7 @@ public class RestartNetworkCmd extends BaseAsyncCmd { @Override public void execute() throws ResourceUnavailableException, ResourceAllocationException, ConcurrentOperationException, InsufficientCapacityException { - boolean result = _networkService.restartNetwork(this, getCleanup(), getMakeRedundant()); + boolean result = _networkService.restartNetwork(this); if (result) { SuccessResponse response = new SuccessResponse(getCommandName()); setResponseObject(response); diff --git a/api/src/main/java/org/apache/cloudstack/api/command/user/vpc/RestartVPCCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/user/vpc/RestartVPCCmd.java index edfd93e785e..8ed2ab26a22 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/user/vpc/RestartVPCCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/user/vpc/RestartVPCCmd.java @@ -91,7 +91,7 @@ public class RestartVPCCmd extends BaseAsyncCmd { @Override public void execute() { try { - final boolean result = _vpcService.restartVpc(getId(), getCleanup(), getMakeredundant()); + final boolean result = _vpcService.restartVpc(this); if (result) { final SuccessResponse response = new SuccessResponse(getCommandName()); setResponseObject(response); diff --git a/api/src/main/java/org/apache/cloudstack/api/response/DomainRouterResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/DomainRouterResponse.java index 131e3e1de7e..97e3b0160d9 100644 --- a/api/src/main/java/org/apache/cloudstack/api/response/DomainRouterResponse.java +++ b/api/src/main/java/org/apache/cloudstack/api/response/DomainRouterResponse.java @@ -18,10 +18,9 @@ package org.apache.cloudstack.api.response; import java.util.Date; import java.util.LinkedHashSet; +import java.util.List; import java.util.Set; -import com.google.gson.annotations.SerializedName; - import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseResponse; import org.apache.cloudstack.api.EntityReference; @@ -29,6 +28,7 @@ import org.apache.cloudstack.api.EntityReference; import com.cloud.serializer.Param; import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.State; +import com.google.gson.annotations.SerializedName; @EntityReference(value = VirtualMachine.class) @SuppressWarnings("unused") @@ -217,6 +217,14 @@ public class DomainRouterResponse extends BaseResponse implements ControlledView @Param(description = "true if the router template requires upgrader") private boolean requiresUpgrade; + @SerializedName("healthchecksfailed") + @Param(description = "true if any health checks had failed") + private boolean healthChecksFailed; + + @SerializedName("healthcheckresults") + @Param(description = "Last executed health check result for the router", responseObject = RouterHealthCheckResultResponse.class, since = "4.14") + List healthCheckResults; + public DomainRouterResponse() { nics = new LinkedHashSet(); } @@ -278,6 +286,14 @@ public class DomainRouterResponse extends BaseResponse implements ControlledView return hypervisor; } + public List getHealthCheckResults() { + return healthCheckResults; + } + + public boolean getHealthChecksFailed() { + return healthChecksFailed; + } + public void setHypervisor(String hypervisor) { this.hypervisor = hypervisor; } @@ -446,4 +462,12 @@ public class DomainRouterResponse extends BaseResponse implements ControlledView public void setRequiresUpgrade(boolean requiresUpgrade) { this.requiresUpgrade = requiresUpgrade; } + + public void setHealthChecksFailed(boolean healthChecksFailed) { + this.healthChecksFailed = healthChecksFailed; + } + + public void setHealthCheckResults(List healthCheckResults) { + this.healthCheckResults = healthCheckResults; + } } diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultResponse.java new file mode 100644 index 00000000000..f98cf0acd5d --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultResponse.java @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.response; + +import java.util.Date; + +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; + +public class RouterHealthCheckResultResponse extends BaseResponse { + @SerializedName(ApiConstants.ROUTER_CHECK_NAME) + @Param(description = "the name of the health check on the router") + private String checkName; + + @SerializedName(ApiConstants.ROUTER_CHECK_TYPE) + @Param(description = "the type of the health check - basic or advanced") + private String checkType; + + @SerializedName(ApiConstants.RESULT) + @Param(description = "result of the health check") + private boolean result; + + @SerializedName(ApiConstants.LAST_UPDATED) + @Param(description = "the date this VPC was created") + private Date lastUpdated; + + @SerializedName(ApiConstants.DETAILS) + @Param(description = "detailed response generated on running health check") + private String details; + + public String getCheckName() { + return checkName; + } + + public String getCheckType() { + return checkType; + } + + public boolean getResult() { + return result; + } + + public Date getLastUpdated() { + return lastUpdated; + } + + public String getDetails() { + return details; + } + + public void setCheckName(String checkName) { + this.checkName = checkName; + } + + public void setCheckType(String checkType) { + this.checkType = checkType; + } + + public void setResult(boolean result) { + this.result = result; + } + + public void setLastUpdated(Date lastUpdated) { + this.lastUpdated = lastUpdated; + } + + public void setDetails(String details) { + this.details = details; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultsListResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultsListResponse.java new file mode 100644 index 00000000000..e56f70d2c59 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/RouterHealthCheckResultsListResponse.java @@ -0,0 +1,52 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.response; + +import java.util.List; + +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; + +public class RouterHealthCheckResultsListResponse extends BaseResponse { + @SerializedName(ApiConstants.ROUTER_ID) + @Param(description = "the id of the router") + private String routerId; + + @SerializedName(ApiConstants.ROUTER_HEALTH_CHECKS) + @Param(description = "the id of the router") + private List healthChecks; + + public String getRouterId() { + return routerId; + } + + public List getHealthChecks() { + return healthChecks; + } + + public void setRouterId(String routerId) { + this.routerId = routerId; + } + + public void setHealthChecks(List healthChecks) { + this.healthChecks = healthChecks; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/query/QueryService.java b/api/src/main/java/org/apache/cloudstack/query/QueryService.java index c4dfe64c739..0a400ed9ae3 100644 --- a/api/src/main/java/org/apache/cloudstack/query/QueryService.java +++ b/api/src/main/java/org/apache/cloudstack/query/QueryService.java @@ -24,6 +24,7 @@ import org.apache.cloudstack.api.command.admin.host.ListHostTagsCmd; import org.apache.cloudstack.api.command.admin.host.ListHostsCmd; import org.apache.cloudstack.api.command.admin.internallb.ListInternalLBVMsCmd; import org.apache.cloudstack.api.command.admin.management.ListMgmtsCmd; +import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd; import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd; import org.apache.cloudstack.api.command.admin.storage.ListImageStoresCmd; import org.apache.cloudstack.api.command.admin.storage.ListSecondaryStagingStoresCmd; @@ -68,6 +69,7 @@ import org.apache.cloudstack.api.response.ProjectInvitationResponse; import org.apache.cloudstack.api.response.ProjectResponse; import org.apache.cloudstack.api.response.ResourceDetailResponse; import org.apache.cloudstack.api.response.ResourceTagResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.api.response.SecurityGroupResponse; import org.apache.cloudstack.api.response.ServiceOfferingResponse; import org.apache.cloudstack.api.response.StoragePoolResponse; @@ -172,4 +174,6 @@ public interface QueryService { ListResponse listManagementServers(ListMgmtsCmd cmd); ListResponse listTemplateOVFProperties(ListTemplateOVFProperties cmd); + + List listRouterHealthChecks(GetRouterHealthCheckResultsCmd cmd); } diff --git a/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsAnswer.java b/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsAnswer.java new file mode 100644 index 00000000000..4db59dfac33 --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsAnswer.java @@ -0,0 +1,46 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.agent.api.routing; + +import java.util.List; + +import com.cloud.agent.api.Answer; +import com.cloud.agent.api.Command; + +public class GetRouterMonitorResultsAnswer extends Answer { + private List failingChecks; + private String monitoringResults; + + protected GetRouterMonitorResultsAnswer() { + super(); + } + + public GetRouterMonitorResultsAnswer(Command cmd, boolean success, List failingChecks, String monitoringResults) { + super(cmd, success, monitoringResults); + this.failingChecks = failingChecks; + this.monitoringResults = monitoringResults; + } + + public List getFailingChecks() { + return failingChecks; + } + + public String getMonitoringResults() { + return monitoringResults; + } +} diff --git a/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsCommand.java b/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsCommand.java new file mode 100644 index 00000000000..779a0f45a57 --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/routing/GetRouterMonitorResultsCommand.java @@ -0,0 +1,38 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.agent.api.routing; + +public class GetRouterMonitorResultsCommand extends NetworkElementCommand { + private boolean performFreshChecks; + + protected GetRouterMonitorResultsCommand() { + } + + public GetRouterMonitorResultsCommand(boolean performFreshChecks) { + this.performFreshChecks = performFreshChecks; + } + + @Override + public boolean isQuery() { + return true; + } + + public boolean shouldPerformFreshChecks() { + return performFreshChecks; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/cloud/agent/api/routing/LoadRouterHealthChecksConfigCommand.java b/core/src/main/java/com/cloud/agent/api/routing/LoadRouterHealthChecksConfigCommand.java new file mode 100644 index 00000000000..b705a469e81 --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/routing/LoadRouterHealthChecksConfigCommand.java @@ -0,0 +1,41 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.agent.api.routing; + +import java.util.HashMap; +import java.util.Map; + +/** + * Loads new and updates old configuration details on VR for health checks. + */ +public class LoadRouterHealthChecksConfigCommand extends NetworkElementCommand { + + private Map details; + + protected LoadRouterHealthChecksConfigCommand() { + details = new HashMap<>(); + } + + public void addDetail(String key, String value) { + this.details.put(key, value); + } + + public Map getDetails() { + return details; + } +} diff --git a/core/src/main/java/com/cloud/agent/api/routing/NetworkElementCommand.java b/core/src/main/java/com/cloud/agent/api/routing/NetworkElementCommand.java index ae482ac71ec..de3843e2b83 100644 --- a/core/src/main/java/com/cloud/agent/api/routing/NetworkElementCommand.java +++ b/core/src/main/java/com/cloud/agent/api/routing/NetworkElementCommand.java @@ -38,7 +38,6 @@ public abstract class NetworkElementCommand extends Command { public static final String GUEST_BRIDGE = "guest.bridge"; public static final String VPC_PRIVATE_GATEWAY = "vpc.gateway.private"; public static final String FIREWALL_EGRESS_DEFAULT = "firewall.egress.default"; - public static final String ROUTER_MONITORING_ENABLE = "router.monitor.enable"; public static final String NETWORK_PUB_LAST_IP = "network.public.last.ip"; private String routerAccessIp; diff --git a/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java b/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java index a5377039dd6..86fc14c88b7 100644 --- a/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java +++ b/core/src/main/java/com/cloud/agent/api/routing/SetMonitorServiceCommand.java @@ -20,6 +20,9 @@ package com.cloud.agent.api.routing; import java.util.List; +import java.util.Map; + +import org.apache.commons.collections.CollectionUtils; import com.cloud.agent.api.to.MonitorServiceTO; @@ -29,13 +32,24 @@ import com.cloud.agent.api.to.MonitorServiceTO; * how to access the components inside the command. */ public class SetMonitorServiceCommand extends NetworkElementCommand { - MonitorServiceTO[] services; + public static final String ROUTER_MONITORING_ENABLED = "router.monitor.enabled"; + public static final String ROUTER_HEALTH_CHECKS_ENABLED = "router.health.checks.enabled"; + public static final String ROUTER_HEALTH_CHECKS_BASIC_INTERVAL = "router.health.checks.basic.interval"; + public static final String ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL = "router.health.checks.advanced.interval"; + public static final String ROUTER_HEALTH_CHECKS_EXCLUDED = "router.health.checks.excluded"; + + private MonitorServiceTO[] services; + private Map healthChecksConfig; + private boolean reconfigureAfterUpdate; + private boolean deleteFromProcessedCache; protected SetMonitorServiceCommand() { } public SetMonitorServiceCommand(List services) { - this.services = services.toArray(new MonitorServiceTO[services.size()]); + if (CollectionUtils.isNotEmpty(services)) { + this.services = services.toArray(new MonitorServiceTO[services.size()]); + } } public MonitorServiceTO[] getRules() { @@ -43,7 +57,9 @@ public class SetMonitorServiceCommand extends NetworkElementCommand { } public String getConfiguration() { - + if (services == null) { + return null; + } StringBuilder sb = new StringBuilder(); for (MonitorServiceTO service : services) { sb.append("[").append(service.getService()).append("]").append(":"); @@ -55,4 +71,28 @@ public class SetMonitorServiceCommand extends NetworkElementCommand { return sb.toString(); } + + public Map getHealthChecksConfig() { + return healthChecksConfig; + } + + public void setHealthChecksConfig(Map healthChecksConfig) { + this.healthChecksConfig = healthChecksConfig; + } + + public boolean shouldReconfigureAfterUpdate() { + return reconfigureAfterUpdate; + } + + public void setReconfigureAfterUpdate(boolean reconfigureAfterUpdate) { + this.reconfigureAfterUpdate = reconfigureAfterUpdate; + } + + public boolean shouldDeleteFromProcessedCache() { + return deleteFromProcessedCache; + } + + public void setDeleteFromProcessedCache(boolean deleteFromProcessedCache) { + this.deleteFromProcessedCache = deleteFromProcessedCache; + } } diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VRScripts.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VRScripts.java index b9d6487de56..f8cf6d451b8 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VRScripts.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VRScripts.java @@ -47,6 +47,8 @@ public class VRScripts { // New scripts for use with chef public static final String UPDATE_CONFIG = "update_config.py"; + public static final String CONFIGURE = "configure.py"; + // Script still in use - mostly by HyperV public static final String S2SVPN_CHECK = "checkbatchs2svpn.sh"; @@ -66,6 +68,7 @@ public class VRScripts { public static final String VPC_STATIC_ROUTE = "vpc_staticroute.sh"; public static final String VPN_L2TP = "vpn_l2tp.sh"; public static final String UPDATE_HOST_PASSWD = "update_host_passwd.sh"; + public static final String ROUTER_MONITOR_RESULTS = "getRouterMonitorResults.sh"; public static final String VR_CFG = "vr_cfg.sh"; diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java index 191a62263f3..f07b1b26bde 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/VirtualRoutingResource.java @@ -22,13 +22,6 @@ package com.cloud.agent.resource.virtualnetwork; import java.io.IOException; import java.net.InetSocketAddress; import java.nio.channels.SocketChannel; - -import org.apache.cloudstack.diagnostics.DeleteFileInVrCommand; -import org.apache.cloudstack.diagnostics.DiagnosticsAnswer; -import org.apache.cloudstack.diagnostics.DiagnosticsCommand; -import org.apache.cloudstack.diagnostics.PrepareFilesAnswer; -import org.apache.cloudstack.diagnostics.PrepareFilesCommand; -import org.joda.time.Duration; import java.util.ArrayList; import java.util.HashMap; import java.util.List; @@ -45,8 +38,14 @@ import org.apache.cloudstack.ca.SetupCertificateAnswer; import org.apache.cloudstack.ca.SetupCertificateCommand; import org.apache.cloudstack.ca.SetupKeyStoreCommand; import org.apache.cloudstack.ca.SetupKeystoreAnswer; +import org.apache.cloudstack.diagnostics.DeleteFileInVrCommand; +import org.apache.cloudstack.diagnostics.DiagnosticsAnswer; +import org.apache.cloudstack.diagnostics.DiagnosticsCommand; +import org.apache.cloudstack.diagnostics.PrepareFilesAnswer; +import org.apache.cloudstack.diagnostics.PrepareFilesCommand; import org.apache.cloudstack.utils.security.KeyStoreUtils; import org.apache.log4j.Logger; +import org.joda.time.Duration; import com.cloud.agent.api.Answer; import com.cloud.agent.api.CheckRouterAnswer; @@ -59,6 +58,8 @@ import com.cloud.agent.api.GetRouterAlertsAnswer; import com.cloud.agent.api.routing.AggregationControlCommand; import com.cloud.agent.api.routing.AggregationControlCommand.Action; import com.cloud.agent.api.routing.GetRouterAlertsCommand; +import com.cloud.agent.api.routing.GetRouterMonitorResultsAnswer; +import com.cloud.agent.api.routing.GetRouterMonitorResultsCommand; import com.cloud.agent.api.routing.GroupAnswer; import com.cloud.agent.api.routing.NetworkElementCommand; import com.cloud.agent.resource.virtualnetwork.facade.AbstractConfigItemFacade; @@ -204,6 +205,8 @@ public class VirtualRoutingResource { return execute((PrepareFilesCommand) cmd); } else if (cmd instanceof DeleteFileInVrCommand) { return execute((DeleteFileInVrCommand)cmd); + } else if (cmd instanceof GetRouterMonitorResultsCommand) { + return execute((GetRouterMonitorResultsCommand)cmd); } else { s_logger.error("Unknown query command in VirtualRoutingResource!"); return Answer.createUnsupportedCommandAnswer(cmd); @@ -225,10 +228,7 @@ public class VirtualRoutingResource { throw new CloudRuntimeException("Unable to apply unknown configitem of type " + c.getClass().getSimpleName()); } - private Answer applyConfig(NetworkElementCommand cmd, List cfg) { - - if (cfg.isEmpty()) { return new Answer(cmd, true, "Nothing to do"); } @@ -256,7 +256,6 @@ public class VirtualRoutingResource { s_logger.warn("Expected " + cmd.getAnswersCount() + " answers while executing " + cmd.getClass().getSimpleName() + " but received " + results.size()); } - if (results.size() == 1) { return new Answer(cmd, finalResult, results.get(0).getDetails()); } else { @@ -275,6 +274,60 @@ public class VirtualRoutingResource { return new CheckS2SVpnConnectionsAnswer(cmd, result.isSuccess(), result.getDetails()); } + private List getFailingChecks(String line) { + List failingChecks = new ArrayList<>(); + for (String w : line.split(",")) { + if (!w.trim().isEmpty()) { + failingChecks.add(w.trim()); + } + } + return failingChecks; + } + + private GetRouterMonitorResultsAnswer parseLinesForHealthChecks(GetRouterMonitorResultsCommand cmd, String executionResult) { + List failingChecks = new ArrayList<>(); + StringBuilder monitorResults = new StringBuilder(); + String[] lines = executionResult.trim().split("\n"); + boolean readingFailedChecks = false, readingMonitorResults = false; + for (String line : lines) { + line = line.trim(); + if (line.contains("FAILING CHECKS")) { // Toggle to reading failing checks from next line + readingFailedChecks = true; + readingMonitorResults = false; + } else if (line.contains("MONITOR RESULTS")) { // Toggle to reading monitor results from next line + readingFailedChecks = false; + readingMonitorResults = true; + } else if (readingFailedChecks && !readingMonitorResults) { // Reading failing checks section + failingChecks.addAll(getFailingChecks(line)); + } else if (!readingFailedChecks && readingMonitorResults) { // Reading monitor checks result + monitorResults.append(line); + } else { + s_logger.error("Unexpected lines reached while parsing health check response. Skipping line:- " + line); + } + } + + return new GetRouterMonitorResultsAnswer(cmd, true, failingChecks, monitorResults.toString()); + } + + private GetRouterMonitorResultsAnswer execute(GetRouterMonitorResultsCommand cmd) { + String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP); + String args = cmd.shouldPerformFreshChecks() ? "true" : "false"; + s_logger.info("Fetching health check result for " + routerIp + " and executing fresh checks: " + args); + ExecutionResult result = _vrDeployer.executeInVR(routerIp, VRScripts.ROUTER_MONITOR_RESULTS, args); + + if (!result.isSuccess()) { + s_logger.warn("Result of " + cmd + " failed with details: " + result.getDetails()); + return new GetRouterMonitorResultsAnswer(cmd, false, null, result.getDetails()); + } + + if (result.getDetails().isEmpty()) { + s_logger.warn("Result of " + cmd + " received no details."); + return new GetRouterMonitorResultsAnswer(cmd, false, null, "No results available."); + } + + return parseLinesForHealthChecks(cmd, result.getDetails()); + } + private GetRouterAlertsAnswer execute(GetRouterAlertsCommand cmd) { String routerIp = cmd.getAccessDetail(NetworkElementCommand.ROUTER_IP); diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/AbstractConfigItemFacade.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/AbstractConfigItemFacade.java index a083012021f..1042d23e7b0 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/AbstractConfigItemFacade.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/AbstractConfigItemFacade.java @@ -127,7 +127,10 @@ public abstract class AbstractConfigItemFacade { final ConfigItem configFile = new FileConfigItem(VRScripts.CONFIG_PERSIST_LOCATION, remoteFilename, gson.toJson(configuration)); cfg.add(configFile); - final ConfigItem updateCommand = new ScriptConfigItem(VRScripts.UPDATE_CONFIG, remoteFilename); + // By default keep files in processed cache on VR + final String args = configuration.shouldDeleteFromProcessedCache() ? remoteFilename + " false" : remoteFilename; + + final ConfigItem updateCommand = new ScriptConfigItem(VRScripts.UPDATE_CONFIG, args); cfg.add(updateCommand); return cfg; diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java index 2cf03e445fc..8ddf17b15b9 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/facade/SetMonitorServiceConfigItem.java @@ -21,21 +21,56 @@ package com.cloud.agent.resource.virtualnetwork.facade; import java.util.List; +import org.apache.log4j.Logger; + import com.cloud.agent.api.routing.NetworkElementCommand; import com.cloud.agent.api.routing.SetMonitorServiceCommand; import com.cloud.agent.resource.virtualnetwork.ConfigItem; +import com.cloud.agent.resource.virtualnetwork.ScriptConfigItem; import com.cloud.agent.resource.virtualnetwork.VRScripts; import com.cloud.agent.resource.virtualnetwork.model.ConfigBase; import com.cloud.agent.resource.virtualnetwork.model.MonitorService; public class SetMonitorServiceConfigItem extends AbstractConfigItemFacade { + private static final Logger s_logger = Logger.getLogger(SetMonitorServiceConfigItem.class); @Override public List generateConfig(final NetworkElementCommand cmd) { final SetMonitorServiceCommand command = (SetMonitorServiceCommand) cmd; - final MonitorService monitorService = new MonitorService(command.getConfiguration(), cmd.getAccessDetail(NetworkElementCommand.ROUTER_MONITORING_ENABLE)); - return generateConfigItems(monitorService); + final MonitorService monitorService = new MonitorService( + command.getConfiguration(), + cmd.getAccessDetail(SetMonitorServiceCommand.ROUTER_MONITORING_ENABLED), + cmd.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED)); + + setupHealthChecksRelatedInfo(monitorService, command); + + monitorService.setDeleteFromProcessedCache(command.shouldDeleteFromProcessedCache()); + + List configItems = generateConfigItems(monitorService); + if (configItems != null && command.shouldReconfigureAfterUpdate()) { + configItems.add(new ScriptConfigItem(VRScripts.CONFIGURE, "monitor_service.json")); + } + return configItems; + } + + private void setupHealthChecksRelatedInfo(MonitorService monitorService, SetMonitorServiceCommand command) { + try { + monitorService.setHealthChecksBasicRunInterval(Integer.parseInt(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL))); + } catch (NumberFormatException exception) { + s_logger.error("Unexpected health check basic interval set" + command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL) + + ". Exception: " + exception + "Will use default value"); + } + + try { + monitorService.setHealthChecksAdvancedRunInterval(Integer.parseInt(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL))); + } catch (NumberFormatException exception) { + s_logger.error("Unexpected health check advanced interval set" + command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL) + + ". Exception: " + exception + "Will use default value"); + } + + monitorService.setExcludedHealthChecks(command.getAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED)); + monitorService.setHealthChecksConfig(command.getHealthChecksConfig()); } @Override diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/ConfigBase.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/ConfigBase.java index edc721178cb..51424ea3115 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/ConfigBase.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/ConfigBase.java @@ -41,6 +41,10 @@ public abstract class ConfigBase { private String type = UNKNOWN; + // For use in update_config.py which by default persists files in /var/cache/cloud/processed + // If true we don't keep the file in cache. Useful for monitor service command to avoid space waste + protected boolean deleteFromProcessedCache; + private ConfigBase() { // Empty constructor for (de)serialization } @@ -57,4 +61,7 @@ public abstract class ConfigBase { this.type = type; } + public boolean shouldDeleteFromProcessedCache() { + return deleteFromProcessedCache; + } } diff --git a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java index fdf9e473f35..fe20476f076 100644 --- a/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java +++ b/core/src/main/java/com/cloud/agent/resource/virtualnetwork/model/MonitorService.java @@ -19,34 +19,84 @@ package com.cloud.agent.resource.virtualnetwork.model; +import java.util.Map; + public class MonitorService extends ConfigBase { public String config, disableMonitoring; + public Boolean healthChecksEnabled; + public Integer healthChecksBasicRunInterval; + public Integer healthChecksAdvancedRunInterval; + public String excludedHealthChecks; + public Map healthChecksConfig; public MonitorService() { super(ConfigBase.MONITORSERVICE); } - public MonitorService(String config, String disableMonitoring) { + public MonitorService(String config, String disableMonitoring, String healthChecksEnabled) { super(ConfigBase.MONITORSERVICE); this.config = config; this.disableMonitoring = disableMonitoring; + this.healthChecksEnabled = Boolean.parseBoolean(healthChecksEnabled); } public String getConfig() { return config; } - public void setConfig(String config) { - this.config = config; - } - public String getDisableMonitoring() { return disableMonitoring; } + public Boolean getHealthChecksEnabled() { + return healthChecksEnabled; + } + + public Integer getHealthChecksBasicRunInterval() { + return healthChecksBasicRunInterval; + } + + public Integer getHealthChecksAdvancedRunInterval() { + return healthChecksAdvancedRunInterval; + } + + public String getExcludedHealthChecks() { + return excludedHealthChecks; + } + + public Map getHealthChecksConfig() { + return healthChecksConfig; + } + + public void setConfig(String config) { + this.config = config; + } + public void setDisableMonitoring(String disableMonitoring) { this.disableMonitoring = disableMonitoring; } + public void setHealthChecksEnabled(Boolean healthChecksEnabled) { + this.healthChecksEnabled = healthChecksEnabled; + } + public void setHealthChecksBasicRunInterval(Integer healthChecksBasicRunInterval) { + this.healthChecksBasicRunInterval = healthChecksBasicRunInterval; + } + + public void setHealthChecksAdvancedRunInterval(Integer healthChecksAdvancedRunInterval) { + this.healthChecksAdvancedRunInterval = healthChecksAdvancedRunInterval; + } + + public void setExcludedHealthChecks(String excludedHealthChecks) { + this.excludedHealthChecks = excludedHealthChecks; + } + + public void setHealthChecksConfig(Map healthChecksConfig) { + this.healthChecksConfig = healthChecksConfig; + } + + public void setDeleteFromProcessedCache(boolean deleteFromProcessedCache) { + this.deleteFromProcessedCache = deleteFromProcessedCache; + } } diff --git a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java index 6e71864c447..9c4167ff47e 100644 --- a/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java +++ b/engine/orchestration/src/main/java/org/apache/cloudstack/engine/orchestration/VolumeOrchestrator.java @@ -30,7 +30,6 @@ import java.util.concurrent.ExecutionException; import javax.inject.Inject; import javax.naming.ConfigurationException; -import com.cloud.storage.VolumeApiService; import org.apache.cloudstack.api.command.admin.vm.MigrateVMCmd; import org.apache.cloudstack.api.command.admin.volume.MigrateVolumeCmdByAdmin; import org.apache.cloudstack.api.command.user.volume.MigrateVolumeCmd; @@ -106,6 +105,7 @@ import com.cloud.storage.StoragePool; import com.cloud.storage.VMTemplateStorageResourceAssoc; import com.cloud.storage.Volume; import com.cloud.storage.Volume.Type; +import com.cloud.storage.VolumeApiService; import com.cloud.storage.VolumeVO; import com.cloud.storage.dao.SnapshotDao; import com.cloud.storage.dao.VolumeDao; diff --git a/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDao.java b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDao.java new file mode 100644 index 00000000000..66dcf68f81f --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDao.java @@ -0,0 +1,49 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.network.dao; + +import java.util.List; + +import com.cloud.utils.db.GenericDao; + +public interface RouterHealthCheckResultDao extends GenericDao { + /** + * @param routerId + * @return Returns all the health checks in the database for the given router id + */ + List getHealthCheckResults(long routerId); + + boolean expungeHealthChecks(long routerId); + + /** + * @param routerId + * @return true if there are checks that have been marked failed in the database + */ + boolean hasFailingChecks(long routerId); + + /** + * For a router, we have only one (check name, check type) possible as we keep the most + * recent check result. This method finds that last check result. + * + * @param routerId + * @param checkName + * @param checkType + * @return returns the check result for the routerId, check type and the check name. + */ + RouterHealthCheckResultVO getRouterHealthCheckResult(long routerId, String checkName, String checkType); +} diff --git a/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDaoImpl.java b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDaoImpl.java new file mode 100644 index 00000000000..991365b5f54 --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultDaoImpl.java @@ -0,0 +1,84 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.network.dao; + +import java.util.List; + +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; + +import com.cloud.utils.db.GenericDaoBase; +import com.cloud.utils.db.SearchBuilder; +import com.cloud.utils.db.SearchCriteria; + +@Component +public class RouterHealthCheckResultDaoImpl extends GenericDaoBase implements RouterHealthCheckResultDao { + private final static Logger s_logger = Logger.getLogger(RouterHealthCheckResultDaoImpl.class); + + private SearchBuilder RouterChecksSearchBuilder; + private SearchBuilder IsRouterFailingSearchBuilder; + + protected RouterHealthCheckResultDaoImpl() { + super(); + RouterChecksSearchBuilder = createSearchBuilder(); + RouterChecksSearchBuilder.and("routerId", RouterChecksSearchBuilder.entity().getRouterId(), SearchCriteria.Op.EQ); + RouterChecksSearchBuilder.and("checkName", RouterChecksSearchBuilder.entity().getCheckName(), SearchCriteria.Op.EQ); + RouterChecksSearchBuilder.and("checkType", RouterChecksSearchBuilder.entity().getCheckType(), SearchCriteria.Op.EQ); + RouterChecksSearchBuilder.done(); + + IsRouterFailingSearchBuilder = createSearchBuilder(); + IsRouterFailingSearchBuilder.and("routerId", IsRouterFailingSearchBuilder.entity().getRouterId(), SearchCriteria.Op.EQ); + IsRouterFailingSearchBuilder.and("checkResult", IsRouterFailingSearchBuilder.entity().getCheckResult(), SearchCriteria.Op.EQ); + IsRouterFailingSearchBuilder.done(); + } + + @Override + public List getHealthCheckResults(long routerId) { + SearchCriteria sc = RouterChecksSearchBuilder.create(); + sc.setParameters("routerId", routerId); + return listBy(sc); + } + + @Override + public boolean expungeHealthChecks(long routerId) { + SearchCriteria sc = RouterChecksSearchBuilder.create(); + sc.setParameters("routerId", routerId); + return expunge(sc) > 0; + } + + @Override + public RouterHealthCheckResultVO getRouterHealthCheckResult(long routerId, String checkName, String checkType) { + SearchCriteria sc = RouterChecksSearchBuilder.create(); + sc.setParameters("routerId", routerId); + sc.setParameters("checkName", checkName); + sc.setParameters("checkType", checkType); + List checks = listBy(sc); + if (checks.size() > 1) { + s_logger.error("Found multiple entries for router Id: " + routerId + ", check name: " + checkName); + } + return checks.isEmpty() ? null : checks.get(0); + } + + @Override + public boolean hasFailingChecks(long routerId) { + SearchCriteria sc = IsRouterFailingSearchBuilder.create(); + sc.setParameters("routerId", routerId); + sc.setParameters("checkResult", false); + return !listBy(sc).isEmpty(); + } +} diff --git a/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultVO.java b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultVO.java new file mode 100644 index 00000000000..9803ccb6a4b --- /dev/null +++ b/engine/schema/src/main/java/com/cloud/network/dao/RouterHealthCheckResultVO.java @@ -0,0 +1,129 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.network.dao; + +import java.util.Date; + +import javax.persistence.Column; +import javax.persistence.Entity; +import javax.persistence.GeneratedValue; +import javax.persistence.GenerationType; +import javax.persistence.Id; +import javax.persistence.Table; +import javax.persistence.Temporal; +import javax.persistence.TemporalType; + +import com.cloud.network.RouterHealthCheckResult; +import com.cloud.utils.StringUtils; + +@Entity +@Table(name = "router_health_check") +public class RouterHealthCheckResultVO implements RouterHealthCheckResult { + @Id + @GeneratedValue(strategy = GenerationType.IDENTITY) + @Column(name = "id", updatable = false, nullable = false) + private long id; + + @Column(name = "router_id", updatable = false, nullable = false) + private long routerId; + + @Column(name = "check_name", updatable = false, nullable = false) + private String checkName; + + @Column(name = "check_type", updatable = false, nullable = false) + private String checkType; + + @Column(name = "check_result") + private boolean checkResult; + + @Temporal(TemporalType.TIMESTAMP) + @Column(name = "last_update", updatable = true, nullable = true) + private Date lastUpdateTime; + + @Column(name = "check_details", updatable = true, nullable = true) + private byte[] checkDetails; + + protected RouterHealthCheckResultVO() { + } + + public RouterHealthCheckResultVO(long routerId, String checkName, String checkType) { + this.routerId = routerId; + this.checkName = checkName; + this.checkType = checkType; + } + + public long getId() { + return id; + } + + @Override + public long getRouterId() { + return routerId; + } + + @Override + public String getCheckName() { + return checkName; + } + + @Override + public String getCheckType() { + return checkType; + } + + @Override + public boolean getCheckResult() { + return checkResult; + } + + @Override + public Date getLastUpdateTime() { + return lastUpdateTime; + } + + @Override + public String getParsedCheckDetails() { + return checkDetails != null ? new String(checkDetails, StringUtils.getPreferredCharset()) : ""; + } + + public byte[] getCheckDetails() { + return checkDetails; + } + + public void setCheckResult(boolean checkResult) { + this.checkResult = checkResult; + } + + public void setLastUpdateTime(Date lastUpdateTime) { + this.lastUpdateTime = lastUpdateTime; + } + + public void setCheckDetails(byte[] checkDetails) { + this.checkDetails = checkDetails; + } + + @Override + public String toString() { + return super.toString() + + "- check type: " + checkType + + ",check name: " + checkName + + ", check result: " + checkResult + + ", check last update: " + lastUpdateTime + + ", details: " + getParsedCheckDetails(); + } +} diff --git a/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml b/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml index 3e0d67b61a4..34c356dab34 100644 --- a/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml +++ b/engine/schema/src/main/resources/META-INF/cloudstack/core/spring-engine-schema-core-daos-context.xml @@ -288,4 +288,5 @@ + diff --git a/engine/schema/src/main/resources/META-INF/db/schema-41300to41400.sql b/engine/schema/src/main/resources/META-INF/db/schema-41300to41400.sql index 63fdbab3799..2dceb8a0213 100644 --- a/engine/schema/src/main/resources/META-INF/db/schema-41300to41400.sql +++ b/engine/schema/src/main/resources/META-INF/db/schema-41300to41400.sql @@ -36,3 +36,18 @@ UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=283 AND display_name= UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=284 AND display_name="Red Hat Enterprise Linux 7.5"; UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=285 AND display_name="Red Hat Enterprise Linux 7.6"; UPDATE `cloud`.`guest_os` SET `category_id`='4' WHERE `id`=286 AND display_name="Red Hat Enterprise Linux 8.0"; + +-- Create table for router health checks. We only save last check result for each. +CREATE TABLE `cloud`.`router_health_check` ( + `id` bigint unsigned NOT NULL auto_increment, + `router_id` bigint unsigned NOT NULL COMMENT 'router id', + `check_name` varchar(255) NOT NULL COMMENT 'name of the health check', + `check_type` varchar(255) NOT NULL COMMENT 'type of the health check', + `last_update` DATETIME NULL COMMENT 'last check update time', + `check_result` boolean NOT NULL COMMENT 'check executions success or failure', + `check_details` BLOB NULL COMMENT 'check result detailed message', + PRIMARY KEY (`id`), + CONSTRAINT `fk_router_health_checks__router_id` FOREIGN KEY (`router_id`) REFERENCES `domain_router`(`id`) ON DELETE CASCADE, + UNIQUE `i_router_health_checks__router_id__check_name__check_type`(`router_id`, `check_name`, `check_type`), + INDEX `i_router_health_checks__router_id`(`router_id`) +) ENGINE=InnoDB AUTO_INCREMENT=1 DEFAULT CHARSET=utf8; diff --git a/plugins/hypervisors/baremetal/src/main/java/com/cloud/baremetal/networkservice/BareMetalResourceBase.java b/plugins/hypervisors/baremetal/src/main/java/com/cloud/baremetal/networkservice/BareMetalResourceBase.java index 65fea092893..74360fe9cb5 100644 --- a/plugins/hypervisors/baremetal/src/main/java/com/cloud/baremetal/networkservice/BareMetalResourceBase.java +++ b/plugins/hypervisors/baremetal/src/main/java/com/cloud/baremetal/networkservice/BareMetalResourceBase.java @@ -22,6 +22,17 @@ // Automatically generated by addcopyright.py at 04/03/2012 package com.cloud.baremetal.networkservice; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.concurrent.TimeUnit; + +import javax.naming.ConfigurationException; + +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.log4j.Logger; + import com.cloud.agent.IAgentControl; import com.cloud.agent.api.Answer; import com.cloud.agent.api.CheckNetworkAnswer; @@ -70,15 +81,6 @@ import com.cloud.vm.VMInstanceVO; import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.PowerState; import com.cloud.vm.dao.VMInstanceDao; -import org.apache.cloudstack.api.ApiConstants; -import org.apache.cloudstack.framework.config.dao.ConfigurationDao; -import org.apache.log4j.Logger; - -import javax.naming.ConfigurationException; -import java.util.HashMap; -import java.util.List; -import java.util.Map; -import java.util.concurrent.TimeUnit; public class BareMetalResourceBase extends ManagerBase implements ServerResource { private static final Logger s_logger = Logger.getLogger(BareMetalResourceBase.class); diff --git a/plugins/hypervisors/hyperv/src/main/java/com/cloud/hypervisor/hyperv/resource/HypervDirectConnectResource.java b/plugins/hypervisors/hyperv/src/main/java/com/cloud/hypervisor/hyperv/resource/HypervDirectConnectResource.java index 979be732f2b..038661b58ad 100644 --- a/plugins/hypervisors/hyperv/src/main/java/com/cloud/hypervisor/hyperv/resource/HypervDirectConnectResource.java +++ b/plugins/hypervisors/hyperv/src/main/java/com/cloud/hypervisor/hyperv/resource/HypervDirectConnectResource.java @@ -2085,6 +2085,11 @@ public class HypervDirectConnectResource extends ServerResourceBase implements S final String controlIp = getRouterSshControlIp(cmd); final String config = cmd.getConfiguration(); + if (org.apache.commons.lang.StringUtils.isBlank(config)) { + s_logger.error("SetMonitorServiceCommand should have config for this case"); + return new Answer(cmd, false, "SetMonitorServiceCommand failed due to missing config"); + } + final String args = String.format(" %s %s", "-c", config); final String command = String.format("%s%s %s", "/opt/cloud/bin/", VRScripts.MONITOR_SERVICE, args); diff --git a/plugins/network-elements/juniper-contrail/src/test/java/org/apache/cloudstack/network/contrail/management/NetworkProviderTest.java b/plugins/network-elements/juniper-contrail/src/test/java/org/apache/cloudstack/network/contrail/management/NetworkProviderTest.java index 95204900f72..f9a478713c7 100644 --- a/plugins/network-elements/juniper-contrail/src/test/java/org/apache/cloudstack/network/contrail/management/NetworkProviderTest.java +++ b/plugins/network-elements/juniper-contrail/src/test/java/org/apache/cloudstack/network/contrail/management/NetworkProviderTest.java @@ -24,30 +24,6 @@ import java.util.UUID; import javax.inject.Inject; -import junit.framework.TestCase; -import net.juniper.contrail.api.ApiConnector; -import net.juniper.contrail.api.ApiConnectorFactory; -import net.juniper.contrail.api.ApiConnectorMock; -import net.juniper.contrail.api.types.InstanceIp; -import net.juniper.contrail.api.types.NetworkIpam; -import net.juniper.contrail.api.types.Project; -import net.juniper.contrail.api.types.SubnetType; -import net.juniper.contrail.api.types.VirtualMachine; -import net.juniper.contrail.api.types.VirtualMachineInterface; -import net.juniper.contrail.api.types.VirtualNetwork; -import net.juniper.contrail.api.types.VnSubnetsType; - -import org.apache.log4j.Logger; -import org.junit.After; -import org.junit.AfterClass; -import org.junit.Before; -import org.junit.BeforeClass; -import org.junit.Test; -import org.junit.runner.RunWith; -import org.springframework.context.support.AbstractApplicationContext; -import org.springframework.test.context.ContextConfiguration; -import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; - import org.apache.cloudstack.api.ApiConstants; import org.apache.cloudstack.api.BaseCmd; import org.apache.cloudstack.api.command.user.address.AssociateIPAddrCmd; @@ -58,6 +34,16 @@ import org.apache.cloudstack.api.command.user.project.CreateProjectCmd; import org.apache.cloudstack.api.command.user.project.DeleteProjectCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.utils.identity.ManagementServerNode; +import org.apache.log4j.Logger; +import org.junit.After; +import org.junit.AfterClass; +import org.junit.Before; +import org.junit.BeforeClass; +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.context.support.AbstractApplicationContext; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; import com.cloud.agent.AgentManager; import com.cloud.dc.DataCenter; @@ -84,6 +70,19 @@ import com.cloud.utils.db.SearchCriteria.Op; import com.cloud.utils.mgmt.JmxUtil; import com.cloud.vm.VirtualMachineManager; +import junit.framework.TestCase; +import net.juniper.contrail.api.ApiConnector; +import net.juniper.contrail.api.ApiConnectorFactory; +import net.juniper.contrail.api.ApiConnectorMock; +import net.juniper.contrail.api.types.InstanceIp; +import net.juniper.contrail.api.types.NetworkIpam; +import net.juniper.contrail.api.types.Project; +import net.juniper.contrail.api.types.SubnetType; +import net.juniper.contrail.api.types.VirtualMachine; +import net.juniper.contrail.api.types.VirtualMachineInterface; +import net.juniper.contrail.api.types.VirtualNetwork; +import net.juniper.contrail.api.types.VnSubnetsType; + @RunWith(SpringJUnit4ClassRunner.class) @ContextConfiguration(locations = "classpath:/providerContext.xml") /** diff --git a/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/provider/DateraHostListener.java b/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/provider/DateraHostListener.java index 2cb4e8cc672..8639db6cfca 100644 --- a/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/provider/DateraHostListener.java +++ b/plugins/storage/volume/datera/src/main/java/org/apache/cloudstack/storage/datastore/provider/DateraHostListener.java @@ -18,6 +18,23 @@ */ package org.apache.cloudstack.storage.datastore.provider; +import java.io.UnsupportedEncodingException; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; + +import javax.inject.Inject; + +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; +import org.apache.cloudstack.engine.subsystem.api.storage.HypervisorHostListener; +import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; +import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; +import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; +import org.apache.cloudstack.storage.datastore.util.DateraObject; +import org.apache.cloudstack.storage.datastore.util.DateraUtil; +import org.apache.log4j.Logger; + import com.cloud.agent.AgentManager; import com.cloud.agent.api.Answer; import com.cloud.agent.api.ModifyStoragePoolAnswer; @@ -41,21 +58,6 @@ import com.cloud.utils.db.GlobalLock; import com.cloud.utils.exception.CloudRuntimeException; import com.cloud.vm.VMInstanceVO; import com.cloud.vm.dao.VMInstanceDao; -import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; -import org.apache.cloudstack.engine.subsystem.api.storage.HypervisorHostListener; -import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; -import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; -import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; -import org.apache.cloudstack.storage.datastore.util.DateraObject; -import org.apache.cloudstack.storage.datastore.util.DateraUtil; -import org.apache.log4j.Logger; - -import javax.inject.Inject; -import java.io.UnsupportedEncodingException; -import java.util.ArrayList; -import java.util.HashMap; -import java.util.List; -import java.util.Map; public class DateraHostListener implements HypervisorHostListener { private static final Logger s_logger = Logger.getLogger(DateraHostListener.class); diff --git a/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java b/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java index 19b678e4aeb..aa277cd4dab 100644 --- a/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java +++ b/plugins/storage/volume/solidfire/src/main/java/org/apache/cloudstack/storage/datastore/driver/SolidFirePrimaryDataStoreDriver.java @@ -24,6 +24,33 @@ import java.util.Map; import javax.inject.Inject; +import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo; +import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult; +import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult; +import org.apache.cloudstack.engine.subsystem.api.storage.DataObject; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStore; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreCapabilities; +import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; +import org.apache.cloudstack.engine.subsystem.api.storage.ObjectInDataStoreStateMachine; +import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver; +import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotInfo; +import org.apache.cloudstack.engine.subsystem.api.storage.TemplateInfo; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory; +import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; +import org.apache.cloudstack.framework.async.AsyncCompletionCallback; +import org.apache.cloudstack.storage.command.CommandResult; +import org.apache.cloudstack.storage.command.CreateObjectAnswer; +import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; +import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreDao; +import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreVO; +import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailVO; +import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; +import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; +import org.apache.cloudstack.storage.datastore.util.SolidFireUtil; +import org.apache.cloudstack.storage.to.SnapshotObjectTO; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; + import com.cloud.agent.api.Answer; import com.cloud.agent.api.to.DataObjectType; import com.cloud.agent.api.to.DataStoreTO; @@ -39,12 +66,12 @@ import com.cloud.storage.DataStoreRole; import com.cloud.storage.ResizeVolumePayload; import com.cloud.storage.Snapshot.State; import com.cloud.storage.SnapshotVO; +import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.StoragePool; import com.cloud.storage.VMTemplateStoragePoolVO; import com.cloud.storage.Volume; import com.cloud.storage.VolumeDetailVO; import com.cloud.storage.VolumeVO; -import com.cloud.storage.Storage.StoragePoolType; import com.cloud.storage.dao.SnapshotDao; import com.cloud.storage.dao.SnapshotDetailsDao; import com.cloud.storage.dao.SnapshotDetailsVO; @@ -57,36 +84,8 @@ import com.cloud.user.AccountVO; import com.cloud.user.dao.AccountDao; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.exception.CloudRuntimeException; - import com.google.common.base.Preconditions; -import org.apache.cloudstack.engine.subsystem.api.storage.ChapInfo; -import org.apache.cloudstack.engine.subsystem.api.storage.CopyCommandResult; -import org.apache.cloudstack.engine.subsystem.api.storage.CreateCmdResult; -import org.apache.cloudstack.engine.subsystem.api.storage.DataObject; -import org.apache.cloudstack.engine.subsystem.api.storage.DataStore; -import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreCapabilities; -import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager; -import org.apache.cloudstack.engine.subsystem.api.storage.PrimaryDataStoreDriver; -import org.apache.cloudstack.engine.subsystem.api.storage.SnapshotInfo; -import org.apache.cloudstack.engine.subsystem.api.storage.TemplateInfo; -import org.apache.cloudstack.engine.subsystem.api.storage.VolumeDataFactory; -import org.apache.cloudstack.engine.subsystem.api.storage.VolumeInfo; -import org.apache.cloudstack.engine.subsystem.api.storage.ObjectInDataStoreStateMachine; -import org.apache.cloudstack.framework.async.AsyncCompletionCallback; -import org.apache.cloudstack.storage.command.CommandResult; -import org.apache.cloudstack.storage.command.CreateObjectAnswer; -import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao; -import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreDao; -import org.apache.cloudstack.storage.datastore.db.SnapshotDataStoreVO; -import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailVO; -import org.apache.cloudstack.storage.datastore.db.StoragePoolDetailsDao; -import org.apache.cloudstack.storage.datastore.db.StoragePoolVO; -import org.apache.cloudstack.storage.datastore.util.SolidFireUtil; -import org.apache.cloudstack.storage.to.SnapshotObjectTO; -import org.apache.commons.lang.StringUtils; -import org.apache.log4j.Logger; - public class SolidFirePrimaryDataStoreDriver implements PrimaryDataStoreDriver { private static final Logger LOGGER = Logger.getLogger(SolidFirePrimaryDataStoreDriver.class); private static final int LOWEST_HYPERVISOR_SNAPSHOT_RESERVE = 10; diff --git a/server/src/main/java/com/cloud/api/ApiResponseHelper.java b/server/src/main/java/com/cloud/api/ApiResponseHelper.java index b8e60325ea2..c121bcb034f 100644 --- a/server/src/main/java/com/cloud/api/ApiResponseHelper.java +++ b/server/src/main/java/com/cloud/api/ApiResponseHelper.java @@ -31,8 +31,6 @@ import java.util.stream.Collectors; import javax.inject.Inject; -import com.cloud.vm.snapshot.VMSnapshotVO; -import com.cloud.vm.snapshot.dao.VMSnapshotDao; import org.apache.cloudstack.acl.ControlledEntity; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.affinity.AffinityGroup; @@ -63,6 +61,7 @@ import org.apache.cloudstack.api.response.CreateCmdResponse; import org.apache.cloudstack.api.response.CreateSSHKeyPairResponse; import org.apache.cloudstack.api.response.DiskOfferingResponse; import org.apache.cloudstack.api.response.DomainResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.api.response.DomainRouterResponse; import org.apache.cloudstack.api.response.EventResponse; import org.apache.cloudstack.api.response.ExtractResponse; @@ -235,6 +234,7 @@ import com.cloud.network.PhysicalNetwork; import com.cloud.network.PhysicalNetworkServiceProvider; import com.cloud.network.PhysicalNetworkTrafficType; import com.cloud.network.RemoteAccessVpn; +import com.cloud.network.RouterHealthCheckResult; import com.cloud.network.Site2SiteCustomerGateway; import com.cloud.network.Site2SiteVpnConnection; import com.cloud.network.Site2SiteVpnGateway; @@ -336,6 +336,8 @@ import com.cloud.vm.VirtualMachine.Type; import com.cloud.vm.dao.NicExtraDhcpOptionDao; import com.cloud.vm.dao.NicSecondaryIpVO; import com.cloud.vm.snapshot.VMSnapshot; +import com.cloud.vm.snapshot.VMSnapshotVO; +import com.cloud.vm.snapshot.dao.VMSnapshotDao; public class ApiResponseHelper implements ResponseGenerator { @@ -1349,6 +1351,7 @@ public class ApiResponseHelper implements ResponseGenerator { return listVrs.get(0); } + @Override public SystemVmResponse createSystemVmResponse(VirtualMachine vm) { SystemVmResponse vmResponse = new SystemVmResponse(); @@ -4205,4 +4208,20 @@ public class ApiResponseHelper implements ResponseGenerator { response.setState(mgmt.getState()); return response; } + + @Override + public List createHealthCheckResponse(VirtualMachine router, List healthCheckResults) { + List responses = new ArrayList<>(healthCheckResults.size()); + for (RouterHealthCheckResult hcResult : healthCheckResults) { + RouterHealthCheckResultResponse healthCheckResponse = new RouterHealthCheckResultResponse(); + healthCheckResponse.setObjectName("routerhealthchecks"); + healthCheckResponse.setCheckName(hcResult.getCheckName()); + healthCheckResponse.setCheckType(hcResult.getCheckType()); + healthCheckResponse.setResult(hcResult.getCheckResult()); + healthCheckResponse.setLastUpdated(hcResult.getLastUpdateTime()); + healthCheckResponse.setDetails(hcResult.getParsedCheckDetails()); + responses.add(healthCheckResponse); + } + return responses; + } } diff --git a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java index 73d742b77b2..6f90d0825da 100644 --- a/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java +++ b/server/src/main/java/com/cloud/api/query/QueryManagerImpl.java @@ -31,9 +31,6 @@ import java.util.stream.Stream; import javax.inject.Inject; -import com.cloud.agent.api.storage.OVFProperty; -import com.cloud.storage.TemplateOVFPropertyVO; -import com.cloud.storage.dao.TemplateOVFPropertiesDao; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.affinity.AffinityGroupDomainMapVO; import org.apache.cloudstack.affinity.AffinityGroupResponse; @@ -42,6 +39,7 @@ import org.apache.cloudstack.affinity.dao.AffinityGroupDomainMapDao; import org.apache.cloudstack.affinity.dao.AffinityGroupVMMapDao; import org.apache.cloudstack.api.BaseListProjectAndAccountResourcesCmd; import org.apache.cloudstack.api.ResourceDetail; +import org.apache.cloudstack.api.ResponseGenerator; import org.apache.cloudstack.api.ResponseObject.ResponseView; import org.apache.cloudstack.api.command.admin.account.ListAccountsCmdByAdmin; import org.apache.cloudstack.api.command.admin.domain.ListDomainsCmd; @@ -51,6 +49,7 @@ import org.apache.cloudstack.api.command.admin.host.ListHostsCmd; import org.apache.cloudstack.api.command.admin.internallb.ListInternalLBVMsCmd; import org.apache.cloudstack.api.command.admin.iso.ListIsosCmdByAdmin; import org.apache.cloudstack.api.command.admin.management.ListMgmtsCmd; +import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd; import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd; import org.apache.cloudstack.api.command.admin.storage.ListImageStoresCmd; import org.apache.cloudstack.api.command.admin.storage.ListSecondaryStagingStoresCmd; @@ -98,6 +97,7 @@ import org.apache.cloudstack.api.response.ProjectInvitationResponse; import org.apache.cloudstack.api.response.ProjectResponse; import org.apache.cloudstack.api.response.ResourceDetailResponse; import org.apache.cloudstack.api.response.ResourceTagResponse; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.api.response.SecurityGroupResponse; import org.apache.cloudstack.api.response.ServiceOfferingResponse; import org.apache.cloudstack.api.response.StoragePoolResponse; @@ -123,6 +123,7 @@ import org.apache.commons.collections.CollectionUtils; import org.apache.log4j.Logger; import org.springframework.stereotype.Component; +import com.cloud.agent.api.storage.OVFProperty; import com.cloud.api.query.dao.AccountJoinDao; import com.cloud.api.query.dao.AffinityGroupJoinDao; import com.cloud.api.query.dao.AsyncJobJoinDao; @@ -182,6 +183,10 @@ import com.cloud.exception.PermissionDeniedException; import com.cloud.ha.HighAvailabilityManager; import com.cloud.hypervisor.Hypervisor; import com.cloud.hypervisor.Hypervisor.HypervisorType; +import com.cloud.network.RouterHealthCheckResult; +import com.cloud.network.VpcVirtualNetworkApplianceService; +import com.cloud.network.dao.RouterHealthCheckResultDao; +import com.cloud.network.router.VirtualNetworkApplianceManager; import com.cloud.network.security.SecurityGroupVMMapVO; import com.cloud.network.security.dao.SecurityGroupVMMapDao; import com.cloud.org.Grouping; @@ -206,9 +211,11 @@ import com.cloud.storage.Storage; import com.cloud.storage.Storage.ImageFormat; import com.cloud.storage.Storage.TemplateType; import com.cloud.storage.StoragePoolTagVO; +import com.cloud.storage.TemplateOVFPropertyVO; import com.cloud.storage.VMTemplateVO; import com.cloud.storage.Volume; import com.cloud.storage.dao.StoragePoolTagsDao; +import com.cloud.storage.dao.TemplateOVFPropertiesDao; import com.cloud.storage.dao.VMTemplateDao; import com.cloud.tags.ResourceTagVO; import com.cloud.tags.dao.ResourceTagDao; @@ -395,6 +402,15 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q @Inject TemplateOVFPropertiesDao templateOVFPropertiesDao; + @Inject + public VpcVirtualNetworkApplianceService routerService; + + @Inject + private ResponseGenerator responseGenerator; + + @Inject + private RouterHealthCheckResultDao routerHealthCheckResultDao; + /* * (non-Javadoc) * @@ -1198,8 +1214,17 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q Pair, Integer> result = searchForRoutersInternal(cmd, cmd.getId(), cmd.getRouterName(), cmd.getState(), cmd.getZoneId(), cmd.getPodId(), cmd.getClusterId(), cmd.getHostId(), cmd.getKeyword(), cmd.getNetworkId(), cmd.getVpcId(), cmd.getForVpc(), cmd.getRole(), cmd.getVersion()); ListResponse response = new ListResponse(); - List routerResponses = ViewResponseHelper.createDomainRouterResponse(result.first().toArray(new DomainRouterJoinVO[result.first().size()])); + if (VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) { + for (DomainRouterResponse res : routerResponses) { + DomainRouterVO resRouter = _routerDao.findByUuid(res.getId()); + res.setHealthChecksFailed(routerHealthCheckResultDao.hasFailingChecks(resRouter.getId())); + if (cmd.shouldFetchHealthCheckResults()) { + res.setHealthCheckResults(responseGenerator.createHealthCheckResponse(resRouter, + new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(resRouter.getId())))); + } + } + } response.setResponses(routerResponses, result.second()); return response; } @@ -1209,8 +1234,18 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q Pair, Integer> result = searchForRoutersInternal(cmd, cmd.getId(), cmd.getRouterName(), cmd.getState(), cmd.getZoneId(), cmd.getPodId(), null, cmd.getHostId(), cmd.getKeyword(), cmd.getNetworkId(), cmd.getVpcId(), cmd.getForVpc(), cmd.getRole(), null); ListResponse response = new ListResponse(); - List routerResponses = ViewResponseHelper.createDomainRouterResponse(result.first().toArray(new DomainRouterJoinVO[result.first().size()])); + if (VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) { + for (DomainRouterResponse res : routerResponses) { + DomainRouterVO resRouter = _routerDao.findByUuid(res.getId()); + res.setHealthChecksFailed(routerHealthCheckResultDao.hasFailingChecks(resRouter.getId())); + if (cmd.shouldFetchHealthCheckResults()) { + res.setHealthCheckResults(responseGenerator.createHealthCheckResponse(resRouter, + new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(resRouter.getId())))); + } + } + } + response.setResponses(routerResponses, result.second()); return response; } @@ -3928,6 +3963,27 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q return response; } + @Override + public List listRouterHealthChecks(GetRouterHealthCheckResultsCmd cmd) { + s_logger.info("Executing health check command " + cmd); + long routerId = cmd.getRouterId(); + if (!VirtualNetworkApplianceManager.RouterHealthChecksEnabled.value()) { + throw new CloudRuntimeException("Router health checks are not enabled for router " + routerId); + } + + if (cmd.shouldPerformFreshChecks() && !routerService.performRouterHealthChecks(routerId)) { + throw new CloudRuntimeException("Unable to perform fresh checks on router."); + } + + List result = new ArrayList<>(routerHealthCheckResultDao.getHealthCheckResults(routerId)); + if (result == null || result.size() == 0) { + throw new CloudRuntimeException("Database had no entries for health checks for router. This could happen for " + + "a newly created router. Please wait for periodic results to populate or manually call for checks to execute."); + } + + return responseGenerator.createHealthCheckResponse(_routerDao.findById(routerId), result); + } + @Override public String getConfigComponentName() { return QueryService.class.getSimpleName(); diff --git a/server/src/main/java/com/cloud/network/NetworkServiceImpl.java b/server/src/main/java/com/cloud/network/NetworkServiceImpl.java index 1e9eb2d325f..7761ce69ef3 100644 --- a/server/src/main/java/com/cloud/network/NetworkServiceImpl.java +++ b/server/src/main/java/com/cloud/network/NetworkServiceImpl.java @@ -1861,14 +1861,7 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService { @Override @ActionEvent(eventType = EventTypes.EVENT_NETWORK_RESTART, eventDescription = "restarting network", async = true) - public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { - // This method restarts all network elements belonging to the network and re-applies all the rules - Long networkId = cmd.getNetworkId(); - - User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId()); - Account callerAccount = _accountMgr.getActiveAccountById(callerUser.getAccountId()); - - // Check if network exists + public boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { NetworkVO network = _networksDao.findById(networkId); if (network == null) { throwInvalidIdException("Network with specified id doesn't exist", networkId.toString(), "networkId"); @@ -1888,8 +1881,8 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService { throw new InvalidParameterException("Unable to restart a running SDN network."); } + Account callerAccount = _accountMgr.getActiveAccountById(user.getAccountId()); _accountMgr.checkAccess(callerAccount, null, true, network); - if (!network.isRedundant() && makeRedundant) { network.setRedundant(true); if (!_networksDao.update(network.getId(), network)) { @@ -1898,8 +1891,7 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService { cleanup = true; } - boolean success = _networkMgr.restartNetwork(networkId, callerAccount, callerUser, cleanup); - + boolean success = _networkMgr.restartNetwork(networkId, callerAccount, user, cleanup); if (success) { s_logger.debug("Network id=" + networkId + " is restarted successfully."); } else { @@ -1909,6 +1901,17 @@ public class NetworkServiceImpl extends ManagerBase implements NetworkService { return success; } + @Override + @ActionEvent(eventType = EventTypes.EVENT_NETWORK_RESTART, eventDescription = "restarting network", async = true) + public boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { + // This method restarts all network elements belonging to the network and re-applies all the rules + Long networkId = cmd.getNetworkId(); + boolean cleanup = cmd.getCleanup(); + boolean makeRedundant = cmd.getMakeRedundant(); + User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId()); + return restartNetwork(networkId, cleanup, makeRedundant, callerUser); + } + @Override public int getActiveNicsInNetwork(long networkId) { return _networksDao.getActiveNicsIn(networkId); diff --git a/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java b/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java index efab0e25568..56814daeef2 100644 --- a/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java +++ b/server/src/main/java/com/cloud/network/firewall/FirewallManagerImpl.java @@ -18,23 +18,21 @@ package com.cloud.network.firewall; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.HashSet; import java.util.List; import java.util.Map; import java.util.Set; -import java.util.Collections; import javax.inject.Inject; import javax.naming.ConfigurationException; -import com.cloud.network.dao.FirewallRulesDcidrsDao; -import org.apache.log4j.Logger; -import org.springframework.stereotype.Component; - import org.apache.cloudstack.api.command.user.firewall.IListFirewallRulesCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; import com.cloud.configuration.Config; import com.cloud.domain.dao.DomainDao; @@ -55,6 +53,7 @@ import com.cloud.network.NetworkModel; import com.cloud.network.NetworkRuleApplier; import com.cloud.network.dao.FirewallRulesCidrsDao; import com.cloud.network.dao.FirewallRulesDao; +import com.cloud.network.dao.FirewallRulesDcidrsDao; import com.cloud.network.dao.IPAddressDao; import com.cloud.network.dao.IPAddressVO; import com.cloud.network.dao.NetworkDao; diff --git a/server/src/main/java/com/cloud/network/router/NetworkHelperImpl.java b/server/src/main/java/com/cloud/network/router/NetworkHelperImpl.java index da07bb5dff5..18f4a45994e 100644 --- a/server/src/main/java/com/cloud/network/router/NetworkHelperImpl.java +++ b/server/src/main/java/com/cloud/network/router/NetworkHelperImpl.java @@ -258,7 +258,7 @@ public class NetworkHelperImpl implements NetworkHelper { @Override public boolean checkRouterVersion(final VirtualRouter router) { - if (!VirtualNetworkApplianceManagerImpl.routerVersionCheckEnabled.value()) { + if (!VirtualNetworkApplianceManager.RouterVersionCheckEnabled.value()) { // Router version check is disabled. return true; } diff --git a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManager.java b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManager.java index a291b3590b2..eba23f3fe3d 100644 --- a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManager.java +++ b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManager.java @@ -45,6 +45,10 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA static final String SetServiceMonitorCK = "network.router.EnableServiceMonitoring"; static final String RouterAlertsCheckIntervalCK = "router.alerts.check.interval"; + static final String RouterHealthChecksConfigRefreshIntervalCK = "router.health.checks.config.refresh.interval"; + static final String RouterHealthChecksResultFetchIntervalCK = "router.health.checks.results.fetch.interval"; + static final String RouterHealthChecksFailuresToRecreateVrCK = "router.health.checks.failures.to.recreate.vr"; + static final ConfigKey RouterTemplateXen = new ConfigKey(String.class, RouterTemplateXenCK, "Advanced", "SystemVM Template (XenServer)", "Name of the default router template on Xenserver.", true, ConfigKey.Scope.Zone, null); static final ConfigKey RouterTemplateKvm = new ConfigKey(String.class, RouterTemplateKvmCK, "Advanced", "SystemVM Template (KVM)", @@ -63,11 +67,48 @@ public interface VirtualNetworkApplianceManager extends Manager, VirtualNetworkA static final ConfigKey RouterAlertsCheckInterval = new ConfigKey(Integer.class, RouterAlertsCheckIntervalCK, "Advanced", "1800", "Interval (in seconds) to check for alerts in Virtual Router.", false, ConfigKey.Scope.Global, null); - static final ConfigKey routerVersionCheckEnabled = new ConfigKey("Advanced", Boolean.class, "router.version.check", "true", + static final ConfigKey RouterVersionCheckEnabled = new ConfigKey("Advanced", Boolean.class, "router.version.check", "true", "If true, router minimum required version is checked before sending command", false); static final ConfigKey UseExternalDnsServers = new ConfigKey(Boolean.class, "use.external.dns", "Advanced", "false", "Bypass internal dns, use external dns1 and dns2", true, ConfigKey.Scope.Zone, null); + // Health checks + static final ConfigKey RouterHealthChecksEnabled = new ConfigKey(Boolean.class, "router.health.checks.enabled", "Advanced", "true", + "If true, router health checks are allowed to be executed and read. If false, all scheduled checks and API calls for on demand checks are disabled.", + true, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksBasicInterval = new ConfigKey(Integer.class, "router.health.checks.basic.interval", "Advanced", "3", + "Interval in minutes at which basic router health checks are performed. If set to 0, no tests are scheduled.", + true, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksAdvancedInterval = new ConfigKey(Integer.class, "router.health.checks.advanced.interval", "Advanced", "10", + "Interval in minutes at which advanced router health checks are performed. If set to 0, no tests are scheduled.", + true, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksConfigRefreshInterval = new ConfigKey(Integer.class, RouterHealthChecksConfigRefreshIntervalCK, "Advanced", "10", + "Interval in minutes at which router health checks config - such as scheduling intervals, excluded checks, etc is updated on virtual routers by the management server. This value should" + + " be sufficiently high (like 2x) from the router.health.checks.basic.interval and router.health.checks.advanced.interval so that there is time between new results generation and results generation for passed data.", + false, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksResultFetchInterval = new ConfigKey(Integer.class, RouterHealthChecksResultFetchIntervalCK, "Advanced", "10", + "Interval in minutes at which router health checks results are fetched by management server. On each result fetch, management server evaluates need to recreate VR as per configuration of " + RouterHealthChecksFailuresToRecreateVrCK + + "This value should be sufficiently high (like 2x) from the router.health.checks.basic.interval and router.health.checks.advanced.interval so that there is time between new results generation and fetch.", + false, ConfigKey.Scope.Global, null); + static final ConfigKey RouterHealthChecksFailuresToRecreateVr = new ConfigKey(String.class, RouterHealthChecksFailuresToRecreateVrCK, "Advanced", "", + "Health checks failures defined by this config are the checks that should cause router recreation. If empty the recreate is not attempted for any health check failure. Possible values are comma separated script names " + + "from systemvm’s /root/health_scripts/ (namely - cpu_usage_check.py, dhcp_check.py, disk_space_check.py, dns_check.py, gateways_check.py, haproxy_check.py, iptables_check.py, memory_usage_check.py, router_version_check.py), connectivity.test " + + " or services (namely - loadbalancing.service, webserver.service, dhcp.service) ", + true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterHealthChecksToExclude = new ConfigKey(String.class, "router.health.checks.to.exclude", "Advanced", "", + "Health checks that should be excluded when executing scheduled checks on the router. This can be a comma separated list of script names placed in the '/root/health_checks/' folder. Currently the following scripts are " + + "placed in default systemvm template - cpu_usage_check.py, disk_space_check.py, gateways_check.py, iptables_check.py, router_version_check.py, dhcp_check.py, dns_check.py, haproxy_check.py, memory_usage_check.py.", + true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterHealthChecksFreeDiskSpaceThreshold = new ConfigKey(Double.class, "router.health.checks.free.disk.space.threshold", + "Advanced", "100", "Free disk space threshold (in MB) on VR below which the check is considered a failure.", + true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterHealthChecksMaxCpuUsageThreshold = new ConfigKey(Double.class, "router.health.checks.max.cpu.usage.threshold", + "Advanced", "100", " Max CPU Usage threshold as % above which check is considered a failure.", + true, ConfigKey.Scope.Zone, null); + static final ConfigKey RouterHealthChecksMaxMemoryUsageThreshold = new ConfigKey(Double.class, "router.health.checks.max.memory.usage.threshold", + "Advanced", "100", "Max Memory Usage threshold as % above which check is considered a failure.", + true, ConfigKey.Scope.Zone, null); + public static final int DEFAULT_ROUTER_VM_RAMSIZE = 256; // 256M public static final int DEFAULT_ROUTER_CPU_MHZ = 500; // 500 MHz public static final boolean USE_POD_VLAN = false; diff --git a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java index c8ea47b31db..87933456de9 100644 --- a/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java +++ b/server/src/main/java/com/cloud/network/router/VirtualNetworkApplianceManagerImpl.java @@ -17,6 +17,7 @@ package com.cloud.network.router; +import java.lang.reflect.Type; import java.math.BigInteger; import java.nio.charset.Charset; import java.security.MessageDigest; @@ -24,7 +25,9 @@ import java.security.NoSuchAlgorithmException; import java.text.ParseException; import java.text.SimpleDateFormat; import java.util.ArrayList; +import java.util.Arrays; import java.util.Calendar; +import java.util.Collections; import java.util.Date; import java.util.HashMap; import java.util.HashSet; @@ -42,11 +45,6 @@ import java.util.concurrent.TimeUnit; import javax.inject.Inject; import javax.naming.ConfigurationException; -import org.apache.log4j.Logger; -import org.cloud.network.router.deployment.RouterDeploymentDefinitionBuilder; -import org.springframework.beans.factory.annotation.Autowired; -import org.springframework.beans.factory.annotation.Qualifier; - import org.apache.cloudstack.alert.AlertService; import org.apache.cloudstack.alert.AlertService.AlertType; import org.apache.cloudstack.api.command.admin.router.RebootRouterCmd; @@ -61,11 +59,18 @@ import org.apache.cloudstack.framework.config.Configurable; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; import org.apache.cloudstack.framework.jobs.AsyncJobManager; import org.apache.cloudstack.framework.jobs.impl.AsyncJobVO; +import org.apache.cloudstack.lb.ApplicationLoadBalancerRuleVO; +import org.apache.cloudstack.lb.dao.ApplicationLoadBalancerRuleDao; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.cloudstack.network.topology.NetworkTopology; import org.apache.cloudstack.network.topology.NetworkTopologyContext; import org.apache.cloudstack.utils.identity.ManagementServerNode; import org.apache.cloudstack.utils.usage.UsageUtils; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; +import org.cloud.network.router.deployment.RouterDeploymentDefinitionBuilder; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.beans.factory.annotation.Qualifier; import com.cloud.agent.AgentManager; import com.cloud.agent.Listener; @@ -87,6 +92,9 @@ import com.cloud.agent.api.check.CheckSshCommand; import com.cloud.agent.api.routing.AggregationControlCommand; import com.cloud.agent.api.routing.AggregationControlCommand.Action; import com.cloud.agent.api.routing.GetRouterAlertsCommand; +import com.cloud.agent.api.routing.GetRouterMonitorResultsAnswer; +import com.cloud.agent.api.routing.GetRouterMonitorResultsCommand; +import com.cloud.agent.api.routing.GroupAnswer; import com.cloud.agent.api.routing.IpAliasTO; import com.cloud.agent.api.routing.NetworkElementCommand; import com.cloud.agent.api.routing.SetMonitorServiceCommand; @@ -95,6 +103,10 @@ import com.cloud.agent.manager.Commands; import com.cloud.alert.AlertManager; import com.cloud.api.ApiAsyncJobDispatcher; import com.cloud.api.ApiGsonHelper; +import com.cloud.api.query.dao.DomainRouterJoinDao; +import com.cloud.api.query.dao.UserVmJoinDao; +import com.cloud.api.query.vo.DomainRouterJoinVO; +import com.cloud.api.query.vo.UserVmJoinVO; import com.cloud.cluster.ManagementServerHostVO; import com.cloud.cluster.dao.ManagementServerHostDao; import com.cloud.configuration.Config; @@ -109,7 +121,9 @@ import com.cloud.dc.dao.DataCenterDao; import com.cloud.dc.dao.HostPodDao; import com.cloud.dc.dao.VlanDao; import com.cloud.deploy.DeployDestination; +import com.cloud.domain.Domain; import com.cloud.event.ActionEvent; +import com.cloud.event.ActionEventUtils; import com.cloud.event.EventTypes; import com.cloud.exception.AgentUnavailableException; import com.cloud.exception.ConcurrentOperationException; @@ -135,6 +149,7 @@ import com.cloud.network.NetworkService; import com.cloud.network.Networks.TrafficType; import com.cloud.network.PublicIpAddress; import com.cloud.network.RemoteAccessVpn; +import com.cloud.network.RouterHealthCheckResult; import com.cloud.network.Site2SiteCustomerGateway; import com.cloud.network.Site2SiteVpnConnection; import com.cloud.network.SshKeysDistriMonitor; @@ -144,8 +159,11 @@ import com.cloud.network.addr.PublicIp; import com.cloud.network.dao.FirewallRulesDao; import com.cloud.network.dao.IPAddressDao; import com.cloud.network.dao.IPAddressVO; +import com.cloud.network.dao.LBStickinessPolicyDao; +import com.cloud.network.dao.LBStickinessPolicyVO; import com.cloud.network.dao.LoadBalancerDao; import com.cloud.network.dao.LoadBalancerVMMapDao; +import com.cloud.network.dao.LoadBalancerVMMapVO; import com.cloud.network.dao.LoadBalancerVO; import com.cloud.network.dao.MonitoringServiceDao; import com.cloud.network.dao.MonitoringServiceVO; @@ -155,6 +173,8 @@ import com.cloud.network.dao.OpRouterMonitorServiceDao; import com.cloud.network.dao.OpRouterMonitorServiceVO; import com.cloud.network.dao.PhysicalNetworkServiceProviderDao; import com.cloud.network.dao.RemoteAccessVpnDao; +import com.cloud.network.dao.RouterHealthCheckResultDao; +import com.cloud.network.dao.RouterHealthCheckResultVO; import com.cloud.network.dao.Site2SiteCustomerGatewayDao; import com.cloud.network.dao.Site2SiteVpnConnectionDao; import com.cloud.network.dao.Site2SiteVpnConnectionVO; @@ -175,12 +195,14 @@ import com.cloud.network.rules.FirewallRule.Purpose; import com.cloud.network.rules.FirewallRuleVO; import com.cloud.network.rules.LoadBalancerContainer.Scheme; import com.cloud.network.rules.PortForwardingRule; +import com.cloud.network.rules.PortForwardingRuleVO; import com.cloud.network.rules.RulesManager; import com.cloud.network.rules.StaticNat; import com.cloud.network.rules.StaticNatImpl; import com.cloud.network.rules.StaticNatRule; import com.cloud.network.rules.dao.PortForwardingRulesDao; import com.cloud.network.vpc.Vpc; +import com.cloud.network.vpc.VpcService; import com.cloud.network.vpc.dao.VpcDao; import com.cloud.network.vpn.Site2SiteVpnManager; import com.cloud.offering.NetworkOffering; @@ -188,6 +210,7 @@ import com.cloud.offering.ServiceOffering; import com.cloud.offerings.NetworkOfferingVO; import com.cloud.offerings.dao.NetworkOfferingDao; import com.cloud.resource.ResourceManager; +import com.cloud.serializer.GsonHelper; import com.cloud.server.ConfigurationServer; import com.cloud.service.ServiceOfferingVO; import com.cloud.service.dao.ServiceOfferingDao; @@ -214,6 +237,7 @@ import com.cloud.utils.db.EntityManager; import com.cloud.utils.db.Filter; import com.cloud.utils.db.GlobalLock; import com.cloud.utils.db.QueryBuilder; +import com.cloud.utils.db.SearchBuilder; import com.cloud.utils.db.SearchCriteria; import com.cloud.utils.db.Transaction; import com.cloud.utils.db.TransactionCallbackNoReturn; @@ -243,6 +267,8 @@ import com.cloud.vm.dao.NicIpAliasVO; import com.cloud.vm.dao.UserVmDao; import com.cloud.vm.dao.UserVmDetailsDao; import com.cloud.vm.dao.VMInstanceDao; +import com.google.gson.JsonSyntaxException; +import com.google.gson.reflect.TypeToken; /** * VirtualNetworkApplianceManagerImpl manages the different types of virtual @@ -251,6 +277,7 @@ import com.cloud.vm.dao.VMInstanceDao; public class VirtualNetworkApplianceManagerImpl extends ManagerBase implements VirtualNetworkApplianceManager, VirtualNetworkApplianceService, VirtualMachineGuru, Listener, Configurable, StateListener { private static final Logger s_logger = Logger.getLogger(VirtualNetworkApplianceManagerImpl.class); + private static final String CONNECTIVITY_TEST = "connectivity.test"; @Inject private EntityManager _entityMgr; @Inject private DataCenterDao _dcDao; @@ -272,12 +299,12 @@ Configurable, StateListener UseExternalDnsServers = new ConfigKey(Boolean.class, "use.external.dns", "Advanced", "false", - "Bypass internal dns, use external dns1 and dns2", true, ConfigKey.Scope.Zone, null); - - static final ConfigKey routerVersionCheckEnabled = new ConfigKey("Advanced", Boolean.class, "router.version.check", "true", - "If true, router minimum required version is checked before sending command", false); - @Override public boolean configure(final String name, final Map params) throws ConfigurationException { @@ -658,7 +689,21 @@ Configurable, StateListener 0) { _checkExecutor.scheduleAtFixedRate(new CheckRouterAlertsTask(), routerAlertsCheckInterval, routerAlertsCheckInterval, TimeUnit.SECONDS); } else { - s_logger.debug("router.alerts.check.interval - " + routerAlertsCheckInterval + " so not scheduling the router alerts checking thread"); + s_logger.debug(RouterAlertsCheckIntervalCK + "=" + routerAlertsCheckInterval + " so not scheduling the router alerts checking thread"); + } + + final int routerHealthCheckConfigRefreshInterval = RouterHealthChecksConfigRefreshInterval.value(); + if (routerHealthCheckConfigRefreshInterval > 0) { + _checkExecutor.scheduleAtFixedRate(new UpdateRouterHealthChecksConfigTask(), routerHealthCheckConfigRefreshInterval, routerHealthCheckConfigRefreshInterval, TimeUnit.MINUTES); + } else { + s_logger.debug(RouterHealthChecksConfigRefreshIntervalCK + "=" + routerHealthCheckConfigRefreshInterval + " so not scheduling the router health check data thread"); + } + + final int routerHealthChecksFetchInterval = RouterHealthChecksResultFetchInterval.value(); + if (routerHealthChecksFetchInterval > 0) { + _checkExecutor.scheduleAtFixedRate(new FetchRouterHealthChecksResultTask(), routerHealthChecksFetchInterval, routerHealthChecksFetchInterval, TimeUnit.MINUTES); + } else { + s_logger.debug(RouterHealthChecksResultFetchIntervalCK + "=" + routerHealthChecksFetchInterval + " so not scheduling the router checks fetching thread"); } return true; @@ -1186,6 +1231,599 @@ Configurable, StateListener routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId); + s_logger.info("Found " + routers.size() + " running routers. Fetching, analysing and updating DB for the health checks."); + if (!RouterHealthChecksEnabled.value()) { + s_logger.debug("Skipping fetching of router health check results as router.health.checks.enabled is disabled"); + return; + } + + for (final DomainRouterVO router : routers) { + GetRouterMonitorResultsAnswer answer = fetchAndUpdateRouterHealthChecks(router, false); + List failingChecks = getFailingChecks(router, answer); + handleFailingChecks(router, failingChecks); + } + } catch (final Exception ex) { + s_logger.error("Fail to complete the FetchRouterHealthChecksResultTask! ", ex); + ex.printStackTrace(); + } + } + + private List getFailingChecks(DomainRouterVO router, GetRouterMonitorResultsAnswer answer) { + + if (answer == null) { + s_logger.warn("Unable to fetch monitor results for router " + router); + resetRouterHealthChecksAndConnectivity(router.getId(), false, "Communication failed"); + return Arrays.asList(CONNECTIVITY_TEST); + } else if (!answer.getResult()) { + s_logger.warn("Failed to fetch monitor results from router " + router + " with details: " + answer.getDetails()); + resetRouterHealthChecksAndConnectivity(router.getId(), false, "Failed to fetch results with details: " + answer.getDetails()); + return Arrays.asList(CONNECTIVITY_TEST); + } else { + resetRouterHealthChecksAndConnectivity(router.getId(), true, "Successfully fetched data"); + updateDbHealthChecksFromRouterResponse(router.getId(), answer.getMonitoringResults()); + return answer.getFailingChecks(); + } + } + + private void handleFailingChecks(DomainRouterVO router, List failingChecks) { + if (failingChecks == null || failingChecks.size() == 0) { + return; + } + + String alertMessage = "Health checks failed: " + failingChecks.size() + " failing checks on router " + router.getUuid(); + _alertMgr.sendAlert(AlertType.ALERT_TYPE_DOMAIN_ROUTER, router.getDataCenterId(), router.getPodIdToDeployIn(), + alertMessage, alertMessage); + s_logger.warn(alertMessage + ". Checking failed health checks to see if router needs recreate"); + + String checkFailsToRecreateVr = RouterHealthChecksFailuresToRecreateVr.valueIn(router.getDataCenterId()); + StringBuilder failingChecksEvent = new StringBuilder(); + boolean recreateRouter = false; + for (int i = 0; i < failingChecks.size(); i++) { + String failedCheck = failingChecks.get(i); + if (i == 0) { + failingChecksEvent.append("Router ") + .append(router.getUuid()) + .append(" has failing checks: "); + } + + failingChecksEvent.append(failedCheck); + if (i < failingChecks.size() - 1) { + failingChecksEvent.append(", "); + } + + if (StringUtils.isNotBlank(checkFailsToRecreateVr) && checkFailsToRecreateVr.contains(failedCheck)) { + recreateRouter = true; + } + } + + ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM, + Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS, failingChecksEvent.toString()); + + if (recreateRouter) { + s_logger.warn("Health Check Alert: Found failing checks in " + + RouterHealthChecksFailuresToRecreateVrCK + ", attempting recreating router."); + recreateRouter(router.getId()); + } + } + } + + private DomainRouterJoinVO getAnyRouterJoinWithVpc(long routerId) { + List routerJoinVOs = domainRouterJoinDao.searchByIds(routerId); + for (DomainRouterJoinVO router : routerJoinVOs) { + if (router.getRemoved() == null && router.getVpcId() != 0) { + return router; + } + } + return null; + } + + private boolean restartVpcInDomainRouter(DomainRouterJoinVO router, User user) { + try { + s_logger.debug("Attempting restart VPC " + router.getVpcName() + " for router recreation " + router.getUuid()); + ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM, + Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS, + "Recreating router " + router.getUuid() + " by restarting VPC " + router.getVpcUuid()); + return vpcService.restartVpc(router.getVpcId(), true, false, user); + } catch (Exception e) { + s_logger.error("Failed to restart VPC for router recreation " + + router.getVpcName() + " ,router " + router.getUuid(), e); + return false; + } + } + + private DomainRouterJoinVO getAnyRouterJoinWithGuestTraffic(long routerId) { + List routerJoinVOs = domainRouterJoinDao.searchByIds(routerId); + for (DomainRouterJoinVO router : routerJoinVOs) { + if (router.getRemoved() == null && router.getTrafficType() == TrafficType.Guest) { + return router; + } + } + return null; + } + + private boolean restartGuestNetworkInDomainRouter(DomainRouterJoinVO router, User user) { + try { + s_logger.info("Attempting restart network " + router.getNetworkName() + " for router recreation " + router.getUuid()); + ActionEventUtils.onActionEvent(User.UID_SYSTEM, Account.ACCOUNT_ID_SYSTEM, + Domain.ROOT_DOMAIN, EventTypes.EVENT_ROUTER_HEALTH_CHECKS, + "Recreating router " + router.getUuid() + " by restarting network " + router.getNetworkUuid()); + return networkService.restartNetwork(router.getNetworkId(), true, false, user); + } catch (Exception e) { + s_logger.error("Failed to restart network " + router.getNetworkName() + + " for router recreation " + router.getNetworkName(), e); + return false; + } + } + + /** + * Attempts recreation of router by restarting with cleanup a VPC if any or a guest network associated in case no VPC. + * @param routerId - the id of the router to be recreated. + * @return true if successfully restart is attempted else false. + */ + private boolean recreateRouter(long routerId) { + User systemUser = _userDao.getUser(User.UID_SYSTEM); + + // Find any VPC containing router join VO, restart it and return + DomainRouterJoinVO routerJoinToRestart = getAnyRouterJoinWithVpc(routerId); + if (routerJoinToRestart != null) { + return restartVpcInDomainRouter(routerJoinToRestart, systemUser); + } + + // If no VPC containing router join VO was found we look for a guest network traffic containing join VO and restart that. + routerJoinToRestart = getAnyRouterJoinWithGuestTraffic(routerId); + if (routerJoinToRestart != null) { + return restartGuestNetworkInDomainRouter(routerJoinToRestart, systemUser); + } + + s_logger.warn("Unable to find a valid guest network or VPC to restart for recreating router id " + routerId); + return false; + } + + private Map> getHealthChecksFromDb(long routerId) { + List healthChecksList = routerHealthCheckResultDao.getHealthCheckResults(routerId); + Map> healthCheckResults = new HashMap<>(); + if (healthChecksList.isEmpty()) { + return healthCheckResults; + } + + for (RouterHealthCheckResultVO healthCheck : healthChecksList) { + if (!healthCheckResults.containsKey(healthCheck.getCheckType())) { + healthCheckResults.put(healthCheck.getCheckType(), new HashMap<>()); + } + healthCheckResults.get(healthCheck.getCheckType()).put(healthCheck.getCheckName(), healthCheck); + } + + return healthCheckResults; + } + + private RouterHealthCheckResultVO resetRouterHealthChecksAndConnectivity(final long routerId, boolean connected, String message) { + routerHealthCheckResultDao.expungeHealthChecks(routerId); + boolean newEntry = false; + RouterHealthCheckResultVO connectivityVO = routerHealthCheckResultDao.getRouterHealthCheckResult(routerId, CONNECTIVITY_TEST, "basic"); + if (connectivityVO == null) { + connectivityVO = new RouterHealthCheckResultVO(routerId, CONNECTIVITY_TEST, "basic"); + newEntry = true; + } + + connectivityVO.setCheckResult(connected); + connectivityVO.setLastUpdateTime(new Date()); + if (StringUtils.isNotEmpty(message)) { + connectivityVO.setCheckDetails(message.getBytes(com.cloud.utils.StringUtils.getPreferredCharset())); + } + + if (newEntry) { + routerHealthCheckResultDao.persist(connectivityVO); + } else { + routerHealthCheckResultDao.update(connectivityVO.getId(), connectivityVO); + } + + return routerHealthCheckResultDao.getRouterHealthCheckResult(routerId, CONNECTIVITY_TEST, "basic"); + } + + private RouterHealthCheckResultVO parseHealthCheckVOFromJson(final long routerId, + final String checkName, final String checkType, final Map checkData, + final Map> checksInDb) { + boolean success = Boolean.parseBoolean(checkData.get("success")); + Date lastUpdate = new Date(Long.parseLong(checkData.get("lastUpdate"))); + double lastRunDuration = Double.parseDouble(checkData.get("lastRunDuration")); + String message = checkData.get("message"); + final RouterHealthCheckResultVO hcVo; + boolean newEntry = false; + if (checksInDb.containsKey(checkType) && checksInDb.get(checkType).containsKey(checkName)) { + hcVo = checksInDb.get(checkType).get(checkName); + } else { + hcVo = new RouterHealthCheckResultVO(routerId, checkName, checkType); + newEntry = true; + } + + hcVo.setCheckResult(success); + hcVo.setLastUpdateTime(lastUpdate); + if (StringUtils.isNotEmpty(message)) { + hcVo.setCheckDetails(message.getBytes(com.cloud.utils.StringUtils.getPreferredCharset())); + } + + if (newEntry) { + routerHealthCheckResultDao.persist(hcVo); + } else { + routerHealthCheckResultDao.update(hcVo.getId(), hcVo); + } + s_logger.info("Found health check " + hcVo + " which took running duration (ms) " + lastRunDuration); + return hcVo; + } + + /** + * + * @param checksJson JSON expected is + * { + * checkType1: { + * checkName1: { + * success: true/false, + * lastUpdate: date string, + * lastRunDuration: ms spent on test, + * message: detailed message from check execution + * }, + * checkType2: ..... + * }, + * checkType2: ...... + * } + * @return converts the above JSON into list of RouterHealthCheckResult. + */ + private List parseHealthCheckResults( + final Map>> checksJson, final long routerId) { + final Map> checksInDb = getHealthChecksFromDb(routerId); + List healthChecks = new ArrayList<>(); + final String lastRunKey = "lastRun"; + for (String checkType : checksJson.keySet()) { + if (checksJson.get(checkType).containsKey(lastRunKey)) { // Log last run of this check type run info + Map lastRun = checksJson.get(checkType).get(lastRunKey); + s_logger.info("Found check types executed on VR " + checkType + ", start: " + lastRun.get("start") + + ", end: " + lastRun.get("end") + ", duration: " + lastRun.get("duration")); + } + + for (String checkName : checksJson.get(checkType).keySet()) { + if (lastRunKey.equals(checkName)) { + continue; + } + + try { + final RouterHealthCheckResultVO hcVo = parseHealthCheckVOFromJson( + routerId, checkName, checkType, checksJson.get(checkType).get(checkName), checksInDb); + healthChecks.add(hcVo); + } catch (Exception ex) { + s_logger.error("Skipping health check: Exception while parsing check result data for router id " + routerId + + ", check type: " + checkType + ", check name: " + checkName + ":" + ex.getLocalizedMessage(), ex); + } + } + } + return healthChecks; + } + + private List updateDbHealthChecksFromRouterResponse(final long routerId, final String monitoringResult) { + if (StringUtils.isBlank(monitoringResult)) { + s_logger.warn("Attempted parsing empty monitoring results string for router " + routerId); + return Collections.emptyList(); + } + + try { + s_logger.debug("Parsing and updating DB health check data for router: " + routerId + " with data: " + monitoringResult) ; + final Type t = new TypeToken>>>() {}.getType(); + final Map>> checks = GsonHelper.getGson().fromJson(monitoringResult, t); + return parseHealthCheckResults(checks, routerId); + } catch (JsonSyntaxException ex) { + s_logger.error("Unable to parse the result of health checks due to " + ex.getLocalizedMessage(), ex); + } + + return Collections.emptyList(); + } + + private GetRouterMonitorResultsAnswer fetchAndUpdateRouterHealthChecks(DomainRouterVO router, boolean performFreshChecks) { + if (!RouterHealthChecksEnabled.value()) { + return null; + } + + String controlIP = getRouterControlIP(router); + if (StringUtils.isNotBlank(controlIP) && !controlIP.equals("0.0.0.0")) { + final GetRouterMonitorResultsCommand command = new GetRouterMonitorResultsCommand(performFreshChecks); + command.setAccessDetail(NetworkElementCommand.ROUTER_IP, controlIP); + command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); + try { + final Answer answer = _agentMgr.easySend(router.getHostId(), command); + + if (answer == null) { + s_logger.warn("Unable to fetch monitoring results data from router " + router.getHostName()); + return null; + } + if (answer instanceof GetRouterMonitorResultsAnswer) { + return (GetRouterMonitorResultsAnswer) answer; + } else { + s_logger.warn("Unable to fetch health checks results to router " + router.getHostName() + " Received answer " + answer.getDetails()); + return new GetRouterMonitorResultsAnswer(command, false, null, answer.getDetails()); + } + } catch (final Exception e) { + s_logger.warn("Error while collecting alerts from router: " + router.getInstanceName(), e); + return null; + } + } + + return null; + } + + @Override + public boolean performRouterHealthChecks(long routerId) { + DomainRouterVO router = _routerDao.findById(routerId); + + if (router == null) { + throw new CloudRuntimeException("Unable to find router with id " + routerId); + } + + if (!RouterHealthChecksEnabled.value()) { + throw new CloudRuntimeException("Router health checks are not enabled for router: " + router); + } + + s_logger.info("Running health check results for router " + router.getUuid()); + + final GetRouterMonitorResultsAnswer answer; + boolean success = true; + // Step 1: Update health check data on router and perform and retrieve health checks on router + if (!updateRouterHealthChecksConfig(router)) { + s_logger.warn("Unable to update health check config for fresh run successfully for router: " + router + ", so trying to fetch last result."); + success = false; + answer = fetchAndUpdateRouterHealthChecks(router, false); + } else { + s_logger.info("Successfully updated health check config for fresh run successfully for router: " + router); + answer = fetchAndUpdateRouterHealthChecks(router, true); + } + + // Step 2: Update health checks values in database. We do this irrespective of new health check config. + if (answer == null || !answer.getResult()) { + success = false; + resetRouterHealthChecksAndConnectivity(routerId, false, + answer == null ? "Communication failed " : "Failed to fetch results with details: " + answer.getDetails()); + } else { + resetRouterHealthChecksAndConnectivity(routerId, true, "Successfully fetched data"); + updateDbHealthChecksFromRouterResponse(routerId, answer.getMonitoringResults()); + } + + return success; + } + + protected class UpdateRouterHealthChecksConfigTask extends ManagedContextRunnable { + public UpdateRouterHealthChecksConfigTask() { + } + + @Override + protected void runInContext() { + try { + final List routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId); + s_logger.debug("Found " + routers.size() + " running routers. "); + + for (final DomainRouterVO router : routers) { + updateRouterHealthChecksConfig(router); + } + } catch (final Exception ex) { + s_logger.error("Fail to complete the UpdateRouterHealthChecksConfigTask! ", ex); + } + } + } + + private SetMonitorServiceCommand createMonitorServiceCommand(DomainRouterVO router, List services, + boolean reconfigure, boolean deleteFromProcessedCache) { + final SetMonitorServiceCommand command = new SetMonitorServiceCommand(services); + command.setAccessDetail(NetworkElementCommand.ROUTER_IP, getRouterControlIP(router)); + command.setAccessDetail(NetworkElementCommand.ROUTER_NAME, router.getInstanceName()); + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ENABLED, RouterHealthChecksEnabled.value().toString()); + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_BASIC_INTERVAL, RouterHealthChecksBasicInterval.value().toString()); + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_ADVANCED_INTERVAL, RouterHealthChecksAdvancedInterval.value().toString()); + command.setAccessDetail(SetMonitorServiceCommand.ROUTER_HEALTH_CHECKS_EXCLUDED, RouterHealthChecksToExclude.valueIn(router.getDataCenterId())); + command.setHealthChecksConfig(getRouterHealthChecksConfig(router)); + command.setReconfigureAfterUpdate(reconfigure); + command.setDeleteFromProcessedCache(deleteFromProcessedCache); // As part of updating + return command; + } + + /** + * Updates router health check config to the virtual router that it uses for health checks. + * @param router - the router ID that data needs to be sent to. + * @return success of whether data was sent or not + */ + private boolean updateRouterHealthChecksConfig(DomainRouterVO router) { + if (!RouterHealthChecksEnabled.value()) { + return false; + } + + SetMonitorServiceCommand command = createMonitorServiceCommand(router, null,true, true); + String controlIP = getRouterControlIP(router); + if (StringUtils.isBlank(controlIP) || controlIP.equals("0.0.0.0")) { + s_logger.debug("Skipping update data on router " + router.getUuid() + " because controlIp is not correct."); + return false; + } + + s_logger.info("Updating data for router health checks for router " + router.getUuid()); + Answer origAnswer = null; + try { + origAnswer = _agentMgr.easySend(router.getHostId(), command); + } catch (final Exception e) { + s_logger.error("Error while sending update data for health check to router: " + router.getInstanceName(), e); + return false; + } + + if (origAnswer == null) { + s_logger.error("Unable to update health checks data to router " + router.getHostName()); + return false; + } + + GroupAnswer answer = null; + if (origAnswer instanceof GroupAnswer) { + answer = (GroupAnswer) origAnswer; + } else { + s_logger.error("Unable to update health checks data to router " + router.getHostName() + " Received answer " + origAnswer.getDetails()); + return false; + } + + if (!answer.getResult()) { + s_logger.error("Unable to update health checks data to router " + router.getHostName() + ", details : " + answer.getDetails()); + } + + return answer.getResult(); + } + + private String getSystemThresholdsHealthChecksData(final DomainRouterVO router) { + return new StringBuilder() + .append("minDiskNeeded=" + RouterHealthChecksFreeDiskSpaceThreshold.valueIn(router.getDataCenterId())) + .append(",maxCpuUsage=" + RouterHealthChecksMaxCpuUsageThreshold.valueIn(router.getDataCenterId())) + .append(",maxMemoryUsage=" + RouterHealthChecksMaxMemoryUsageThreshold.valueIn(router.getDataCenterId()) + ";") + .toString(); + } + + private String getRouterVersionHealthChecksData(final DomainRouterVO router) { + if (router.getTemplateVersion() != null && router.getScriptsVersion() != null) { + StringBuilder routerVersion = new StringBuilder() + .append("templateVersion=" + router.getTemplateVersion()) + .append(",scriptsVersion=" + router.getScriptsVersion()); + return routerVersion.toString(); + } + return null; + } + + private void updateWithPortForwardingRules(final DomainRouterJoinVO routerJoinVO, final UserVmJoinVO vm, final StringBuilder portData) { + SearchBuilder sbpf = portForwardingDao.createSearchBuilder(); + sbpf.and("networkId", sbpf.entity().getNetworkId(), SearchCriteria.Op.EQ); + sbpf.and("instanceId", sbpf.entity().getVirtualMachineId(), SearchCriteria.Op.EQ); + SearchCriteria scpf = sbpf.create(); + scpf.setParameters("networkId", routerJoinVO.getNetworkId()); + scpf.setParameters("instanceId", vm.getId()); + List portForwardingRules = portForwardingDao.search(scpf, null); + for (PortForwardingRuleVO portForwardingRule : portForwardingRules) { + portData.append("sourceIp=").append(_ipAddressDao.findById(portForwardingRule.getSourceIpAddressId()).getAddress().toString()) + .append(",sourcePortStart=").append(portForwardingRule.getSourcePortStart()) + .append(",sourcePortEnd=").append(portForwardingRule.getSourcePortEnd()) + .append(",destIp=").append(portForwardingRule.getDestinationIpAddress()) + .append(",destPortStart=").append(portForwardingRule.getDestinationPortStart()) + .append(",destPortEnd=").append(portForwardingRule.getDestinationPortEnd()).append(";"); + } + } + + private String getStickinessPolicies(long loadBalancingRuleId) { + List stickinessPolicyVOs = lbStickinessPolicyDao.listByLoadBalancerId(loadBalancingRuleId, false); + if (stickinessPolicyVOs != null && stickinessPolicyVOs.size() > 0) { + StringBuilder stickiness = new StringBuilder(); + for (LBStickinessPolicyVO stickinessVO : stickinessPolicyVOs) { + stickiness.append(stickinessVO.getMethodName()).append(" "); + } + return stickiness.toString().trim(); + } + return "None"; + } + + private void updateWithLbRules(final DomainRouterJoinVO routerJoinVO, final StringBuilder loadBalancingData) { + List loadBalancerVOs = this.getLBRules(routerJoinVO); + for (FirewallRuleVO firewallRuleVO : loadBalancerVOs) { + List vmMapVOs = _loadBalancerVMMapDao.listByLoadBalancerId(firewallRuleVO.getId(), false); + if (vmMapVOs.size() > 0) { + + final NetworkOffering offering = _networkOfferingDao.findById(_networkDao.findById(routerJoinVO.getNetworkId()).getNetworkOfferingId()); + if (offering.getConcurrentConnections() == null) { + loadBalancingData.append("maxconn=").append(_configDao.getValue(Config.NetworkLBHaproxyMaxConn.key())); + } else { + loadBalancingData.append("maxconn=").append(offering.getConcurrentConnections().toString()); + } + + loadBalancingData.append(",sourcePortStart=").append(firewallRuleVO.getSourcePortStart()) + .append(",sourcePortEnd=").append(firewallRuleVO.getSourcePortEnd()); + if (firewallRuleVO instanceof LoadBalancerVO) { + LoadBalancerVO loadBalancerVO = (LoadBalancerVO) firewallRuleVO; + loadBalancingData.append(",sourceIp=").append(_ipAddressDao.findById(loadBalancerVO.getSourceIpAddressId()).getAddress().toString()) + .append(",destPortStart=").append(loadBalancerVO.getDefaultPortStart()) + .append(",destPortEnd=").append(loadBalancerVO.getDefaultPortEnd()) + .append(",algorithm=").append(loadBalancerVO.getAlgorithm()) + .append(",protocol=").append(loadBalancerVO.getLbProtocol()); + } else if (firewallRuleVO instanceof ApplicationLoadBalancerRuleVO) { + ApplicationLoadBalancerRuleVO appLoadBalancerVO = (ApplicationLoadBalancerRuleVO) firewallRuleVO; + loadBalancingData.append(",sourceIp=").append(appLoadBalancerVO.getSourceIp()) + .append(",destPortStart=").append(appLoadBalancerVO.getDefaultPortStart()) + .append(",destPortEnd=").append(appLoadBalancerVO.getDefaultPortEnd()) + .append(",algorithm=").append(appLoadBalancerVO.getAlgorithm()) + .append(",protocol=").append(appLoadBalancerVO.getLbProtocol()); + } + loadBalancingData.append(",stickiness=").append(getStickinessPolicies(firewallRuleVO.getId())); + loadBalancingData.append(",keepAliveEnabled=").append(offering.isKeepAliveEnabled()).append(",vmIps="); + for (LoadBalancerVMMapVO vmMapVO : vmMapVOs) { + loadBalancingData.append(vmMapVO.getInstanceIp()).append(" "); + } + loadBalancingData.setCharAt(loadBalancingData.length() - 1, ';'); + } + } + } + + private Map getRouterHealthChecksConfig(final DomainRouterVO router) { + Map data = new HashMap<>(); + List routerJoinVOs = domainRouterJoinDao.searchByIds(router.getId()); + StringBuilder vmsData = new StringBuilder(); + StringBuilder portData = new StringBuilder(); + StringBuilder loadBalancingData = new StringBuilder(); + StringBuilder gateways = new StringBuilder(); + gateways.append("gatewaysIps="); + for (DomainRouterJoinVO routerJoinVO : routerJoinVOs) { + if (StringUtils.isNotBlank(routerJoinVO.getGateway())) { + gateways.append(routerJoinVO.getGateway() + " "); + } + SearchBuilder sbvm = userVmJoinDao.createSearchBuilder(); + sbvm.and("networkId", sbvm.entity().getNetworkId(), SearchCriteria.Op.EQ); + SearchCriteria scvm = sbvm.create(); + scvm.setParameters("networkId", routerJoinVO.getNetworkId()); + List vms = userVmJoinDao.search(scvm, null); + for (UserVmJoinVO vm : vms) { + if (vm.getState() != VirtualMachine.State.Running) { + continue; + } + + vmsData.append("vmName=").append(vm.getName()) + .append(",macAddress=").append(vm.getMacAddress()) + .append(",ip=").append(vm.getIpAddress()).append(";"); + updateWithPortForwardingRules(routerJoinVO, vm, portData); + } + updateWithLbRules(routerJoinVO, loadBalancingData); + } + + String routerVersion = getRouterVersionHealthChecksData(router); + data.put("virtualMachines", vmsData.toString()); + data.put("gateways", gateways.toString()); + data.put("portForwarding", portData.toString()); + data.put("haproxyData", loadBalancingData.toString()); + data.put("systemThresholds", getSystemThresholdsHealthChecksData(router)); + if (routerVersion != null) { + data.put("routerVersion", routerVersion); + } + return data; + } + + private List getLBRules(final DomainRouterJoinVO router) { + if (router.getRole() == Role.VIRTUAL_ROUTER) { + SearchBuilder sblb = _loadBalancerDao.createSearchBuilder(); + sblb.and("networkId", sblb.entity().getNetworkId(), SearchCriteria.Op.EQ); + sblb.and("sourceIpAddressId", sblb.entity().getSourceIpAddressId(), SearchCriteria.Op.NNULL); + SearchCriteria sclb = sblb.create(); + sclb.setParameters("networkId", router.getNetworkId()); + return _loadBalancerDao.search(sclb, null); + } else if (router.getRole() == Role.INTERNAL_LB_VM) { + SearchBuilder sbalb = applicationLoadBalancerRuleDao.createSearchBuilder(); + sbalb.and("networkId", sbalb.entity().getNetworkId(), SearchCriteria.Op.EQ); + sbalb.and("sourceIpAddress", sbalb.entity().getSourceIp(), SearchCriteria.Op.NNULL); + SearchCriteria sclb = sbalb.create(); + sclb.setParameters("networkId", router.getNetworkId()); + return applicationLoadBalancerRuleDao.search(sclb, null); + } + return Collections.emptyList(); + } + protected class CheckRouterAlertsTask extends ManagedContextRunnable { public CheckRouterAlertsTask() { } @@ -1205,12 +1843,11 @@ Configurable, StateListener routers = _routerDao.listByStateAndManagementServer(VirtualMachine.State.Running, mgmtSrvrId); s_logger.debug("Found " + routers.size() + " running routers. "); - for (final DomainRouterVO router : routers) { final String serviceMonitoringFlag = SetServiceMonitor.valueIn(router.getDataCenterId()); // Skip the routers in VPC network or skip the routers where // Monitor service is not enabled in the corresponding Zone - if (!Boolean.parseBoolean(serviceMonitoringFlag) || router.getVpcId() != null) { + if (!Boolean.parseBoolean(serviceMonitoringFlag)) { continue; } String controlIP = getRouterControlIP(router); @@ -1253,7 +1890,7 @@ Configurable, StateListener services = new ArrayList(); - if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dhcp, Provider.VirtualRouter) - || _networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dns, Provider.VirtualRouter)) { + if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dhcp, provider) + || _networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Dns, provider)) { final MonitoringServiceVO dhcpService = _monitorServiceDao.getServiceByName(MonitoringService.Service.Dhcp.toString()); if (dhcpService != null) { services.add(dhcpService); } } - if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Lb, Provider.VirtualRouter)) { + if (_networkModel.isProviderSupportServiceInNetwork(network.getId(), Service.Lb, provider)) { final MonitoringServiceVO lbService = _monitorServiceDao.getServiceByName(MonitoringService.Service.LoadBalancing.toString()); if (lbService != null) { services.add(lbService); } } - final List defaultServices = _monitorServiceDao.listDefaultServices(true); - services.addAll(defaultServices); + + services.addAll(getDefaultServicesToMonitor(network)); final List servicesTO = new ArrayList(); for (final MonitoringServiceVO service : services) { @@ -1734,17 +2366,21 @@ Configurable, StateListener getDefaultServicesToMonitor(final NetworkVO network) { + return _monitorServiceDao.listDefaultServices(true); + } + protected NicProfile getControlNic(final VirtualMachineProfile profile) { final DomainRouterVO router = _routerDao.findById(profile.getId()); final DataCenterVO dcVo = _dcDao.findById(router.getDataCenterId()); @@ -2599,7 +3235,22 @@ Configurable, StateListener[] getConfigKeys() { - return new ConfigKey[] { UseExternalDnsServers, routerVersionCheckEnabled, SetServiceMonitor, RouterAlertsCheckInterval }; + return new ConfigKey[] { + UseExternalDnsServers, + RouterVersionCheckEnabled, + SetServiceMonitor, + RouterAlertsCheckInterval, + RouterHealthChecksEnabled, + RouterHealthChecksBasicInterval, + RouterHealthChecksAdvancedInterval, + RouterHealthChecksConfigRefreshInterval, + RouterHealthChecksResultFetchInterval, + RouterHealthChecksFailuresToRecreateVr, + RouterHealthChecksToExclude, + RouterHealthChecksFreeDiskSpaceThreshold, + RouterHealthChecksMaxCpuUsageThreshold, + RouterHealthChecksMaxMemoryUsageThreshold + }; } @Override diff --git a/server/src/main/java/com/cloud/network/router/VpcVirtualNetworkApplianceManagerImpl.java b/server/src/main/java/com/cloud/network/router/VpcVirtualNetworkApplianceManagerImpl.java index 80b1797e848..4b6da55b238 100644 --- a/server/src/main/java/com/cloud/network/router/VpcVirtualNetworkApplianceManagerImpl.java +++ b/server/src/main/java/com/cloud/network/router/VpcVirtualNetworkApplianceManagerImpl.java @@ -18,6 +18,7 @@ package com.cloud.network.router; import java.net.URI; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -26,6 +27,9 @@ import java.util.Map; import javax.inject.Inject; import javax.naming.ConfigurationException; +import org.apache.log4j.Logger; +import org.springframework.stereotype.Component; + import com.cloud.agent.api.Answer; import com.cloud.agent.api.Command; import com.cloud.agent.api.Command.OnError; @@ -34,6 +38,7 @@ import com.cloud.agent.api.PlugNicCommand; import com.cloud.agent.api.SetupGuestNetworkCommand; import com.cloud.agent.api.routing.AggregationControlCommand; import com.cloud.agent.api.routing.AggregationControlCommand.Action; +import com.cloud.agent.api.to.VirtualMachineTO; import com.cloud.agent.manager.Commands; import com.cloud.dc.DataCenter; import com.cloud.deploy.DeployDestination; @@ -42,7 +47,11 @@ import com.cloud.exception.ConcurrentOperationException; import com.cloud.exception.InsufficientCapacityException; import com.cloud.exception.OperationTimedoutException; import com.cloud.exception.ResourceUnavailableException; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.hypervisor.HypervisorGuru; +import com.cloud.hypervisor.HypervisorGuruManager; import com.cloud.network.IpAddress; +import com.cloud.network.MonitoringService; import com.cloud.network.Network; import com.cloud.network.Network.Provider; import com.cloud.network.Network.Service; @@ -54,6 +63,8 @@ import com.cloud.network.Site2SiteVpnConnection; import com.cloud.network.VirtualRouterProvider; import com.cloud.network.addr.PublicIp; import com.cloud.network.dao.IPAddressVO; +import com.cloud.network.dao.MonitoringServiceVO; +import com.cloud.network.dao.NetworkVO; import com.cloud.network.dao.RemoteAccessVpnVO; import com.cloud.network.vpc.NetworkACLItemDao; import com.cloud.network.vpc.NetworkACLItemVO; @@ -72,6 +83,9 @@ import com.cloud.network.vpc.dao.PrivateIpDao; import com.cloud.network.vpc.dao.StaticRouteDao; import com.cloud.network.vpc.dao.VpcGatewayDao; import com.cloud.network.vpn.Site2SiteVpnManager; +import com.cloud.service.ServiceOfferingVO; +import com.cloud.template.VirtualMachineTemplate; +import com.cloud.user.Account; import com.cloud.user.UserStatisticsVO; import com.cloud.utils.Pair; import com.cloud.utils.db.EntityManager; @@ -87,14 +101,8 @@ import com.cloud.vm.VirtualMachine; import com.cloud.vm.VirtualMachine.State; import com.cloud.vm.VirtualMachineProfile; import com.cloud.vm.VirtualMachineProfile.Param; +import com.cloud.vm.VirtualMachineProfileImpl; import com.cloud.vm.dao.VMInstanceDao; -import com.cloud.agent.api.to.VirtualMachineTO; -import com.cloud.hypervisor.Hypervisor; -import com.cloud.hypervisor.HypervisorGuru; -import com.cloud.hypervisor.HypervisorGuruManager; - -import org.apache.log4j.Logger; -import org.springframework.stereotype.Component; @Component public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplianceManagerImpl implements VpcVirtualNetworkApplianceManager { @@ -151,8 +159,9 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian result = false; } // 3) apply networking rules - if (result && params.get(Param.ReProgramGuestNetworks) != null && (Boolean) params.get(Param.ReProgramGuestNetworks) == true) { - sendNetworkRulesToRouter(router.getId(), network.getId()); + if (result) { + boolean reprogramNetwork = params != null && params.get(Param.ReProgramGuestNetworks) != null && (Boolean) params.get(Param.ReProgramGuestNetworks) == true; + sendNetworkRulesToRouter(router.getId(), network.getId(), reprogramNetwork); } } catch (final Exception ex) { s_logger.warn("Failed to add router " + router + " to network " + network + " due to ", ex); @@ -454,19 +463,25 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian throw new CloudRuntimeException("Cannot find related provider of virtual router provider: " + vrProvider.getType().toString()); } + if (reprogramGuestNtwks && publicNics.size() > 0) { + finalizeMonitorService(cmds, profile, domainRouterVO, provider, publicNics.get(0).second().getId(), true); + } + for (final Pair nicNtwk : guestNics) { final Nic guestNic = nicNtwk.first(); + final long guestNetworkId = guestNic.getNetworkId(); final AggregationControlCommand startCmd = new AggregationControlCommand(Action.Start, domainRouterVO.getInstanceName(), controlNic.getIPv4Address(), _routerControlHelper.getRouterIpInNetwork( - guestNic.getNetworkId(), domainRouterVO.getId())); + guestNetworkId, domainRouterVO.getId())); cmds.addCommand(startCmd); if (reprogramGuestNtwks) { - finalizeIpAssocForNetwork(cmds, domainRouterVO, provider, guestNic.getNetworkId(), vlanMacAddress); - finalizeNetworkRulesForNetwork(cmds, domainRouterVO, provider, guestNic.getNetworkId()); + finalizeIpAssocForNetwork(cmds, domainRouterVO, provider, guestNetworkId, vlanMacAddress); + finalizeNetworkRulesForNetwork(cmds, domainRouterVO, provider, guestNetworkId); + finalizeMonitorService(cmds, profile, domainRouterVO, provider, guestNetworkId, true); } - finalizeUserDataAndDhcpOnStart(cmds, domainRouterVO, provider, guestNic.getNetworkId()); + finalizeUserDataAndDhcpOnStart(cmds, domainRouterVO, provider, guestNetworkId); final AggregationControlCommand finishCmd = new AggregationControlCommand(Action.Finish, domainRouterVO.getInstanceName(), controlNic.getIPv4Address(), _routerControlHelper.getRouterIpInNetwork( - guestNic.getNetworkId(), domainRouterVO.getId())); + guestNetworkId, domainRouterVO.getId())); cmds.addCommand(finishCmd); } @@ -476,6 +491,14 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian return true; } + @Override + protected List getDefaultServicesToMonitor(NetworkVO network) { + if (network.getTrafficType() == TrafficType.Public) { + return Arrays.asList(_monitorServiceDao.getServiceByName(MonitoringService.Service.Ssh.toString())); + } + return super.getDefaultServicesToMonitor(network); + } + @Override protected void finalizeNetworkRulesForNetwork(final Commands cmds, final DomainRouterVO domainRouterVO, final Provider provider, final Long guestNetworkId) { @@ -495,7 +518,7 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian } } - protected boolean sendNetworkRulesToRouter(final long routerId, final long networkId) throws ResourceUnavailableException { + protected boolean sendNetworkRulesToRouter(final long routerId, final long networkId, final boolean reprogramNetwork) throws ResourceUnavailableException { final DomainRouterVO router = _routerDao.findById(routerId); final Commands cmds = new Commands(OnError.Continue); @@ -508,10 +531,26 @@ public class VpcVirtualNetworkApplianceManagerImpl extends VirtualNetworkApplian throw new CloudRuntimeException("Cannot find related provider of virtual router provider: " + vrProvider.getType().toString()); } - finalizeNetworkRulesForNetwork(cmds, router, provider, networkId); + if (reprogramNetwork) { + finalizeNetworkRulesForNetwork(cmds, router, provider, networkId); + } + + finalizeMonitorService(cmds, getVirtualMachineProfile(router), router, provider, networkId, false); + return _nwHelper.sendCommandsToRouter(router, cmds); } + private VirtualMachineProfile getVirtualMachineProfile(DomainRouterVO router) { + final ServiceOfferingVO offering = _serviceOfferingDao.findById(router.getId(), router.getServiceOfferingId()); + final VirtualMachineTemplate template = _entityMgr.findByIdIncludingRemoved(VirtualMachineTemplate.class, router.getTemplateId()); + final Account owner = _entityMgr.findById(Account.class, router.getAccountId()); + final VirtualMachineProfileImpl profile = new VirtualMachineProfileImpl(router, template, offering, owner, null); + for (final NicProfile nic : _networkMgr.getNicProfiles(router)) { + profile.addNic(nic); + } + return profile; + } + /** * @param router * @param add diff --git a/server/src/main/java/com/cloud/network/vpc/VpcManagerImpl.java b/server/src/main/java/com/cloud/network/vpc/VpcManagerImpl.java index b1b1059c123..839280c3196 100644 --- a/server/src/main/java/com/cloud/network/vpc/VpcManagerImpl.java +++ b/server/src/main/java/com/cloud/network/vpc/VpcManagerImpl.java @@ -46,6 +46,7 @@ import org.apache.cloudstack.api.command.admin.vpc.UpdateVPCOfferingCmd; import org.apache.cloudstack.api.command.user.vpc.ListPrivateGatewaysCmd; import org.apache.cloudstack.api.command.user.vpc.ListStaticRoutesCmd; import org.apache.cloudstack.api.command.user.vpc.ListVPCOfferingsCmd; +import org.apache.cloudstack.api.command.user.vpc.RestartVPCCmd; import org.apache.cloudstack.context.CallContext; import org.apache.cloudstack.engine.orchestration.service.NetworkOrchestrationService; import org.apache.cloudstack.framework.config.dao.ConfigurationDao; @@ -1697,16 +1698,21 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis return success; } + @Override @ActionEvent(eventType = EventTypes.EVENT_VPC_RESTART, eventDescription = "restarting vpc") - public boolean restartVpc(final long vpcId, final boolean cleanUp, final boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, + public boolean restartVpc(final RestartVPCCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { - - final Account callerAccount = CallContext.current().getCallingAccount(); + final long vpcId = cmd.getId(); + final boolean cleanUp = cmd.getCleanup(); + final boolean makeRedundant = cmd.getMakeredundant(); final User callerUser = _accountMgr.getActiveUser(CallContext.current().getCallingUserId()); - final ReservationContext context = new ReservationContextImpl(null, null, callerUser, callerAccount); + return restartVpc(vpcId, cleanUp, makeRedundant, callerUser); + } - // Verify input parameters + @Override + @ActionEvent(eventType = EventTypes.EVENT_VPC_RESTART, eventDescription = "restarting vpc") + public boolean restartVpc(Long vpcId, boolean cleanUp, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { Vpc vpc = getActiveVpc(vpcId); if (vpc == null) { final InvalidParameterValueException ex = new InvalidParameterValueException("Unable to find Enabled VPC by id specified"); @@ -1714,6 +1720,8 @@ public class VpcManagerImpl extends ManagerBase implements VpcManager, VpcProvis throw ex; } + Account callerAccount = _accountMgr.getActiveAccountById(user.getAccountId()); + final ReservationContext context = new ReservationContextImpl(null, null, user, callerAccount); _accountMgr.checkAccess(callerAccount, null, false, vpc); s_logger.debug("Restarting VPC " + vpc); diff --git a/server/src/main/java/com/cloud/server/ManagementServerImpl.java b/server/src/main/java/com/cloud/server/ManagementServerImpl.java index 147c527b273..01fe2eb8833 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerImpl.java +++ b/server/src/main/java/com/cloud/server/ManagementServerImpl.java @@ -171,6 +171,7 @@ import org.apache.cloudstack.api.command.admin.router.ConfigureOvsElementCmd; import org.apache.cloudstack.api.command.admin.router.ConfigureVirtualRouterElementCmd; import org.apache.cloudstack.api.command.admin.router.CreateVirtualRouterElementCmd; import org.apache.cloudstack.api.command.admin.router.DestroyRouterCmd; +import org.apache.cloudstack.api.command.admin.router.GetRouterHealthCheckResultsCmd; import org.apache.cloudstack.api.command.admin.router.ListOvsElementsCmd; import org.apache.cloudstack.api.command.admin.router.ListRoutersCmd; import org.apache.cloudstack.api.command.admin.router.ListVirtualRouterElementsCmd; @@ -3115,6 +3116,7 @@ public class ManagementServerImpl extends ManagerBase implements ManagementServe cmdList.add(ListMgmtsCmd.class); cmdList.add(GetUploadParamsForIsoCmd.class); cmdList.add(ListTemplateOVFProperties.class); + cmdList.add(GetRouterHealthCheckResultsCmd.class); // Out-of-band management APIs for admins cmdList.add(EnableOutOfBandManagementForHostCmd.class); diff --git a/server/src/test/java/com/cloud/keystore/KeystoreTest.java b/server/src/test/java/com/cloud/keystore/KeystoreTest.java index 1981a7315f4..24cc3a74d41 100644 --- a/server/src/test/java/com/cloud/keystore/KeystoreTest.java +++ b/server/src/test/java/com/cloud/keystore/KeystoreTest.java @@ -16,17 +16,16 @@ // under the License. package com.cloud.keystore; -import junit.framework.TestCase; - +import org.apache.cloudstack.api.response.AlertResponse; +import org.apache.cloudstack.api.response.UserVmResponse; import org.apache.log4j.Logger; import org.junit.After; import org.junit.Before; -import org.apache.cloudstack.api.response.AlertResponse; -import org.apache.cloudstack.api.response.UserVmResponse; - import com.cloud.api.ApiSerializerHelper; +import junit.framework.TestCase; + public class KeystoreTest extends TestCase { private final static Logger s_logger = Logger.getLogger(KeystoreTest.class); diff --git a/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java b/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java index 09fd997f618..7a6cc8bbeee 100644 --- a/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java +++ b/server/src/test/java/com/cloud/vpc/MockNetworkManagerImpl.java @@ -213,11 +213,16 @@ public class MockNetworkManagerImpl extends ManagerBase implements NetworkOrches return false; } + @Override + public boolean restartNetwork(Long networkId, boolean cleanup, boolean makeRedundant, User user) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { + return false; + } + /* (non-Javadoc) * @see com.cloud.network.NetworkService#restartNetwork(com.cloud.api.commands.RestartNetworkCmd, boolean) */ @Override - public boolean restartNetwork(RestartNetworkCmd cmd, boolean cleanup, boolean makeRedundant) throws ConcurrentOperationException, ResourceUnavailableException, + public boolean restartNetwork(RestartNetworkCmd cmd) throws ConcurrentOperationException, ResourceUnavailableException, InsufficientCapacityException { // TODO Auto-generated method stub return false; diff --git a/server/src/test/java/com/cloud/vpc/MockVpcVirtualNetworkApplianceManager.java b/server/src/test/java/com/cloud/vpc/MockVpcVirtualNetworkApplianceManager.java index a85d039cd13..d1b951a9201 100644 --- a/server/src/test/java/com/cloud/vpc/MockVpcVirtualNetworkApplianceManager.java +++ b/server/src/test/java/com/cloud/vpc/MockVpcVirtualNetworkApplianceManager.java @@ -248,6 +248,11 @@ public class MockVpcVirtualNetworkApplianceManager extends ManagerBase implement return null; //To change body of implemented methods use File | Settings | File Templates. } + @Override + public boolean performRouterHealthChecks(long routerId) { + return false; + } + @Override public boolean prepareAggregatedExecution(final Network network, final List routers) throws AgentUnavailableException { return true; //To change body of implemented methods use File | Settings | File Templates. diff --git a/systemvm/debian/etc/logrotate.d/monitor b/systemvm/debian/etc/logrotate.d/monitor new file mode 100644 index 00000000000..769f8d5e7e1 --- /dev/null +++ b/systemvm/debian/etc/logrotate.d/monitor @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +/var/log/monitor.log { + rotate 5 + maxsize 10M + missingok + notifempty + compress + copytruncate +} diff --git a/systemvm/debian/etc/logrotate.d/routerServiceMonitor b/systemvm/debian/etc/logrotate.d/routerServiceMonitor new file mode 100644 index 00000000000..7202441f66a --- /dev/null +++ b/systemvm/debian/etc/logrotate.d/routerServiceMonitor @@ -0,0 +1,24 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +/var/log/routerServiceMonitor.log { + rotate 5 + maxsize 10M + missingok + notifempty + compress + copytruncate +} diff --git a/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py b/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py index 6b194238b1a..5a0ff5b114c 100755 --- a/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py +++ b/systemvm/debian/opt/cloud/bin/cs/CsMonitor.py @@ -17,27 +17,67 @@ import logging from cs.CsDatabag import CsDataBag from CsFile import CsFile +import json MON_CONFIG = "/etc/monitor.conf" +HC_CONFIG = "/root/health_checks_data.json" class CsMonitor(CsDataBag): - """ Manage dhcp entries """ + """ Manage Monitor script schedule and health checks for router """ - def process(self): - if "config" not in self.dbag: - return - procs = [x.strip() for x in self.dbag['config'].split(',')] - file = CsFile(MON_CONFIG) - for proc in procs: - bits = [x for x in proc.split(':')] - if len(bits) < 5: - continue - for i in range(0, 4): - file.add(bits[i], -1) - file.commit() + def get_basic_check_interval(self): + return self.dbag["health_checks_basic_run_interval"] if "health_checks_basic_run_interval" in self.dbag else 3 + + def get_advanced_check_interval(self): + return self.dbag["health_checks_advanced_run_interval"] if "health_checks_advanced_run_interval" in self.dbag else 0 + + def setupMonitorConfigFile(self): + if "config" in self.dbag: + procs = [x.strip() for x in self.dbag['config'].split(',')] + file = CsFile(MON_CONFIG) + for proc in procs: + bits = [x for x in proc.split(':')] + if len(bits) < 5: + continue + for i in range(0, 4): + file.add(bits[i], -1) + file.commit() + + def setupHealthCheckCronJobs(self): + cron_rep_basic = self.get_basic_check_interval() + cron_rep_advanced = self.get_advanced_check_interval() cron = CsFile("/etc/cron.d/process") + cron.deleteLine("root /usr/bin/python /root/monitorServices.py") cron.add("SHELL=/bin/bash", 0) cron.add("PATH=/usr/local/sbin:/usr/local/bin:/sbin:/bin:/usr/sbin:/usr/bin", 1) - cron.add("*/3 * * * * root /usr/bin/python /root/monitorServices.py", -1) + if cron_rep_basic > 0: + cron.add("*/" + str(cron_rep_basic) + " * * * * root /usr/bin/python /root/monitorServices.py basic", -1) + if cron_rep_advanced > 0: + cron.add("*/" + str(cron_rep_advanced) + " * * * * root /usr/bin/python /root/monitorServices.py advanced", -1) cron.commit() + + def setupHealthChecksConfigFile(self): + hc_data = {} + hc_data["health_checks_basic_run_interval"] = self.get_basic_check_interval() + hc_data["health_checks_advanced_run_interval"] = self.get_advanced_check_interval() + hc_data["health_checks_enabled"] = self.dbag["health_checks_enabled"] if "health_checks_enabled" in self.dbag else False + + if "excluded_health_checks" in self.dbag: + excluded_checks = self.dbag["excluded_health_checks"] + hc_data["excluded_health_checks"] = [ch.strip() for ch in excluded_checks.split(",")] if len(excluded_checks) > 0 else [] + else: + hc_data["excluded_health_checks"] = [] + + if "health_checks_config" in self.dbag: + hc_data["health_checks_config"] = self.dbag["health_checks_config"] + else: + hc_data["health_checks_config"] = {} + + with open(HC_CONFIG, 'w') as f: + json.dump(hc_data, f, ensure_ascii=False, indent=4) + + def process(self): + self.setupMonitorConfigFile() + self.setupHealthChecksConfigFile() + self.setupHealthCheckCronJobs() diff --git a/systemvm/debian/opt/cloud/bin/cs_monitorservice.py b/systemvm/debian/opt/cloud/bin/cs_monitorservice.py index 75a7c95d6fa..55c89dfb59b 100755 --- a/systemvm/debian/opt/cloud/bin/cs_monitorservice.py +++ b/systemvm/debian/opt/cloud/bin/cs_monitorservice.py @@ -22,4 +22,15 @@ def merge(dbag, data): if "config" in data: dbag['config'] = data["config"] + if "health_checks_enabled" in data: + dbag["health_checks_enabled"] = data["health_checks_enabled"] + if "health_checks_basic_run_interval" in data: + dbag["health_checks_basic_run_interval"] = data["health_checks_basic_run_interval"] + if "health_checks_advanced_run_interval" in data: + dbag["health_checks_advanced_run_interval"] = data["health_checks_advanced_run_interval"] + if "excluded_health_checks" in data: + dbag["excluded_health_checks"] = data["excluded_health_checks"] + if "health_checks_config" in data: + dbag["health_checks_config"] = data["health_checks_config"] + return dbag diff --git a/systemvm/debian/opt/cloud/bin/getRouterMonitorResults.sh b/systemvm/debian/opt/cloud/bin/getRouterMonitorResults.sh new file mode 100755 index 00000000000..bdc709d265f --- /dev/null +++ b/systemvm/debian/opt/cloud/bin/getRouterMonitorResults.sh @@ -0,0 +1,55 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# getRouterMonitorResults.sh --- Send the monitor results to Management Server + +if [ "$1" == "true" ] +then + python /root/monitorServices.py > /dev/null +fi + +printf "FAILING CHECKS:\n" + +if [ -f /root/basic_failing_health_checks ] +then + echo `cat /root/basic_failing_health_checks` +fi + +if [ -f /root/advanced_failing_health_checks ] +then + echo `cat /root/advanced_failing_health_checks` +fi + +printf "MONITOR RESULTS:\n" + +echo "{\"basic\":" +if [ -f /root/basic_monitor_results.json ] +then + echo `cat /root/basic_monitor_results.json` +else + echo "{}" +fi +echo ",\"advanced\":" +if [ -f /root/advanced_monitor_results.json ] +then + echo `cat /root/advanced_monitor_results.json` +else + echo "{}" +fi + +echo "}" diff --git a/systemvm/debian/opt/cloud/bin/merge.py b/systemvm/debian/opt/cloud/bin/merge.py index 54d86c5e8bb..b988b7a3e37 100755 --- a/systemvm/debian/opt/cloud/bin/merge.py +++ b/systemvm/debian/opt/cloud/bin/merge.py @@ -301,6 +301,7 @@ class QueueFile: if self.keep: self.__moveFile(filename, self.configCache + "/processed") else: + logging.debug("Processed file deleted: %s and not kept in /processed", filename) os.remove(filename) updateDataBag(self) diff --git a/systemvm/debian/opt/cloud/bin/update_config.py b/systemvm/debian/opt/cloud/bin/update_config.py index 77008afb794..c9121eb634f 100755 --- a/systemvm/debian/opt/cloud/bin/update_config.py +++ b/systemvm/debian/opt/cloud/bin/update_config.py @@ -29,7 +29,8 @@ import json logging.basicConfig(filename='/var/log/cloud.log', level=logging.INFO, format='%(asctime)s %(filename)s %(funcName)s:%(lineno)d %(message)s') # first commandline argument should be the file to process -if (len(sys.argv) != 2): +argc = len(sys.argv) +if argc != 2 and argc != 3: logging.error("Invalid usage, args passed: %s" % sys.argv) sys.exit(1) @@ -49,6 +50,9 @@ def finish_config(): def process_file(): logging.info("Processing JSON file %s" % sys.argv[1]) qf = QueueFile() + if len(sys.argv) > 2 and sys.argv[2].lower() == "false": + qf.keep = False + qf.setFile(sys.argv[1]) qf.load(None) # These can be safely deferred, dramatically speeding up loading times diff --git a/systemvm/debian/root/health_checks/__init__.py b/systemvm/debian/root/health_checks/__init__.py new file mode 100644 index 00000000000..3dcbe822698 --- /dev/null +++ b/systemvm/debian/root/health_checks/__init__.py @@ -0,0 +1,20 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Needed to expose utility as package outside for monitorServices.py. +# This directory should only contain executables for health checks. diff --git a/systemvm/debian/root/health_checks/cpu_usage_check.py b/systemvm/debian/root/health_checks/cpu_usage_check.py new file mode 100644 index 00000000000..5e6a2fe5e9e --- /dev/null +++ b/systemvm/debian/root/health_checks/cpu_usage_check.py @@ -0,0 +1,56 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path, statvfs +from subprocess import * +from utility import getHealthChecksData + + +def main(): + entries = getHealthChecksData("systemThresholds") + data = {} + if entries is not None and len(entries) == 1: + data = entries[0] + + if "maxCpuUsage" not in data: + print "Missing maxCpuUsage in health_checks_data systemThresholds, skipping" + exit(0) + + maxCpuUsage = float(data["maxCpuUsage"]) + cmd = "top -b -n2 -p 1 | fgrep \"Cpu(s)\" | tail -1 | " \ + "awk -F 'id,' " \ + "'{ split($1, vs, \",\"); idle=vs[length(vs)]; " \ + "sub(\"%\", \"\", idle); printf \"%.2f\", 100 - idle }'" + pout = Popen(cmd, shell=True, stdout=PIPE) + if pout.wait() == 0: + currentUsage = float(pout.communicate()[0].strip()) + if currentUsage > maxCpuUsage: + print "CPU Usage " + str(currentUsage) + \ + "% has crossed threshold of " + str(maxCpuUsage) + "%" + exit(1) + print "CPU Usage within limits with current at " \ + + str(currentUsage) + "%" + exit(0) + else: + print "Failed to retrieve cpu usage using " + cmd + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/dhcp_check.py b/systemvm/debian/root/health_checks/dhcp_check.py new file mode 100755 index 00000000000..be7a8407e39 --- /dev/null +++ b/systemvm/debian/root/health_checks/dhcp_check.py @@ -0,0 +1,69 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from utility import getHealthChecksData + + +def main(): + vMs = getHealthChecksData("virtualMachines") + + if vMs is None or len(vMs) == 0: + print "No VMs running data available, skipping" + exit(0) + + with open('/etc/dhcphosts.txt', 'r') as hostsFile: + allHosts = hostsFile.readlines() + hostsFile.close() + + failedCheck = False + failureMessage = "Missing elements in dhcphosts.txt - \n" + for vM in vMs: + entry = vM["macAddress"] + " " + vM["ip"] + " " + vM["vmName"] + foundEntry = False + for host in allHosts: + host = host.strip().split(',') + if len(host) < 4: + continue + + if host[0].strip() == vM["macAddress"] and host[1].strip() == vM["ip"]\ + and host[2].strip() == vM["vmName"]: + foundEntry = True + break + + nonDefaultSet = "set:" + vM["ip"].replace(".", "_") + if host[0].strip() == vM["macAddress"] and host[1].strip() == nonDefaultSet \ + and host[2].strip() == vM["ip"] and host[3].strip() == vM["vmName"]: + foundEntry = True + break + + if not foundEntry: + failedCheck = True + failureMessage = failureMessage + entry + ", " + + if failedCheck: + print failureMessage[:-2] + exit(1) + else: + print "All " + str(len(vMs)) + " VMs are present in dhcphosts.txt" + exit(0) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "advanced": + main() diff --git a/systemvm/debian/root/health_checks/disk_space_check.py b/systemvm/debian/root/health_checks/disk_space_check.py new file mode 100644 index 00000000000..af8cb3dd07c --- /dev/null +++ b/systemvm/debian/root/health_checks/disk_space_check.py @@ -0,0 +1,47 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path, statvfs +from utility import getHealthChecksData + + +def main(): + entries = getHealthChecksData("systemThresholds") + data = {} + if entries is not None and len(entries) == 1: + data = entries[0] + + if "minDiskNeeded" not in data: + print "Missing minDiskNeeded in health_checks_data systemThresholds, skipping" + exit(0) + + minDiskNeeded = float(data["minDiskNeeded"]) * 1024 + s = statvfs('/') + freeSpace = (s.f_bavail * s.f_frsize) / 1024 + + if (freeSpace < minDiskNeeded): + print "Insufficient free space is " + str(freeSpace/1024) + " MB" + exit(1) + else: + print "Sufficient free space is " + str(freeSpace/1024) + " MB" + exit(0) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/dns_check.py b/systemvm/debian/root/health_checks/dns_check.py new file mode 100644 index 00000000000..c1778881233 --- /dev/null +++ b/systemvm/debian/root/health_checks/dns_check.py @@ -0,0 +1,59 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from utility import getHealthChecksData + + +def main(): + vMs = getHealthChecksData("virtualMachines") + + if vMs is None or len(vMs) == 0: + print "No VMs running data available, skipping" + exit(0) + + with open('/etc/hosts', 'r') as hostsFile: + allHosts = hostsFile.readlines() + hostsFile.close() + + failedCheck = False + failureMessage = "Missing entries for VMs in /etc/hosts -\n" + for vM in vMs: + foundEntry = False + for host in allHosts: + components = host.split('\t') + if len(components) == 2 and components[0].strip() == vM["ip"] \ + and components[1].strip() == vM["vmName"]: + foundEntry = True + break + + if not foundEntry: + failedCheck = True + failureMessage = failureMessage + vM["ip"] + " " + vM["vmName"] + ", " + + if failedCheck: + print failureMessage[:-2] + exit(1) + else: + print "All " + str(len(vMs)) + " VMs are present in /etc/hosts" + exit(0) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "advanced": + main() diff --git a/systemvm/debian/root/health_checks/gateways_check.py b/systemvm/debian/root/health_checks/gateways_check.py new file mode 100644 index 00000000000..29ce884ca99 --- /dev/null +++ b/systemvm/debian/root/health_checks/gateways_check.py @@ -0,0 +1,57 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from subprocess import * +from utility import getHealthChecksData + + +def main(): + gws = getHealthChecksData("gateways") + if gws is None and len(gws) == 0: + print "No gateways data available, skipping" + exit(0) + + unreachableGateWays = [] + gwsList = gws[0]["gatewaysIps"].strip().split(' ') + for gw in gwsList: + if len(gw) == 0: + continue + reachableGw = False + for i in range(5): + pingCmd = "ping " + gw + " -c 5 -w 10" + pout = Popen(pingCmd, shell=True, stdout=PIPE) + if pout.wait() == 0: + reachableGw = True + break + + if not reachableGw: + unreachableGateWays.append(gw) + + if len(unreachableGateWays) == 0: + print "All " + str(len(gws)) + " gateways are reachable via ping" + exit(0) + else: + print "Unreachable gateways found-" + print unreachableGateWays + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/haproxy_check.py b/systemvm/debian/root/health_checks/haproxy_check.py new file mode 100644 index 00000000000..56e0ce7d0b0 --- /dev/null +++ b/systemvm/debian/root/health_checks/haproxy_check.py @@ -0,0 +1,134 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from utility import getHealthChecksData, formatPort + + +def checkMaxconn(haproxyData, haCfgSections): + if "maxconn" in haproxyData and "maxconn" in haCfgSections["global"]: + if haproxyData["maxconn"] != haCfgSections["global"]["maxconn"][0].strip(): + print "global maxconn mismatch occured" + return False + + return True + + +def checkLoadBalance(haproxyData, haCfgSections): + correct = True + for lbSec in haproxyData: + srcServer = lbSec["sourceIp"].replace('.', '_') + "-" + \ + formatPort(lbSec["sourcePortStart"], + lbSec["sourcePortEnd"]) + secName = "listen " + srcServer + + if secName not in haCfgSections: + print "Missing section for load balancing " + secName + "\n" + correct = False + else: + cfgSection = haCfgSections[secName] + if "server" in cfgSection: + if lbSec["algorithm"] != cfgSection["balance"][0]: + print "Incorrect balance method for " + secName + \ + "Expected : " + lbSec["algorithm"] + \ + " but found " + cfgSection["balance"][0] + "\n" + correct = False + + bindStr = lbSec["sourceIp"] + ":" + formatPort(lbSec["sourcePortStart"], lbSec["sourcePortEnd"]) + if cfgSection["bind"][0] != bindStr: + print "Incorrect bind string found. Expected " + bindStr + " but found " + cfgSection["bind"][0] + "." + correct = False + + if (lbSec["sourcePortStart"] == "80" and lbSec["sourcePortEnd"] == "80" and lbSec["keepAliveEnabled"] == "false") \ + or (lbSec["stickiness"].find("AppCookie") != -1 or lbSec["stickiness"].find("LbCookie") != -1): + if not ("mode" in cfgSection and cfgSection["mode"][0] == "http"): + print "Expected HTTP mode but not found" + correct = False + + expectedServerIps = lbSec["vmIps"].split(" ") + for expectedServerIp in expectedServerIps: + pattern = expectedServerIp + ":" + \ + formatPort(lbSec["destPortStart"], + lbSec["destPortEnd"]) + foundPattern = False + for server in cfgSection["server"]: + s = server.split() + if s[0].strip().find(srcServer + "_") == 0 and s[1].strip() == pattern: + foundPattern = True + break + + if not foundPattern: + correct = False + print "Missing load balancing for " + pattern + ". " + + return correct + + +def main(): + ''' + Checks for max con and each load balancing rule - source ip, ports and destination + ips and ports. Also checks for http mode. Does not check for stickiness policies. + ''' + haproxyData = getHealthChecksData("haproxyData") + if haproxyData is None or len(haproxyData) == 0: + print "No data provided to check, skipping" + exit(0) + + with open("/etc/haproxy/haproxy.cfg", 'r') as haCfgFile: + haCfgLines = haCfgFile.readlines() + haCfgFile.close() + + if len(haCfgLines) == 0: + print "Unable to read config file /etc/haproxy/haproxy.cfg" + exit(1) + + haCfgSections = {} + currSection = None + currSectionDict = {} + for line in haCfgLines: + line = line.strip() + if len(line) == 0: + if currSection is not None and len(currSectionDict) > 0: + haCfgSections[currSection] = currSectionDict + + currSection = None + currSectionDict = {} + continue + + if currSection is None: + currSection = line + else: + lineSec = line.split(' ', 1) + if lineSec[0] not in currSectionDict: + currSectionDict[lineSec[0]] = [] + + currSectionDict[lineSec[0]].append(lineSec[1] if len(lineSec) > 1 else '') + + checkMaxConn = checkMaxconn(haproxyData[0], haCfgSections) + checkLbRules = checkLoadBalance(haproxyData, haCfgSections) + + if checkMaxConn and checkLbRules: + print "All checks pass" + exit(0) + else: + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "advanced": + main() diff --git a/systemvm/debian/root/health_checks/iptables_check.py b/systemvm/debian/root/health_checks/iptables_check.py new file mode 100644 index 00000000000..2f3dc502672 --- /dev/null +++ b/systemvm/debian/root/health_checks/iptables_check.py @@ -0,0 +1,81 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path +from subprocess import * +from utility import getHealthChecksData, formatPort + + +def main(): + portForwards = getHealthChecksData("portForwarding") + if portForwards is None or len(portForwards) == 0: + print "No portforwarding rules provided to check, skipping" + exit(0) + + failedCheck = False + failureMessage = "Missing port forwarding rules in Iptables-\n " + for portForward in portForwards: + entriesExpected = [] + destIp = portForward["destIp"] + srcIpText = "-d " + portForward["sourceIp"] + srcPortText = "--dport " + formatPort(portForward["sourcePortStart"], portForward["sourcePortEnd"], ":") + dstText = destIp + ":" + formatPort(portForward["destPortStart"], portForward["destPortEnd"], "-") + for algo in [["PREROUTING", "--to-destination"], + ["OUTPUT", "--to-destination"], + ["POSTROUTING", "--to-source"]]: + entriesExpected.append([algo[0], srcIpText, srcPortText, algo[1] + " " + dstText]) + + fetchIpTableEntriesCmd = "iptables-save | grep " + destIp + pout = Popen(fetchIpTableEntriesCmd, shell=True, stdout=PIPE) + if pout.wait() != 0: + failedCheck = True + failureMessage = failureMessage + "Unable to execute iptables-save command " \ + "for fetching rules by " + fetchIpTableEntriesCmd + "\n" + continue + + ipTablesMatchingEntries = pout.communicate()[0].strip().split('\n') + for pfEntryListExpected in entriesExpected: + foundPfEntryList = False + for ipTableEntry in ipTablesMatchingEntries: + # Check if all expected parts of pfEntryList + # is present in this ipTableEntry + foundAll = True + for expectedEntry in pfEntryListExpected: + if ipTableEntry.find(expectedEntry) == -1: + foundAll = False + break + + if foundAll: + foundPfEntryList = True + break + + if not foundPfEntryList: + failedCheck = True + failureMessage = failureMessage + str(pfEntryListExpected) + "\n" + + if failedCheck: + print failureMessage + exit(1) + else: + print "Found all entries (count " + str(len(portForwards)) + ") in iptables" + exit(0) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "advanced": + main() diff --git a/systemvm/debian/root/health_checks/memory_usage_check.py b/systemvm/debian/root/health_checks/memory_usage_check.py new file mode 100644 index 00000000000..97ca0c5030e --- /dev/null +++ b/systemvm/debian/root/health_checks/memory_usage_check.py @@ -0,0 +1,55 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path, statvfs +from subprocess import * +from utility import getHealthChecksData + + +def main(): + entries = getHealthChecksData("systemThresholds") + data = {} + if entries is not None and len(entries) == 1: + data = entries[0] + + if "maxMemoryUsage" not in data: + print "Missing maxMemoryUsage in health_checks_data " + \ + "systemThresholds, skipping" + exit(0) + + maxMemoryUsage = float(data["maxMemoryUsage"]) + cmd = "free | awk 'FNR == 2 { print $3 * 100 / $2 }'" + pout = Popen(cmd, shell=True, stdout=PIPE) + + if pout.wait() == 0: + currentUsage = float(pout.communicate()[0].strip()) + if currentUsage > maxMemoryUsage: + print "Memory Usage " + str(currentUsage) + \ + "% has crossed threshold of " + str(maxMemoryUsage) + "%" + exit(1) + print "Memory Usage within limits with current at " + \ + str(currentUsage) + "%" + exit(0) + else: + print "Failed to retrieve memory usage using " + cmd + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/router_version_check.py b/systemvm/debian/root/health_checks/router_version_check.py new file mode 100644 index 00000000000..2173e09c81f --- /dev/null +++ b/systemvm/debian/root/health_checks/router_version_check.py @@ -0,0 +1,83 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from os import sys, path, statvfs +from utility import getHealthChecksData + + +def getFirstLine(file=None): + if file is not None and path.isfile(file): + ret = None + with open(file, 'r') as oFile: + lines = oFile.readlines() + if len(lines) > 0: + ret = lines[0].strip() + oFile.close() + + return ret + else: + return None + + +def main(): + entries = getHealthChecksData("routerVersion") + data = {} + if entries is not None and len(entries) == 1: + data = entries[0] + + if len(data) == 0: + print "Missing routerVersion in health_checks_data, skipping" + exit(0) + + templateVersionMatches = True + scriptVersionMatches = True + + if "templateVersion" in data: + expected = data["templateVersion"].strip() + releaseFile = "/etc/cloudstack-release" + found = getFirstLine(releaseFile) + if found is None: + print "Release version not yet setup at " + releaseFile +\ + ", skipping." + elif expected != found: + print "Template Version mismatch. Expected: " + \ + expected + ", found: " + found + templateVersionMatches = False + + if "scriptsVersion" in data: + expected = data["scriptsVersion"].strip() + sigFile = "/var/cache/cloud/cloud-scripts-signature" + found = getFirstLine(sigFile) + if found is None: + print "Scripts signature is not yet setup at " + sigFile +\ + ", skipping" + if expected != found: + print "Scripts Version mismatch. Expected: " + \ + expected + ", found: " + found + scriptVersionMatches = False + + if templateVersionMatches and scriptVersionMatches: + print "Template and scripts version match successful" + exit(0) + else: + exit(1) + + +if __name__ == "__main__": + if len(sys.argv) == 2 and sys.argv[1] == "basic": + main() diff --git a/systemvm/debian/root/health_checks/utility/__init__.py b/systemvm/debian/root/health_checks/utility/__init__.py new file mode 100644 index 00000000000..22ac3ff9013 --- /dev/null +++ b/systemvm/debian/root/health_checks/utility/__init__.py @@ -0,0 +1,19 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from sharedFunctions import getHealthChecksData, formatPort diff --git a/systemvm/debian/root/health_checks/utility/sharedFunctions.py b/systemvm/debian/root/health_checks/utility/sharedFunctions.py new file mode 100644 index 00000000000..20ef640b64a --- /dev/null +++ b/systemvm/debian/root/health_checks/utility/sharedFunctions.py @@ -0,0 +1,54 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import json + + +def getHealthChecksData(additionalDataKey=None): + with open('/root/health_checks_data.json', 'r') as hc_data_file: + hc_data = json.load(hc_data_file) + + # If no specific key is requested return all the data as JSON + if additionalDataKey is None: + return hc_data + + if additionalDataKey not in hc_data["health_checks_config"]: + return None + + data = hc_data["health_checks_config"][additionalDataKey].strip().split(";") + addData = [] + for line in data: + line = line.strip() + if len(line) == 0: + continue + entries = line.split(',') + d = {} + for entry in entries: + entry = entry.strip() + if len(entry) == 0: + continue + keyVal = entry.split("=") + if len(keyVal) == 2: + d[keyVal[0].strip()] = keyVal[1].strip() + if len(d) > 0: + addData.append(d) + + return addData + + +def formatPort(portStart, portEnd, delim="-"): + return portStart if portStart == portEnd else portStart + delim + portEnd diff --git a/systemvm/debian/root/monitorServices.py b/systemvm/debian/root/monitorServices.py index 75d10043816..909e419c180 100755 --- a/systemvm/debian/root/monitorServices.py +++ b/systemvm/debian/root/monitorServices.py @@ -16,16 +16,15 @@ # specific language governing permissions and limitations # under the License. - - - - from ConfigParser import SafeConfigParser from subprocess import * -from os import path +from datetime import datetime import time import os import logging +import json +from os import sys, path +from health_checks.utility import getHealthChecksData class StatusCodes: SUCCESS = 0 @@ -42,15 +41,15 @@ class Log: NOTIF = 'NOTIF' class Config: - MONIT_AFTER_MINS = 30 SLEEP_SEC = 1 RETRY_ITERATIONS = 10 RETRY_FOR_RESTART = 5 MONITOR_LOG = '/var/log/monitor.log' - UNMONIT_PS_FILE = '/etc/unmonit_psList.txt' + HEALTH_CHECKS_DIR = 'health_checks' + MONITOR_RESULT_FILE_SUFFIX = 'monitor_results.json' + FAILING_CHECKS_FILE = 'failing_health_checks' - -def getConfig( config_file_path = "/etc/monitor.conf" ): +def getServicesConfig( config_file_path = "/etc/monitor.conf" ): """ Reads the process configuration from the config file. Config file contains the processes to be monitored. @@ -66,7 +65,7 @@ def getConfig( config_file_path = "/etc/monitor.conf" ): for name, value in parser.items(section): process_dict[section][name] = value -# printd (" %s = %r" % (name, value)) + printd (" %s = %r" % (name, value)) return process_dict @@ -77,12 +76,12 @@ def printd (msg): #for debug #print msg - return 0 - f= open(Config.MONITOR_LOG,'r+') + f= open(Config.MONITOR_LOG, 'w' if not path.isfile(Config.MONITOR_LOG) else 'r+') f.seek(0, 2) f.write(str(msg)+"\n") f.close() + print str(msg) def raisealert(severity, msg, process_name=None): """ Writes the alert message""" @@ -97,6 +96,7 @@ def raisealert(severity, msg, process_name=None): logging.info(log) msg = 'logger -t monit '+ log pout = Popen(msg, shell=True, stdout=PIPE) + print "[Alert] " + msg def isPidMatchPidFile(pidfile, pids): @@ -126,7 +126,7 @@ def isPidMatchPidFile(pidfile, pids): fd.close() return StatusCodes.FAILED - printd("file content "+str(inp)) + printd("file content of pidfile " + pidfile + " = " + str(inp).strip()) printd(pids) tocheck_pid = inp.strip() for item in pids: @@ -152,7 +152,7 @@ def checkProcessRunningStatus(process_name, pidFile): #check there is only one pid or not if exitStatus == 0: - pids = temp_out.split(' ') + pids = temp_out.strip().split(' ') printd("pid(s) of process %s are %s " %(process_name, pids)) #there is more than one process so match the pid file @@ -181,11 +181,10 @@ def restartService(service_name): return False - - def checkProcessStatus( process ): """ Check the process running status, if not running tries to restart + Returns the process status and if it was restarted """ process_name = process.get('processname') service_name = process.get('servicename') @@ -197,13 +196,13 @@ def checkProcessStatus( process ): cmd='' if process_name is None: printd ("\n Invalid Process Name") - return StatusCodes.INVALID_INP + return StatusCodes.INVALID_INP, False status, pids = checkProcessRunningStatus(process_name, pidfile) if status == True: printd("The process is running ....") - return StatusCodes.RUNNING + return StatusCodes.RUNNING, False else: printd("Process %s is not running trying to recover" %process_name) #Retry the process state for few seconds @@ -243,138 +242,151 @@ def checkProcessStatus( process ): raisealert(Log.ALERT,process_name,msg) printd("Restart failed after number of retries") - return StatusCodes.STOPPED + return StatusCodes.STOPPED, False - return StatusCodes.RUNNING + return StatusCodes.RUNNING, True def monitProcess( processes_info ): """ Monitors the processes which got from the config file """ + checkStartTime = time.time() + service_status = {} + failing_services = [] if len( processes_info ) == 0: - printd("Invalid Input") - return StatusCodes.INVALID_INP + printd("No config items provided - means a redundant VR or a VPC Router") + return service_status, failing_services - dict_unmonit={} - umonit_update={} - unMonitPs=False - - if not path.isfile(Config.UNMONIT_PS_FILE): - printd('Unmonit File not exist') - else: - #load the dictionary with unmonit process list - dict_unmonit = loadPsFromUnMonitFile() + print "[Process Info] " + json.dumps(processes_info) #time for noting process down time csec = repr(time.time()).split('.')[0] for process,properties in processes_info.items(): - #skip the process it its time stamp less than Config.MONIT_AFTER_MINS - printd ("checking the service %s \n" %process) - - if not is_emtpy(dict_unmonit): - if dict_unmonit.has_key(process): - ts = dict_unmonit[process] - - if checkPsTimeStampForMonitor (csec, ts, properties) == False: - unMonitPs = True - continue - - if checkProcessStatus( properties) != StatusCodes.RUNNING: + printd ("---------------------------\nchecking the service %s\n---------------------------- " %process) + serviceName = process + ".service" + processStatus, wasRestarted = checkProcessStatus(properties) + if processStatus != StatusCodes.RUNNING: printd( "\n Service %s is not Running"%process) - #add this process into unmonit list - printd ("updating the service for unmonit %s\n" %process) - umonit_update[process]=csec + checkEndTime = time.time() + service_status[serviceName] = { + "success": "false", + "lastUpdate": str(int(checkStartTime * 1000)), + "lastRunDuration": str((checkEndTime - checkStartTime) * 1000), + "message": "service down at last check " + str(csec) + } + failing_services.append(serviceName) + else: + checkEndTime = time.time() + service_status[serviceName] = { + "success": "true", + "lastUpdate": str(int(checkStartTime * 1000)), + "lastRunDuration": str((checkEndTime - checkStartTime) * 1000), + "message": "service is running" + (", was restarted" if wasRestarted else "") + } - #if dict is not empty write to file else delete it - if not is_emtpy(umonit_update): - writePsListToUnmonitFile(umonit_update) + return service_status, failing_services + + +def execute(script, checkType = "basic"): + checkStartTime = time.time() + cmd = "./" + script + " " + checkType + printd ("Executing health check script command: " + cmd) + + pout = Popen(cmd, shell=True, stdout=PIPE) + exitStatus = pout.wait() + output = pout.communicate()[0].strip() + checkEndTime = time.time() + + if exitStatus == 0: + if len(output) > 0: + printd("Successful execution of " + script) + return { + "success": "true", + "lastUpdate": str(int(checkStartTime * 1000)), + "lastRunDuration": str((checkEndTime - checkStartTime) * 1000), + "message": output + } + return {} #Skip script if no output is received else: - if is_emtpy(umonit_update) and unMonitPs == False: - #delete file it is there - removeFile(Config.UNMONIT_PS_FILE) + printd("Script execution failed " + script) + return { + "success": "false", + "lastUpdate": str(int(checkStartTime * 1000)), + "lastRunDuration": str((checkEndTime - checkStartTime) * 1000), + "message": output + } - -def checkPsTimeStampForMonitor(csec,ts, process): - printd("Time difference=%s" %str(int(csec) - int(ts))) - tmin = (int(csec) - int(ts) )/60 - - if ( int(csec) - int(ts) )/60 < Config.MONIT_AFTER_MINS: - raisealert(Log.ALERT, "The %s get monitor after %s minutes " %(process, Config.MONIT_AFTER_MINS)) - printd('process will be monitored after %s min' %(str(int(Config.MONIT_AFTER_MINS) - tmin))) - return False - - return True - -def removeFile(fileName): - if path.isfile(fileName): - printd("Removing the file %s" %fileName) - os.remove(fileName) - -def loadPsFromUnMonitFile(): - - dict_unmonit = {} - - try: - fd = open(Config.UNMONIT_PS_FILE) - except: - printd("Failed to open file %s " %(Config.UNMONIT_PS_FILE)) - return StatusCodes.FAILED - - ps = fd.read() - - if not ps: - printd("File %s content is empty " %Config.UNMONIT_PS_FILE) - return StatusCodes.FAILED - - printd(ps) - plist = ps.split(',') - plist.remove('') - for i in plist: - dict_unmonit[i.split(':')[0]] = i.split(':')[1] - - fd.close() - - return dict_unmonit - - -def writePsListToUnmonitFile(umonit_update): - printd("Write updated unmonit list to file") - line='' - for i in umonit_update: - line+=str(i)+":"+str(umonit_update[i])+',' - printd(line) - try: - fd=open(Config.UNMONIT_PS_FILE,'w') - except: - printd("Failed to open file %s " %Config.UNMONIT_PS_FILE) - return StatusCodes.FAILED - - fd.write(line) - fd.close() - - -def is_emtpy(struct): - """ - Checks wether the given struct is empty or not - """ - if struct: - return False - else: - return True - -def main(): +def main(checkType = "basic"): + startTime = time.time() ''' - Step1 : Get Config + Step1 : Get Services Config ''' printd("monitoring started") - temp_dict = getConfig() + configDict = getServicesConfig() ''' - Step2: Monitor and Raise Alert + Step2: Monitor services and Raise Alerts ''' - monitProcess( temp_dict ) + monitResult = {} + failingChecks = [] + if checkType == "basic": + monitResult, failingChecks = monitProcess(configDict) + + ''' + Step3: Run health check scripts as needed + ''' + hc_data = getHealthChecksData() + + if hc_data is not None and "health_checks_enabled" in hc_data and hc_data['health_checks_enabled']: + hc_exclude = hc_data["excluded_health_checks"] if "excluded_health_checks" in hc_data else [] + for f in os.listdir(Config.HEALTH_CHECKS_DIR): + if f in hc_exclude: + continue + fpath = path.join(Config.HEALTH_CHECKS_DIR, f) + if path.isfile(fpath) and os.access(fpath, os.X_OK): + ret = execute(fpath, checkType) + if len(ret) == 0: + continue + if "success" in ret and ret["success"].lower() == "false": + failingChecks.append(f) + monitResult[f] = ret + + ''' + Step4: Write results to the json file for admins/management server to read + ''' + + endTime = time.time() + monitResult["lastRun"] = { + "start": str(datetime.fromtimestamp(startTime)), + "end": str(datetime.fromtimestamp(endTime)), + "duration": str(endTime - startTime) + } + + with open(checkType + "_" + Config.MONITOR_RESULT_FILE_SUFFIX, 'w') as f: + json.dump(monitResult, f, ensure_ascii=False) + + failChecksFile = checkType + "_" + Config.FAILING_CHECKS_FILE + if len(failingChecks) > 0: + fcs = "" + for fc in failingChecks: + fcs = fcs + fc + "," + fcs = fcs[0:-1] + with open(failChecksFile, 'w') as f: + f.write(fcs) + elif path.isfile(failChecksFile): + os.remove(failChecksFile) if __name__ == "__main__": - main() + checkType = "basic" + if len(sys.argv) == 2: + if sys.argv[1] == "advanced": + main("advanced") + elif sys.argv[1] == "basic": + main("basic") + else: + printd("Error: Unknown type of test: " + sys.argv) + else: + main("basic") + main("advanced") diff --git a/test/integration/component/test_routers.py b/test/integration/component/test_routers.py index 45e2853db89..196d054ed34 100644 --- a/test/integration/component/test_routers.py +++ b/test/integration/component/test_routers.py @@ -21,7 +21,8 @@ from nose.plugins.attrib import attr from marvin.cloudstackTestCase import cloudstackTestCase from marvin.cloudstackAPI import (stopVirtualMachine, stopRouter, - startRouter) + startRouter, + getRouterHealthCheckResults) from marvin.lib.utils import (cleanup_resources, get_process_status) from marvin.lib.base import (ServiceOffering, @@ -594,6 +595,75 @@ class TestRouterServices(cloudstackTestCase): return + @attr(tags=["advanced"], required_hardware="true") + def test_04_RouterHealthChecksResults(self): + """Test advanced zone router list contains health check records + """ + + routers = list_routers( + self.apiclient, + account=self.account.name, + domainid=self.account.domainid, + fetchhealthcheckresults=True + ) + + self.assertEqual(isinstance(routers, list), True, + "Check for list routers response return valid data" + ) + self.assertNotEqual( + len(routers), 0, + "Check list router response" + ) + + router = routers[0] + self.info("Router ID: %s & Router state: %s" % ( + router.id, router.state + )) + + self.assertEqual(isinstance(router.healthcheckresults, list), True, + "Router response should contain it's health check result as list" + ) + + cmd = getRouterHealthCheckResults.getRouterHealthCheckResultsCmd() + cmd.routerid = router.id + cmd.performfreshchecks = True # Perform fresh checks as a newly created router may not have results + healthData = self.api_client.getRouterHealthCheckResults(cmd) + self.info("Router ID: %s & Router state: %s" % ( + router.id, router.state + )) + + self.assertEqual(router.id, healthData.routerid, + "Router response should contain it's health check result so id should match" + ) + self.assertEqual(isinstance(healthData.healthchecks, list), True, + "Router response should contain it's health check result as list" + ) + + self.verifyCheckTypes(healthData.healthchecks) + self.verifyCheckNames(healthData.healthchecks) + + def verifyCheckTypes(self, healthChecks): + for checkType in ["basic", "advanced"]: + foundType = False + for check in healthChecks: + if check.checktype == checkType: + foundType = True + break + self.assertTrue(foundType, + "Router should contain health check results info for type: " + checkType + ) + + def verifyCheckNames(self, healthChecks): + for checkName in ["dns_check.py", "dhcp_check.py", "haproxy_check.py", "disk_space_check.py", "iptables_check.py", "gateways_check.py", "router_version_check.py"]: + foundCheck = False + for check in healthChecks: + if check.checkname == checkName: + foundCheck = True + break + self.assertTrue(foundCheck, + "Router should contain health check results info for check name: " + checkName + ) + class TestRouterStopCreatePF(cloudstackTestCase): diff --git a/tools/appliance/systemvmtemplate/scripts/configure_systemvm_services.sh b/tools/appliance/systemvmtemplate/scripts/configure_systemvm_services.sh index 56406b711f5..e9814dd06ce 100644 --- a/tools/appliance/systemvmtemplate/scripts/configure_systemvm_services.sh +++ b/tools/appliance/systemvmtemplate/scripts/configure_systemvm_services.sh @@ -48,6 +48,7 @@ function install_cloud_scripts() { /root/{clearUsageRules.sh,reconfigLB.sh,monitorServices.py} \ /etc/profile.d/cloud.sh /etc/cron.daily/* /etc/cron.hourly/* + chmod +x /root/health_checks/* chmod -x /etc/systemd/system/* systemctl daemon-reload diff --git a/ui/l10n/en.js b/ui/l10n/en.js index 66f45ea19d0..c9fd01b5873 100644 --- a/ui/l10n/en.js +++ b/ui/l10n/en.js @@ -291,6 +291,8 @@ var dictionary = { "label.action.stop.instance.processing":"Stopping Instance....", "label.action.stop.router":"Stop Router", "label.action.stop.router.processing":"Stopping Router....", +"label.action.router.health.checks":"Get health checks result", +"label.perform.fresh.checks":"Perform fresh checks", "label.action.stop.systemvm":"Stop System VM", "label.action.stop.systemvm.processing":"Stopping System VM....", "label.action.take.snapshot":"Take Snapshot", @@ -579,6 +581,7 @@ var dictionary = { "label.continue":"Continue", "label.continue.basic.install":"Continue with basic installation", "label.copying.iso":"Copying ISO", +"label.copy.text": "Copy Text", "label.corrections.saved":"Corrections saved", "label.counter":"Counter", "label.cpu":"CPU", @@ -1505,6 +1508,12 @@ var dictionary = { "label.root.disk.offering":"Root Disk Offering", "label.root.disk.size":"Root disk size (GB)", "label.router.vm.scaled.up":"Router VM Scaled Up", +"label.router.health.checks":"Health Checks", +"label.router.health.check.name":"Check name", +"label.router.health.check.type":"Type", +"label.router.health.check.success":"Success", +"label.router.health.check.last.updated":"Last updated", +"label.router.health.check.details":"Details", "label.routing":"Routing", "label.routing.host":"Routing Host", "label.rule":"Rule", @@ -1974,6 +1983,7 @@ var dictionary = { "message.action.start.systemvm":"Please confirm that you want to start this system VM.", "message.action.stop.instance":"Please confirm that you want to stop this instance.", "message.action.stop.router":"All services provided by this virtual router will be interrupted. Please confirm that you want to stop this router.", +"message.action.router.health.checks":"Health checks result will be fetched from router.", "message.action.stop.systemvm":"Please confirm that you want to stop this system VM.", "message.action.take.snapshot":"Please confirm that you want to take a snapshot of this volume.", "message.action.unmanage.cluster":"Please confirm that you want to unmanage the cluster.", diff --git a/ui/scripts/cloudStack.js b/ui/scripts/cloudStack.js index 9b5f0117058..97eec9ce653 100644 --- a/ui/scripts/cloudStack.js +++ b/ui/scripts/cloudStack.js @@ -102,6 +102,64 @@ var $container = $('#cloudStack3-container'); + var updateSharedConfigs = function() { + // Update global pagesize for list APIs in UI + $.ajax({ + type: 'GET', + url: createURL('listConfigurations'), + data: {name: 'default.ui.page.size'}, + dataType: 'json', + async: false, + success: function(data, textStatus, xhr) { + if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { + var config = data.listconfigurationsresponse.configuration[0]; + if (config && config.name == 'default.ui.page.size') { + pageSize = parseInt(config.value); + } + } + }, + error: function(xhr) { // ignore any errors, fallback to the default + } + }); + + // Update global pagesize for sort key in UI + $.ajax({ + type: 'GET', + url: createURL('listConfigurations'), + data: {name: 'sortkey.algorithm'}, + dataType: 'json', + async: false, + success: function(data, textStatus, xhr) { + if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { + var config = data.listconfigurationsresponse.configuration[0]; + if (config && config.name == 'sortkey.algorithm') { + g_sortKeyIsAscending = config.value == 'true'; + } + } + }, + error: function(xhr) { // ignore any errors, fallback to the default + } + }); + + // Update global router health checks enabled + $.ajax({ + type: 'GET', + url: createURL('listConfigurations'), + data: {name: 'router.health.checks.enabled'}, + dataType: 'json', + async: false, + success: function(data, textStatus, xhr) { + if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { + var config = data.listconfigurationsresponse.configuration[0]; + if (config && config.name == 'router.health.checks.enabled') { + g_routerHealthChecksEnabled = config.value == 'true'; + } + } + }, + error: function(xhr) { // ignore any errors, fallback to the default + } + }); + } var loginArgs = { $container: $container, @@ -170,61 +228,25 @@ } }); - // Update global pagesize for list APIs in UI - $.ajax({ - type: 'GET', - url: createURL('listConfigurations'), - data: {name: 'default.ui.page.size'}, - dataType: 'json', - async: false, - success: function(data, textStatus, xhr) { - if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { - var config = data.listconfigurationsresponse.configuration[0]; - if (config && config.name == 'default.ui.page.size') { - pageSize = parseInt(config.value); - } - } - }, - error: function(xhr) { // ignore any errors, fallback to the default - } - }); + updateSharedConfigs() - // Update global pagesize for sort key in UI - $.ajax({ - type: 'GET', - url: createURL('listConfigurations'), - data: {name: 'sortkey.algorithm'}, - dataType: 'json', - async: false, - success: function(data, textStatus, xhr) { - if (data && data.listconfigurationsresponse && data.listconfigurationsresponse.configuration) { - var config = data.listconfigurationsresponse.configuration[0]; - if (config && config.name == 'sortkey.algorithm') { - g_sortKeyIsAscending = config.value == 'true'; - } - } - }, - error: function(xhr) { // ignore any errors, fallback to the default - } - }); - - // Populate IDP list - $.ajax({ - type: 'GET', - url: createURL('listIdps'), - dataType: 'json', - async: false, - success: function(data, textStatus, xhr) { - if (data && data.listidpsresponse && data.listidpsresponse.idp) { - var idpList = data.listidpsresponse.idp.sort(function (a, b) { - return a.orgName.localeCompare(b.orgName); - }); - g_idpList = idpList; - } - }, - error: function(xhr) { - } - }); + // Populate IDP list + $.ajax({ + type: 'GET', + url: createURL('listIdps'), + dataType: 'json', + async: false, + success: function(data, textStatus, xhr) { + if (data && data.listidpsresponse && data.listidpsresponse.idp) { + var idpList = data.listidpsresponse.idp.sort(function (a, b) { + return a.orgName.localeCompare(b.orgName); + }); + g_idpList = idpList; + } + }, + error: function(xhr) { + } + }); return userValid ? { user: { @@ -337,6 +359,7 @@ }) } }); + updateSharedConfigs(); }, error: function(xmlHTTP) { args.response.error(); @@ -377,6 +400,7 @@ g_regionsecondaryenabled = null; g_loginCmdText = null; g_allowUserViewAllDomainAccounts = null; + g_routerHealthChecksEnabled = false; // Remove any cookies var cookies = document.cookie.split(";"); diff --git a/ui/scripts/sharedFunctions.js b/ui/scripts/sharedFunctions.js index d3e6fe870be..f8ad7bfff84 100644 --- a/ui/scripts/sharedFunctions.js +++ b/ui/scripts/sharedFunctions.js @@ -37,6 +37,7 @@ var g_idpList = null; var g_appendIdpDomain = false; var g_sortKeyIsAscending = false; var g_allowUserViewAllDomainAccounts = false; +var g_routerHealthChecksEnabled = false; //keyboard keycode var keycode_Enter = 13; diff --git a/ui/scripts/system.js b/ui/scripts/system.js index 2ae2f466043..b1c75a75779 100755 --- a/ui/scripts/system.js +++ b/ui/scripts/system.js @@ -9886,6 +9886,7 @@ listView: { id: 'routers', label: 'label.virtual.appliances', + horizontalOverflow: true, fields: { name: { label: 'label.name' @@ -9914,7 +9915,19 @@ indicator: { 'Running': 'on', 'Stopped': 'off', - 'Error': 'off' + 'Error': 'off', + 'Alert': 'warning' + } + }, + healthchecksfailed: { + converter: function (str) { + if (str) return 'Failed' + return 'Passed'; + }, + label: 'label.health.check', + indicator: { + false: 'on', + true: 'warning' } }, requiresupgrade: { @@ -9922,6 +9935,12 @@ converter: cloudStack.converters.toBooleanText } }, + preFilter: function () { + if (!g_routerHealthChecksEnabled) { + return ['healthchecksfailed'] + } + return [] + }, dataProvider: function (args) { var array1 =[]; if (args.filterBy != null) { @@ -9982,44 +10001,47 @@ routers.push(item); }); - /* - * In project view, the first listRotuers API(without projectid=-1) will return the same objects as the second listRouters API(with projectid=-1), - * because in project view, all API calls are appended with projectid=[projectID]. - * Therefore, we only call the second listRouters API(with projectid=-1) in non-project view. - */ - if (cloudStack.context && cloudStack.context.projects == null) { //non-project view - /* - * account parameter(account+domainid) and project parameter(projectid) are not allowed to be passed together to listXXXXXXX API. - * So, remove account parameter(account+domainid) from data2 - */ - if ("account" in data2) { - delete data2.account; - } - if ("domainid" in data2) { - delete data2.domainid; - } - - $.ajax({ - url: createURL("listRouters&listAll=true&page=" + args.page + "&pagesize=" + pageSize + array1.join("") + "&projectid=-1"), - data: data2, - async: false, - success: function (json) { - var items = json.listroutersresponse.router ? - json.listroutersresponse.router:[]; - - $(items).map(function (index, item) { - routers.push(item); - }); - } - }); - } - - args.response.success({ - actionFilter: routerActionfilter, - data: $(routers).map(mapRouterType) - }); + /* + * In project view, the first listRotuers API(without projectid=-1) will return the same objects as the second listRouters API(with projectid=-1), + * because in project view, all API calls are appended with projectid=[projectID]. + * Therefore, we only call the second listRouters API(with projectid=-1) in non-project view. + */ + if (cloudStack.context && cloudStack.context.projects == null) { //non-project view + /* + * account parameter(account+domainid) and project parameter(projectid) are not allowed to be passed together to listXXXXXXX API. + * So, remove account parameter(account+domainid) from data2 + */ + if ("account" in data2) { + delete data2.account; } + if ("domainid" in data2) { + delete data2.domainid; + } + + $.ajax({ + url: createURL("listRouters&listAll=true&page=" + args.page + "&pagesize=" + pageSize + array1.join("") + "&projectid=-1"), + data: data2, + async: false, + success: function (json) { + var items = json.listroutersresponse.router ? + json.listroutersresponse.router:[]; + + var items = json.listroutersresponse.router ? + json.listroutersresponse.router:[]; + + $(items).map(function (index, item) { + routers.push(item); + }); + } + }); + } + + args.response.success({ + actionFilter: routerActionfilter, + data: $(routers).map(mapRouterType) }); + } + }); }, detailView: { name: 'label.virtual.appliance.details', @@ -10542,6 +10564,56 @@ height: 640 } } + }, + + healthChecks: { + label: 'label.action.router.health.checks', + createForm: { + title: 'label.action.router.health.checks', + desc: 'message.action.router.health.checks', + fields: { + performfreshchecks: { + label: 'label.perform.fresh.checks', + isBoolean: true + } + } + }, + action: function (args) { + if (!g_routerHealthChecksEnabled) { + cloudStack.dialog.notice({ + message: 'Router health checks are disabled. Please enable router.health.checks.enabled to execute this action' + }) + args.response.success() + return + } + var data = { + 'routerid': args.context.routers[0].id, + 'performfreshchecks': (args.data.performfreshchecks === 'on') + }; + $.ajax({ + url: createURL('getRouterHealthCheckResults'), + dataType: 'json', + data: data, + async: true, + success: function (json) { + var healthChecks = json.getrouterhealthcheckresultsresponse.routerhealthchecks.healthchecks + var numChecks = healthChecks.length + var failedChecks = 0 + $.each(healthChecks, function(idx, check) { + if (!check.success) failedChecks = failedChecks + 1 + }) + cloudStack.dialog.notice({ + message: 'Found ' + numChecks + ' checks for router, with ' + failedChecks + ' failing checks. Please visit router > Health Checks tab to see details' + }) + args.response.success(); + } + }); + }, + messages: { + notification: function(args) { + return 'label.action.router.health.checks' + } + } } }, tabs: { @@ -10731,6 +10803,78 @@ } }); } + }, + healthCheckResults: { + title: 'label.router.health.checks', + listView: { + id: 'routerHealthCheckResults', + label: 'label.router.health.checks', + hideToolbar: true, + fields: { + checkname: { + label: 'label.router.health.check.name' + }, + checktype: { + label: 'label.router.health.check.type' + }, + success: { + label: 'label.router.health.check.success', + converter: function (args) { + if (args) { + return _l('True'); + } else { + return _l('False'); + } + }, + indicator: { + true: 'on', + false: 'off' + } + }, + lastupdated: { + label: 'label.router.health.check.last.updated' + } + }, + actions: { + details: { + label: 'label.router.health.check.details', + action: { + custom: function (args) { + cloudStack.dialog.notice({ + message: args.context.routerHealthCheckResults[0].details + }) + } + } + } + }, + dataProvider: function(args) { + if (!g_routerHealthChecksEnabled) { + cloudStack.dialog.notice({ + message: 'Router health checks are disabled. Please enable router.health.checks.enabled to get data' + }) + args.response.success({}) + return + } + if (args.page > 1) { + // Only one page is supported as it's not list command. + args.response.success({}); + return + } + + $.ajax({ + url: createURL('getRouterHealthCheckResults'), + data: { + 'routerid': args.context.routers[0].id + }, + success: function (json) { + var hcData = json.getrouterhealthcheckresultsresponse.routerhealthchecks.healthchecks + args.response.success({ + data: hcData + }); + } + }); + } + } } } } @@ -22273,6 +22417,7 @@ allowedActions.push("migrate"); allowedActions.push("diagnostics"); allowedActions.push("retrieveDiagnostics"); + allowedActions.push("healthChecks"); } } else if (jsonObj.state == 'Stopped') { allowedActions.push("start"); diff --git a/ui/scripts/ui/widgets/listView.js b/ui/scripts/ui/widgets/listView.js index 91578148e28..5245c10a3fa 100644 --- a/ui/scripts/ui/widgets/listView.js +++ b/ui/scripts/ui/widgets/listView.js @@ -168,8 +168,11 @@ } else { if (needsRefresh) { var $loading = $('
').addClass('loading-overlay'); - - $listView.prepend($loading); + if ($listView) { + $listView.prepend($loading); + } else { + $instanceRow.closest('.list-view').prepend($loading) + } } var actionArgs = {