diff --git a/agent/bindir/rolling-maintenance.in b/agent/bindir/rolling-maintenance.in new file mode 100644 index 00000000000..572209cde8f --- /dev/null +++ b/agent/bindir/rolling-maintenance.in @@ -0,0 +1,91 @@ +#!/usr/bin/python +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# http://www.apache.org/licenses/LICENSE-2.0 +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +from subprocess import * +import sys +import logging + +LOG_FILE='/var/log/cloudstack/agent/rolling-maintenance.log' +AVOID_MAINTENANCE_EXIT_STATUS=70 + +logging.basicConfig(filename=LOG_FILE, + filemode='a', + format='%(asctime)s,%(msecs)d %(name)s %(levelname)s %(message)s', + datefmt='%H:%M:%S', + level=logging.INFO) +logger = logging.getLogger('rolling-maintenance') + + +def execute_script(stage, script, payload, timeout): + logger.info("Executing script: %s for stage: %s" % (script, stage)) + + try: + command = "timeout %s %s " % (str(timeout), script) + if payload: + logger.info("Adding payload: %s" % payload) + command += " " + payload + pout = Popen(command, shell=True, stdout=PIPE, stderr=PIPE) + exitStatus = pout.wait() + stdout, stderr = pout.communicate() + + success = True if exitStatus == 0 or exitStatus == AVOID_MAINTENANCE_EXIT_STATUS else False + avoid_maintenance = True if exitStatus == AVOID_MAINTENANCE_EXIT_STATUS else False + return {"success": success, "message": stdout.strip(), "avoidmaintenance": avoid_maintenance} + except Exception as e: + logger.error("Error in stage %s: %s" % (script, e)) + sys.exit(1) + + +if __name__ == '__main__': + try: + logger.info(sys.argv) + if len(sys.argv) < 2: + logger.error("Arguments missing") + sys.exit(0) + + args = sys.argv[1] + params = args.split(',') + if len(params) < 5: + logger.error("Wrong number of parameters received, STAGE,SCRIPT,TIMEOUT,RESULTS_FILE,OUTPUT_FILE" + "[,PAYLOAD] expected") + sys.exit(0) + + stage = params[0] + script = params[1] + timeout = params[2] + results_file_path = params[3] + output_file_path = params[4] + payload = params[5] if len(params) > 5 else None + logger.info("Received parameters: stage: %s, script: %s, timeout: %s, results_file: %s, output_file: %s " + "and payload: %s" % (stage, script, timeout, results_file_path, output_file_path, payload)) + + results = execute_script(stage, script, payload, timeout) + + # Persist results and output on a file + output_file = open(output_file_path, "w+") + output_file.write(results['message']) + output_file.close() + + results_file = open(results_file_path, "w+") + results_file.write("%s,%s,%s" % (stage, str(results['success']), str(results['avoidmaintenance']))) + results_file.close() + + msg = "Successful execution of %s" if results['success'] else "Script execution failed: %s" + logger.info(results['message']) + logger.info(msg % script) + except Exception as e: + logger.error("Unexpected error on systemd service: %s" % e) + sys.exit(1) diff --git a/agent/conf/agent.properties b/agent/conf/agent.properties index 24592387b09..bb9bf4071b2 100644 --- a/agent/conf/agent.properties +++ b/agent/conf/agent.properties @@ -118,6 +118,12 @@ hypervisor.type=kvm # This parameter specifies a directory on the host local storage for temporary storing direct download templates #direct.download.temporary.download.location=/var/lib/libvirt/images +# set the rolling maintenance hook scripts directory +#rolling.maintenance.hooks.dir=/etc/cloudstack/agent/hooks.d + +# disable the rolling maintenance service execution +#rolling.maintenance.service.executor.disabled=true + # set the hypervisor URI. Usually there is no need for changing this # For KVM: qemu:///system # For LXC: lxc:/// diff --git a/agent/conf/cloudstack-agent.logrotate.in b/agent/conf/cloudstack-agent.logrotate.in index d9a3dfbc569..2b3dc87f253 100644 --- a/agent/conf/cloudstack-agent.logrotate.in +++ b/agent/conf/cloudstack-agent.logrotate.in @@ -15,7 +15,7 @@ # specific language governing permissions and limitations # under the License. -/var/log/cloudstack/agent/security_group.log /var/log/cloudstack/agent/resizevolume.log { +/var/log/cloudstack/agent/security_group.log /var/log/cloudstack/agent/resizevolume.log /var/log/cloudstack/agent/rolling-maintenance.log { copytruncate daily rotate 5 diff --git a/api/src/main/java/com/cloud/deploy/DataCenterDeployment.java b/api/src/main/java/com/cloud/deploy/DataCenterDeployment.java index 76faf25f726..3ee544cf4ee 100644 --- a/api/src/main/java/com/cloud/deploy/DataCenterDeployment.java +++ b/api/src/main/java/com/cloud/deploy/DataCenterDeployment.java @@ -33,6 +33,7 @@ public class DataCenterDeployment implements DeploymentPlan { boolean _recreateDisks; ReservationContext _context; List preferredHostIds = new ArrayList<>(); + boolean migrationPlan; public DataCenterDeployment(long dataCenterId) { this(dataCenterId, null, null, null, null, null); @@ -107,4 +108,13 @@ public class DataCenterDeployment implements DeploymentPlan { return this.preferredHostIds; } + public void setMigrationPlan(boolean migrationPlan) { + this.migrationPlan = migrationPlan; + } + + @Override + public boolean isMigrationPlan() { + return migrationPlan; + } + } diff --git a/api/src/main/java/com/cloud/deploy/DeploymentPlan.java b/api/src/main/java/com/cloud/deploy/DeploymentPlan.java index b57fec0cf41..c71bf3e9311 100644 --- a/api/src/main/java/com/cloud/deploy/DeploymentPlan.java +++ b/api/src/main/java/com/cloud/deploy/DeploymentPlan.java @@ -71,4 +71,6 @@ public interface DeploymentPlan { void setPreferredHosts(List hostIds); List getPreferredHosts(); + + boolean isMigrationPlan(); } diff --git a/api/src/main/java/com/cloud/event/EventTypes.java b/api/src/main/java/com/cloud/event/EventTypes.java index c74e9b7b353..30b6ac0b0a1 100644 --- a/api/src/main/java/com/cloud/event/EventTypes.java +++ b/api/src/main/java/com/cloud/event/EventTypes.java @@ -76,6 +76,10 @@ import com.cloud.user.User; import com.cloud.vm.Nic; import com.cloud.vm.NicSecondaryIp; import com.cloud.vm.VirtualMachine; +import org.apache.cloudstack.api.response.ClusterResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.PodResponse; +import org.apache.cloudstack.api.response.ZoneResponse; public class EventTypes { @@ -591,6 +595,13 @@ public class EventTypes { // Diagnostics Events public static final String EVENT_SYSTEM_VM_DIAGNOSTICS = "SYSTEM.VM.DIAGNOSTICS"; + // Rolling Maintenance + public static final String EVENT_START_ROLLING_MAINTENANCE = "SYSTEM.ROLLING.MAINTENANCE"; + public static final String EVENT_HOST_ROLLING_MAINTENANCE = "HOST.ROLLING.MAINTENANCE"; + public static final String EVENT_CLUSTER_ROLLING_MAINTENANCE = "CLUSTER.ROLLING.MAINTENANCE"; + public static final String EVENT_POD_ROLLING_MAINTENANCE = "POD.ROLLING.MAINTENANCE"; + public static final String EVENT_ZONE_ROLLING_MAINTENANCE = "ZONE.ROLLING.MAINTENANCE"; + static { // TODO: need a way to force author adding event types to declare the entity details as well, with out braking @@ -990,6 +1001,11 @@ public class EventTypes { entityEventDetails.put(EVENT_TEMPLATE_DIRECT_DOWNLOAD_FAILURE, VirtualMachineTemplate.class); entityEventDetails.put(EVENT_ISO_DIRECT_DOWNLOAD_FAILURE, "Iso"); entityEventDetails.put(EVENT_SYSTEM_VM_DIAGNOSTICS, VirtualMachine.class); + + entityEventDetails.put(EVENT_ZONE_ROLLING_MAINTENANCE, ZoneResponse.class); + entityEventDetails.put(EVENT_POD_ROLLING_MAINTENANCE, PodResponse.class); + entityEventDetails.put(EVENT_CLUSTER_ROLLING_MAINTENANCE, ClusterResponse.class); + entityEventDetails.put(EVENT_HOST_ROLLING_MAINTENANCE, HostResponse.class); } public static String getEntityForEvent(String eventName) { diff --git a/api/src/main/java/com/cloud/resource/RollingMaintenanceManager.java b/api/src/main/java/com/cloud/resource/RollingMaintenanceManager.java new file mode 100644 index 00000000000..23999804c2f --- /dev/null +++ b/api/src/main/java/com/cloud/resource/RollingMaintenanceManager.java @@ -0,0 +1,146 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.resource; + +import com.cloud.host.Host; +import com.cloud.utils.Pair; +import com.cloud.utils.Ternary; +import com.cloud.utils.exception.CloudRuntimeException; +import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.cloudstack.framework.config.Configurable; + +import java.util.Date; +import java.util.List; + +public interface RollingMaintenanceManager extends Configurable { + + ConfigKey KvmRollingMaintenanceStageTimeout = new ConfigKey<>("Advanced", Integer.class, + "kvm.rolling.maintenance.stage.timeout", "1800", + "Wait timeout (in seconds) for a rolling maintenance stage update from hosts", + true, ConfigKey.Scope.Global); + ConfigKey KvmRollingMaintenancePingInterval = new ConfigKey<>("Advanced", Integer.class, + "kvm.rolling.maintenance.ping.interval", "10", + "Ping interval in seconds between management server and hosts performing stages during rolling maintenance", + true, ConfigKey.Scope.Global); + ConfigKey KvmRollingMaintenanceWaitForMaintenanceTimeout = new ConfigKey<>("Advanced", Integer.class, + "kvm.rolling.maintenance.wait.maintenance.timeout", "1800", + "Timeout (in seconds) to wait for a host preparing to enter maintenance mode", + true, ConfigKey.Scope.Global); + + class HostSkipped { + private Host host; + private String reason; + + public HostSkipped(Host host, String reason) { + this.host = host; + this.reason = reason; + } + + public Host getHost() { + return host; + } + + public void setHost(Host host) { + this.host = host; + } + + public String getReason() { + return reason; + } + + public void setReason(String reason) { + this.reason = reason; + } + } + + class HostUpdated { + private Host host; + private Date start; + private Date end; + private String outputMsg; + + public HostUpdated(Host host, Date start, Date end, String outputMsg) { + this.host = host; + this.start = start; + this.end = end; + this.outputMsg = outputMsg; + } + + public Host getHost() { + return host; + } + + public void setHost(Host host) { + this.host = host; + } + + public Date getStart() { + return start; + } + + public void setStart(Date start) { + this.start = start; + } + + public Date getEnd() { + return end; + } + + public void setEnd(Date end) { + this.end = end; + } + + public String getOutputMsg() { + return outputMsg; + } + + public void setOutputMsg(String outputMsg) { + this.outputMsg = outputMsg; + } + } + + enum Stage { + PreFlight, PreMaintenance, Maintenance, PostMaintenance; + + public Stage next() { + switch (this) { + case PreFlight: + return PreMaintenance; + case PreMaintenance: + return Maintenance; + case Maintenance: + return PostMaintenance; + case PostMaintenance: + return null; + } + throw new CloudRuntimeException("Unexpected stage: " + this); + } + } + + enum ResourceType { + Pod, Cluster, Zone, Host + } + + /** + * Starts rolling maintenance as specified in cmd + * @param cmd command + * @return tuple: (SUCCESS, DETAILS, (HOSTS_UPDATED, HOSTS_SKIPPED)) + */ + Ternary, List>> startRollingMaintenance(StartRollingMaintenanceCmd cmd); + Pair> getResourceTypeIdPair(StartRollingMaintenanceCmd cmd); +} \ No newline at end of file diff --git a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java index ed7e39e3f92..04823644240 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java +++ b/api/src/main/java/org/apache/cloudstack/api/ApiConstants.java @@ -74,6 +74,7 @@ public class ApiConstants { public static final String CLEANUP = "cleanup"; public static final String MAKEREDUNDANT = "makeredundant"; public static final String CLUSTER_ID = "clusterid"; + public static final String CLUSTER_IDS = "clusterids"; public static final String CLUSTER_NAME = "clustername"; public static final String CLUSTER_TYPE = "clustertype"; public static final String CN = "cn"; @@ -173,6 +174,7 @@ public class ApiConstants { public static final String HEALTH = "health"; public static final String HIDE_IP_ADDRESS_USAGE = "hideipaddressusage"; public static final String HOST_ID = "hostid"; + public static final String HOST_IDS = "hostids"; public static final String HOST_NAME = "hostname"; public static final String HYPERVISOR = "hypervisor"; public static final String INLINE = "inline"; @@ -256,6 +258,7 @@ public class ApiConstants { public static final String OS_NAME_FOR_HYPERVISOR = "osnameforhypervisor"; public static final String OUTOFBANDMANAGEMENT_POWERSTATE = "outofbandmanagementpowerstate"; public static final String OUTOFBANDMANAGEMENT_ENABLED = "outofbandmanagementenabled"; + public static final String OUTPUT = "output"; public static final String OVF_PROPERTIES = "ovfproperties"; public static final String PARAMS = "params"; public static final String PARENT_ID = "parentid"; @@ -267,6 +270,7 @@ public class ApiConstants { public static final String PASSWORD_ENABLED = "passwordenabled"; public static final String SSHKEY_ENABLED = "sshkeyenabled"; public static final String PATH = "path"; + public static final String PAYLOAD = "payload"; public static final String POD_ID = "podid"; public static final String POD_NAME = "podname"; public static final String POD_IDS = "podids"; diff --git a/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java b/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java index 57e03b35276..3f0d978b161 100644 --- a/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java +++ b/api/src/main/java/org/apache/cloudstack/api/ResponseGenerator.java @@ -22,6 +22,10 @@ import java.util.List; import java.util.Map; import java.util.Set; +import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; +import com.cloud.resource.RollingMaintenanceManager; +import org.apache.cloudstack.api.response.RollingMaintenanceResponse; +import org.apache.cloudstack.management.ManagementServerHost; import org.apache.cloudstack.affinity.AffinityGroup; import org.apache.cloudstack.affinity.AffinityGroupResponse; import org.apache.cloudstack.api.ApiConstants.HostDetails; @@ -88,7 +92,6 @@ import org.apache.cloudstack.api.response.RemoteAccessVpnResponse; import org.apache.cloudstack.api.response.ResourceCountResponse; import org.apache.cloudstack.api.response.ResourceLimitResponse; import org.apache.cloudstack.api.response.ResourceTagResponse; -import org.apache.cloudstack.api.response.RouterHealthCheckResultResponse; import org.apache.cloudstack.api.response.SSHKeyPairResponse; import org.apache.cloudstack.api.response.SecurityGroupResponse; import org.apache.cloudstack.api.response.ServiceOfferingResponse; @@ -125,7 +128,6 @@ import org.apache.cloudstack.backup.BackupOffering; import org.apache.cloudstack.backup.Backup; import org.apache.cloudstack.backup.BackupSchedule; import org.apache.cloudstack.config.Configuration; -import org.apache.cloudstack.management.ManagementServerHost; import org.apache.cloudstack.network.lb.ApplicationLoadBalancerRule; import org.apache.cloudstack.region.PortableIp; import org.apache.cloudstack.region.PortableIpRange; @@ -482,4 +484,7 @@ public interface ResponseGenerator { ManagementServerResponse createManagementResponse(ManagementServerHost mgmt); List createHealthCheckResponse(VirtualMachine router, List healthCheckResults); + + RollingMaintenanceResponse createRollingMaintenanceResponse(Boolean success, String details, List hostsUpdated, List hostsSkipped); + } diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java index f60812821d6..7083f0db133 100644 --- a/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/host/PrepareForMaintenanceCmd.java @@ -97,6 +97,10 @@ public class PrepareForMaintenanceCmd extends BaseAsyncCmd { return getId(); } + public void setId(Long id) { + this.id = id; + } + @Override public void execute() { try { diff --git a/api/src/main/java/org/apache/cloudstack/api/command/admin/resource/StartRollingMaintenanceCmd.java b/api/src/main/java/org/apache/cloudstack/api/command/admin/resource/StartRollingMaintenanceCmd.java new file mode 100644 index 00000000000..b5a91281047 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/command/admin/resource/StartRollingMaintenanceCmd.java @@ -0,0 +1,178 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.cloudstack.api.command.admin.resource; + +import com.cloud.event.EventTypes; +import com.cloud.exception.ConcurrentOperationException; +import com.cloud.exception.InsufficientCapacityException; +import com.cloud.exception.NetworkRuleConflictException; +import com.cloud.exception.ResourceAllocationException; +import com.cloud.exception.ResourceUnavailableException; +import com.cloud.resource.RollingMaintenanceManager; +import com.cloud.utils.Pair; +import com.cloud.utils.Ternary; +import org.apache.cloudstack.acl.RoleType; +import org.apache.cloudstack.api.APICommand; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseAsyncCmd; +import org.apache.cloudstack.api.BaseCmd; +import org.apache.cloudstack.api.Parameter; +import org.apache.cloudstack.api.ServerApiException; +import org.apache.cloudstack.api.response.ClusterResponse; +import org.apache.cloudstack.api.response.HostResponse; +import org.apache.cloudstack.api.response.PodResponse; +import org.apache.cloudstack.api.response.RollingMaintenanceResponse; +import org.apache.cloudstack.api.response.ZoneResponse; +import org.apache.cloudstack.context.CallContext; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import java.util.List; + +@APICommand(name = StartRollingMaintenanceCmd.APINAME, description = "Start rolling maintenance", + responseObject = RollingMaintenanceResponse.class, + requestHasSensitiveInfo = false, responseHasSensitiveInfo = false, + authorized = {RoleType.Admin}) +public class StartRollingMaintenanceCmd extends BaseAsyncCmd { + + @Inject + RollingMaintenanceManager manager; + + public static final Logger s_logger = Logger.getLogger(StartRollingMaintenanceCmd.class.getName()); + + public static final String APINAME = "startRollingMaintenance"; + + ///////////////////////////////////////////////////// + //////////////// API parameters ///////////////////// + ///////////////////////////////////////////////////// + @Parameter(name = ApiConstants.POD_IDS, type = CommandType.LIST, collectionType = CommandType.UUID, + entityType = PodResponse.class, description = "the IDs of the pods to start maintenance on") + private List podIds; + + @Parameter(name = ApiConstants.CLUSTER_IDS, type = CommandType.LIST, collectionType = CommandType.UUID, + entityType = ClusterResponse.class, description = "the IDs of the clusters to start maintenance on") + private List clusterIds; + + @Parameter(name = ApiConstants.ZONE_ID_LIST, type = CommandType.LIST, collectionType = CommandType.UUID, + entityType = ZoneResponse.class, description = "the IDs of the zones to start maintenance on") + private List zoneIds; + + @Parameter(name = ApiConstants.HOST_IDS, type = CommandType.LIST, collectionType = CommandType.UUID, + entityType = HostResponse.class, description = "the IDs of the hosts to start maintenance on") + private List hostIds; + + @Parameter(name = ApiConstants.FORCED, type = CommandType.BOOLEAN, + description = "if rolling mechanism should continue in case of an error") + private Boolean forced; + + @Parameter(name = ApiConstants.PAYLOAD, type = CommandType.STRING, + description = "the command to execute while hosts are on maintenance") + private String payload; + + @Parameter(name = ApiConstants.TIMEOUT, type = CommandType.INTEGER, + description = "optional operation timeout (in seconds) that overrides the global timeout setting") + private Integer timeout; + + ///////////////////////////////////////////////////// + /////////////////// Accessors /////////////////////// + ///////////////////////////////////////////////////// + + public List getPodIds() { + return podIds; + } + + public List getClusterIds() { + return clusterIds; + } + + public List getZoneIds() { + return zoneIds; + } + + public List getHostIds() { + return hostIds; + } + + public Boolean getForced() { + return forced != null && forced; + } + + public String getPayload() { + return payload; + } + + public Integer getTimeout() { + return timeout; + } + + ///////////////////////////////////////////////////// + /////////////// API Implementation/////////////////// + ///////////////////////////////////////////////////// + + @Override + public void execute() throws ResourceUnavailableException, InsufficientCapacityException, ServerApiException, ConcurrentOperationException, ResourceAllocationException, NetworkRuleConflictException { + Ternary, List>> + result = manager.startRollingMaintenance(this); + Boolean success = result.first(); + String details = result.second(); + Pair, List> pair = result.third(); + List hostsUpdated = pair.first(); + List hostsSkipped = pair.second(); + + RollingMaintenanceResponse response = _responseGenerator.createRollingMaintenanceResponse(success, details, hostsUpdated, hostsSkipped); + response.setResponseName(getCommandName()); + this.setResponseObject(response); + } + + @Override + public String getCommandName() { + return APINAME.toLowerCase() + BaseCmd.RESPONSE_SUFFIX; + } + + @Override + public long getEntityOwnerId() { + return CallContext.current().getCallingAccountId(); + } + + @Override + public String getEventType() { + Pair> pair = manager.getResourceTypeIdPair(this); + RollingMaintenanceManager.ResourceType type = pair.first(); + String eventType = ""; + switch (type) { + case Zone: + eventType = EventTypes.EVENT_ZONE_ROLLING_MAINTENANCE; + break; + case Pod: + eventType = EventTypes.EVENT_POD_ROLLING_MAINTENANCE; + break; + case Cluster: + eventType = EventTypes.EVENT_CLUSTER_ROLLING_MAINTENANCE; + break; + case Host: + eventType = EventTypes.EVENT_HOST_ROLLING_MAINTENANCE; + } + return eventType; + } + + @Override + public String getEventDescription() { + Pair> pair = manager.getResourceTypeIdPair(this); + return "Starting rolling maintenance on entity: " + pair.first() + " with IDs: " + pair.second(); + } +} \ No newline at end of file diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostSkippedResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostSkippedResponse.java new file mode 100644 index 00000000000..8d304543fb9 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostSkippedResponse.java @@ -0,0 +1,61 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; + +public class RollingMaintenanceHostSkippedResponse extends BaseResponse { + + @SerializedName(ApiConstants.HOST_ID) + @Param(description = "the ID of the skipped host") + private String hostId; + + @SerializedName(ApiConstants.HOST_NAME) + @Param(description = "the name of the skipped host") + private String hostName; + + @SerializedName(ApiConstants.ACL_REASON) + @Param(description = "the reason to skip the host") + private String reason; + + public String getHostId() { + return hostId; + } + + public void setHostId(String hostId) { + this.hostId = hostId; + } + + public String getHostName() { + return hostName; + } + + public void setHostName(String hostName) { + this.hostName = hostName; + } + + public String getReason() { + return reason; + } + + public void setReason(String reason) { + this.reason = reason; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostUpdatedResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostUpdatedResponse.java new file mode 100644 index 00000000000..821257d4e07 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceHostUpdatedResponse.java @@ -0,0 +1,85 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.ApiConstants; +import org.apache.cloudstack.api.BaseResponse; + +public class RollingMaintenanceHostUpdatedResponse extends BaseResponse { + + @SerializedName(ApiConstants.HOST_ID) + @Param(description = "the ID of the updated host") + private String hostId; + + @SerializedName(ApiConstants.HOST_NAME) + @Param(description = "the name of the updated host") + private String hostName; + + @SerializedName(ApiConstants.START_DATE) + @Param(description = "start date of the update on the host") + private String startDate; + + @SerializedName(ApiConstants.END_DATE) + @Param(description = "end date of the update on the host") + private String endDate; + + @SerializedName(ApiConstants.OUTPUT) + @Param(description = "output of the maintenance script on the host") + private String output; + + public String getHostId() { + return hostId; + } + + public void setHostId(String hostId) { + this.hostId = hostId; + } + + public String getHostName() { + return hostName; + } + + public void setHostName(String hostName) { + this.hostName = hostName; + } + + public String getStartDate() { + return startDate; + } + + public void setStartDate(String startDate) { + this.startDate = startDate; + } + + public String getEndDate() { + return endDate; + } + + public void setEndDate(String endDate) { + this.endDate = endDate; + } + + public String getOutput() { + return output; + } + + public void setOutput(String output) { + this.output = output; + } +} diff --git a/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceResponse.java b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceResponse.java new file mode 100644 index 00000000000..bfd4d9fa741 --- /dev/null +++ b/api/src/main/java/org/apache/cloudstack/api/response/RollingMaintenanceResponse.java @@ -0,0 +1,79 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package org.apache.cloudstack.api.response; + +import com.cloud.serializer.Param; +import com.google.gson.annotations.SerializedName; +import org.apache.cloudstack.api.BaseResponse; + +import java.util.List; + +public class RollingMaintenanceResponse extends BaseResponse { + + @SerializedName("success") + @Param(description = "indicates if the rolling maintenance operation was successful") + private Boolean success; + + @SerializedName("details") + @Param(description = "in case of failure, details are displayed") + private String details; + + @SerializedName("hostsupdated") + @Param(description = "the hosts updated", responseObject = RollingMaintenanceHostUpdatedResponse.class) + private List updatedHosts; + + @SerializedName("hostsskipped") + @Param(description = "the hosts skipped", responseObject = RollingMaintenanceHostSkippedResponse.class) + private List skippedHosts; + + public RollingMaintenanceResponse(Boolean success, String details) { + this.success = success; + this.details = details; + } + + public Boolean getSuccess() { + return success; + } + + public void setSuccess(Boolean success) { + this.success = success; + } + + public String getDetails() { + return details; + } + + public void setDetails(String details) { + this.details = details; + } + + public List getUpdatedHosts() { + return updatedHosts; + } + + public void setUpdatedHosts(List updatedHosts) { + this.updatedHosts = updatedHosts; + } + + public List getSkippedHosts() { + return skippedHosts; + } + + public void setSkippedHosts(List skippedHosts) { + this.skippedHosts = skippedHosts; + } +} \ No newline at end of file diff --git a/core/src/main/java/com/cloud/agent/api/RollingMaintenanceAnswer.java b/core/src/main/java/com/cloud/agent/api/RollingMaintenanceAnswer.java new file mode 100644 index 00000000000..de7b1ba223e --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/RollingMaintenanceAnswer.java @@ -0,0 +1,56 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// +package com.cloud.agent.api; + +public class RollingMaintenanceAnswer extends Answer { + + private boolean finished; + private boolean avoidMaintenance; + private boolean maintenaceScriptDefined; + + public RollingMaintenanceAnswer(Command command, boolean success, String details, boolean finished) { + super(command, success, details); + this.finished = finished; + } + + public RollingMaintenanceAnswer(Command command, boolean isMaintenanceScript) { + super(command, true, ""); + this.maintenaceScriptDefined = isMaintenanceScript; + } + + public boolean isFinished() { + return finished; + } + + public boolean isAvoidMaintenance() { + return avoidMaintenance; + } + + public void setAvoidMaintenance(boolean avoidMaintenance) { + this.avoidMaintenance = avoidMaintenance; + } + + public boolean isMaintenaceScriptDefined() { + return maintenaceScriptDefined; + } + + public void setMaintenaceScriptDefined(boolean maintenaceScriptDefined) { + this.maintenaceScriptDefined = maintenaceScriptDefined; + } +} diff --git a/core/src/main/java/com/cloud/agent/api/RollingMaintenanceCommand.java b/core/src/main/java/com/cloud/agent/api/RollingMaintenanceCommand.java new file mode 100644 index 00000000000..ae1f4935f7e --- /dev/null +++ b/core/src/main/java/com/cloud/agent/api/RollingMaintenanceCommand.java @@ -0,0 +1,70 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.agent.api; + +public class RollingMaintenanceCommand extends Command { + + private String stage; + private String payload; + private boolean started; + private boolean checkMaintenanceScript; + + public RollingMaintenanceCommand(boolean checkMaintenanceScript) { + this.checkMaintenanceScript = checkMaintenanceScript; + } + + public RollingMaintenanceCommand(String stage) { + this.stage = stage; + } + + public void setStage(String stage) { + this.stage = stage; + } + + public String getStage() { + return this.stage; + } + + public String getPayload() { + return payload; + } + + public void setPayload(String payload) { + this.payload = payload; + } + + public boolean isStarted() { + return started; + } + + public void setStarted(boolean started) { + this.started = started; + } + + public boolean isCheckMaintenanceScript() { + return checkMaintenanceScript; + } + + @Override + public boolean executeInSequence() { + return false; + } + +} diff --git a/debian/rules b/debian/rules index 4220a839eb5..9055ee14f54 100755 --- a/debian/rules +++ b/debian/rules @@ -45,6 +45,7 @@ override_dh_auto_install: install -d -m0755 debian/$(PACKAGE)-agent/lib/systemd/system install -m0644 packaging/systemd/$(PACKAGE)-agent.service debian/$(PACKAGE)-agent/lib/systemd/system/$(PACKAGE)-agent.service install -m0644 packaging/systemd/$(PACKAGE)-agent.default $(DESTDIR)/$(SYSCONFDIR)/default/$(PACKAGE)-agent + install -m0644 packaging/systemd/$(PACKAGE)-rolling-maintenance@.service debian/$(PACKAGE)-agent/lib/systemd/system/$(PACKAGE)-rolling-maintenance@.service install -D -m0644 agent/target/transformed/cloudstack-agent.logrotate $(DESTDIR)/$(SYSCONFDIR)/logrotate.d/cloudstack-agent @@ -54,6 +55,7 @@ override_dh_auto_install: install -D agent/target/transformed/cloudstack-agent-upgrade $(DESTDIR)/usr/bin/cloudstack-agent-upgrade install -D agent/target/transformed/cloud-guest-tool $(DESTDIR)/usr/bin/cloudstack-guest-tool install -D agent/target/transformed/libvirtqemuhook $(DESTDIR)/usr/share/$(PACKAGE)-agent/lib/ + install -D agent/target/transformed/rolling-maintenance $(DESTDIR)/usr/share/$(PACKAGE)-agent/lib/ install -D agent/target/transformed/* $(DESTDIR)/$(SYSCONFDIR)/$(PACKAGE)/agent # cloudstack-management @@ -139,7 +141,7 @@ override_dh_auto_install: cp -r test/integration/* $(DESTDIR)/usr/share/$(PACKAGE)-integration-tests/ override_dh_systemd_enable: - dh_systemd_enable -pcloudstack-management -pcloudstack-agent -pcloudstack-usage + dh_systemd_enable -pcloudstack-management -pcloudstack-agent -pcloudstack-usage -pcloudstack-rolling-maintenance@ override_dh_strip_nondeterminism: # Disable dh_strip_nondeterminism to speed up the build diff --git a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java index 387fa7f6415..db7a27ff41c 100755 --- a/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java +++ b/engine/components-api/src/main/java/com/cloud/resource/ResourceManager.java @@ -205,4 +205,6 @@ public interface ResourceManager extends ResourceService, Configurable { HashMap> getGPUStatistics(HostVO host); HostVO findOneRandomRunningHostByHypervisor(HypervisorType type); + + boolean cancelMaintenance(final long hostId); } diff --git a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java index e96181b96d7..45df2311f3a 100644 --- a/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java +++ b/engine/orchestration/src/main/java/com/cloud/agent/manager/AgentAttache.java @@ -34,6 +34,7 @@ import java.util.concurrent.TimeUnit; import com.cloud.agent.api.ModifySshKeysCommand; import com.cloud.agent.api.ModifyStoragePoolCommand; import org.apache.cloudstack.agent.lb.SetupMSListCommand; +import com.cloud.agent.api.RollingMaintenanceCommand; import org.apache.cloudstack.managed.context.ManagedContextRunnable; import org.apache.log4j.Logger; @@ -117,7 +118,7 @@ public abstract class AgentAttache { StopCommand.class.toString(), CheckVirtualMachineCommand.class.toString(), PingTestCommand.class.toString(), CheckHealthCommand.class.toString(), ReadyCommand.class.toString(), ShutdownCommand.class.toString(), SetupCommand.class.toString(), CleanupNetworkRulesCmd.class.toString(), CheckNetworkCommand.class.toString(), PvlanSetupCommand.class.toString(), CheckOnHostCommand.class.toString(), - ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString(), ModifyStoragePoolCommand.class.toString(), SetupMSListCommand.class.toString()}; + ModifyTargetsCommand.class.toString(), ModifySshKeysCommand.class.toString(), ModifyStoragePoolCommand.class.toString(), SetupMSListCommand.class.toString(), RollingMaintenanceCommand.class.toString()}; protected final static String[] s_commandsNotAllowedInConnectingMode = new String[] { StartCommand.class.toString(), CreateCommand.class.toString() }; static { Arrays.sort(s_commandsAllowedInMaintenanceMode); diff --git a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java index 8e52c38902c..7765611744f 100755 --- a/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java +++ b/engine/orchestration/src/main/java/com/cloud/vm/VirtualMachineManagerImpl.java @@ -2997,6 +2997,7 @@ public class VirtualMachineManagerImpl extends ManagerBase implements VirtualMac while (true) { try { + plan.setMigrationPlan(true); dest = _dpMgr.planDeployment(profile, plan, excludes, planner); } catch (final AffinityConflictException e2) { s_logger.warn("Unable to create deployment, affinity rules associted to the VM conflict", e2); diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java index 781f82fc3be..34b89634edc 100644 --- a/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java +++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDao.java @@ -111,4 +111,6 @@ public interface HostDao extends GenericDao, StateDao listAllHostsUpByZoneAndHypervisor(long zoneId, HypervisorType hypervisorType); + + List listByClusterAndHypervisorType(long clusterId, HypervisorType hypervisorType); } diff --git a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java index 2b2a80bb69a..20d817cbf45 100644 --- a/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java +++ b/engine/schema/src/main/java/com/cloud/host/dao/HostDaoImpl.java @@ -109,6 +109,7 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao protected SearchBuilder ClusterStatusSearch; protected SearchBuilder TypeNameZoneSearch; protected SearchBuilder AvailHypevisorInZone; + protected SearchBuilder ClusterHypervisorSearch; protected SearchBuilder DirectConnectSearch; protected SearchBuilder ManagedDirectConnectSearch; @@ -293,6 +294,13 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao DirectlyConnectedSearch.and("resourceState", DirectlyConnectedSearch.entity().getResourceState(), SearchCriteria.Op.NOTIN); DirectlyConnectedSearch.done(); + ClusterHypervisorSearch = createSearchBuilder(); + ClusterHypervisorSearch.and("clusterId", ClusterHypervisorSearch.entity().getClusterId(), SearchCriteria.Op.EQ); + ClusterHypervisorSearch.and("hypervisor", ClusterHypervisorSearch.entity().getHypervisorType(), SearchCriteria.Op.EQ); + ClusterHypervisorSearch.and("type", ClusterHypervisorSearch.entity().getType(), SearchCriteria.Op.EQ); + ClusterHypervisorSearch.and("status", ClusterHypervisorSearch.entity().getStatus(), SearchCriteria.Op.EQ); + ClusterHypervisorSearch.done(); + UnmanagedDirectConnectSearch = createSearchBuilder(); UnmanagedDirectConnectSearch.and("resource", UnmanagedDirectConnectSearch.entity().getResource(), SearchCriteria.Op.NNULL); UnmanagedDirectConnectSearch.and("server", UnmanagedDirectConnectSearch.entity().getManagementServerId(), SearchCriteria.Op.NULL); @@ -1213,6 +1221,16 @@ public class HostDaoImpl extends GenericDaoBase implements HostDao .collect(Collectors.toList()); } + @Override + public List listByClusterAndHypervisorType(long clusterId, HypervisorType hypervisorType) { + SearchCriteria sc = ClusterHypervisorSearch.create(); + sc.setParameters("clusterId", clusterId); + sc.setParameters("hypervisor", hypervisorType); + sc.setParameters("type", Type.Routing); + sc.setParameters("status", Status.Up); + return listBy(sc); + } + private ResultSet executeSqlGetResultsetForMethodFindHostInZoneToExecuteCommand(HypervisorType hypervisorType, long zoneId, TransactionLegacy tx, String sql) throws SQLException { PreparedStatement pstmt = tx.prepareAutoCloseStatement(sql); pstmt.setString(1, Objects.toString(hypervisorType)); diff --git a/packaging/centos7/cloud.spec b/packaging/centos7/cloud.spec index 3a0828922fb..6a8a160902c 100644 --- a/packaging/centos7/cloud.spec +++ b/packaging/centos7/cloud.spec @@ -292,6 +292,7 @@ mkdir -p ${RPM_BUILD_ROOT}%{_localstatedir}/log/%{name}/agent mkdir -p ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/lib mkdir -p ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/plugins install -D packaging/systemd/cloudstack-agent.service ${RPM_BUILD_ROOT}%{_unitdir}/%{name}-agent.service +install -D packaging/systemd/cloudstack-rolling-maintenance@.service ${RPM_BUILD_ROOT}%{_unitdir}/%{name}-rolling-maintenance@.service install -D packaging/systemd/cloudstack-agent.default ${RPM_BUILD_ROOT}%{_sysconfdir}/default/%{name}-agent install -D agent/target/transformed/agent.properties ${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/agent/agent.properties install -D agent/target/transformed/environment.properties ${RPM_BUILD_ROOT}%{_sysconfdir}/%{name}/agent/environment.properties @@ -300,6 +301,7 @@ install -D agent/target/transformed/cloud-setup-agent ${RPM_BUILD_ROOT}%{_bindir install -D agent/target/transformed/cloudstack-agent-upgrade ${RPM_BUILD_ROOT}%{_bindir}/%{name}-agent-upgrade install -D agent/target/transformed/cloud-guest-tool ${RPM_BUILD_ROOT}%{_bindir}/%{name}-guest-tool install -D agent/target/transformed/libvirtqemuhook ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/lib/libvirtqemuhook +install -D agent/target/transformed/rolling-maintenance ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/lib/rolling-maintenance install -D agent/target/transformed/cloud-ssh ${RPM_BUILD_ROOT}%{_bindir}/%{name}-ssh install -D agent/target/transformed/cloudstack-agent-profile.sh ${RPM_BUILD_ROOT}%{_sysconfdir}/profile.d/%{name}-agent-profile.sh install -D agent/target/transformed/cloudstack-agent.logrotate ${RPM_BUILD_ROOT}%{_sysconfdir}/logrotate.d/%{name}-agent @@ -428,6 +430,7 @@ cp -a ${RPM_BUILD_ROOT}%{_datadir}/%{name}-agent/lib/libvirtqemuhook %{_sysconfd mkdir -m 0755 -p /usr/share/cloudstack-agent/tmp /sbin/service libvirtd restart /sbin/systemctl enable cloudstack-agent > /dev/null 2>&1 || true +/sbin/systemctl enable cloudstack-rolling-maintenance@p > /dev/null 2>&1 || true # if saved configs from upgrade exist, copy them over if [ -f "%{_sysconfdir}/cloud.rpmsave/agent/agent.properties" ]; then @@ -519,6 +522,7 @@ pip install --upgrade /usr/share/cloudstack-marvin/Marvin-*.tar.gz %attr(0755,root,root) %{_bindir}/%{name}-guest-tool %attr(0755,root,root) %{_bindir}/%{name}-ssh %attr(0644,root,root) %{_unitdir}/%{name}-agent.service +%attr(0644,root,root) %{_unitdir}/%{name}-rolling-maintenance@.service %config(noreplace) %{_sysconfdir}/default/%{name}-agent %attr(0644,root,root) %{_sysconfdir}/profile.d/%{name}-agent-profile.sh %config(noreplace) %attr(0644,root,root) %{_sysconfdir}/logrotate.d/%{name}-agent @@ -527,6 +531,7 @@ pip install --upgrade /usr/share/cloudstack-marvin/Marvin-*.tar.gz %dir %{_localstatedir}/log/%{name}/agent %attr(0644,root,root) %{_datadir}/%{name}-agent/lib/*.jar %attr(0755,root,root) %{_datadir}/%{name}-agent/lib/libvirtqemuhook +%attr(0755,root,root) %{_datadir}/%{name}-agent/lib/rolling-maintenance %dir %{_datadir}/%{name}-agent/plugins %{_defaultdocdir}/%{name}-agent-%{version}/LICENSE %{_defaultdocdir}/%{name}-agent-%{version}/NOTICE diff --git a/packaging/systemd/cloudstack-rolling-maintenance@.service b/packaging/systemd/cloudstack-rolling-maintenance@.service new file mode 100644 index 00000000000..8c793a73855 --- /dev/null +++ b/packaging/systemd/cloudstack-rolling-maintenance@.service @@ -0,0 +1,31 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Systemd unit file for CloudStack Rolling Maintenance + +[Unit] +Description=Rolling maintenance executor %I +After=network.target local-fs.target + +[Install] +WantedBy=multi-user.target + +[Service] +Type=simple +WorkingDirectory=/usr/share/cloudstack-agent/lib/ +ExecStart=/usr/share/cloudstack-agent/lib/rolling-maintenance %I +Restart=no diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java index 9404be2bd71..1be67858fdb 100644 --- a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/LibvirtComputingResource.java @@ -46,6 +46,9 @@ import javax.xml.parsers.DocumentBuilder; import javax.xml.parsers.DocumentBuilderFactory; import javax.xml.parsers.ParserConfigurationException; +import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceAgentExecutor; +import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceExecutor; +import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceServiceExecutor; import org.apache.cloudstack.storage.to.PrimaryDataStoreTO; import org.apache.cloudstack.storage.to.TemplateObjectTO; import org.apache.cloudstack.storage.to.VolumeObjectTO; @@ -276,6 +279,7 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv protected int _migrateDowntime; protected int _migratePauseAfter; protected boolean _diskActivityCheckEnabled; + protected RollingMaintenanceExecutor rollingMaintenanceExecutor; protected long _diskActivityCheckFileSizeMin = 10485760; // 10MB protected int _diskActivityCheckTimeoutSeconds = 120; // 120s protected long _diskActivityInactiveThresholdMilliseconds = 30000; // 30s @@ -426,6 +430,10 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv return _migrateSpeed; } + public RollingMaintenanceExecutor getRollingMaintenanceExecutor() { + return rollingMaintenanceExecutor; + } + public String getPingTestPath() { return _pingTestPath; } @@ -790,6 +798,11 @@ public class LibvirtComputingResource extends ServerResourceBase implements Serv _hypervisorType = HypervisorType.KVM; } + String hooksDir = (String)params.get("rolling.maintenance.hooks.dir"); + value = (String) params.get("rolling.maintenance.service.executor.disabled"); + rollingMaintenanceExecutor = Boolean.parseBoolean(value) ? new RollingMaintenanceAgentExecutor(hooksDir) : + new RollingMaintenanceServiceExecutor(hooksDir); + _hypervisorURI = (String)params.get("hypervisor.uri"); if (_hypervisorURI == null) { _hypervisorURI = LibvirtConnection.getHypervisorURI(_hypervisorType.toString()); diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceAgentExecutor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceAgentExecutor.java new file mode 100644 index 00000000000..110c4a8acd9 --- /dev/null +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceAgentExecutor.java @@ -0,0 +1,88 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.hypervisor.kvm.resource.rolling.maintenance; + +import com.cloud.utils.Pair; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.script.OutputInterpreter; +import com.cloud.utils.script.Script; +import com.google.common.base.Strings; +import org.apache.log4j.Logger; +import org.joda.time.Duration; + +import java.io.File; + +public class RollingMaintenanceAgentExecutor extends RollingMaintenanceExecutorBase implements RollingMaintenanceExecutor { + + private static final Logger s_logger = Logger.getLogger(RollingMaintenanceAgentExecutor.class); + + private String output; + private boolean success; + + public RollingMaintenanceAgentExecutor(String hooksDir) { + super(hooksDir); + } + + @Override + public Pair startStageExecution(String stage, File scriptFile, int timeout, String payload) { + checkHooksDirectory(); + Duration duration = Duration.standardSeconds(timeout); + final Script script = new Script(scriptFile.getAbsolutePath(), duration, s_logger); + final OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser(); + if (!Strings.isNullOrEmpty(payload)) { + script.add(payload); + } + s_logger.info("Executing stage: " + stage + " script: " + script); + output = script.execute(parser) + " " + parser.getLines(); + + if (script.isTimeout()) { + String msg = "Script " + scriptFile + " timed out"; + s_logger.error(msg); + success = false; + return new Pair<>(false, msg); + } + + int exitValue = script.getExitValue(); + if (exitValue == exitValueTerminatedSignal) { + throw new CloudRuntimeException("Script " + scriptFile + " terminated"); + } + success = exitValue == 0 || exitValue == exitValueAvoidMaintenance; + setAvoidMaintenance(exitValue == exitValueAvoidMaintenance); + s_logger.info("Execution finished for stage: " + stage + " script: " + script + ": " + exitValue); + if (s_logger.isDebugEnabled()) { + s_logger.debug(output); + s_logger.debug("Stage " + stage + " execution finished: " + exitValue); + } + return new Pair<>(true, "Stage " + stage + " finished"); + } + + @Override + public String getStageExecutionOutput(String stage, File scriptFile) { + return output; + } + + @Override + public boolean isStageRunning(String stage, File scriptFile, String payload) { + // In case of reconnection, it is assumed that the stage is finished + return false; + } + + @Override + public boolean getStageExecutionSuccess(String stage, File scriptFile) { + return success; + } +} diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutor.java new file mode 100644 index 00000000000..fe72765781d --- /dev/null +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutor.java @@ -0,0 +1,31 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.hypervisor.kvm.resource.rolling.maintenance; + +import com.cloud.utils.Pair; + +import java.io.File; + +public interface RollingMaintenanceExecutor { + + File getStageScriptFile(String stage); + Pair startStageExecution(String stage, File scriptFile, int timeout, String payload); + String getStageExecutionOutput(String stage, File scriptFile); + boolean isStageRunning(String stage, File scriptFile, String payload); + boolean getStageExecutionSuccess(String stage, File scriptFile); + boolean getStageAvoidMaintenance(String stage, File scriptFile); +} diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutorBase.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutorBase.java new file mode 100644 index 00000000000..140b58851cc --- /dev/null +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceExecutorBase.java @@ -0,0 +1,91 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.hypervisor.kvm.resource.rolling.maintenance; + +import com.cloud.utils.exception.CloudRuntimeException; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; + +import java.io.File; + +public abstract class RollingMaintenanceExecutorBase implements RollingMaintenanceExecutor { + + private String hooksDir; + private int timeout; + private boolean avoidMaintenance = false; + + static final int exitValueAvoidMaintenance = 70; + static final int exitValueTerminatedSignal = 143; + private static final Logger s_logger = Logger.getLogger(RollingMaintenanceExecutor.class); + + void setTimeout(int timeout) { + this.timeout = timeout; + } + + long getTimeout() { + return timeout; + } + + private void sanitizeHoooksDirFormat() { + if (StringUtils.isNotBlank(this.hooksDir) && !this.hooksDir.endsWith("/")) { + this.hooksDir += "/"; + } + } + + RollingMaintenanceExecutorBase(String hooksDir) { + this.hooksDir = hooksDir; + sanitizeHoooksDirFormat(); + } + + protected boolean existsAndIsFile(String filepath) { + File file = new File(filepath); + return file.exists() && file.isFile(); + } + + public File getStageScriptFile(String stage) { + String scriptPath = hooksDir + stage; + if (existsAndIsFile(scriptPath)) { + return new File(scriptPath); + } else if (existsAndIsFile(scriptPath + ".sh")) { + return new File(scriptPath + ".sh"); + } else if (existsAndIsFile(scriptPath + ".py")) { + return new File(scriptPath + ".py"); + } else { + String msg = "Unable to locate script for stage: " + stage + " in directory: " + hooksDir; + s_logger.warn(msg); + return null; + } + } + + void checkHooksDirectory() { + if (StringUtils.isBlank(hooksDir)) { + throw new CloudRuntimeException("Hooks directory is empty, please specify it on agent.properties and restart the agent"); + } + } + + String getHooksDir() { + return hooksDir; + } + + public void setAvoidMaintenance(boolean avoidMaintenance) { + this.avoidMaintenance = avoidMaintenance; + } + + public boolean getStageAvoidMaintenance(String stage, File scriptFile) { + return avoidMaintenance; + } +} diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceServiceExecutor.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceServiceExecutor.java new file mode 100644 index 00000000000..6659bf4a0e8 --- /dev/null +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/rolling/maintenance/RollingMaintenanceServiceExecutor.java @@ -0,0 +1,137 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.hypervisor.kvm.resource.rolling.maintenance; + +import com.cloud.utils.Pair; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.utils.script.OutputInterpreter; +import com.cloud.utils.script.Script; +import org.apache.commons.lang.StringUtils; +import org.apache.log4j.Logger; + +import java.io.File; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.nio.file.Files; +import java.nio.file.Paths; +import java.util.stream.Stream; + +public class RollingMaintenanceServiceExecutor extends RollingMaintenanceExecutorBase implements RollingMaintenanceExecutor { + + private static final String servicePrefix = "cloudstack-rolling-maintenance"; + private static final String resultsFileSuffix = "rolling-maintenance-results"; + private static final String outputFileSuffix = "rolling-maintenance-output"; + + private static final Logger s_logger = Logger.getLogger(RollingMaintenanceServiceExecutor.class); + + public RollingMaintenanceServiceExecutor(String hooksDir) { + super(hooksDir); + } + + /** + * Generate and return escaped instance name to use on systemd service invokation + */ + private String generateInstanceName(String stage, String file, String payload) { + String instanceName = String.format("%s,%s,%s,%s,%s", stage, file, getTimeout(), + getResultsFilePath(), getOutputFilePath()); + if (StringUtils.isNotBlank(payload)) { + instanceName += "," + payload; + } + return Script.runSimpleBashScript(String.format("systemd-escape '%s'", instanceName)); + } + + private String invokeService(String action, String stage, String file, String payload) { + s_logger.debug("Invoking rolling maintenance service for stage: " + stage + " and file " + file + " with action: " + action); + final OutputInterpreter.AllLinesParser parser = new OutputInterpreter.AllLinesParser(); + Script command = new Script("/bin/systemctl", s_logger); + command.add(action); + String service = servicePrefix + "@" + generateInstanceName(stage, file, payload); + command.add(service); + String result = command.execute(parser); + int exitValue = command.getExitValue(); + s_logger.trace("Execution: " + command.toString() + " - exit code: " + exitValue + + ": " + result + (StringUtils.isNotBlank(parser.getLines()) ? parser.getLines() : "")); + return StringUtils.isBlank(result) ? parser.getLines().replace("\n", " ") : result; + } + + @Override + public Pair startStageExecution(String stage, File scriptFile, int timeout, String payload) { + checkHooksDirectory(); + setTimeout(timeout); + String result = invokeService("start", stage, scriptFile.getAbsolutePath(), payload); + if (StringUtils.isNotBlank(result)) { + throw new CloudRuntimeException("Error starting stage: " + stage + " execution: " + result); + } + s_logger.trace("Stage " + stage + "execution started"); + return new Pair<>(true, "OK"); + } + + private String getResultsFilePath() { + return getHooksDir() + resultsFileSuffix; + } + + private String getOutputFilePath() { + return getHooksDir() + outputFileSuffix; + } + + private String readFromFile(String filePath) { + StringBuilder contentBuilder = new StringBuilder(); + + try (Stream stream = Files.lines( Paths.get(filePath), StandardCharsets.UTF_8)) { + stream.forEach(s -> contentBuilder.append(s).append("\n")); + } catch (IOException e) { + e.printStackTrace(); + } + + return contentBuilder.toString(); + } + + @Override + public String getStageExecutionOutput(String stage, File scriptFile) { + return readFromFile(getOutputFilePath()); + } + + @Override + public boolean isStageRunning(String stage, File scriptFile, String payload) { + String result = invokeService("is-active", stage, scriptFile.getAbsolutePath(), payload); + if (StringUtils.isNotBlank(result) && result.equals("failed")) { + String status = invokeService("status", stage, scriptFile.getAbsolutePath(), payload); + String errorMsg = "Stage " + stage + " execution failed, status: " + status; + s_logger.error(errorMsg); + throw new CloudRuntimeException(errorMsg); + } + return StringUtils.isNotBlank(result) && result.equals("active"); + } + + @Override + public boolean getStageExecutionSuccess(String stage, File scriptFile) { + String fileContent = readFromFile(getResultsFilePath()); + if (StringUtils.isBlank(fileContent)) { + throw new CloudRuntimeException("Empty content in file " + getResultsFilePath()); + } + fileContent = fileContent.replace("\n", ""); + String[] parts = fileContent.split(","); + if (parts.length < 3) { + throw new CloudRuntimeException("Results file " + getResultsFilePath() + " unexpected content: " + fileContent); + } + if (!parts[0].equalsIgnoreCase(stage)) { + throw new CloudRuntimeException("Expected stage " + stage + " results but got stage " + parts[0]); + } + setAvoidMaintenance(Boolean.parseBoolean(parts[2])); + return Boolean.parseBoolean(parts[1]); + } +} diff --git a/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRollingMaintenanceCommandWrapper.java b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRollingMaintenanceCommandWrapper.java new file mode 100644 index 00000000000..a1b1af60c9f --- /dev/null +++ b/plugins/hypervisors/kvm/src/main/java/com/cloud/hypervisor/kvm/resource/wrapper/LibvirtRollingMaintenanceCommandWrapper.java @@ -0,0 +1,81 @@ +// +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +// + +package com.cloud.hypervisor.kvm.resource.wrapper; + +import com.cloud.agent.api.RollingMaintenanceAnswer; +import com.cloud.agent.api.RollingMaintenanceCommand; +import com.cloud.hypervisor.kvm.resource.LibvirtComputingResource; +import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceAgentExecutor; +import com.cloud.hypervisor.kvm.resource.rolling.maintenance.RollingMaintenanceExecutor; +import com.cloud.resource.CommandWrapper; +import com.cloud.resource.ResourceWrapper; +import com.cloud.resource.RollingMaintenanceManager; +import com.cloud.utils.exception.CloudRuntimeException; +import org.apache.log4j.Logger; + +import java.io.File; + +@ResourceWrapper(handles = RollingMaintenanceCommand.class) +public class LibvirtRollingMaintenanceCommandWrapper extends CommandWrapper { + + private static final Logger s_logger = Logger.getLogger(LibvirtRollingMaintenanceCommandWrapper.class); + + @Override + public RollingMaintenanceAnswer execute(RollingMaintenanceCommand command, LibvirtComputingResource resource) { + RollingMaintenanceExecutor executor = resource.getRollingMaintenanceExecutor(); + String stage = command.isCheckMaintenanceScript() ? RollingMaintenanceManager.Stage.Maintenance.toString() : command.getStage(); + int timeout = command.getWait(); + String payload = command.getPayload(); + + try { + File scriptFile = executor.getStageScriptFile(stage); + if (command.isCheckMaintenanceScript()) { + return new RollingMaintenanceAnswer(command, scriptFile != null); + } else if (scriptFile == null) { + s_logger.info("No script file defined for stage " + stage + ". Skipping stage..."); + return new RollingMaintenanceAnswer(command, true, "Skipped stage " + stage, true); + } + + if (command.isStarted() && executor instanceof RollingMaintenanceAgentExecutor) { + String msg = "Stage has been started previously and the agent restarted, setting stage as finished"; + s_logger.info(msg); + return new RollingMaintenanceAnswer(command, true, msg, true); + } + s_logger.info("Processing stage " + stage); + if (!command.isStarted()) { + executor.startStageExecution(stage, scriptFile, timeout, payload); + } + if (executor.isStageRunning(stage, scriptFile, payload)) { + return new RollingMaintenanceAnswer(command, true, "Stage " + stage + " still running", false); + } + boolean success = executor.getStageExecutionSuccess(stage, scriptFile); + String output = executor.getStageExecutionOutput(stage, scriptFile); + RollingMaintenanceAnswer answer = new RollingMaintenanceAnswer(command, success, output, true); + if (executor.getStageAvoidMaintenance(stage, scriptFile)) { + s_logger.info("Avoid maintenance flag added to the answer for the stage " + stage); + answer.setAvoidMaintenance(true); + } + s_logger.info("Finished processing stage " + stage); + return answer; + } catch (CloudRuntimeException e) { + return new RollingMaintenanceAnswer(command, false, e.getMessage(), false); + } + } +} diff --git a/server/src/main/java/com/cloud/api/ApiResponseHelper.java b/server/src/main/java/com/cloud/api/ApiResponseHelper.java index 9da3ae4f710..9bec40894c9 100644 --- a/server/src/main/java/com/cloud/api/ApiResponseHelper.java +++ b/server/src/main/java/com/cloud/api/ApiResponseHelper.java @@ -31,6 +31,7 @@ import java.util.stream.Collectors; import javax.inject.Inject; +import com.cloud.resource.RollingMaintenanceManager; import org.apache.cloudstack.acl.ControlledEntity; import org.apache.cloudstack.acl.ControlledEntity.ACLType; import org.apache.cloudstack.affinity.AffinityGroup; @@ -44,6 +45,9 @@ import org.apache.cloudstack.api.command.user.job.QueryAsyncJobResultCmd; import org.apache.cloudstack.api.response.AccountResponse; import org.apache.cloudstack.api.response.ApplicationLoadBalancerInstanceResponse; import org.apache.cloudstack.api.response.ApplicationLoadBalancerResponse; +import org.apache.cloudstack.api.response.RollingMaintenanceHostSkippedResponse; +import org.apache.cloudstack.api.response.RollingMaintenanceHostUpdatedResponse; +import org.apache.cloudstack.api.response.RollingMaintenanceResponse; import org.apache.cloudstack.api.response.ApplicationLoadBalancerRuleResponse; import org.apache.cloudstack.api.response.AsyncJobResponse; import org.apache.cloudstack.api.response.AutoScalePolicyResponse; @@ -4281,4 +4285,31 @@ public class ApiResponseHelper implements ResponseGenerator { } return responses; } + + @Override + public RollingMaintenanceResponse createRollingMaintenanceResponse(Boolean success, String details, List hostsUpdated, List hostsSkipped) { + RollingMaintenanceResponse response = new RollingMaintenanceResponse(success, details); + List updated = new ArrayList<>(); + for (RollingMaintenanceManager.HostUpdated h : hostsUpdated) { + RollingMaintenanceHostUpdatedResponse r = new RollingMaintenanceHostUpdatedResponse(); + r.setHostId(h.getHost().getUuid()); + r.setHostName(h.getHost().getName()); + r.setStartDate(getDateStringInternal(h.getStart())); + r.setEndDate(getDateStringInternal(h.getEnd())); + r.setOutput(h.getOutputMsg()); + updated.add(r); + } + List skipped = new ArrayList<>(); + for (RollingMaintenanceManager.HostSkipped h : hostsSkipped) { + RollingMaintenanceHostSkippedResponse r = new RollingMaintenanceHostSkippedResponse(); + r.setHostId(h.getHost().getUuid()); + r.setHostName(h.getHost().getName()); + r.setReason(h.getReason()); + skipped.add(r); + } + response.setUpdatedHosts(updated); + response.setSkippedHosts(skipped); + response.setObjectName("rollingmaintenance"); + return response; + } } diff --git a/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java b/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java index a95f4ef2d69..1315cdfd070 100644 --- a/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java +++ b/server/src/main/java/com/cloud/deploy/DeploymentPlanningManagerImpl.java @@ -1041,7 +1041,7 @@ StateListener { for (Long clusterId : clusterList) { ClusterVO clusterVO = _clusterDao.findById(clusterId); - if (clusterVO.getAllocationState() == Grouping.AllocationState.Disabled) { + if (clusterVO.getAllocationState() == Grouping.AllocationState.Disabled && !plan.isMigrationPlan()) { s_logger.debug("Cannot deploy in disabled cluster " + clusterId + ", skipping this cluster"); avoid.addCluster(clusterVO.getId()); } diff --git a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java index 29f7e68e08c..c1c221b4f7e 100755 --- a/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java +++ b/server/src/main/java/com/cloud/resource/ResourceManagerImpl.java @@ -2485,7 +2485,7 @@ public class ResourceManagerImpl extends ManagerBase implements ResourceManager, } } - private boolean cancelMaintenance(final long hostId) { + public boolean cancelMaintenance(final long hostId) { try { final Boolean result = propagateResourceEvent(hostId, ResourceState.Event.AdminCancelMaintenance); diff --git a/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java new file mode 100644 index 00000000000..62bb30e1323 --- /dev/null +++ b/server/src/main/java/com/cloud/resource/RollingMaintenanceManagerImpl.java @@ -0,0 +1,734 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. +package com.cloud.resource; + +import com.cloud.agent.AgentManager; +import com.cloud.agent.api.Answer; +import com.cloud.agent.api.RollingMaintenanceAnswer; +import com.cloud.agent.api.RollingMaintenanceCommand; +import com.cloud.alert.AlertManager; +import com.cloud.capacity.CapacityManager; +import com.cloud.dc.ClusterDetailsDao; +import com.cloud.dc.ClusterDetailsVO; +import com.cloud.deploy.DeployDestination; +import com.cloud.event.ActionEventUtils; +import com.cloud.event.EventVO; +import com.cloud.exception.AgentUnavailableException; +import com.cloud.exception.InvalidParameterValueException; +import com.cloud.exception.OperationTimedoutException; +import com.cloud.host.Host; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.host.dao.HostDao; +import com.cloud.host.dao.HostTagsDao; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.org.Cluster; +import com.cloud.org.Grouping; +import com.cloud.service.ServiceOfferingVO; +import com.cloud.service.dao.ServiceOfferingDao; +import com.cloud.utils.Pair; +import com.cloud.utils.Ternary; +import com.cloud.utils.component.ManagerBase; +import com.cloud.utils.exception.CloudRuntimeException; +import com.cloud.vm.VMInstanceVO; +import com.cloud.vm.VirtualMachine.State; +import com.cloud.vm.VirtualMachineProfileImpl; +import com.cloud.vm.dao.VMInstanceDao; +import org.apache.cloudstack.affinity.AffinityGroupProcessor; +import org.apache.cloudstack.api.command.admin.host.PrepareForMaintenanceCmd; +import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd; +import org.apache.cloudstack.context.CallContext; +import org.apache.cloudstack.framework.config.ConfigKey; +import org.apache.commons.collections.CollectionUtils; +import org.apache.log4j.Logger; + +import javax.inject.Inject; +import javax.naming.ConfigurationException; +import java.util.ArrayList; +import java.util.Collections; +import java.util.Date; +import java.util.HashMap; +import java.util.HashSet; +import java.util.List; +import java.util.Map; +import java.util.Set; +import java.util.stream.Collectors; + +public class RollingMaintenanceManagerImpl extends ManagerBase implements RollingMaintenanceManager { + + @Inject + private HostDao hostDao; + @Inject + private AgentManager agentManager; + @Inject + private ResourceManager resourceManager; + @Inject + private CapacityManager capacityManager; + @Inject + private VMInstanceDao vmInstanceDao; + @Inject + private ServiceOfferingDao serviceOfferingDao; + @Inject + private ClusterDetailsDao clusterDetailsDao; + @Inject + private HostTagsDao hostTagsDao; + @Inject + private AlertManager alertManager; + + protected List _affinityProcessors; + + public void setAffinityGroupProcessors(List affinityProcessors) { + _affinityProcessors = affinityProcessors; + } + + public static final Logger s_logger = Logger.getLogger(RollingMaintenanceManagerImpl.class.getName()); + + private Pair> getResourceTypeAndIdPair(List podIds, List clusterIds, List zoneIds, List hostIds) { + Pair> pair = CollectionUtils.isNotEmpty(podIds) ? new Pair<>(ResourceType.Pod, podIds) : + CollectionUtils.isNotEmpty(clusterIds) ? new Pair<>(ResourceType.Cluster, clusterIds) : + CollectionUtils.isNotEmpty(zoneIds) ? new Pair<>(ResourceType.Zone, zoneIds) : + CollectionUtils.isNotEmpty(hostIds) ? new Pair<>(ResourceType.Host, hostIds) : null; + if (pair == null) { + throw new CloudRuntimeException("Parameters podId, clusterId, zoneId, hostId are mutually exclusive, " + + "please set only one of them"); + } + return pair; + } + + @Override + public boolean configure(String name, Map params) throws ConfigurationException { + return true; + } + + private void updateCluster(long clusterId, String state) { + Cluster cluster = resourceManager.getCluster(clusterId); + if (cluster == null) { + throw new InvalidParameterValueException("Unable to find the cluster by id=" + clusterId); + } + resourceManager.updateCluster(cluster, "", "", state, ""); + } + + private void generateReportAndFinishingEvent(StartRollingMaintenanceCmd cmd, boolean success, String details, + List hostsUpdated, List hostsSkipped) { + Pair> pair = getResourceTypeIdPair(cmd); + ResourceType entity = pair.first(); + List ids = pair.second(); + + String description = String.format("Success: %s, details: %s, hosts updated: %s, hosts skipped: %s", success, details, + generateReportHostsUpdated(hostsUpdated), generateReportHostsSkipped(hostsSkipped)); + ActionEventUtils.onCompletedActionEvent(CallContext.current().getCallingUserId(), CallContext.current().getCallingAccountId(), + EventVO.LEVEL_INFO, cmd.getEventType(), + "Completed rolling maintenance for entity " + entity + " with IDs: " + ids + " - " + description, 0); + } + + private String generateReportHostsUpdated(List hostsUpdated) { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(hostsUpdated.size()); + return stringBuilder.toString(); + } + + private String generateReportHostsSkipped(List hostsSkipped) { + StringBuilder stringBuilder = new StringBuilder(); + stringBuilder.append(hostsSkipped.size()); + return stringBuilder.toString(); + } + + @Override + public Ternary, List>> startRollingMaintenance(StartRollingMaintenanceCmd cmd) { + Pair> pair = getResourceTypeAndIdPair(cmd.getPodIds(), cmd.getClusterIds(), cmd.getZoneIds(), cmd.getHostIds()); + ResourceType type = pair.first(); + List ids = pair.second(); + int timeout = cmd.getTimeout() == null ? KvmRollingMaintenanceStageTimeout.value() : cmd.getTimeout(); + String payload = cmd.getPayload(); + Boolean forced = cmd.getForced(); + + Set disabledClusters = new HashSet<>(); + Map hostsToAvoidMaintenance = new HashMap<>(); + + boolean success = false; + String details = null; + List hostsUpdated = new ArrayList<>(); + List hostsSkipped = new ArrayList<>(); + + if (timeout <= KvmRollingMaintenancePingInterval.value()) { + return new Ternary<>(success, "The timeout value provided must be greater or equal than the ping interval " + + "defined in '" + KvmRollingMaintenancePingInterval.key() + "'", new Pair<>(hostsUpdated, hostsSkipped)); + } + + try { + Map> hostsByCluster = getHostsByClusterForRollingMaintenance(type, ids); + + for (Long clusterId : hostsByCluster.keySet()) { + Cluster cluster = resourceManager.getCluster(clusterId); + List hosts = hostsByCluster.get(clusterId); + + if (!isMaintenanceAllowedByVMStates(cluster, hosts, hostsSkipped)) { + if (forced) { + continue; + } + success = false; + details = "VMs in invalid states in cluster: " + cluster.getUuid(); + return new Ternary<>(success, details, new Pair<>(hostsUpdated, hostsSkipped)); + } + disableClusterIfEnabled(cluster, disabledClusters); + + s_logger.debug("State checks on the hosts in the cluster"); + performStateChecks(cluster, hosts, forced, hostsSkipped); + s_logger.debug("Checking hosts capacity before attempting rolling maintenance"); + performCapacityChecks(cluster, hosts, forced); + s_logger.debug("Attempting pre-flight stages on each host before starting rolling maintenance"); + performPreFlightChecks(hosts, timeout, payload, forced, hostsToAvoidMaintenance); + + for (Host host: hosts) { + Ternary hostResult = startRollingMaintenanceHostInCluster(cluster, host, + timeout, payload, forced, hostsToAvoidMaintenance, hostsUpdated, hostsSkipped); + if (hostResult.second()) { + continue; + } + if (hostResult.first()) { + success = false; + details = hostResult.third(); + return new Ternary<>(success, details, new Pair<>(hostsUpdated, hostsSkipped)); + } + } + enableClusterIfDisabled(cluster, disabledClusters); + } + } catch (AgentUnavailableException | InterruptedException | CloudRuntimeException e) { + String err = "Error starting rolling maintenance: " + e.getMessage(); + s_logger.error(err, e); + success = false; + details = err; + return new Ternary<>(success, details, new Pair<>(hostsUpdated, hostsSkipped)); + } finally { + // Enable back disabled clusters + for (Long clusterId : disabledClusters) { + Cluster cluster = resourceManager.getCluster(clusterId); + if (cluster.getAllocationState() == Grouping.AllocationState.Disabled) { + updateCluster(clusterId, "Enabled"); + } + } + generateReportAndFinishingEvent(cmd, success, details, hostsUpdated, hostsSkipped); + } + success = true; + details = "OK"; + return new Ternary<>(success, details, new Pair<>(hostsUpdated, hostsSkipped)); + } + + /** + * Perform state checks on the hosts in a cluster + */ + protected void performStateChecks(Cluster cluster, List hosts, Boolean forced, List hostsSkipped) { + List hostsToDrop = new ArrayList<>(); + for (Host host : hosts) { + if (host.getStatus() != Status.Up) { + String msg = "Host " + host.getUuid() + " is not connected, state = " + host.getStatus().toString(); + if (forced) { + hostsSkipped.add(new HostSkipped(host, msg)); + hostsToDrop.add(host); + continue; + } + throw new CloudRuntimeException(msg); + } + if (host.getResourceState() != ResourceState.Enabled) { + String msg = "Host " + host.getUuid() + " is not enabled, state = " + host.getResourceState().toString(); + if (forced) { + hostsSkipped.add(new HostSkipped(host, msg)); + hostsToDrop.add(host); + continue; + } + throw new CloudRuntimeException(msg); + } + } + if (CollectionUtils.isNotEmpty(hostsToDrop)) { + hosts.removeAll(hostsToDrop); + } + } + + /** + * Do not allow rolling maintenance if there are VMs in Starting/Stopping/Migrating/Error/Unknown state + */ + private boolean isMaintenanceAllowedByVMStates(Cluster cluster, List hosts, List hostsSkipped) { + for (Host host : hosts) { + List notAllowedStates = vmInstanceDao.findByHostInStates(host.getId(), State.Starting, State.Stopping, + State.Migrating, State.Error, State.Unknown); + if (notAllowedStates.size() > 0) { + String msg = "There are VMs in starting/stopping/migrating/error/unknown state, not allowing rolling maintenance in the cluster"; + HostSkipped skipped = new HostSkipped(host, msg); + hostsSkipped.add(skipped); + return false; + } + } + return true; + } + + /** + * Start rolling maintenance for a single host + * @return tuple: (FAIL, SKIP, DETAILS), where: + * - FAIL: True if rolling maintenance must fail + * - SKIP: True if host must be skipped + * - DETAILS: Information retrieved by the host + */ + private Ternary startRollingMaintenanceHostInCluster(Cluster cluster, Host host, int timeout, + String payload, Boolean forced, + Map hostsToAvoidMaintenance, + List hostsUpdated, + List hostsSkipped) throws InterruptedException, AgentUnavailableException { + Ternary result; + if (!isMaintenanceScriptDefinedOnHost(host, hostsSkipped)) { + String msg = "There is no maintenance script on the host"; + hostsSkipped.add(new HostSkipped(host, msg)); + return new Ternary<>(false, true, msg); + } + + result = performPreMaintenanceStageOnHost(host, timeout, payload, forced, hostsToAvoidMaintenance, hostsSkipped); + if (result.first() || result.second()) { + return result; + } + + if (isMaintenanceStageAvoided(host, hostsToAvoidMaintenance, hostsSkipped)) { + return new Ternary<>(false, true, "Maintenance stage must be avoided"); + } + + s_logger.debug("Updating capacity before re-checking capacity"); + alertManager.recalculateCapacity(); + result = reCheckCapacityBeforeMaintenanceOnHost(cluster, host, forced, hostsSkipped); + if (result.first() || result.second()) { + return result; + } + + Date startTime = new Date(); + putHostIntoMaintenance(host); + result = performMaintenanceStageOnHost(host, timeout, payload, forced, hostsToAvoidMaintenance, hostsSkipped); + if (result.first() || result.second()) { + cancelHostMaintenance(host); + return result; + } + cancelHostMaintenance(host); + Date endTime = new Date(); + + HostUpdated hostUpdated = new HostUpdated(host, startTime, endTime, result.third()); + hostsUpdated.add(hostUpdated); + + result = performPostMaintenanceStageOnHost(host, timeout, payload, forced, hostsToAvoidMaintenance, hostsSkipped); + if (result.first() || result.second()) { + return result; + } + return new Ternary<>(false, false, "Completed rolling maintenance on host " + host.getUuid()); + } + + /** + * Perform Post-Maintenance stage on host + * @return tuple: (FAIL, SKIP, DETAILS), where: + * - FAIL: True if rolling maintenance must fail + * - SKIP: True if host must be skipped + * - DETAILS: Information retrieved by the host after executing the stage + * @throws InterruptedException + */ + private Ternary performPostMaintenanceStageOnHost(Host host, int timeout, String payload, Boolean forced, Map hostsToAvoidMaintenance, List hostsSkipped) throws InterruptedException { + Ternary result = performStageOnHost(host, Stage.PostMaintenance, timeout, payload, forced); + if (!result.first()) { + if (forced) { + String msg = "Post-maintenance script failed: " + result.second(); + hostsSkipped.add(new HostSkipped(host, msg)); + return new Ternary<>(true, true, msg); + } + return new Ternary<>(true, false, result.second()); + } + return new Ternary<>(false, false, result.second()); + } + + /** + * Cancel maintenance mode on host + * @param host host + */ + private void cancelHostMaintenance(Host host) { + if (!resourceManager.cancelMaintenance(host.getId())) { + String message = "Could not cancel maintenance on host " + host.getUuid(); + s_logger.error(message); + throw new CloudRuntimeException(message); + } + } + + /** + * Perform Maintenance stage on host + * @return tuple: (FAIL, SKIP, DETAILS), where: + * - FAIL: True if rolling maintenance must fail + * - SKIP: True if host must be skipped + * - DETAILS: Information retrieved by the host after executing the stage + * @throws InterruptedException + */ + private Ternary performMaintenanceStageOnHost(Host host, int timeout, String payload, Boolean forced, Map hostsToAvoidMaintenance, List hostsSkipped) throws InterruptedException { + Ternary result = performStageOnHost(host, Stage.Maintenance, timeout, payload, forced); + if (!result.first()) { + if (forced) { + String msg = "Maintenance script failed: " + result.second(); + hostsSkipped.add(new HostSkipped(host, msg)); + return new Ternary<>(true, true, msg); + } + return new Ternary<>(true, false, result.second()); + } + return new Ternary<>(false, false, result.second()); + } + + /** + * Puts host into maintenance and waits for its completion + * @param host host + * @throws InterruptedException + * @throws AgentUnavailableException + */ + private void putHostIntoMaintenance(Host host) throws InterruptedException, AgentUnavailableException { + s_logger.debug("Trying to set the host " + host.getId() + " into maintenance"); + PrepareForMaintenanceCmd cmd = new PrepareForMaintenanceCmd(); + cmd.setId(host.getId()); + resourceManager.maintain(cmd); + waitForHostInMaintenance(host.getId()); + } + + /** + * Enable back disabled cluster + * @param cluster cluster to enable if it has been disabled + * @param disabledClusters set of disabled clusters + */ + private void enableClusterIfDisabled(Cluster cluster, Set disabledClusters) { + if (cluster.getAllocationState() == Grouping.AllocationState.Disabled && disabledClusters.contains(cluster.getId())) { + updateCluster(cluster.getId(), "Enabled"); + } + } + + /** + * Re-check capacity to ensure the host can transit into maintenance state + * @return tuple: (FAIL, SKIP, DETAILS), where: + * - FAIL: True if rolling maintenance must fail + * - SKIP: True if host must be skipped + * - DETAILS: Information retrieved after capacity checks + */ + private Ternary reCheckCapacityBeforeMaintenanceOnHost(Cluster cluster, Host host, Boolean forced, List hostsSkipped) { + Pair capacityCheckBeforeMaintenance = performCapacityChecksBeforeHostInMaintenance(host, cluster); + if (!capacityCheckBeforeMaintenance.first()) { + String errorMsg = "Capacity check failed for host " + host.getUuid() + ": " + capacityCheckBeforeMaintenance.second(); + if (forced) { + s_logger.info("Skipping host " + host.getUuid() + " as: " + errorMsg); + hostsSkipped.add(new HostSkipped(host, errorMsg)); + return new Ternary<>(true, true, capacityCheckBeforeMaintenance.second()); + } + return new Ternary<>(true, false, capacityCheckBeforeMaintenance.second()); + } + return new Ternary<>(false, false, capacityCheckBeforeMaintenance.second()); + } + + /** + * Indicates if the maintenance stage must be avoided + */ + private boolean isMaintenanceStageAvoided(Host host, Map hostsToAvoidMaintenance, List hostsSkipped) { + if (hostsToAvoidMaintenance.containsKey(host.getId())) { + s_logger.debug("Host " + host.getId() + " is not being put into maintenance, skipping it"); + HostSkipped hostSkipped = new HostSkipped(host, hostsToAvoidMaintenance.get(host.getId())); + hostsSkipped.add(hostSkipped); + return true; + } + return false; + } + + /** + * Perform Pre-Maintenance stage on host + * @return tuple: (FAIL, SKIP, DETAILS), where: + * - FAIL: True if rolling maintenance must fail + * - SKIP: True if host must be skipped + * - DETAILS: Information retrieved by the host after executing the stage + * @throws InterruptedException + */ + private Ternary performPreMaintenanceStageOnHost(Host host, int timeout, String payload, Boolean forced, + Map hostsToAvoidMaintenance, + List hostsSkipped) throws InterruptedException { + Ternary result = performStageOnHost(host, Stage.PreMaintenance, timeout, payload, forced); + if (!result.first()) { + if (forced) { + String msg = "Pre-maintenance script failed: " + result.second(); + hostsSkipped.add(new HostSkipped(host, msg)); + return new Ternary<>(true, true, result.second()); + } + return new Ternary<>(true, false, result.second()); + } + if (result.third() && !hostsToAvoidMaintenance.containsKey(host.getId())) { + s_logger.debug("Host " + host.getId() + " added to the avoid maintenance set"); + hostsToAvoidMaintenance.put(host.getId(), "Pre-maintenance stage set to avoid maintenance"); + } + return new Ternary<>(false, false, result.second()); + } + + /** + * Disable cluster (if hasn't been disabled yet) + * @param cluster cluster to disable + * @param disabledClusters set of disabled cluster ids. cluster is added if it is disabled + */ + private void disableClusterIfEnabled(Cluster cluster, Set disabledClusters) { + if (cluster.getAllocationState() == Grouping.AllocationState.Enabled && !disabledClusters.contains(cluster.getId())) { + updateCluster(cluster.getId(), "Disabled"); + disabledClusters.add(cluster.getId()); + } + } + + private boolean isMaintenanceScriptDefinedOnHost(Host host, List hostsSkipped) { + try { + RollingMaintenanceAnswer answer = (RollingMaintenanceAnswer) agentManager.send(host.getId(), new RollingMaintenanceCommand(true)); + return answer.isMaintenaceScriptDefined(); + } catch (AgentUnavailableException | OperationTimedoutException e) { + String msg = "Could not check for maintenance script on host " + host.getId() + " due to: " + e.getMessage(); + s_logger.error(msg, e); + return false; + } + } + + /** + * Execute stage on host + * @return tuple: (SUCCESS, DETAILS, AVOID_MAINTENANCE) where: + * - SUCCESS: True if stage is successfull + * - DETAILS: Information retrieved by the host after executing the stage + * - AVOID_MAINTENANCE: True if maintenance stage must be avoided + */ + private Ternary performStageOnHost(Host host, Stage stage, int timeout, + String payload, Boolean forced) throws InterruptedException { + Ternary result = sendRollingMaintenanceCommandToHost(host, stage, timeout, payload); + if (!result.first() && !forced) { + throw new CloudRuntimeException("Stage: " + stage.toString() + " failed on host " + host.getUuid() + ": " + result.second()); + } + return result; + } + + /** + * Send rolling maintenance command to a host to perform a certain stage specified in cmd + * @return tuple: (SUCCESS, DETAILS, AVOID_MAINTENANCE) where: + * - SUCCESS: True if stage is successfull + * - DETAILS: Information retrieved by the host after executing the stage + * - AVOID_MAINTENANCE: True if maintenance stage must be avoided + */ + private Ternary sendRollingMaintenanceCommandToHost(Host host, Stage stage, + int timeout, String payload) throws InterruptedException { + boolean completed = false; + Answer answer = null; + long timeSpent = 0L; + long pingInterval = KvmRollingMaintenancePingInterval.value() * 1000L; + boolean avoidMaintenance = false; + + RollingMaintenanceCommand cmd = new RollingMaintenanceCommand(stage.toString()); + cmd.setWait(timeout); + cmd.setPayload(payload); + + while (!completed && timeSpent < timeout * 1000L) { + try { + answer = agentManager.send(host.getId(), cmd); + } catch (AgentUnavailableException | OperationTimedoutException e) { + // Agent may be restarted on the scripts - continue polling until it is up + String msg = "Cannot send command to host: " + host.getId() + ", waiting " + pingInterval + "ms - " + e.getMessage(); + s_logger.warn(msg); + cmd.setStarted(true); + Thread.sleep(pingInterval); + timeSpent += pingInterval; + continue; + } + cmd.setStarted(true); + + RollingMaintenanceAnswer rollingMaintenanceAnswer = (RollingMaintenanceAnswer) answer; + completed = rollingMaintenanceAnswer.isFinished(); + if (!completed) { + Thread.sleep(pingInterval); + timeSpent += pingInterval; + } else { + avoidMaintenance = rollingMaintenanceAnswer.isAvoidMaintenance(); + } + } + if (timeSpent >= timeout * 1000L) { + return new Ternary<>(false, + "Timeout exceeded for rolling maintenance on host " + host.getUuid() + " and stage " + stage.toString(), + avoidMaintenance); + } + return new Ternary<>(answer.getResult(), answer.getDetails(), avoidMaintenance); + } + + /** + * Pre flight checks on hosts + */ + private void performPreFlightChecks(List hosts, int timeout, String payload, Boolean forced, + Map hostsToAvoidMaintenance) throws InterruptedException { + for (Host host : hosts) { + Ternary result = performStageOnHost(host, Stage.PreFlight, timeout, payload, forced); + if (result.third() && !hostsToAvoidMaintenance.containsKey(host.getId())) { + s_logger.debug("Host " + host.getId() + " added to the avoid maintenance set"); + hostsToAvoidMaintenance.put(host.getId(), "Pre-flight stage set to avoid maintenance"); + } + } + } + + /** + * Capacity checks on hosts + */ + private void performCapacityChecks(Cluster cluster, List hosts, Boolean forced) { + for (Host host : hosts) { + Pair result = performCapacityChecksBeforeHostInMaintenance(host, cluster); + if (!result.first() && !forced) { + throw new CloudRuntimeException("Capacity check failed for host " + host.getUuid() + ": " + result.second()); + } + } + } + + /** + * Check if there is enough capacity for host to enter maintenance + */ + private Pair performCapacityChecksBeforeHostInMaintenance(Host host, Cluster cluster) { + List hosts = hostDao.findByClusterId(cluster.getId()); + List hostsInCluster = hosts.stream() + .filter(x -> x.getId() != host.getId() && + x.getClusterId().equals(cluster.getId()) && + x.getResourceState() == ResourceState.Enabled && + x.getStatus() == Status.Up) + .collect(Collectors.toList()); + if (CollectionUtils.isEmpty(hostsInCluster)) { + throw new CloudRuntimeException("No host available in cluster " + cluster.getUuid() + " (" + cluster.getName() + ") to support host " + + host.getUuid() + " (" + host.getName() + ") in maintenance"); + } + List vmsRunning = vmInstanceDao.listByHostId(host.getId()); + if (CollectionUtils.isEmpty(vmsRunning)) { + return new Pair<>(true, "OK"); + } + List hostTags = hostTagsDao.gethostTags(host.getId()); + + int sucessfullyCheckedVmMigrations = 0; + for (VMInstanceVO runningVM : vmsRunning) { + boolean canMigrateVm = false; + ServiceOfferingVO serviceOffering = serviceOfferingDao.findById(runningVM.getServiceOfferingId()); + for (Host hostInCluster : hostsInCluster) { + if (!checkHostTags(hostTags, hostTagsDao.gethostTags(hostInCluster.getId()), serviceOffering.getHostTag())) { + s_logger.debug("Host tags mismatch between host " + host.getUuid() + " and host " + hostInCluster.getUuid() + + ". Skipping it from the capacity check"); + continue; + } + DeployDestination deployDestination = new DeployDestination(null, null, null, host); + VirtualMachineProfileImpl vmProfile = new VirtualMachineProfileImpl(runningVM); + boolean affinityChecks = true; + for (AffinityGroupProcessor affinityProcessor : _affinityProcessors) { + affinityChecks = affinityChecks && affinityProcessor.check(vmProfile, deployDestination); + } + if (!affinityChecks) { + s_logger.debug("Affinity check failed between host " + host.getUuid() + " and host " + hostInCluster.getUuid() + + ". Skipping it from the capacity check"); + continue; + } + boolean maxGuestLimit = capacityManager.checkIfHostReachMaxGuestLimit(host); + boolean hostHasCPUCapacity = capacityManager.checkIfHostHasCpuCapability(hostInCluster.getId(), serviceOffering.getCpu(), serviceOffering.getSpeed()); + int cpuRequested = serviceOffering.getCpu() * serviceOffering.getSpeed(); + long ramRequested = serviceOffering.getRamSize() * 1024L * 1024L; + ClusterDetailsVO clusterDetailsCpuOvercommit = clusterDetailsDao.findDetail(cluster.getId(), "cpuOvercommitRatio"); + ClusterDetailsVO clusterDetailsRamOvercommmt = clusterDetailsDao.findDetail(cluster.getId(), "memoryOvercommitRatio"); + Float cpuOvercommitRatio = Float.parseFloat(clusterDetailsCpuOvercommit.getValue()); + Float memoryOvercommitRatio = Float.parseFloat(clusterDetailsRamOvercommmt.getValue()); + boolean hostHasCapacity = capacityManager.checkIfHostHasCapacity(hostInCluster.getId(), cpuRequested, ramRequested, false, + cpuOvercommitRatio, memoryOvercommitRatio, false); + if (!maxGuestLimit && hostHasCPUCapacity && hostHasCapacity) { + canMigrateVm = true; + break; + } + } + if (!canMigrateVm) { + String msg = "VM " + runningVM.getUuid() + " cannot be migrated away from host " + host.getUuid() + + " to any other host in the cluster"; + s_logger.error(msg); + return new Pair<>(false, msg); + } + sucessfullyCheckedVmMigrations++; + } + if (sucessfullyCheckedVmMigrations != vmsRunning.size()) { + return new Pair<>(false, "Host " + host.getId() + " cannot enter maintenance mode as capacity check failed for hosts in cluster " + cluster.getUuid()); + } + return new Pair<>(true, "OK"); + } + + /** + * Check hosts tags + */ + private boolean checkHostTags(List hostTags, List hostInClusterTags, String offeringTag) { + if (CollectionUtils.isEmpty(hostTags) && CollectionUtils.isEmpty(hostInClusterTags)) { + return true; + } else if ((CollectionUtils.isNotEmpty(hostTags) && CollectionUtils.isEmpty(hostInClusterTags)) || + (CollectionUtils.isEmpty(hostTags) && CollectionUtils.isNotEmpty(hostInClusterTags))) { + return false; + } else { + return hostInClusterTags.contains(offeringTag); + } + } + + /** + * Retrieve all the hosts in 'Up' state within the scope for starting rolling maintenance + */ + protected Map> getHostsByClusterForRollingMaintenance(ResourceType type, List ids) { + Set hosts = new HashSet<>(); + List hostsInScope = null; + for (Long id : ids) { + if (type == ResourceType.Host) { + hostsInScope = Collections.singletonList(hostDao.findById(id)); + } else if (type == ResourceType.Cluster) { + hostsInScope = hostDao.findByClusterId(id); + } else if (type == ResourceType.Pod) { + hostsInScope = hostDao.findByPodId(id); + } else if (type == ResourceType.Zone) { + hostsInScope = hostDao.findByDataCenterId(id); + } + List hostsUp = hostsInScope.stream() + .filter(x -> x.getHypervisorType() == Hypervisor.HypervisorType.KVM) + .collect(Collectors.toList()); + hosts.addAll(hostsUp); + } + return hosts.stream().collect(Collectors.groupingBy(Host::getClusterId)); + } + + @Override + public Pair> getResourceTypeIdPair(StartRollingMaintenanceCmd cmd) { + return getResourceTypeAndIdPair(cmd.getPodIds(), cmd.getClusterIds(), cmd.getZoneIds(), cmd.getHostIds()); + } + + /* + Wait for to be in maintenance mode + */ + private void waitForHostInMaintenance(long hostId) throws CloudRuntimeException, InterruptedException { + HostVO host = hostDao.findById(hostId); + long timeout = KvmRollingMaintenanceWaitForMaintenanceTimeout.value() * 1000L; + long timeSpent = 0; + long step = 30 * 1000L; + while (timeSpent < timeout && host.getResourceState() != ResourceState.Maintenance) { + Thread.sleep(step); + timeSpent += step; + host = hostDao.findById(hostId); + } + + if (host.getResourceState() != ResourceState.Maintenance) { + String errorMsg = "Timeout: waited " + timeout + "ms for host " + host.getUuid() + "(" + host.getName() + ")" + + " to be in Maintenance state, but after timeout it is in " + host.getResourceState().toString() + " state"; + s_logger.error(errorMsg); + throw new CloudRuntimeException(errorMsg); + } + s_logger.debug("Host " + host.getUuid() + "(" + host.getName() + ") is in maintenance"); + } + + @Override + public String getConfigComponentName() { + return RollingMaintenanceManagerImpl.class.getSimpleName(); + } + + @Override + public ConfigKey[] getConfigKeys() { + return new ConfigKey[] {KvmRollingMaintenanceStageTimeout, KvmRollingMaintenancePingInterval, KvmRollingMaintenanceWaitForMaintenanceTimeout}; + } +} \ No newline at end of file diff --git a/server/src/main/java/com/cloud/server/ManagementServerImpl.java b/server/src/main/java/com/cloud/server/ManagementServerImpl.java index e02f0167cb8..ff29f1d8634 100644 --- a/server/src/main/java/com/cloud/server/ManagementServerImpl.java +++ b/server/src/main/java/com/cloud/server/ManagementServerImpl.java @@ -166,6 +166,7 @@ import org.apache.cloudstack.api.command.admin.resource.CleanVMReservationsCmd; import org.apache.cloudstack.api.command.admin.resource.DeleteAlertsCmd; import org.apache.cloudstack.api.command.admin.resource.ListAlertsCmd; import org.apache.cloudstack.api.command.admin.resource.ListCapacityCmd; +import org.apache.cloudstack.api.command.admin.resource.StartRollingMaintenanceCmd; import org.apache.cloudstack.api.command.admin.resource.UploadCustomCertificateCmd; import org.apache.cloudstack.api.command.admin.router.ConfigureOvsElementCmd; import org.apache.cloudstack.api.command.admin.router.ConfigureVirtualRouterElementCmd; @@ -3128,6 +3129,7 @@ public class ManagementServerImpl extends ManagerBase implements ManagementServe cmdList.add(GetUploadParamsForIsoCmd.class); cmdList.add(ListTemplateOVFProperties.class); cmdList.add(GetRouterHealthCheckResultsCmd.class); + cmdList.add(StartRollingMaintenanceCmd.class); // Out-of-band management APIs for admins cmdList.add(EnableOutOfBandManagementForHostCmd.class); diff --git a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml index 17a9b946dd8..1c90a97a70f 100644 --- a/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml +++ b/server/src/main/resources/META-INF/cloudstack/core/spring-server-core-managers-context.xml @@ -313,4 +313,8 @@ + + + diff --git a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java index 26cd820fd67..8ce60df715d 100755 --- a/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java +++ b/server/src/test/java/com/cloud/resource/MockResourceManagerImpl.java @@ -621,6 +621,11 @@ public class MockResourceManagerImpl extends ManagerBase implements ResourceMana return null; } + @Override + public boolean cancelMaintenance(long hostId) { + return false; + } + @Override public boolean isHostGpuEnabled(final long hostId) { // TODO Auto-generated method stub diff --git a/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java b/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java new file mode 100644 index 00000000000..ef0277fd372 --- /dev/null +++ b/server/src/test/java/com/cloud/resource/RollingMaintenanceManagerImplTest.java @@ -0,0 +1,167 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package com.cloud.resource; + +import com.cloud.host.Host; +import com.cloud.host.HostVO; +import com.cloud.host.Status; +import com.cloud.host.dao.HostDao; +import com.cloud.hypervisor.Hypervisor; +import com.cloud.org.Cluster; +import com.cloud.utils.exception.CloudRuntimeException; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.InjectMocks; +import org.mockito.Mock; +import org.mockito.Mockito; +import org.mockito.MockitoAnnotations; +import org.mockito.Spy; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.Map; + +public class RollingMaintenanceManagerImplTest { + + @Mock + HostDao hostDao; + @Mock + HostVO host1; + @Mock + HostVO host2; + @Mock + HostVO host3; + @Mock + HostVO host4; + @Mock + Cluster cluster; + + @Spy + @InjectMocks + private RollingMaintenanceManagerImpl manager = new RollingMaintenanceManagerImpl(); + + // Hosts in cluster 1 + private static final long hostId1 = 1L; + private static final long hostId2 = 2L; + + // Hosts in cluster 2 + private static final long hostId3 = 3L; + private static final long hostId4 = 4L; + + private static final long clusterId1 = 1L; + private static final long clusterId2 = 2L; + + private static final long podId = 1L; + private static final long zoneId = 1L; + + @Before + public void setup() throws Exception { + MockitoAnnotations.initMocks(this); + Mockito.when(hostDao.findByClusterId(clusterId1)).thenReturn(Arrays.asList(host1, host2)); + Mockito.when(hostDao.findByClusterId(clusterId2)).thenReturn(Arrays.asList(host3, host4)); + List hosts = Arrays.asList(host1, host2, host3, host4); + Mockito.when(hostDao.findByPodId(podId)).thenReturn(hosts); + Mockito.when(hostDao.findByDataCenterId(zoneId)).thenReturn(hosts); + for (HostVO host : hosts) { + Mockito.when(host.getHypervisorType()).thenReturn(Hypervisor.HypervisorType.KVM); + Mockito.when(host.getState()).thenReturn(Status.Up); + Mockito.when(host.isInMaintenanceStates()).thenReturn(false); + } + Mockito.when(host1.getClusterId()).thenReturn(clusterId1); + Mockito.when(host2.getClusterId()).thenReturn(clusterId1); + + Mockito.when(host3.getClusterId()).thenReturn(clusterId2); + Mockito.when(host4.getClusterId()).thenReturn(clusterId2); + + Mockito.when(hostDao.findById(hostId1)).thenReturn(host1); + Mockito.when(hostDao.findById(hostId2)).thenReturn(host2); + Mockito.when(hostDao.findById(hostId3)).thenReturn(host3); + Mockito.when(hostDao.findById(hostId4)).thenReturn(host4); + + Mockito.when(host1.getStatus()).thenReturn(Status.Up); + Mockito.when(host2.getStatus()).thenReturn(Status.Up); + Mockito.when(host1.getResourceState()).thenReturn(ResourceState.Enabled); + Mockito.when(host2.getResourceState()).thenReturn(ResourceState.Enabled); + } + + private void checkResults(Map> result) { + Assert.assertEquals(2, result.size()); + Assert.assertTrue(result.containsKey(clusterId1)); + Assert.assertTrue(result.containsKey(clusterId2)); + List cluster1Hosts = result.get(clusterId1); + List cluster2Hosts = result.get(clusterId2); + Assert.assertEquals(2, cluster1Hosts.size()); + Assert.assertTrue(cluster1Hosts.contains(host1)); + Assert.assertTrue(cluster1Hosts.contains(host2)); + Assert.assertEquals(2, cluster2Hosts.size()); + Assert.assertTrue(cluster2Hosts.contains(host3)); + Assert.assertTrue(cluster2Hosts.contains(host4)); + } + + @Test + public void testGetHostsByClusterForRollingMaintenanceZoneScope() { + Map> result = manager.getHostsByClusterForRollingMaintenance(RollingMaintenanceManager.ResourceType.Zone, Collections.singletonList(zoneId)); + checkResults(result); + } + + @Test + public void testGetHostsByClusterForRollingMaintenancePodScope() { + Map> result = manager.getHostsByClusterForRollingMaintenance(RollingMaintenanceManager.ResourceType.Pod, Collections.singletonList(podId)); + checkResults(result); + } + + @Test + public void testGetHostsByClusterForRollingMaintenanceClusterScope() { + List clusterIds = Arrays.asList(clusterId1, clusterId2); + Map> result = manager.getHostsByClusterForRollingMaintenance(RollingMaintenanceManager.ResourceType.Cluster, clusterIds); + checkResults(result); + } + + @Test + public void testGetHostsByClusterForRollingMaintenanceHostScope() { + List hostIds = Arrays.asList(hostId1, hostId2, hostId3, hostId4); + Map> result = manager.getHostsByClusterForRollingMaintenance(RollingMaintenanceManager.ResourceType.Host, hostIds); + checkResults(result); + } + + @Test(expected = CloudRuntimeException.class) + public void testPerformStateChecksNotForce() { + List hosts = Arrays.asList(host1, host2); + Mockito.when(host1.getStatus()).thenReturn(Status.Error); + manager.performStateChecks(cluster, hosts, false, new ArrayList<>()); + } + + @Test + public void testPerformStateChecksForce() { + List hosts = new ArrayList<>(); + hosts.add(host1); + hosts.add(host2); + Mockito.when(host1.getStatus()).thenReturn(Status.Error); + List skipped = new ArrayList<>(); + manager.performStateChecks(cluster, hosts, true, skipped); + + Assert.assertFalse(skipped.isEmpty()); + Assert.assertEquals(1, skipped.size()); + Assert.assertEquals(host1, skipped.get(0).getHost()); + + Assert.assertEquals(1, hosts.size()); + } +} diff --git a/tools/apidoc/gen_toc.py b/tools/apidoc/gen_toc.py index 068b6850021..ef98b135898 100644 --- a/tools/apidoc/gen_toc.py +++ b/tools/apidoc/gen_toc.py @@ -193,7 +193,9 @@ known_categories = { 'Restore' : 'Backup and Recovery', 'UnmanagedInstance': 'Virtual Machine', 'KubernetesSupportedVersion': 'Kubernetes Service', - 'KubernetesCluster': 'Kubernetes Service' + 'KubernetesCluster': 'Kubernetes Service', + 'UnmanagedInstance': 'Virtual Machine', + 'Rolling': 'Rolling Maintenance' } diff --git a/ui/css/cloudstack3.css b/ui/css/cloudstack3.css index cc69a39cf9e..a2da335463d 100644 --- a/ui/css/cloudstack3.css +++ b/ui/css/cloudstack3.css @@ -12669,6 +12669,14 @@ div.ui-dialog div.autoscaler div.field-group div.form-container form div.form-it background-position: -100px -614px; } +.startRollingMaintenance .icon { + background-position: -138px -65px; +} + +.startRollingMaintenance:hover .icon { + background-position: -138px -65px; +} + .addVlanRange .icon, .addVmwareDc .icon { background-position: -37px -62px; diff --git a/ui/l10n/en.js b/ui/l10n/en.js index 36f6d6d42f4..d022e025f32 100644 --- a/ui/l10n/en.js +++ b/ui/l10n/en.js @@ -1669,6 +1669,9 @@ var dictionary = { "label.start.lb.vm":"Start LB VM", "label.start.port":"Start Port", "label.start.reserved.system.IP":"Start Reserved system IP", +"label.start.rolling.maintenance":"Start Rolling Maintenance", +"label.start.rolling.maintenance.force":"Force", +"label.start.rolling.maintenance.payload":"Payload", "label.start.vlan":"Start VLAN", "label.start.vxlan":"Start VXLAN", "label.state":"State", diff --git a/ui/scripts/system.js b/ui/scripts/system.js index ea180fdcf37..1e29500e16d 100755 --- a/ui/scripts/system.js +++ b/ui/scripts/system.js @@ -258,6 +258,100 @@ return allowedActions; }; + var rollingMaintenanceAction = function(args) { + var isCluster = args.entity === 'clusters'; + var isZone = args.entity === 'zones'; + var isPod = args.entity === 'pods'; + var isHost = args.entity === 'hosts'; + var action = { + messages: { + notification: function(args) { + return 'label.start.rolling.maintenance'; + } + }, + label: 'label.start.rolling.maintenance', + addRow: 'false', + createForm: { + title: 'label.start.rolling.maintenance', + fields: { + timeout: { + label: 'label.timeout', + }, + force: { + isBoolean: true, + label: 'label.start.rolling.maintenance.force' + }, + payload: { + label: 'label.start.rolling.maintenance.payload' + } + } + }, + action: function(args) { + var selectedIds; + if (isCluster) { + selectedIds = args.context.clusters.map(x => x.id); + } else if (isZone) { + selectedIds = args.context.physicalResources.map(x => x.id); + } else if (isPod) { + selectedIds = args.context.pods.map(x => x.id); + } else if (isHost) { + selectedIds = args.context.hosts.map(x => x.id); + } + var ids = selectedIds.join(','); + var data = { + force: args.data.force, + timeout: args.data.timeout, + payload: args.data.payload + }; + if (isCluster) { + $.extend(data, { + clusterids : ids + }); + } else if (isZone) { + $.extend(data, { + zoneids : ids + }); + } else if (isPod) { + $.extend(data, { + podids : ids + }); + } else if (isHost) { + $.extend(data, { + hostids : ids + }); + } + + $.ajax({ + url: createURL("startRollingMaintenance"), + dataType: "json", + data: data, + async: true, + success: function (json) { + var item = json.startrollingmaintenanceresponse; + var jid = item.jobid; + args.response.success({ + _custom: { + jobId: jid + } + }); + } + }); + }, + notification: { + poll: pollAsyncJobResult + } + }; + + if (args && args.listView) { + $.extend(action, { + isHeader: true, + isMultiSelectAction: true + }); + } + + return action; + }; + cloudStack.sections.system = { title: 'label.menu.infrastructure', id: 'system', @@ -7666,6 +7760,7 @@ zones: { id: 'physicalResources', label: 'label.menu.physical.resources', + multiSelect: true, fields: { name: { label: 'label.zone' @@ -7755,12 +7850,65 @@ return 'label.metrics'; } } - } + }, + startRollingMaintenance: rollingMaintenanceAction({ listView: true, entity: 'zones' }) }, detailView: { isMaximized: true, actions: { + + startRollingMaintenance: { + label: 'label.start.rolling.maintenance', + textLabel: 'label.start.rolling.maintenance', + messages: { + notification: function (args) { + return 'label.start.rolling.maintenance'; + } + }, + createForm: { + title: 'label.start.rolling.maintenance', + fields: { + timeout: { + label: 'label.timeout', + }, + force: { + isBoolean: true, + label: 'label.start.rolling.maintenance.force' + }, + payload: { + label: 'label.start.rolling.maintenance.payload' + } + } + }, + action: function (args) { + var data = { + zoneids: args.context.physicalResources[0].id, + force: args.data.force, + timeout: args.data.timeout, + payload: args.data.payload + }; + $.ajax({ + url: createURL("startRollingMaintenance"), + dataType: "json", + data: data, + async: true, + success: function (json) { + var item = json.rollingmaintenance; + args.response.success({ + actionFilter: zoneActionfilter, + data: item + }); + } + }); + }, + notification: { + poll: function (args) { + args.complete(); + } + } + }, + addVmwareDc: { label: 'label.add.vmware.datacenter', textLabel: 'label.add.vmware.datacenter', @@ -13792,6 +13940,7 @@ listView: { id: 'pods', section: 'pods', + multiSelect: true, fields: { name: { label: 'label.name' @@ -14053,7 +14202,8 @@ return 'label.add.pod'; } } - } + }, + startRollingMaintenance: rollingMaintenanceAction({ listView: true, entity: 'pods' }) }, detailView: { @@ -14075,6 +14225,57 @@ return hiddenTabs; }, actions: { + startRollingMaintenance: { + label: 'label.start.rolling.maintenance', + textLabel: 'label.start.rolling.maintenance', + messages: { + notification: function (args) { + return 'label.start.rolling.maintenance'; + } + }, + createForm: { + title: 'label.start.rolling.maintenance', + fields: { + timeout: { + label: 'label.timeout', + }, + force: { + isBoolean: true, + label: 'label.start.rolling.maintenance.force' + }, + payload: { + label: 'label.start.rolling.maintenance.payload' + } + } + }, + action: function (args) { + var data = { + podids: args.context.pods[0].id, + force: args.data.force, + timeout: args.data.timeout, + payload: args.data.payload + }; + $.ajax({ + url: createURL("startRollingMaintenance"), + dataType: "json", + data: data, + async: true, + success: function (json) { + var item = json.rollingmaintenance; + args.response.success({ + actionFilter: zoneActionfilter, + data: item + }); + } + }); + }, + notification: { + poll: function (args) { + args.complete(); + } + } + }, + edit: { label: 'label.edit', action: function (args) { @@ -14446,6 +14647,7 @@ listView: { id: 'clusters', section: 'clusters', + multiSelect: true, fields: { name: { label: 'label.name' @@ -15184,7 +15386,8 @@ return 'label.metrics'; } } - } + }, + startRollingMaintenance: rollingMaintenanceAction({ listView: true, entity: 'clusters' }) }, detailView: { @@ -15215,6 +15418,56 @@ actions: { + startRollingMaintenance: { + label: 'label.start.rolling.maintenance', + textLabel: 'label.start.rolling.maintenance', + messages: { + notification: function (args) { + return 'label.start.rolling.maintenance'; + } + }, + createForm: { + title: 'label.start.rolling.maintenance', + fields: { + timeout: { + label: 'label.timeout', + }, + force: { + isBoolean: true, + label: 'label.start.rolling.maintenance.force' + }, + payload: { + label: 'label.start.rolling.maintenance.payload' + } + } + }, + action: function (args) { + var data = { + clusterids: args.context.clusters[0].id, + force: args.data.force, + timeout: args.data.timeout, + payload: args.data.payload + }; + $.ajax({ + url: createURL("startRollingMaintenance"), + dataType: "json", + data: data, + async: true, + success: function (json) { + var item = json.rollingmaintenance; + args.response.success({ + actionFilter: zoneActionfilter, + data: item + }); + } + }); + }, + notification: { + poll: function (args) { + args.complete(); + } + } + }, edit: { label: 'label.edit', action: function (args) { @@ -16002,6 +16255,7 @@ listView: { section: 'hosts', id: 'hosts', + multiSelect: true, fields: { name: { label: 'label.name' @@ -16697,7 +16951,8 @@ return 'label.metrics'; } } - } + }, + startRollingMaintenance: rollingMaintenanceAction({ listView: true, entity: 'hosts' }) }, detailView: { name: "Host details", @@ -16706,6 +16961,56 @@ path: 'instances' }, actions: { + startRollingMaintenance: { + label: 'label.start.rolling.maintenance', + textLabel: 'label.start.rolling.maintenance', + messages: { + notification: function (args) { + return 'label.start.rolling.maintenance'; + } + }, + createForm: { + title: 'label.start.rolling.maintenance', + fields: { + timeout: { + label: 'label.timeout', + }, + force: { + isBoolean: true, + label: 'label.start.rolling.maintenance.force' + }, + payload: { + label: 'label.start.rolling.maintenance.payload' + } + } + }, + action: function (args) { + var data = { + hostids: args.context.hosts[0].id, + force: args.data.force, + timeout: args.data.timeout, + payload: args.data.payload + }; + $.ajax({ + url: createURL("startRollingMaintenance"), + dataType: "json", + data: data, + async: true, + success: function (json) { + var item = json.rollingmaintenance; + args.response.success({ + actionFilter: zoneActionfilter, + data: item + }); + } + }); + }, + notification: { + poll: function (args) { + args.complete(); + } + } + }, edit: { label: 'label.edit', action: function (args) { @@ -22173,6 +22478,7 @@ allowedActions.push("disableHA"); } + allowedActions.push("startRollingMaintenance"); return allowedActions; } @@ -22224,6 +22530,7 @@ //$("#tab_ipallocation, #add_iprange_button, #tab_network_device, #add_network_device_button").hide(); } + allowedActions.push("startRollingMaintenance"); return allowedActions; } @@ -22270,6 +22577,7 @@ allowedActions.push("disableHA"); } + allowedActions.push("startRollingMaintenance"); return allowedActions; } @@ -22292,12 +22600,16 @@ if (jsonObj.hypervisor == "KVM") { allowedActions.push("secureKVMHost"); + allowedActions.push("startRollingMaintenance"); } } else if (jsonObj.resourcestate == "ErrorInMaintenance") { allowedActions.push("edit"); allowedActions.push("enableMaintenanceMode"); allowedActions.push("cancelMaintenanceMode"); + if (jsonObj.hypervisor == "KVM") { + allowedActions.push("startRollingMaintenance"); + } } else if (jsonObj.resourcestate == "PrepareForMaintenance" || jsonObj.resourcestate == 'ErrorInPrepareForMaintenance') { allowedActions.push("edit"); allowedActions.push("cancelMaintenanceMode"); diff --git a/utils/src/main/java/com/cloud/utils/script/Script.java b/utils/src/main/java/com/cloud/utils/script/Script.java index 35aa24b1a84..13845cda3a0 100644 --- a/utils/src/main/java/com/cloud/utils/script/Script.java +++ b/utils/src/main/java/com/cloud/utils/script/Script.java @@ -66,6 +66,10 @@ public class Script implements Callable { Process _process; Thread _thread; + public boolean isTimeout() { + return _isTimeOut; + } + public int getExitValue() { return _process.exitValue(); }