CLOUDSTACK-8530: KVM hosts without active agents should be in Disconnected state

KVM hosts which are actuall up, but if their agents are shutdown should be put
in disconnected state. This would avoid getting the VMs HA'd and other commands
such as deploying a VM will exclude that host and save us from errors.

The improvement is that, we first try to contact the KVM host itself. If it fails
we assume that it's disconnected, and then ask its KVM neighbours if they can
check its status. If all of the KVM neighbours tell us that it's Down and we're
unable to reach the KVM host, then the host is possibly down. In case any of the
KVM neighbours tell us that it's Up but we're unable to reach the KVM host then
we can be sure that the agent is offline but the host is running.

Signed-off-by: Rohit Yadav <rohit.yadav@shapeblue.com>
Signed-off-by: wilderrodrigues <wrodrigues@schubergphilis.com>

This closes #340
This commit is contained in:
Rohit Yadav 2015-06-01 14:53:58 +02:00 committed by wilderrodrigues
parent f2b1ec2c7d
commit f341246888

View File

@ -18,13 +18,6 @@
*/
package com.cloud.ha;
import java.util.List;
import javax.ejb.Local;
import javax.inject.Inject;
import org.apache.log4j.Logger;
import com.cloud.agent.AgentManager;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CheckOnHostCommand;
@ -35,6 +28,11 @@ import com.cloud.host.dao.HostDao;
import com.cloud.hypervisor.Hypervisor;
import com.cloud.resource.ResourceManager;
import com.cloud.utils.component.AdapterBase;
import org.apache.log4j.Logger;
import javax.ejb.Local;
import javax.inject.Inject;
import java.util.List;
@Local(value = Investigator.class)
public class KVMInvestigator extends AdapterBase implements Investigator {
@ -64,22 +62,47 @@ public class KVMInvestigator extends AdapterBase implements Investigator {
if (agent.getHypervisorType() != Hypervisor.HypervisorType.KVM && agent.getHypervisorType() != Hypervisor.HypervisorType.LXC) {
return null;
}
Status hostStatus = null;
Status neighbourStatus = null;
CheckOnHostCommand cmd = new CheckOnHostCommand(agent);
try {
Answer answer = _agentMgr.easySend(agent.getId(), cmd);
if (answer != null) {
hostStatus = answer.getResult() ? Status.Down : Status.Up;
}
} catch (Exception e) {
s_logger.debug("Failed to send command to host: " + agent.getId());
}
if (hostStatus == null) {
hostStatus = Status.Disconnected;
}
List<HostVO> neighbors = _resourceMgr.listHostsInClusterByStatus(agent.getClusterId(), Status.Up);
for (HostVO neighbor : neighbors) {
if (neighbor.getId() == agent.getId() || (neighbor.getHypervisorType() != Hypervisor.HypervisorType.KVM && neighbor.getHypervisorType() != Hypervisor.HypervisorType.LXC)) {
continue;
}
s_logger.debug("Investigating host:" + agent.getId() + " via neighbouring host:" + neighbor.getId());
try {
Answer answer = _agentMgr.easySend(neighbor.getId(), cmd);
if (answer != null) {
return answer.getResult() ? Status.Down : Status.Up;
neighbourStatus = answer.getResult() ? Status.Down : Status.Up;
s_logger.debug("Neighbouring host:" + neighbor.getId() + " returned status:" + neighbourStatus + " for the investigated host:" + agent.getId());
if (neighbourStatus == Status.Up) {
break;
}
}
} catch (Exception e) {
s_logger.debug("Failed to send command to host: " + neighbor.getId());
}
}
return null;
if (neighbourStatus == Status.Up && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) {
hostStatus = Status.Disconnected;
}
if (neighbourStatus == Status.Down && (hostStatus == Status.Disconnected || hostStatus == Status.Down)) {
hostStatus = Status.Down;
}
return hostStatus;
}
}