bug 11910: full sync will only stop unknown VMs on agent

This commit is contained in:
Abhinandan Prateek 2011-11-23 16:48:12 +05:30
parent beb38c8310
commit da20d33e27
4 changed files with 27 additions and 43 deletions

View File

@ -25,8 +25,9 @@ import com.cloud.vm.VirtualMachine.State;
public class ClusterSyncAnswer extends Answer { public class ClusterSyncAnswer extends Answer {
private long _clusterId; private long _clusterId;
private HashMap<String, Pair<String, State>> _newStates; private HashMap<String, Pair<String, State>> _newStates;
private HashMap<String, Pair<String, State>> _allStates;
private int _type = -1; // 0 for full, 1 for delta private int _type = -1; // 0 for full, 1 for delta
private boolean isExecuted=false; // this is to avoidf double execution first time, due to framework ??? private boolean isExecuted=false; // this is to avoid double execution first time, due to framework ???
public static final int FULL_SYNC=0; public static final int FULL_SYNC=0;
@ -39,10 +40,19 @@ public class ClusterSyncAnswer extends Answer {
_type = -1; _type = -1;
} }
public ClusterSyncAnswer(long clusterId, HashMap<String, Pair<String, State>> newStates, int type){
public ClusterSyncAnswer(long clusterId, HashMap<String, Pair<String, State>> newStates){
_clusterId = clusterId; _clusterId = clusterId;
_newStates = newStates; _newStates = newStates;
_type = type; _type = DELTA_SYNC;
result = true;
}
public ClusterSyncAnswer(long clusterId, HashMap<String, Pair<String, State>> newStates, HashMap<String, Pair<String, State>> allStates){
_clusterId = clusterId;
_newStates = newStates;
_allStates = allStates;
_type = FULL_SYNC;
result = true; result = true;
} }
@ -53,6 +63,10 @@ public class ClusterSyncAnswer extends Answer {
public HashMap<String, Pair<String, State>> getNewStates() { public HashMap<String, Pair<String, State>> getNewStates() {
return _newStates; return _newStates;
} }
public HashMap<String, Pair<String, State>> getAllStates() {
return _allStates;
}
public boolean isFull(){ public boolean isFull(){
return _type==0; return _type==0;

View File

@ -6578,23 +6578,17 @@ public abstract class CitrixResourceBase implements ServerResource, HypervisorRe
s_logger.warn("Check for master failed, failing the Cluster sync command"); s_logger.warn("Check for master failed, failing the Cluster sync command");
return new ClusterSyncAnswer(cmd.getClusterId()); return new ClusterSyncAnswer(cmd.getClusterId());
} }
HashMap<String, Pair<String, State>> newStates; HashMap<String, Pair<String, State>> newStates = deltaClusterSync(conn);
int sync_type=-1;
if (cmd.isRightStep()){ if (cmd.isRightStep()){
// do full sync // do full sync
newStates=fullClusterSync(conn); HashMap<String, Pair<String, State>> allStates=fullClusterSync(conn);
sync_type = ClusterSyncAnswer.FULL_SYNC; cmd.incrStep();
return new ClusterSyncAnswer(cmd.getClusterId(), newStates, allStates);
} }
else { else {
// do delta sync cmd.incrStep();
newStates = deltaClusterSync(conn); return new ClusterSyncAnswer(cmd.getClusterId(), newStates);
if (newStates == null) {
s_logger.warn("Unable to get current status from sync");
}
sync_type = ClusterSyncAnswer.DELTA_SYNC;
} }
cmd.incrStep();
return new ClusterSyncAnswer(cmd.getClusterId(), newStates, sync_type);
} }

View File

@ -159,7 +159,7 @@ public enum Config {
PingInterval("Advanced", AgentManager.class, Integer.class, "ping.interval", "60", "Ping interval in seconds", null), PingInterval("Advanced", AgentManager.class, Integer.class, "ping.interval", "60", "Ping interval in seconds", null),
PingTimeout("Advanced", AgentManager.class, Float.class, "ping.timeout", "2.5", "Multiplier to ping.interval before announcing an agent has timed out", null), PingTimeout("Advanced", AgentManager.class, Float.class, "ping.timeout", "2.5", "Multiplier to ping.interval before announcing an agent has timed out", null),
ClusterDeltaSyncInterval("Advanced", AgentManager.class, Integer.class, "sync.interval", "60", "Cluster Delta sync interval in seconds", null), ClusterDeltaSyncInterval("Advanced", AgentManager.class, Integer.class, "sync.interval", "60", "Cluster Delta sync interval in seconds", null),
ClusterFullSyncSkipSteps("Advanced", AgentManager.class, Integer.class, "skip.steps", "30", "Cluster full sync skip steps count", null), ClusterFullSyncSkipSteps("Advanced", AgentManager.class, Integer.class, "skip.steps", "60", "Cluster full sync skip steps count", null),
Port("Advanced", AgentManager.class, Integer.class, "port", "8250", "Port to listen on for agent connection.", null), Port("Advanced", AgentManager.class, Integer.class, "port", "8250", "Port to listen on for agent connection.", null),
RouterCpuMHz("Advanced", NetworkManager.class, Integer.class, "router.cpu.mhz", String.valueOf(VirtualNetworkApplianceManager.DEFAULT_ROUTER_CPU_MHZ), "Default CPU speed (MHz) for router VM.", null), RouterCpuMHz("Advanced", NetworkManager.class, Integer.class, "router.cpu.mhz", String.valueOf(VirtualNetworkApplianceManager.DEFAULT_ROUTER_CPU_MHZ), "Default CPU speed (MHz) for router VM.", null),
RestartRetryInterval("Advanced", HighAvailabilityManager.class, Integer.class, "restart.retry.interval", "600", "Time (in seconds) between retries to restart a vm", null), RestartRetryInterval("Advanced", HighAvailabilityManager.class, Integer.class, "restart.retry.interval", "600", "Time (in seconds) between retries to restart a vm", null),

View File

@ -1703,7 +1703,6 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
} }
public Commands fullSync(final long clusterId, Map<String, Pair<String, State>> newStates) { public Commands fullSync(final long clusterId, Map<String, Pair<String, State>> newStates) {
Commands commands = new Commands(OnError.Continue); Commands commands = new Commands(OnError.Continue);
Map<Long, AgentVmInfo> infos = convertToInfos(newStates); Map<Long, AgentVmInfo> infos = convertToInfos(newStates);
@ -1711,30 +1710,6 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
final List<VMInstanceVO> vms = _vmDao.listByClusterId(clusterId); final List<VMInstanceVO> vms = _vmDao.listByClusterId(clusterId);
for (VMInstanceVO vm : vms) { for (VMInstanceVO vm : vms) {
AgentVmInfo info = infos.remove(vm.getId()); AgentVmInfo info = infos.remove(vm.getId());
VMInstanceVO castedVm = null;
if (info == null) {
// the vm is not there on cluster, check the vm status in DB
if (vm.getState() == State.Starting && (DateUtil.currentGMTTime().getTime() - vm.getUpdateTime().getTime()) < 10*60*1000){
continue; // ignoring this VM as it is still settling
}
info = new AgentVmInfo(vm.getInstanceName(), getVmGuru(vm), vm, State.Stopped);
castedVm = info.guru.findById(vm.getId());
hId = vm.getHostId() == null ? vm.getLastHostId() : vm.getHostId();
} else {
castedVm = info.vm;
String hostGuid = info.getHostUuid();
Host host = _resourceMgr.findHostByGuid(hostGuid);
if (host == null) {
infos.put(vm.getId(), info);
continue;
}
hId = host.getId();
}
HypervisorGuru hvGuru = _hvGuruMgr.getGuru(castedVm.getHypervisorType());
Command command = compareState(hId, castedVm, info, false, hvGuru.trackVmHostChange());
if (command != null) {
commands.addCommand(command);
}
} }
for (final AgentVmInfo left : infos.values()) { for (final AgentVmInfo left : infos.values()) {
s_logger.warn("Stopping a VM that we have no record of: " + left.name); s_logger.warn("Stopping a VM that we have no record of: " + left.name);
@ -2072,7 +2047,8 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
ClusterSyncAnswer hs = (ClusterSyncAnswer) answer; ClusterSyncAnswer hs = (ClusterSyncAnswer) answer;
if (hs.execute()){ if (hs.execute()){
if (hs.isFull()) { if (hs.isFull()) {
fullSync(hs.getClusterId(), hs.getNewStates()); deltaSync(hs.getNewStates());
fullSync(hs.getClusterId(), hs.getAllStates());
} else if (hs.isDelta()){ } else if (hs.isDelta()){
deltaSync(hs.getNewStates()); deltaSync(hs.getNewStates());
} }
@ -2149,7 +2125,7 @@ public class VirtualMachineManagerImpl implements VirtualMachineManager, Listene
Long clusterId = agent.getClusterId(); Long clusterId = agent.getClusterId();
long agentId = agent.getId(); long agentId = agent.getId();
if (agent.getHypervisorType() == HypervisorType.XenServer) { // only fro Xen if (agent.getHypervisorType() == HypervisorType.XenServer) { // only for Xen
ClusterSyncCommand syncCmd = new ClusterSyncCommand(Integer.parseInt(Config.ClusterDeltaSyncInterval.getDefaultValue()), ClusterSyncCommand syncCmd = new ClusterSyncCommand(Integer.parseInt(Config.ClusterDeltaSyncInterval.getDefaultValue()),
Integer.parseInt(Config.ClusterFullSyncSkipSteps.getDefaultValue()), clusterId); Integer.parseInt(Config.ClusterFullSyncSkipSteps.getDefaultValue()), clusterId);
try { try {