Merge pull request #1293 from miguelaferreira/nsx-heath-check

Add Health Check Command to NSX pluginThe NSX plugin does not support the HeathCheckCommand. Instead it fakes a PingCommand as a call tot he control cluster status API.
However, we have seen in production that the management server will sometimes find the NSX controller to be behind on ping and that will trigger a HealthCheckCommand which will return with an unsupported command answer.
Once this happens the controller is put into Alert state and will not recover until the management sever is restarted.

In addition, during the investigation, there will be a null pointer exception due tot he fact that the NSX controllers do not live in a pod.

This PR tries to address those two issues.

* pr/1293:
  Implement CheckHealthCommand for NSX controllers
  Fix log message that refers to agent, not host
  Prevent NullPointerException when host does not belong to a pod

Signed-off-by: Remi Bergsma <github@remi.nl>
This commit is contained in:
Remi Bergsma 2016-01-16 20:39:43 +01:00
commit a767407fd2
3 changed files with 214 additions and 71 deletions

View File

@ -121,10 +121,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
protected static final Logger status_logger = Logger.getLogger(Status.class); protected static final Logger status_logger = Logger.getLogger(Status.class);
/** /**
* _agents is a ConcurrentHashMap, but it is used from within a synchronized block. * _agents is a ConcurrentHashMap, but it is used from within a synchronized block. This will be reported by findbugs as JLM_JSR166_UTILCONCURRENT_MONITORENTER. Maybe a
* This will be reported by findbugs as JLM_JSR166_UTILCONCURRENT_MONITORENTER. * ConcurrentHashMap is not the right thing to use here, but i'm not sure so i leave it alone.
* Maybe a ConcurrentHashMap is not the right thing to use here, but i'm not sure
* so i leave it alone.
*/ */
protected ConcurrentHashMap<Long, AgentAttache> _agents = new ConcurrentHashMap<Long, AgentAttache>(10007); protected ConcurrentHashMap<Long, AgentAttache> _agents = new ConcurrentHashMap<Long, AgentAttache>(10007);
protected List<Pair<Integer, Listener>> _hostMonitors = new ArrayList<Pair<Integer, Listener>>(17); protected List<Pair<Integer, Listener>> _hostMonitors = new ArrayList<Pair<Integer, Listener>>(17);
@ -208,7 +206,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
_nodeId = ManagementServerNode.getManagementServerId(); _nodeId = ManagementServerNode.getManagementServerId();
s_logger.info("Configuring AgentManagerImpl. management server node id(msid): " + _nodeId); s_logger.info("Configuring AgentManagerImpl. management server node id(msid): " + _nodeId);
final long lastPing = (System.currentTimeMillis() >> 10) - (long)(PingTimeout.value() * PingInterval.value()); final long lastPing = (System.currentTimeMillis() >> 10) - (long) (PingTimeout.value() * PingInterval.value());
_hostDao.markHostsAsDisconnected(_nodeId, lastPing); _hostDao.markHostsAsDisconnected(_nodeId, lastPing);
registerForHostEvents(new BehindOnPingListener(), true, true, false); registerForHostEvents(new BehindOnPingListener(), true, true, false);
@ -216,7 +214,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
_executor = new ThreadPoolExecutor(threads, threads, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentTaskPool")); _executor = new ThreadPoolExecutor(threads, threads, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentTaskPool"));
_connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentConnectTaskPool")); _connectExecutor = new ThreadPoolExecutor(100, 500, 60l, TimeUnit.SECONDS, new LinkedBlockingQueue<Runnable>(), new NamedThreadFactory("AgentConnectTaskPool"));
//allow core threads to time out even when there are no items in the queue // allow core threads to time out even when there are no items in the queue
_connectExecutor.allowCoreThreadTimeOut(true); _connectExecutor.allowCoreThreadTimeOut(true);
_connection = new NioServer("AgentManager", Port.value(), Workers.value() + 10, this); _connection = new NioServer("AgentManager", Port.value(), Workers.value() + 10, this);
@ -235,7 +233,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
} }
protected long getTimeout() { protected long getTimeout() {
return (long)(PingTimeout.value() * PingInterval.value()); return (long) (PingTimeout.value() * PingInterval.value());
} }
@Override @Override
@ -552,7 +550,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
monitor.second().processConnect(host, cmd[i], forRebalance); monitor.second().processConnect(host, cmd[i], forRebalance);
} catch (final Exception e) { } catch (final Exception e) {
if (e instanceof ConnectionException) { if (e instanceof ConnectionException) {
final ConnectionException ce = (ConnectionException)e; final ConnectionException ce = (ConnectionException) e;
if (ce.isSetupError()) { if (ce.isSetupError()) {
s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId + s_logger.warn("Monitor " + monitor.second().getClass().getSimpleName() + " says there is an error in the connect process for " + hostId +
" due to " + e.getMessage()); " due to " + e.getMessage());
@ -622,7 +620,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
try { try {
final Class<?> clazz = Class.forName(resourceName); final Class<?> clazz = Class.forName(resourceName);
final Constructor<?> constructor = clazz.getConstructor(); final Constructor<?> constructor = clazz.getConstructor();
resource = (ServerResource)constructor.newInstance(); resource = (ServerResource) constructor.newInstance();
} catch (final ClassNotFoundException e) { } catch (final ClassNotFoundException e) {
s_logger.warn("Unable to find class " + host.getResource(), e); s_logger.warn("Unable to find class " + host.getResource(), e);
} catch (final InstantiationException e) { } catch (final InstantiationException e) {
@ -690,7 +688,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
boolean initialized = false; boolean initialized = false;
ServerResource resource = null; ServerResource resource = null;
try { try {
//load the respective discoverer // load the respective discoverer
final Discoverer discoverer = _resourceMgr.getMatchingDiscover(host.getHypervisorType()); final Discoverer discoverer = _resourceMgr.getMatchingDiscover(host.getHypervisorType());
if (discoverer == null) { if (discoverer == null) {
s_logger.info("Could not to find a Discoverer to load the resource: " + host.getId() + " for hypervisor type: " + host.getHypervisorType()); s_logger.info("Could not to find a Discoverer to load the resource: " + host.getId() + " for hypervisor type: " + host.getHypervisorType());
@ -804,7 +802,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
} }
removeAgent(attache, nextStatus); removeAgent(attache, nextStatus);
//update the DB // update the DB
if (host != null && transitState) { if (host != null && transitState) {
disconnectAgent(host, event, _nodeId); disconnectAgent(host, event, _nodeId);
} }
@ -821,9 +819,9 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
try { try {
nextStatus = host.getStatus().getNextStatus(event); nextStatus = host.getStatus().getNextStatus(event);
} catch (final NoTransitionException ne) { } catch (final NoTransitionException ne) {
/* Agent may be currently in status of Down, Alert, Removed, namely there is no next status for some events. /*
* Why this can happen? Ask God not me. I hate there was no piece of comment for code handling race condition. * Agent may be currently in status of Down, Alert, Removed, namely there is no next status for some events. Why this can happen? Ask God not me. I hate there was
* God knew what race condition the code dealt with! * no piece of comment for code handling race condition. God knew what race condition the code dealt with!
*/ */
s_logger.debug("Caught exception while getting agent's next status", ne); s_logger.debug("Caught exception while getting agent's next status", ne);
} }
@ -845,7 +843,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
} }
final Status currentStatus = host.getStatus(); final Status currentStatus = host.getStatus();
s_logger.info("The agent " + hostId + " state determined is " + determinedState); s_logger.info("The agent from host " + hostId + " state determined is " + determinedState);
if (determinedState == Status.Down) { if (determinedState == Status.Down) {
final String message = "Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs"; final String message = "Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs";
@ -883,7 +881,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
// if we end up here we are in alert state, send an alert // if we end up here we are in alert state, send an alert
final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
final HostPodVO podVO = _podDao.findById(host.getPodId()); final HostPodVO podVO = _podDao.findById(host.getPodId());
final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); final String podName = podVO != null ? podVO.getName() : "NO POD";
final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podName;
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host in ALERT state, " + hostDesc, _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host in ALERT state, " + hostDesc,
"In availability zone " + host.getDataCenterId() + ", host is in alert state: " + host.getId() + "-" + host.getName()); "In availability zone " + host.getDataCenterId() + ", host is in alert state: " + host.getId() + "-" + host.getName());
} }
@ -1130,7 +1129,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
_request.logD("Processing the first command "); _request.logD("Processing the first command ");
final StartupCommand[] startups = new StartupCommand[_cmds.length]; final StartupCommand[] startups = new StartupCommand[_cmds.length];
for (int i = 0; i < _cmds.length; i++) { for (int i = 0; i < _cmds.length; i++) {
startups[i] = (StartupCommand)_cmds[i]; startups[i] = (StartupCommand) _cmds[i];
} }
final AgentAttache attache = handleConnectedAgent(_link, startups, _request); final AgentAttache attache = handleConnectedAgent(_link, startups, _request);
@ -1141,14 +1140,15 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
} }
protected void connectAgent(final Link link, final Command[] cmds, final Request request) { protected void connectAgent(final Link link, final Command[] cmds, final Request request) {
//send startupanswer to agent in the very beginning, so agent can move on without waiting for the answer for an undetermined time, if we put this logic into another thread pool. // send startupanswer to agent in the very beginning, so agent can move on without waiting for the answer for an undetermined time, if we put this logic into another
// thread pool.
final StartupAnswer[] answers = new StartupAnswer[cmds.length]; final StartupAnswer[] answers = new StartupAnswer[cmds.length];
Command cmd; Command cmd;
for (int i = 0; i < cmds.length; i++) { for (int i = 0; i < cmds.length; i++) {
cmd = cmds[i]; cmd = cmds[i];
if (cmd instanceof StartupRoutingCommand || cmd instanceof StartupProxyCommand || cmd instanceof StartupSecondaryStorageCommand || if (cmd instanceof StartupRoutingCommand || cmd instanceof StartupProxyCommand || cmd instanceof StartupSecondaryStorageCommand ||
cmd instanceof StartupStorageCommand) { cmd instanceof StartupStorageCommand) {
answers[i] = new StartupAnswer((StartupCommand)cmds[i], 0, getPingInterval()); answers[i] = new StartupAnswer((StartupCommand) cmds[i], 0, getPingInterval());
break; break;
} }
} }
@ -1168,7 +1168,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
} }
protected void processRequest(final Link link, final Request request) { protected void processRequest(final Link link, final Request request) {
final AgentAttache attache = (AgentAttache)link.attachment(); final AgentAttache attache = (AgentAttache) link.attachment();
final Command[] cmds = request.getCommands(); final Command[] cmds = request.getCommands();
Command cmd = cmds[0]; Command cmd = cmds[0];
boolean logD = true; boolean logD = true;
@ -1177,7 +1177,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
if (!(cmd instanceof StartupCommand)) { if (!(cmd instanceof StartupCommand)) {
s_logger.warn("Throwing away a request because it came through as the first command on a connect: " + request); s_logger.warn("Throwing away a request because it came through as the first command on a connect: " + request);
} else { } else {
//submit the task for execution // submit the task for execution
request.logD("Scheduling the first command "); request.logD("Scheduling the first command ");
connectAgent(link, cmds, request); connectAgent(link, cmds, request);
} }
@ -1207,40 +1207,40 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
Answer answer = null; Answer answer = null;
try { try {
if (cmd instanceof StartupRoutingCommand) { if (cmd instanceof StartupRoutingCommand) {
final StartupRoutingCommand startup = (StartupRoutingCommand)cmd; final StartupRoutingCommand startup = (StartupRoutingCommand) cmd;
answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); answer = new StartupAnswer(startup, attache.getId(), getPingInterval());
} else if (cmd instanceof StartupProxyCommand) { } else if (cmd instanceof StartupProxyCommand) {
final StartupProxyCommand startup = (StartupProxyCommand)cmd; final StartupProxyCommand startup = (StartupProxyCommand) cmd;
answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); answer = new StartupAnswer(startup, attache.getId(), getPingInterval());
} else if (cmd instanceof StartupSecondaryStorageCommand) { } else if (cmd instanceof StartupSecondaryStorageCommand) {
final StartupSecondaryStorageCommand startup = (StartupSecondaryStorageCommand)cmd; final StartupSecondaryStorageCommand startup = (StartupSecondaryStorageCommand) cmd;
answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); answer = new StartupAnswer(startup, attache.getId(), getPingInterval());
} else if (cmd instanceof StartupStorageCommand) { } else if (cmd instanceof StartupStorageCommand) {
final StartupStorageCommand startup = (StartupStorageCommand)cmd; final StartupStorageCommand startup = (StartupStorageCommand) cmd;
answer = new StartupAnswer(startup, attache.getId(), getPingInterval()); answer = new StartupAnswer(startup, attache.getId(), getPingInterval());
} else if (cmd instanceof ShutdownCommand) { } else if (cmd instanceof ShutdownCommand) {
final ShutdownCommand shutdown = (ShutdownCommand)cmd; final ShutdownCommand shutdown = (ShutdownCommand) cmd;
final String reason = shutdown.getReason(); final String reason = shutdown.getReason();
s_logger.info("Host " + attache.getId() + " has informed us that it is shutting down with reason " + reason + " and detail " + s_logger.info("Host " + attache.getId() + " has informed us that it is shutting down with reason " + reason + " and detail " +
shutdown.getDetail()); shutdown.getDetail());
if (reason.equals(ShutdownCommand.Update)) { if (reason.equals(ShutdownCommand.Update)) {
//disconnectWithoutInvestigation(attache, Event.UpdateNeeded); // disconnectWithoutInvestigation(attache, Event.UpdateNeeded);
throw new CloudRuntimeException("Agent update not implemented"); throw new CloudRuntimeException("Agent update not implemented");
} else if (reason.equals(ShutdownCommand.Requested)) { } else if (reason.equals(ShutdownCommand.Requested)) {
disconnectWithoutInvestigation(attache, Event.ShutdownRequested); disconnectWithoutInvestigation(attache, Event.ShutdownRequested);
} }
return; return;
} else if (cmd instanceof AgentControlCommand) { } else if (cmd instanceof AgentControlCommand) {
answer = handleControlCommand(attache, (AgentControlCommand)cmd); answer = handleControlCommand(attache, (AgentControlCommand) cmd);
} else { } else {
handleCommands(attache, request.getSequence(), new Command[] {cmd}); handleCommands(attache, request.getSequence(), new Command[] { cmd });
if (cmd instanceof PingCommand) { if (cmd instanceof PingCommand) {
final long cmdHostId = ((PingCommand)cmd).getHostId(); final long cmdHostId = ((PingCommand) cmd).getHostId();
// if the router is sending a ping, verify the // if the router is sending a ping, verify the
// gateway was pingable // gateway was pingable
if (cmd instanceof PingRoutingCommand) { if (cmd instanceof PingRoutingCommand) {
final boolean gatewayAccessible = ((PingRoutingCommand)cmd).isGatewayAccessible(); final boolean gatewayAccessible = ((PingRoutingCommand) cmd).isGatewayAccessible();
final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId)); final HostVO host = _hostDao.findById(Long.valueOf(cmdHostId));
if (host != null) { if (host != null) {
@ -1250,7 +1250,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId());
final HostPodVO podVO = _podDao.findById(host.getPodId()); final HostPodVO podVO = _podDao.findById(host.getPodId());
final String hostDesc = final String hostDesc =
"name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: "
+ podVO.getName();
_alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId(), _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId(),
"Host lost connection to gateway, " + hostDesc, "Host [" + hostDesc + "Host lost connection to gateway, " + hostDesc, "Host [" + hostDesc +
@ -1263,7 +1264,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
"; can't find the host in the DB"); "; can't find the host in the DB");
} }
} }
answer = new PingAnswer((PingCommand)cmd); answer = new PingAnswer((PingCommand) cmd);
} else if (cmd instanceof ReadyAnswer) { } else if (cmd instanceof ReadyAnswer) {
final HostVO host = _hostDao.findById(attache.getId()); final HostVO host = _hostDao.findById(attache.getId());
if (host == null) { if (host == null) {
@ -1299,7 +1300,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
} }
protected void processResponse(final Link link, final Response response) { protected void processResponse(final Link link, final Response response) {
final AgentAttache attache = (AgentAttache)link.attachment(); final AgentAttache attache = (AgentAttache) link.attachment();
if (attache == null) { if (attache == null) {
s_logger.warn("Unable to process: " + response); s_logger.warn("Unable to process: " + response);
} else if (!attache.processAnswers(response.getSequence(), response)) { } else if (!attache.processAnswers(response.getSequence(), response)) {
@ -1317,7 +1318,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
try { try {
final Request event = Request.parse(data); final Request event = Request.parse(data);
if (event instanceof Response) { if (event instanceof Response) {
processResponse(task.getLink(), (Response)event); processResponse(task.getLink(), (Response) event);
} else { } else {
processRequest(task.getLink(), event); processRequest(task.getLink(), event);
} }
@ -1332,7 +1333,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
} else if (type == Task.Type.CONNECT) { } else if (type == Task.Type.CONNECT) {
} else if (type == Task.Type.DISCONNECT) { } else if (type == Task.Type.DISCONNECT) {
final Link link = task.getLink(); final Link link = task.getLink();
final AgentAttache attache = (AgentAttache)link.attachment(); final AgentAttache attache = (AgentAttache) link.attachment();
if (attache != null) { if (attache != null) {
disconnectWithInvestigation(attache, Event.AgentDisconnected); disconnectWithInvestigation(attache, Event.AgentDisconnected);
} else { } else {
@ -1533,11 +1534,9 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
final ResourceState resourceState = h.getResourceState(); final ResourceState resourceState = h.getResourceState();
if (resourceState == ResourceState.Disabled || resourceState == ResourceState.Maintenance || resourceState == ResourceState.ErrorInMaintenance) { if (resourceState == ResourceState.Disabled || resourceState == ResourceState.Maintenance || resourceState == ResourceState.ErrorInMaintenance) {
/* /*
* Host is in non-operation state, so no * Host is in non-operation state, so no investigation and direct put agent to Disconnected
* investigation and direct put agent to
* Disconnected
*/ */
status_logger.debug("Ping timeout but host " + agentId + " is in resource state of " + resourceState + ", so no investigation"); status_logger.debug("Ping timeout but agent " + agentId + " is in resource state of " + resourceState + ", so no investigation");
disconnectWithoutInvestigation(agentId, Event.ShutdownRequested); disconnectWithoutInvestigation(agentId, Event.ShutdownRequested);
} else { } else {
final HostVO host = _hostDao.findById(agentId); final HostVO host = _hostDao.findById(agentId);
@ -1547,7 +1546,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
s_logger.warn("Disconnect agent for CPVM/SSVM due to physical connection close. host: " + host.getId()); s_logger.warn("Disconnect agent for CPVM/SSVM due to physical connection close. host: " + host.getId());
disconnectWithoutInvestigation(agentId, Event.ShutdownRequested); disconnectWithoutInvestigation(agentId, Event.ShutdownRequested);
} else { } else {
status_logger.debug("Ping timeout for host " + agentId + ", do invstigation"); status_logger.debug("Ping timeout for agent " + agentId + ", do invstigation");
disconnectWithInvestigation(agentId, Event.PingTimeout); disconnectWithInvestigation(agentId, Event.PingTimeout);
} }
} }
@ -1653,7 +1652,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl
@Override @Override
public ConfigKey<?>[] getConfigKeys() { public ConfigKey<?>[] getConfigKeys() {
return new ConfigKey<?>[] {CheckTxnBeforeSending, Workers, Port, PingInterval, PingTimeout, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize, DirectAgentThreadCap}; return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, PingInterval, PingTimeout, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize,
DirectAgentThreadCap };
} }
} }

View File

@ -0,0 +1,63 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package com.cloud.network.resource.wrapper;
import org.apache.log4j.Logger;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CheckHealthAnswer;
import com.cloud.agent.api.CheckHealthCommand;
import com.cloud.network.nicira.ControlClusterStatus;
import com.cloud.network.nicira.NiciraNvpApi;
import com.cloud.network.nicira.NiciraNvpApiException;
import com.cloud.network.resource.NiciraNvpResource;
import com.cloud.resource.CommandWrapper;
import com.cloud.resource.ResourceWrapper;
@ResourceWrapper(handles = CheckHealthCommand.class)
public class NiciraCheckHealthCommandWrapper extends CommandWrapper<CheckHealthCommand, Answer, NiciraNvpResource> {
private static final String CONTROL_CLUSTER_STATUS_IS_STABLE = "stable";
private static final Logger s_logger = Logger.getLogger(NiciraCheckHealthCommandWrapper.class);
@Override
public Answer execute(final CheckHealthCommand command, final NiciraNvpResource serverResource) {
final NiciraNvpApi niciraNvpApi = serverResource.getNiciraNvpApi();
boolean healthy = true;
try {
final ControlClusterStatus clusterStatus = niciraNvpApi.getControlClusterStatus();
final String status = clusterStatus.getClusterStatus();
if (clusterIsUnstable(status)) {
s_logger.warn("Control cluster is not stable. Current status is " + status);
healthy = false;
}
} catch (final NiciraNvpApiException e) {
s_logger.error("Exception caught while checking control cluster status during health check", e);
healthy = false;
}
return new CheckHealthAnswer(command, healthy);
}
protected boolean clusterIsUnstable(final String clusterStatus) {
return !CONTROL_CLUSTER_STATUS_IS_STABLE.equals(clusterStatus);
}
}

View File

@ -0,0 +1,80 @@
//
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
//
package com.cloud.network.resource.wrapper;
import static org.hamcrest.MatcherAssert.assertThat;
import static org.hamcrest.Matchers.equalTo;
import static org.mockito.Mockito.mock;
import static org.mockito.Mockito.when;
import org.junit.Before;
import org.junit.Test;
import com.cloud.agent.api.Answer;
import com.cloud.agent.api.CheckHealthCommand;
import com.cloud.network.nicira.ControlClusterStatus;
import com.cloud.network.nicira.NiciraNvpApi;
import com.cloud.network.nicira.NiciraNvpApiException;
import com.cloud.network.resource.NiciraNvpResource;
public class NiciraCheckHealthCommandWrapperTest {
private final NiciraNvpResource niciraResource = mock(NiciraNvpResource.class);
private final NiciraNvpApi niciraApi = mock(NiciraNvpApi.class);
@Before
public void setup() {
when(niciraResource.getNiciraNvpApi()).thenReturn(niciraApi);
}
@Test
public void tetsExecuteWhenClusterIsNotStable() throws Exception {
when(niciraApi.getControlClusterStatus()).thenReturn(new ControlClusterStatus());
final NiciraCheckHealthCommandWrapper commandWrapper = new NiciraCheckHealthCommandWrapper();
final Answer answer = commandWrapper.execute(new CheckHealthCommand(), niciraResource);
assertThat(answer.getResult(), equalTo(false));
}
@SuppressWarnings("unchecked")
@Test
public void tetsExecuteWhenApiThrowsException() throws Exception {
when(niciraApi.getControlClusterStatus()).thenThrow(NiciraNvpApiException.class);
final NiciraCheckHealthCommandWrapper commandWrapper = new NiciraCheckHealthCommandWrapper();
final Answer answer = commandWrapper.execute(new CheckHealthCommand(), niciraResource);
assertThat(answer.getResult(), equalTo(false));
}
@Test
public void tetsExecuteWhenClusterIsStable() throws Exception {
final ControlClusterStatus statusValue = mock(ControlClusterStatus.class);
when(statusValue.getClusterStatus()).thenReturn("stable");
when(niciraApi.getControlClusterStatus()).thenReturn(statusValue);
final NiciraCheckHealthCommandWrapper commandWrapper = new NiciraCheckHealthCommandWrapper();
final Answer answer = commandWrapper.execute(new CheckHealthCommand(), niciraResource);
assertThat(answer.getResult(), equalTo(true));
}
}