mirror of
				https://github.com/apache/cloudstack.git
				synced 2025-10-26 08:42:29 +01:00 
			
		
		
		
	Merge release branch 4.7 to master
* 4.7: Implement CheckHealthCommand for NSX controllers Fix log message that refers to agent, not host Prevent NullPointerException when host does not belong to a pod
This commit is contained in:
		
						commit
						1b8c464e6b
					
				| @ -121,10 +121,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
|     protected static final Logger status_logger = Logger.getLogger(Status.class); | ||||
| 
 | ||||
|     /** | ||||
|      * _agents is a ConcurrentHashMap, but it is used from within a synchronized block. | ||||
|      * This will be reported by findbugs as JLM_JSR166_UTILCONCURRENT_MONITORENTER. | ||||
|      * Maybe a ConcurrentHashMap is not the right thing to use here, but i'm not sure | ||||
|      * so i leave it alone. | ||||
|      * _agents is a ConcurrentHashMap, but it is used from within a synchronized block. This will be reported by findbugs as JLM_JSR166_UTILCONCURRENT_MONITORENTER. Maybe a | ||||
|      * ConcurrentHashMap is not the right thing to use here, but i'm not sure so i leave it alone. | ||||
|      */ | ||||
|     protected ConcurrentHashMap<Long, AgentAttache> _agents = new ConcurrentHashMap<Long, AgentAttache>(10007); | ||||
|     protected List<Pair<Integer, Listener>> _hostMonitors = new ArrayList<Pair<Integer, Listener>>(17); | ||||
| @ -821,9 +819,9 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
|             try { | ||||
|                 nextStatus = host.getStatus().getNextStatus(event); | ||||
|             } catch (final NoTransitionException ne) { | ||||
|                 /* Agent may be currently in status of Down, Alert, Removed, namely there is no next status for some events. | ||||
|                  * Why this can happen? Ask God not me. I hate there was no piece of comment for code handling race condition. | ||||
|                  * God knew what race condition the code dealt with! | ||||
|                 /* | ||||
|                  * Agent may be currently in status of Down, Alert, Removed, namely there is no next status for some events. Why this can happen? Ask God not me. I hate there was | ||||
|                  * no piece of comment for code handling race condition. God knew what race condition the code dealt with! | ||||
|                  */ | ||||
|                 s_logger.debug("Caught exception while getting agent's next status", ne); | ||||
|             } | ||||
| @ -845,7 +843,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
|                 } | ||||
| 
 | ||||
|                 final Status currentStatus = host.getStatus(); | ||||
|                 s_logger.info("The agent " + hostId + " state determined is " + determinedState); | ||||
|                 s_logger.info("The agent from host " + hostId + " state determined is " + determinedState); | ||||
| 
 | ||||
|                 if (determinedState == Status.Down) { | ||||
|                     final String message = "Host is down: " + host.getId() + "-" + host.getName() + ". Starting HA on the VMs"; | ||||
| @ -883,7 +881,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
|                     // if we end up here we are in alert state, send an alert | ||||
|                     final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); | ||||
|                     final HostPodVO podVO = _podDao.findById(host.getPodId()); | ||||
|                     final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); | ||||
|                     final String podName = podVO != null ? podVO.getName() : "NO POD"; | ||||
|                     final String hostDesc = "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podName; | ||||
|                     _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_HOST, host.getDataCenterId(), host.getPodId(), "Host in ALERT state, " + hostDesc, | ||||
|                                     "In availability zone " + host.getDataCenterId() + ", host is in alert state: " + host.getId() + "-" + host.getName()); | ||||
|                 } | ||||
| @ -1141,7 +1140,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
|     } | ||||
| 
 | ||||
|     protected void connectAgent(final Link link, final Command[] cmds, final Request request) { | ||||
|         //send startupanswer to agent in the very beginning, so agent can move on without waiting for the answer for an undetermined time, if we put this logic into another thread pool. | ||||
|         // send startupanswer to agent in the very beginning, so agent can move on without waiting for the answer for an undetermined time, if we put this logic into another | ||||
|         // thread pool. | ||||
|         final StartupAnswer[] answers = new StartupAnswer[cmds.length]; | ||||
|         Command cmd; | ||||
|         for (int i = 0; i < cmds.length; i++) { | ||||
| @ -1250,7 +1250,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
|                                         final DataCenterVO dcVO = _dcDao.findById(host.getDataCenterId()); | ||||
|                                         final HostPodVO podVO = _podDao.findById(host.getPodId()); | ||||
|                                         final String hostDesc = | ||||
|                                                 "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " + podVO.getName(); | ||||
|                                                         "name: " + host.getName() + " (id:" + host.getId() + "), availability zone: " + dcVO.getName() + ", pod: " | ||||
|                                                                         + podVO.getName(); | ||||
| 
 | ||||
|                                         _alertMgr.sendAlert(AlertManager.AlertType.ALERT_TYPE_ROUTING, host.getDataCenterId(), host.getPodId(), | ||||
|                                                         "Host lost connection to gateway, " + hostDesc, "Host [" + hostDesc + | ||||
| @ -1533,11 +1534,9 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
|                         final ResourceState resourceState = h.getResourceState(); | ||||
|                         if (resourceState == ResourceState.Disabled || resourceState == ResourceState.Maintenance || resourceState == ResourceState.ErrorInMaintenance) { | ||||
|                             /* | ||||
|                              * Host is in non-operation state, so no | ||||
|                              * investigation and direct put agent to | ||||
|                              * Disconnected | ||||
|                              * Host is in non-operation state, so no investigation and direct put agent to Disconnected | ||||
|                              */ | ||||
|                             status_logger.debug("Ping timeout but host " + agentId + " is in resource state of " + resourceState + ", so no investigation"); | ||||
|                             status_logger.debug("Ping timeout but agent " + agentId + " is in resource state of " + resourceState + ", so no investigation"); | ||||
|                             disconnectWithoutInvestigation(agentId, Event.ShutdownRequested); | ||||
|                         } else { | ||||
|                             final HostVO host = _hostDao.findById(agentId); | ||||
| @ -1547,7 +1546,7 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
|                                 s_logger.warn("Disconnect agent for CPVM/SSVM due to physical connection close. host: " + host.getId()); | ||||
|                                 disconnectWithoutInvestigation(agentId, Event.ShutdownRequested); | ||||
|                             } else { | ||||
|                                 status_logger.debug("Ping timeout for host " + agentId + ", do invstigation"); | ||||
|                                 status_logger.debug("Ping timeout for agent " + agentId + ", do invstigation"); | ||||
|                                 disconnectWithInvestigation(agentId, Event.PingTimeout); | ||||
|                             } | ||||
|                         } | ||||
| @ -1653,7 +1652,8 @@ public class AgentManagerImpl extends ManagerBase implements AgentManager, Handl | ||||
| 
 | ||||
|     @Override | ||||
|     public ConfigKey<?>[] getConfigKeys() { | ||||
|         return new ConfigKey<?>[] {CheckTxnBeforeSending, Workers, Port, PingInterval, PingTimeout, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize, DirectAgentThreadCap}; | ||||
|         return new ConfigKey<?>[] { CheckTxnBeforeSending, Workers, Port, PingInterval, PingTimeout, Wait, AlertWait, DirectAgentLoadSize, DirectAgentPoolSize, | ||||
|                         DirectAgentThreadCap }; | ||||
|     } | ||||
| 
 | ||||
| } | ||||
|  | ||||
| @ -0,0 +1,63 @@ | ||||
| // | ||||
| // Licensed to the Apache Software Foundation (ASF) under one | ||||
| // or more contributor license agreements.  See the NOTICE file | ||||
| // distributed with this work for additional information | ||||
| // regarding copyright ownership.  The ASF licenses this file | ||||
| // to you under the Apache License, Version 2.0 (the | ||||
| // "License"); you may not use this file except in compliance | ||||
| // with the License.  You may obtain a copy of the License at | ||||
| // | ||||
| //   http://www.apache.org/licenses/LICENSE-2.0 | ||||
| // | ||||
| // Unless required by applicable law or agreed to in writing, | ||||
| // software distributed under the License is distributed on an | ||||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||||
| // KIND, either express or implied.  See the License for the | ||||
| // specific language governing permissions and limitations | ||||
| // under the License. | ||||
| // | ||||
| 
 | ||||
| package com.cloud.network.resource.wrapper; | ||||
| 
 | ||||
| import org.apache.log4j.Logger; | ||||
| 
 | ||||
| import com.cloud.agent.api.Answer; | ||||
| import com.cloud.agent.api.CheckHealthAnswer; | ||||
| import com.cloud.agent.api.CheckHealthCommand; | ||||
| import com.cloud.network.nicira.ControlClusterStatus; | ||||
| import com.cloud.network.nicira.NiciraNvpApi; | ||||
| import com.cloud.network.nicira.NiciraNvpApiException; | ||||
| import com.cloud.network.resource.NiciraNvpResource; | ||||
| import com.cloud.resource.CommandWrapper; | ||||
| import com.cloud.resource.ResourceWrapper; | ||||
| 
 | ||||
| @ResourceWrapper(handles = CheckHealthCommand.class) | ||||
| public class NiciraCheckHealthCommandWrapper extends CommandWrapper<CheckHealthCommand, Answer, NiciraNvpResource> { | ||||
| 
 | ||||
|     private static final String CONTROL_CLUSTER_STATUS_IS_STABLE = "stable"; | ||||
|     private static final Logger s_logger = Logger.getLogger(NiciraCheckHealthCommandWrapper.class); | ||||
| 
 | ||||
|     @Override | ||||
|     public Answer execute(final CheckHealthCommand command, final NiciraNvpResource serverResource) { | ||||
|         final NiciraNvpApi niciraNvpApi = serverResource.getNiciraNvpApi(); | ||||
|         boolean healthy = true; | ||||
|         try { | ||||
|             final ControlClusterStatus clusterStatus = niciraNvpApi.getControlClusterStatus(); | ||||
|             final String status = clusterStatus.getClusterStatus(); | ||||
|             if (clusterIsUnstable(status)) { | ||||
|                 s_logger.warn("Control cluster is not stable. Current status is " + status); | ||||
|                 healthy = false; | ||||
|             } | ||||
|         } catch (final NiciraNvpApiException e) { | ||||
|             s_logger.error("Exception caught while checking control cluster status during health check", e); | ||||
|             healthy = false; | ||||
|         } | ||||
| 
 | ||||
|         return new CheckHealthAnswer(command, healthy); | ||||
|     } | ||||
| 
 | ||||
|     protected boolean clusterIsUnstable(final String clusterStatus) { | ||||
|         return !CONTROL_CLUSTER_STATUS_IS_STABLE.equals(clusterStatus); | ||||
|     } | ||||
| 
 | ||||
| } | ||||
| @ -0,0 +1,80 @@ | ||||
| // | ||||
| // Licensed to the Apache Software Foundation (ASF) under one | ||||
| // or more contributor license agreements.  See the NOTICE file | ||||
| // distributed with this work for additional information | ||||
| // regarding copyright ownership.  The ASF licenses this file | ||||
| // to you under the Apache License, Version 2.0 (the | ||||
| // "License"); you may not use this file except in compliance | ||||
| // with the License.  You may obtain a copy of the License at | ||||
| // | ||||
| //   http://www.apache.org/licenses/LICENSE-2.0 | ||||
| // | ||||
| // Unless required by applicable law or agreed to in writing, | ||||
| // software distributed under the License is distributed on an | ||||
| // "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY | ||||
| // KIND, either express or implied.  See the License for the | ||||
| // specific language governing permissions and limitations | ||||
| // under the License. | ||||
| // | ||||
| 
 | ||||
| package com.cloud.network.resource.wrapper; | ||||
| 
 | ||||
| import static org.hamcrest.MatcherAssert.assertThat; | ||||
| import static org.hamcrest.Matchers.equalTo; | ||||
| import static org.mockito.Mockito.mock; | ||||
| import static org.mockito.Mockito.when; | ||||
| 
 | ||||
| import org.junit.Before; | ||||
| import org.junit.Test; | ||||
| 
 | ||||
| import com.cloud.agent.api.Answer; | ||||
| import com.cloud.agent.api.CheckHealthCommand; | ||||
| import com.cloud.network.nicira.ControlClusterStatus; | ||||
| import com.cloud.network.nicira.NiciraNvpApi; | ||||
| import com.cloud.network.nicira.NiciraNvpApiException; | ||||
| import com.cloud.network.resource.NiciraNvpResource; | ||||
| 
 | ||||
| public class NiciraCheckHealthCommandWrapperTest { | ||||
| 
 | ||||
|     private final NiciraNvpResource niciraResource = mock(NiciraNvpResource.class); | ||||
|     private final NiciraNvpApi niciraApi = mock(NiciraNvpApi.class); | ||||
| 
 | ||||
|     @Before | ||||
|     public void setup() { | ||||
|         when(niciraResource.getNiciraNvpApi()).thenReturn(niciraApi); | ||||
|     } | ||||
| 
 | ||||
|     @Test | ||||
|     public void tetsExecuteWhenClusterIsNotStable() throws Exception { | ||||
|         when(niciraApi.getControlClusterStatus()).thenReturn(new ControlClusterStatus()); | ||||
| 
 | ||||
|         final NiciraCheckHealthCommandWrapper commandWrapper = new NiciraCheckHealthCommandWrapper(); | ||||
|         final Answer answer = commandWrapper.execute(new CheckHealthCommand(), niciraResource); | ||||
| 
 | ||||
|         assertThat(answer.getResult(), equalTo(false)); | ||||
|     } | ||||
| 
 | ||||
|     @SuppressWarnings("unchecked") | ||||
|     @Test | ||||
|     public void tetsExecuteWhenApiThrowsException() throws Exception { | ||||
|         when(niciraApi.getControlClusterStatus()).thenThrow(NiciraNvpApiException.class); | ||||
| 
 | ||||
|         final NiciraCheckHealthCommandWrapper commandWrapper = new NiciraCheckHealthCommandWrapper(); | ||||
|         final Answer answer = commandWrapper.execute(new CheckHealthCommand(), niciraResource); | ||||
| 
 | ||||
|         assertThat(answer.getResult(), equalTo(false)); | ||||
|     } | ||||
| 
 | ||||
|     @Test | ||||
|     public void tetsExecuteWhenClusterIsStable() throws Exception { | ||||
|         final ControlClusterStatus statusValue = mock(ControlClusterStatus.class); | ||||
|         when(statusValue.getClusterStatus()).thenReturn("stable"); | ||||
|         when(niciraApi.getControlClusterStatus()).thenReturn(statusValue); | ||||
| 
 | ||||
|         final NiciraCheckHealthCommandWrapper commandWrapper = new NiciraCheckHealthCommandWrapper(); | ||||
|         final Answer answer = commandWrapper.execute(new CheckHealthCommand(), niciraResource); | ||||
| 
 | ||||
|         assertThat(answer.getResult(), equalTo(true)); | ||||
|     } | ||||
| 
 | ||||
| } | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user