Improvement: management server peer states (#9885)

* Improvement: management server peer states

* Update pr9885: consider new mgmt server node which has msId=managementServerNodeId

* Update pr9885: update global config description

* Update pr9885: update label on UI

* framework: Do not update mshost_peer when mgmt server is Up as it will be updated by status update

* mgmt: Update state to Up when mgmt server writes heartbeat to db

* mgmt: change Service IP to Management IP

---------

Co-authored-by: Boris Stoyanov - a.k.a Bobby <bss.stoyanov@gmail.com>
This commit is contained in:
Wei Zhou 2024-12-02 05:56:20 +01:00 committed by GitHub
parent 22a6604491
commit 34056d956c
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
23 changed files with 782 additions and 37 deletions

View File

@ -32,6 +32,8 @@ public interface ManagementServerHostStats {
String getManagementServerHostUuid();
long getManagementServerRunId();
long getSessions();
double getCpuUtilization();

View File

@ -381,6 +381,14 @@ public class ApiConstants {
public static final String PATH = "path";
public static final String PAYLOAD = "payload";
public static final String PAYLOAD_URL = "payloadurl";
public static final String PEERS = "peers";
public static final String PEER_ID = "peerid";
public static final String PEER_NAME = "peername";
public static final String PEER_MSID = "peermsid";
public static final String PEER_RUNID = "peerrunid";
public static final String PEER_SERVICE_IP = "peerserviceip";
public static final String PEER_SERVICE_PORT = "peerserviceport";
public static final String PEER_STATE = "peerstate";
public static final String POD_ID = "podid";
public static final String POD_NAME = "podname";
public static final String POD_IDS = "podids";

View File

@ -23,6 +23,7 @@ import org.apache.cloudstack.api.BaseListCmd;
import org.apache.cloudstack.api.Parameter;
import org.apache.cloudstack.api.response.ListResponse;
import org.apache.cloudstack.api.response.ManagementServerResponse;
import org.apache.commons.lang3.BooleanUtils;
@APICommand(name = "listManagementServers", description = "Lists management servers.", responseObject = ManagementServerResponse.class,
requestHasSensitiveInfo = false, responseHasSensitiveInfo = false)
@ -39,6 +40,11 @@ public class ListMgmtsCmd extends BaseListCmd {
@Parameter(name = ApiConstants.NAME, type = CommandType.STRING, description = "the name of the management server")
private String hostName;
@Parameter(name = ApiConstants.PEERS, type = CommandType.BOOLEAN,
description = "Whether to return the management server peers or not. By default, the management server peers will not be returned.",
since = "4.20.0.0")
private Boolean peers;
/////////////////////////////////////////////////////
/////////////////// Accessors ///////////////////////
/////////////////////////////////////////////////////
@ -51,6 +57,10 @@ public class ListMgmtsCmd extends BaseListCmd {
return hostName;
}
public Boolean getPeers() {
return BooleanUtils.toBooleanDefaultIfNull(peers, false);
}
/////////////////////////////////////////////////////
/////////////// API Implementation///////////////////
/////////////////////////////////////////////////////

View File

@ -24,7 +24,9 @@ import org.apache.cloudstack.api.BaseResponse;
import org.apache.cloudstack.api.EntityReference;
import org.apache.cloudstack.management.ManagementServerHost.State;
import java.util.ArrayList;
import java.util.Date;
import java.util.List;
@EntityReference(value = ManagementServerHost.class)
public class ManagementServerResponse extends BaseResponse {
@ -76,6 +78,10 @@ public class ManagementServerResponse extends BaseResponse {
@Param(description = "the IP Address for this Management Server")
private String serviceIp;
@SerializedName(ApiConstants.PEERS)
@Param(description = "the Management Server Peers")
private List<PeerManagementServerNodeResponse> peers;
public String getId() {
return this.id;
}
@ -171,4 +177,19 @@ public class ManagementServerResponse extends BaseResponse {
public String getKernelVersion() {
return kernelVersion;
}
public List<PeerManagementServerNodeResponse> getPeers() {
return peers;
}
public void setPeers(List<PeerManagementServerNodeResponse> peers) {
this.peers = peers;
}
public void addPeer(PeerManagementServerNodeResponse peer) {
if (peers == null) {
peers = new ArrayList<>();
}
peers.add(peer);
}
}

View File

@ -0,0 +1,100 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package org.apache.cloudstack.api.response;
import com.cloud.serializer.Param;
import com.google.gson.annotations.SerializedName;
import org.apache.cloudstack.api.ApiConstants;
import org.apache.cloudstack.api.BaseResponse;
import org.apache.cloudstack.management.ManagementServerHost.State;
import java.util.Date;
public class PeerManagementServerNodeResponse extends BaseResponse {
@SerializedName(ApiConstants.STATE)
@Param(description = "the state of the management server peer")
private State state;
@SerializedName(ApiConstants.LAST_UPDATED)
@Param(description = "the last updated time of the management server peer state")
private Date lastUpdated;
@SerializedName(ApiConstants.PEER_ID)
@Param(description = "the ID of the peer management server")
private String peerId;
@SerializedName(ApiConstants.PEER_NAME)
@Param(description = "the name of the peer management server")
private String peerName;
@SerializedName(ApiConstants.PEER_MSID)
@Param(description = "the management ID of the peer management server")
private String peerMsId;
@SerializedName(ApiConstants.PEER_RUNID)
@Param(description = "the run ID of the peer management server")
private String peerRunId;
@SerializedName(ApiConstants.PEER_STATE)
@Param(description = "the state of the peer management server")
private String peerState;
@SerializedName(ApiConstants.PEER_SERVICE_IP)
@Param(description = "the IP Address for the peer Management Server")
private String peerServiceIp;
@SerializedName(ApiConstants.PEER_SERVICE_PORT)
@Param(description = "the service port for the peer Management Server")
private String peerServicePort;
public void setState(State state) {
this.state = state;
}
public void setLastUpdated(Date lastUpdated) {
this.lastUpdated = lastUpdated;
}
public void setPeerId(String peerId) {
this.peerId = peerId;
}
public void setPeerName(String peerName) {
this.peerName = peerName;
}
public void setPeerMsId(String peerMsId) {
this.peerMsId = peerMsId;
}
public void setPeerRunId(String peerRunId) {
this.peerRunId = peerRunId;
}
public void setPeerState(String peerState) {
this.peerState = peerState;
}
public void setPeerServiceIp(String peerServiceIp) {
this.peerServiceIp = peerServiceIp;
}
public void setPeerServicePort(String peerServicePort) {
this.peerServicePort = peerServicePort;
}
}

View File

@ -117,6 +117,7 @@
<bean id="loadBalancerCertMapDaoImpl" class="com.cloud.network.dao.LoadBalancerCertMapDaoImpl" />
<bean id="managementServerHostDaoImpl" class="com.cloud.cluster.dao.ManagementServerHostDaoImpl" />
<bean id="managementServerHostPeerDaoImpl" class="com.cloud.cluster.dao.ManagementServerHostPeerDaoImpl" />
<bean id="managementServerHostPeerJoinDaoImpl" class="com.cloud.cluster.dao.ManagementServerHostPeerJoinDaoImpl" />
<bean id="managementServerStatusDaoImpl" class="com.cloud.cluster.dao.ManagementServerStatusDaoImpl" />
<bean id="networkAccountDaoImpl" class="com.cloud.network.dao.NetworkAccountDaoImpl" />
<bean id="networkACLDaoImpl" class="com.cloud.network.vpc.dao.NetworkACLDaoImpl" />

View File

@ -0,0 +1,28 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
DROP PROCEDURE IF EXISTS `cloud`.`IDEMPOTENT_ADD_FOREIGN_KEY`;
CREATE PROCEDURE `cloud`.`IDEMPOTENT_ADD_FOREIGN_KEY` (
IN in_table_name VARCHAR(200)
, IN in_key_name VARCHAR(200)
, IN in_foreign_key VARCHAR(200)
, IN in_references VARCHAR(1000)
)
BEGIN
DECLARE CONTINUE HANDLER FOR 1061 BEGIN END; SET @ddl = CONCAT_WS(' ', 'ALTER TABLE ', in_table_name, ' ADD CONSTRAINT ', in_key_name, ' FOREIGN KEY ', in_foreign_key, ' REFERENCES ', in_references, ' ON DELETE CASCADE'); PREPARE stmt FROM @ddl; EXECUTE stmt; DEALLOCATE PREPARE stmt; END;

View File

@ -425,3 +425,10 @@ INSERT IGNORE INTO `cloud`.`guest_os_hypervisor` (uuid, hypervisor_type, hypervi
CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.vm_instance', 'delete_protection', 'boolean DEFAULT FALSE COMMENT "delete protection for vm" ');
CALL `cloud`.`IDEMPOTENT_ADD_COLUMN`('cloud.volumes', 'delete_protection', 'boolean DEFAULT FALSE COMMENT "delete protection for volumes" ');
-- Modify index for mshost_peer
DELETE FROM `cloud`.`mshost_peer`;
CALL `cloud`.`IDEMPOTENT_DROP_FOREIGN_KEY`('cloud.mshost_peer','fk_mshost_peer__owner_mshost');
CALL `cloud`.`IDEMPOTENT_DROP_INDEX`('i_mshost_peer__owner_peer_runid','mshost_peer');
CALL `cloud`.`IDEMPOTENT_ADD_UNIQUE_KEY`('cloud.mshost_peer', 'i_mshost_peer__owner_peer', '(owner_mshost, peer_mshost)');
CALL `cloud`.`IDEMPOTENT_ADD_FOREIGN_KEY`('cloud.mshost_peer', 'fk_mshost_peer__owner_mshost', '(owner_mshost)', '`mshost`(`id`)');

View File

@ -0,0 +1,44 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
DROP VIEW IF EXISTS `cloud`.`mshost_peer_view`;
CREATE VIEW `cloud`.`mshost_peer_view` AS
SELECT
`mshost_peer`.`id` AS `id`,
`mshost_peer`.`peer_state` AS `peer_state`,
`mshost_peer`.`last_update` AS `last_update`,
`owner_mshost`.`id` AS `owner_mshost_id`,
`owner_mshost`.`msid` AS `owner_mshost_msid`,
`owner_mshost`.`runid` AS `owner_mshost_runid`,
`owner_mshost`.`name` AS `owner_mshost_name`,
`owner_mshost`.`uuid` AS `owner_mshost_uuid`,
`owner_mshost`.`state` AS `owner_mshost_state`,
`owner_mshost`.`service_ip` AS `owner_mshost_service_ip`,
`owner_mshost`.`service_port` AS `owner_mshost_service_port`,
`peer_mshost`.`id` AS `peer_mshost_id`,
`peer_mshost`.`msid` AS `peer_mshost_msid`,
`peer_mshost`.`runid` AS `peer_mshost_runid`,
`peer_mshost`.`name` AS `peer_mshost_name`,
`peer_mshost`.`uuid` AS `peer_mshost_uuid`,
`peer_mshost`.`state` AS `peer_mshost_state`,
`peer_mshost`.`service_ip` AS `peer_mshost_service_ip`,
`peer_mshost`.`service_port` AS `peer_mshost_service_port`
FROM `cloud`.`mshost_peer`
LEFT JOIN `cloud`.`mshost` AS owner_mshost on `mshost_peer`.`owner_mshost` = `owner_mshost`.`id`
LEFT JOIN `cloud`.`mshost` AS peer_mshost on `mshost_peer`.`peer_mshost` = `peer_mshost`.`id`;

View File

@ -27,9 +27,9 @@ import com.cloud.utils.component.Manager;
public interface ClusterManager extends Manager {
static final String ALERT_SUBJECT = "cluster-alert";
final ConfigKey<Integer> HeartbeatInterval = new ConfigKey<Integer>(Integer.class, "cluster.heartbeat.interval", "management-server", "1500",
"Interval to check for the heart beat between management server nodes", false);
"Interval (in milliseconds) to check for the heart beat between management server nodes", false);
final ConfigKey<Integer> HeartbeatThreshold = new ConfigKey<Integer>(Integer.class, "cluster.heartbeat.threshold", "management-server", "150000",
"Threshold before self-fence the management server", true);
"Threshold (in milliseconds) before self-fence the management server. The threshold should be larger than management.server.stats.interval", true);
/**
* Adds a new packet to the incoming queue.

View File

@ -758,21 +758,16 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
}
switch (msg.getMessageType()) {
case nodeAdded: {
final List<ManagementServerHostVO> l = msg.getNodes();
if (l != null && l.size() > 0) {
for (final ManagementServerHostVO mshost : l) {
_mshostPeerDao.updatePeerInfo(_mshostId, mshost.getId(), mshost.getRunid(), ManagementServerHost.State.Up);
}
}
}
case nodeAdded:
break;
case nodeRemoved: {
final List<ManagementServerHostVO> l = msg.getNodes();
if (l != null && l.size() > 0) {
for (final ManagementServerHostVO mshost : l) {
_mshostPeerDao.updatePeerInfo(_mshostId, mshost.getId(), mshost.getRunid(), ManagementServerHost.State.Down);
if (mshost.getId() != _mshostId) {
_mshostPeerDao.updatePeerInfo(_mshostId, mshost.getId(), mshost.getRunid(), ManagementServerHost.State.Down);
}
}
}
}
@ -823,8 +818,9 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
final List<ManagementServerHostVO> downHostList = new ArrayList<ManagementServerHostVO>();
for (final ManagementServerHostVO host : inactiveList) {
if (!pingManagementNode(host)) {
logger.warn("Management node " + host.getId() + " is detected inactive by timestamp and also not pingable");
// Check if peer state is Up in the period
if (!_mshostPeerDao.isPeerUpState(_mshostId, host.getId(), new Date(cutTime.getTime() - HeartbeatThreshold.value()))) {
logger.warn("Management node " + host.getId() + " is detected inactive by timestamp and did not send node status to this node");
downHostList.add(host);
}
}
@ -898,6 +894,44 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
final Profiler profilerInvalidatedNodeList = new Profiler();
profilerInvalidatedNodeList.start();
processInvalidatedNodes(invalidatedNodeList);
profilerInvalidatedNodeList.stop();
final Profiler profilerRemovedList = new Profiler();
profilerRemovedList.start();
processRemovedNodes(cutTime, removedNodeList);
profilerRemovedList.stop();
final Profiler profilerNewList = new Profiler();
profilerNewList.start();
processNewNodes(cutTime, currentList);
profilerNewList.stop();
final Profiler profilerInactiveList = new Profiler();
profilerInactiveList.start();
processInactiveNodes(cutTime);
profilerInactiveList.stop();
profiler.stop();
logger.debug(String.format("Peer scan is finished. profiler: %s , profilerQueryActiveList: %s, " +
", profilerSyncClusterInfo: %s, profilerInvalidatedNodeList: %s, profilerRemovedList: %s," +
", profilerNewList: %s, profilerInactiveList: %s",
profiler, profilerQueryActiveList, profilerSyncClusterInfo, profilerInvalidatedNodeList, profilerRemovedList,
profilerNewList, profilerInactiveList));
if (profiler.getDurationInMillis() >= HeartbeatInterval.value()) {
if (logger.isDebugEnabled()) {
logger.debug(String.format("Peer scan takes too long to finish. profiler: %s , profilerQueryActiveList: %s, " +
", profilerSyncClusterInfo: %s, profilerInvalidatedNodeList: %s, profilerRemovedList: %s," +
", profilerNewList: %s, profilerInactiveList: %s",
profiler, profilerQueryActiveList, profilerSyncClusterInfo, profilerInvalidatedNodeList, profilerRemovedList,
profilerNewList, profilerInactiveList));
}
}
}
private void processInvalidatedNodes(List<ManagementServerHostVO> invalidatedNodeList) {
// process invalidated node list
if (invalidatedNodeList.size() > 0) {
for (final ManagementServerHostVO mshost : invalidatedNodeList) {
@ -911,16 +945,16 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
queueNotification(new ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeRemoved, invalidatedNodeList));
}
profilerInvalidatedNodeList.stop();
}
final Profiler profilerRemovedList = new Profiler();
profilerRemovedList.start();
private void processRemovedNodes(Date cutTime, List<ManagementServerHostVO> removedNodeList) {
// process removed node list
final Iterator<ManagementServerHostVO> it = removedNodeList.iterator();
while (it.hasNext()) {
final ManagementServerHostVO mshost = it.next();
if (!pingManagementNode(mshost)) {
logger.warn("Management node " + mshost.getId() + " is detected inactive by timestamp and also not pingable");
// Check if peer state is Up in the period
if (!_mshostPeerDao.isPeerUpState(_mshostId, mshost.getId(), new Date(cutTime.getTime() - HeartbeatThreshold.value()))) {
logger.warn("Management node " + mshost.getId() + " is detected inactive by timestamp and did not send node status to this node");
_activePeers.remove(mshost.getId());
try {
JmxUtil.unregisterMBean("ClusterManager", "Node " + mshost.getId());
@ -928,7 +962,7 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
logger.warn("Unable to deregiester cluster node from JMX monitoring due to exception " + e.toString());
}
} else {
logger.info("Management node " + mshost.getId() + " is detected inactive by timestamp but is pingable");
logger.info("Management node " + mshost.getId() + " is detected inactive by timestamp but sent node status to this node");
it.remove();
}
}
@ -936,8 +970,9 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
if (removedNodeList.size() > 0) {
queueNotification(new ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeRemoved, removedNodeList));
}
profilerRemovedList.stop();
}
private void processNewNodes(Date cutTime, List<ManagementServerHostVO> currentList) {
final List<ManagementServerHostVO> newNodeList = new ArrayList<ManagementServerHostVO>();
for (final ManagementServerHostVO mshost : currentList) {
if (!_activePeers.containsKey(mshost.getId())) {
@ -959,18 +994,31 @@ public class ClusterManagerImpl extends ManagerBase implements ClusterManager, C
if (newNodeList.size() > 0) {
queueNotification(new ClusterManagerMessage(ClusterManagerMessage.MessageType.nodeAdded, newNodeList));
}
}
profiler.stop();
if (profiler.getDurationInMillis() >= HeartbeatInterval.value()) {
if (logger.isDebugEnabled()) {
logger.debug("Peer scan takes too long to finish. profiler: " + profiler.toString() + ", profilerQueryActiveList: " +
profilerQueryActiveList.toString() + ", profilerSyncClusterInfo: " + profilerSyncClusterInfo.toString() + ", profilerInvalidatedNodeList: " +
profilerInvalidatedNodeList.toString() + ", profilerRemovedList: " + profilerRemovedList.toString());
private void processInactiveNodes(Date cutTime) {
final List<ManagementServerHostVO> inactiveList = _mshostDao.getInactiveList(new Date(cutTime.getTime() - HeartbeatThreshold.value()));
if (inactiveList.size() > 0) {
if (logger.isInfoEnabled()) {
logger.info(String.format("Found %s inactive management server node based on timestamp", inactiveList.size()));
}
for (final ManagementServerHostVO host : inactiveList) {
logger.info(String.format("management server node msid: %s, name: %s, service ip: %s, version: %s",
host.getMsid(), host.getName(), host.getServiceIP(), host.getVersion()));
// Check if any peer state is Up in the period
if (ManagementServerHost.State.Up.equals(host.getState()) &&
!_mshostPeerDao.isPeerUpState(host.getId(), new Date(cutTime.getTime() - HeartbeatThreshold.value()))) {
logger.warn("Management node " + host.getId() + " is detected inactive by timestamp and did not send node status to all other nodes");
host.setState(ManagementServerHost.State.Down);
_mshostDao.update(host.getId(), host);
}
}
} else {
logger.info("No inactive management server node found");
}
}
private static ManagementServerHostVO getInListById(final Long id, final List<ManagementServerHostVO> l) {
for (final ManagementServerHostVO mshost : l) {
if (mshost.getId() == id) {

View File

@ -0,0 +1,177 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.cluster;
import java.util.Date;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.EnumType;
import javax.persistence.Enumerated;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Table;
import javax.persistence.Temporal;
import javax.persistence.TemporalType;
import org.apache.cloudstack.management.ManagementServerHost;
@Entity
@Table(name = "mshost_peer_view")
public class ManagementServerHostPeerJoinVO {
@Id
@GeneratedValue(strategy = GenerationType.IDENTITY)
@Column(name = "id")
private long id;
@Column(name = "peer_state")
@Enumerated(value = EnumType.STRING)
private ManagementServerHost.State peerState;
@Temporal(TemporalType.TIMESTAMP)
@Column(name = "last_update")
private Date lastUpdateTime;
@Column(name = "owner_mshost_id")
private long ownerMshostId;
@Column(name = "owner_mshost_msid")
private long ownerMshostMsId;
@Column(name = "owner_mshost_runid")
private long ownerMshostRunId;
@Column(name = "owner_mshost_name")
private String ownerMshostName;
@Column(name = "owner_mshost_uuid")
private String ownerMshostUuid;
@Column(name = "owner_mshost_state")
private String ownerMshostState;
@Column(name = "owner_mshost_service_ip")
private String ownerMshostServiceIp;
@Column(name = "owner_mshost_service_port")
private Integer ownerMshostServicePort;
@Column(name = "peer_mshost_id")
private long peerMshostId;
@Column(name = "peer_mshost_msid")
private long peerMshostMsId;
@Column(name = "peer_mshost_runid")
private long peerMshostRunId;
@Column(name = "peer_mshost_name")
private String peerMshostName;
@Column(name = "peer_mshost_uuid")
private String peerMshostUuid;
@Column(name = "peer_mshost_state")
private String peerMshostState;
@Column(name = "peer_mshost_service_ip")
private String peerMshostServiceIp;
@Column(name = "peer_mshost_service_port")
private Integer peerMshostServicePort;
public ManagementServerHostPeerJoinVO() {
}
public long getId() {
return id;
}
public ManagementServerHost.State getPeerState() {
return peerState;
}
public Date getLastUpdateTime() {
return lastUpdateTime;
}
public long getOwnerMshostId() {
return ownerMshostId;
}
public long getOwnerMshostMsId() {
return ownerMshostMsId;
}
public long getOwnerMshostRunId() {
return ownerMshostRunId;
}
public String getOwnerMshostName() {
return ownerMshostName;
}
public String getOwnerMshostUuid() {
return ownerMshostUuid;
}
public String getOwnerMshostState() {
return ownerMshostState;
}
public String getOwnerMshostServiceIp() {
return ownerMshostServiceIp;
}
public Integer getOwnerMshostServicePort() {
return ownerMshostServicePort;
}
public long getPeerMshostId() {
return peerMshostId;
}
public long getPeerMshostMsId() {
return peerMshostMsId;
}
public long getPeerMshostRunId() {
return peerMshostRunId;
}
public String getPeerMshostName() {
return peerMshostName;
}
public String getPeerMshostUuid() {
return peerMshostUuid;
}
public String getPeerMshostState() {
return peerMshostState;
}
public String getPeerMshostServiceIp() {
return peerMshostServiceIp;
}
public Integer getPeerMshostServicePort() {
return peerMshostServicePort;
}
}

View File

@ -130,7 +130,7 @@ public class ManagementServerHostDaoImpl extends GenericDaoBase<ManagementServer
try {
txn.start();
pstmt = txn.prepareAutoCloseStatement("update mshost set last_update=?, removed=null, alert_count=0 where id=? and runid=?");
pstmt = txn.prepareAutoCloseStatement("update mshost set last_update=?, removed=null, alert_count=0, state='Up' where id=? and runid=?");
pstmt.setString(1, DateUtil.getDateDisplayString(TimeZone.getTimeZone("GMT"), lastUpdate));
pstmt.setLong(2, id);
pstmt.setLong(3, runid);

View File

@ -20,10 +20,17 @@ import org.apache.cloudstack.management.ManagementServerHost;
import com.cloud.cluster.ManagementServerHostPeerVO;
import com.cloud.utils.db.GenericDao;
import java.util.Date;
public interface ManagementServerHostPeerDao extends GenericDao<ManagementServerHostPeerVO, Long> {
void clearPeerInfo(long ownerMshost);
void updatePeerInfo(long ownerMshost, long peerMshost, long peerRunid, ManagementServerHost.State peerState);
int countStateSeenInPeers(long mshost, long runid, ManagementServerHost.State state);
int countStateSeenInPeers(long peerMshost, long runid, ManagementServerHost.State state);
boolean isPeerUpState(long peerMshost, Date cutTime);
boolean isPeerUpState(long ownerMshost, long peerMshost, Date cutTime);
}

View File

@ -16,10 +16,10 @@
// under the License.
package com.cloud.cluster.dao;
import java.util.Date;
import java.util.List;
import org.apache.cloudstack.management.ManagementServerHost;
import com.cloud.cluster.ManagementServerHostPeerVO;
import com.cloud.utils.db.DB;
@ -33,10 +33,12 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase<ManagementSe
private final SearchBuilder<ManagementServerHostPeerVO> ClearPeerSearch;
private final SearchBuilder<ManagementServerHostPeerVO> FindForUpdateSearch;
private final SearchBuilder<ManagementServerHostPeerVO> CountSearch;
private final SearchBuilder<ManagementServerHostPeerVO> ActiveSearch;
public ManagementServerHostPeerDaoImpl() {
ClearPeerSearch = createSearchBuilder();
ClearPeerSearch.and("ownerMshost", ClearPeerSearch.entity().getOwnerMshost(), SearchCriteria.Op.EQ);
ClearPeerSearch.or("peerMshost", ClearPeerSearch.entity().getPeerMshost(), SearchCriteria.Op.EQ);
ClearPeerSearch.done();
FindForUpdateSearch = createSearchBuilder();
@ -50,6 +52,13 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase<ManagementSe
CountSearch.and("peerRunid", CountSearch.entity().getPeerRunid(), SearchCriteria.Op.EQ);
CountSearch.and("peerState", CountSearch.entity().getPeerState(), SearchCriteria.Op.EQ);
CountSearch.done();
ActiveSearch = createSearchBuilder();
ActiveSearch.and("ownerMshost", ActiveSearch.entity().getOwnerMshost(), SearchCriteria.Op.EQ);
ActiveSearch.and("peerMshost", ActiveSearch.entity().getPeerMshost(), SearchCriteria.Op.EQ);
ActiveSearch.and("peerState", ActiveSearch.entity().getPeerState(), SearchCriteria.Op.EQ);
ActiveSearch.and("lastUpdateTime", ActiveSearch.entity().getLastUpdateTime(), SearchCriteria.Op.GT);
ActiveSearch.done();
}
@Override
@ -57,6 +66,7 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase<ManagementSe
public void clearPeerInfo(long ownerMshost) {
SearchCriteria<ManagementServerHostPeerVO> sc = ClearPeerSearch.create();
sc.setParameters("ownerMshost", ownerMshost);
sc.setParameters("peerMshost", ownerMshost);
expunge(sc);
}
@ -71,11 +81,12 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase<ManagementSe
SearchCriteria<ManagementServerHostPeerVO> sc = FindForUpdateSearch.create();
sc.setParameters("ownerMshost", ownerMshost);
sc.setParameters("peerMshost", peerMshost);
sc.setParameters("peerRunid", peerRunid);
List<ManagementServerHostPeerVO> l = listBy(sc);
if (l.size() == 1) {
ManagementServerHostPeerVO peer = l.get(0);
peer.setPeerRunid(peerRunid);
peer.setPeerState(peerState);
peer.setLastUpdateTime(new Date());
update(peer.getId(), peer);
} else {
ManagementServerHostPeerVO peer = new ManagementServerHostPeerVO(ownerMshost, peerMshost, peerRunid, peerState);
@ -90,13 +101,36 @@ public class ManagementServerHostPeerDaoImpl extends GenericDaoBase<ManagementSe
@Override
@DB
public int countStateSeenInPeers(long mshost, long runid, ManagementServerHost.State state) {
public int countStateSeenInPeers(long peerMshost, long runid, ManagementServerHost.State state) {
SearchCriteria<ManagementServerHostPeerVO> sc = CountSearch.create();
sc.setParameters("peerMshost", mshost);
sc.setParameters("peerMshost", peerMshost);
sc.setParameters("peerRunid", runid);
sc.setParameters("peerState", state);
List<ManagementServerHostPeerVO> l = listBy(sc);
return l.size();
}
@Override
@DB
public boolean isPeerUpState(long peerMshost, Date cutTime) {
SearchCriteria<ManagementServerHostPeerVO> sc = ActiveSearch.create();
sc.setParameters("peerMshost", peerMshost);
sc.setParameters("peerState", ManagementServerHost.State.Up);
sc.setParameters("lastUpdateTime", cutTime);
return listBy(sc).size() > 0;
}
@Override
@DB
public boolean isPeerUpState(long ownerMshost, long peerMshost, Date cutTime) {
SearchCriteria<ManagementServerHostPeerVO> sc = ActiveSearch.create();
sc.setParameters("ownerMshost", ownerMshost);
sc.setParameters("peerMshost", peerMshost);
sc.setParameters("peerState", ManagementServerHost.State.Up);
sc.setParameters("lastUpdateTime", cutTime);
return listBy(sc).size() > 0;
}
}

View File

@ -0,0 +1,27 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.cluster.dao;
import com.cloud.cluster.ManagementServerHostPeerJoinVO;
import com.cloud.utils.db.GenericDao;
import java.util.List;
public interface ManagementServerHostPeerJoinDao extends GenericDao<ManagementServerHostPeerJoinVO, Long> {
List<ManagementServerHostPeerJoinVO> listByOwnerMshostId(long ownerMshostId);
}

View File

@ -0,0 +1,42 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
package com.cloud.cluster.dao;
import java.util.List;
import com.cloud.cluster.ManagementServerHostPeerJoinVO;
import com.cloud.utils.db.GenericDaoBase;
import com.cloud.utils.db.SearchBuilder;
import com.cloud.utils.db.SearchCriteria;
public class ManagementServerHostPeerJoinDaoImpl extends GenericDaoBase<ManagementServerHostPeerJoinVO, Long> implements ManagementServerHostPeerJoinDao {
private final SearchBuilder<ManagementServerHostPeerJoinVO> AllFieldSearch;
public ManagementServerHostPeerJoinDaoImpl() {
AllFieldSearch = createSearchBuilder();
AllFieldSearch.and("ownerMshostId", AllFieldSearch.entity().getOwnerMshostId(), SearchCriteria.Op.EQ);
AllFieldSearch.done();
}
@Override
public List<ManagementServerHostPeerJoinVO> listByOwnerMshostId(long ownerMshostId) {
SearchCriteria<ManagementServerHostPeerJoinVO> sc = AllFieldSearch.create();
sc.setParameters("ownerMshostId", ownerMshostId);
return listBy(sc);
}
}

View File

@ -36,7 +36,6 @@ import java.util.stream.Stream;
import javax.inject.Inject;
import com.cloud.cpu.CPU;
import org.apache.cloudstack.acl.ControlledEntity;
import org.apache.cloudstack.acl.ControlledEntity.ACLType;
import org.apache.cloudstack.acl.SecurityChecker;
@ -114,6 +113,7 @@ import org.apache.cloudstack.api.response.IpQuarantineResponse;
import org.apache.cloudstack.api.response.ListResponse;
import org.apache.cloudstack.api.response.ManagementServerResponse;
import org.apache.cloudstack.api.response.ObjectStoreResponse;
import org.apache.cloudstack.api.response.PeerManagementServerNodeResponse;
import org.apache.cloudstack.api.response.ProjectAccountResponse;
import org.apache.cloudstack.api.response.ProjectInvitationResponse;
import org.apache.cloudstack.api.response.ProjectResponse;
@ -214,8 +214,11 @@ import com.cloud.api.query.vo.TemplateJoinVO;
import com.cloud.api.query.vo.UserAccountJoinVO;
import com.cloud.api.query.vo.UserVmJoinVO;
import com.cloud.api.query.vo.VolumeJoinVO;
import com.cloud.cluster.ManagementServerHostPeerJoinVO;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.cluster.dao.ManagementServerHostPeerJoinDao;
import com.cloud.cpu.CPU;
import com.cloud.dc.ClusterVO;
import com.cloud.dc.DataCenter;
import com.cloud.dc.DedicatedResourceVO;
@ -607,6 +610,9 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
@Inject
private ClusterDao clusterDao;
@Inject
private ManagementServerHostPeerJoinDao mshostPeerJoinDao;
private SearchCriteria<ServiceOfferingJoinVO> getMinimumCpuServiceOfferingJoinSearchCriteria(int cpu) {
SearchCriteria<ServiceOfferingJoinVO> sc = _srvOfferingJoinDao.createSearchCriteria();
@ -5342,7 +5348,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
List<ManagementServerResponse> hostResponses = new ArrayList<>();
for (ManagementServerJoinVO host : result.first()) {
ManagementServerResponse hostResponse = createManagementServerResponse(host);
ManagementServerResponse hostResponse = createManagementServerResponse(host, cmd.getPeers());
hostResponses.add(hostResponse);
}
@ -5365,7 +5371,7 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
return managementServerJoinDao.searchAndCount(sc, null);
}
protected ManagementServerResponse createManagementServerResponse(ManagementServerJoinVO mgmt) {
protected ManagementServerResponse createManagementServerResponse(ManagementServerJoinVO mgmt, boolean listPeers) {
ManagementServerResponse mgmtResponse = new ManagementServerResponse();
mgmtResponse.setId(mgmt.getUuid());
mgmtResponse.setName(mgmt.getName());
@ -5378,10 +5384,34 @@ public class QueryManagerImpl extends MutualExclusiveIdsManagerBase implements Q
mgmtResponse.setLastServerStop(mgmt.getLastJvmStop());
mgmtResponse.setLastBoot(mgmt.getLastSystemBoot());
mgmtResponse.setServiceIp(mgmt.getServiceIP());
if (listPeers) {
List<ManagementServerHostPeerJoinVO> peers = mshostPeerJoinDao.listByOwnerMshostId(mgmt.getId());
for (ManagementServerHostPeerJoinVO peer: peers) {
mgmtResponse.addPeer(createPeerManagementServerNodeResponse(peer));
}
}
mgmtResponse.setObjectName("managementserver");
return mgmtResponse;
}
private PeerManagementServerNodeResponse createPeerManagementServerNodeResponse(ManagementServerHostPeerJoinVO peer) {
PeerManagementServerNodeResponse response = new PeerManagementServerNodeResponse();
response.setState(peer.getPeerState());
response.setLastUpdated(peer.getLastUpdateTime());
response.setPeerId(peer.getPeerMshostUuid());
response.setPeerName(peer.getPeerMshostName());
response.setPeerMsId(String.valueOf(peer.getPeerMshostMsId()));
response.setPeerRunId(String.valueOf(peer.getPeerMshostRunId()));
response.setPeerState(peer.getPeerMshostState());
response.setPeerServiceIp(peer.getPeerMshostServiceIp());
response.setPeerServicePort(String.valueOf(peer.getPeerMshostServicePort()));
response.setObjectName("peermanagementserver");
return response;
}
@Override
public List<RouterHealthCheckResultResponse> listRouterHealthChecks(GetRouterHealthCheckResultsCmd cmd) {
logger.info("Executing health check command " + cmd);

View File

@ -24,6 +24,7 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats
private long managementServerHostId;
private String managementServerHostUuid;
private long managementServerRunId;
private Date collectionTime;
private long sessions;
@ -94,6 +95,15 @@ public class ManagementServerHostStatsEntry implements ManagementServerHostStats
this.managementServerHostUuid = managementServerHostUuid;
}
@Override
public long getManagementServerRunId() {
return managementServerRunId;
}
public void setManagementServerRunId(long managementServerRunId) {
this.managementServerRunId = managementServerRunId;
}
@Override
public Date getCollectionTime(){
return collectionTime;

View File

@ -95,6 +95,7 @@ import com.cloud.cluster.ClusterServicePdu;
import com.cloud.cluster.ManagementServerHostVO;
import com.cloud.cluster.ManagementServerStatusVO;
import com.cloud.cluster.dao.ManagementServerHostDao;
import com.cloud.cluster.dao.ManagementServerHostPeerDao;
import com.cloud.cluster.dao.ManagementServerStatusDao;
import com.cloud.dc.Vlan.VlanType;
import com.cloud.dc.VlanVO;
@ -346,6 +347,8 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
@Inject
private ManagementServerStatusDao managementServerStatusDao;
@Inject
private ManagementServerHostPeerDao managementServerHostPeerDao;
@Inject
VirtualMachineManager virtualMachineManager;
private final ConcurrentHashMap<String, ManagementServerHostStats> managementServerHostStats = new ConcurrentHashMap<>();
@ -796,6 +799,7 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
logger.trace("Metrics collection start...");
newEntry.setManagementServerHostId(mshost.getId());
newEntry.setManagementServerHostUuid(mshost.getUuid());
newEntry.setManagementServerRunId(mshost.getRunid());
newEntry.setDbLocal(isDbLocal());
newEntry.setUsageLocal(isUsageLocal());
retrieveSession(newEntry);
@ -1153,6 +1157,9 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
try {
hostStatsEntry = gson.fromJson(pdu.getJsonPackage(),new TypeToken<ManagementServerHostStatsEntry>(){}.getType());
managementServerHostStats.put(hostStatsEntry.getManagementServerHostUuid(), hostStatsEntry);
// Update peer state to Up in mshost_peer
updatePeerInfo(hostStatsEntry);
} catch (JsonParseException e) {
logger.error("Exception in decoding of other MS hosts status from : " + pdu.getSourcePeer());
if (logger.isDebugEnabled()) {
@ -1162,6 +1169,23 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc
return null;
}
private void updatePeerInfo(ManagementServerHostStatsEntry hostStatsEntry) {
// Update msId to id of the management server if msId is same as managementServerNodeId
if (msId == managementServerNodeId) {
ManagementServerHostVO mgmtServerVo = managementServerHostDao.findByMsid(managementServerNodeId);
if (mgmtServerVo != null) {
msId = mgmtServerVo.getId();
} else {
logger.warn(String.format("Cannot find management server with msid [%s]. Therefore, do not update peer info.", managementServerNodeId));
return;
}
}
// Update peer state to Up in mshost_peer
if (msId != hostStatsEntry.getManagementServerHostId()) {
managementServerHostPeerDao.updatePeerInfo(msId, hostStatsEntry.getManagementServerHostId(), hostStatsEntry.getManagementServerRunId(), ManagementServerHost.State.Up);
}
}
@Override
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
// do nothing, but wait for the status to come through

View File

@ -1291,6 +1291,7 @@
"label.l3gatewayserviceuuid": "L3 Gateway Service UUID",
"label.label": "Label",
"label.last.updated": "Last update",
"label.lastupdated": "Last update",
"label.lastannotated": "Last annotation date",
"label.lastheartbeat": "Last heartbeat",
"label.lastsuccessfuljob": "Last successful job",
@ -1379,6 +1380,7 @@
"label.management.ips": "Management IP addresses",
"label.management.server": "Management server",
"label.management.servers": "Management servers",
"label.management.server.peers": "Peers",
"label.managementservers": "Number of management servers",
"label.matchall": "Match all",
"label.max": "Max.",
@ -1667,6 +1669,13 @@
"label.payload": "Payload",
"label.payloadurl": "Payload URL",
"label.pcidevice": "GPU",
"label.peername": "Management Server",
"label.peermsid": "Management Server Node ID",
"label.peerrunid": "Process Timestamp",
"label.peerserviceip": "Management IP",
"label.peerserviceport": "Service Port",
"label.peerstate": "Peer State",
"label.peerstate.lastupdated": "Peer State Updated Time",
"label.pending.jobs": "Pending Jobs",
"label.per.account": "Per Account",
"label.per.zone": "Per zone",
@ -2035,7 +2044,7 @@
"label.service.connectivity.distributedroutercapabilitycheckbox": "Distributed router",
"label.service.connectivity.regionlevelvpccapabilitycheckbox": "Region level VPC",
"label.service.group": "Service group",
"label.serviceip": "Service IP",
"label.serviceip": "Management IP",
"label.service.lb.elasticlbcheckbox": "Elastic LB",
"label.service.lb.inlinemodedropdown": "Mode",
"label.service.lb.lbisolationdropdown": "LB isolation",
@ -2150,6 +2159,7 @@
"label.startport": "Start port",
"label.startquota": "Quota value",
"label.state": "State",
"label.state.reported": "Reported State",
"label.staticnat": "Static NAT",
"label.static": "Static",
"label.static.routes": "Static routes",

View File

@ -43,6 +43,10 @@ export default {
name: 'pending.jobs',
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/AsyncJobsTab.vue')))
},
{
name: 'management.server.peers',
component: shallowRef(defineAsyncComponent(() => import('@/views/infra/ManagementServerPeerTab.vue')))
},
{
name: 'comments',
component: shallowRef(defineAsyncComponent(() => import('@/components/view/AnnotationsTab.vue')))

View File

@ -0,0 +1,111 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
<template>
<a-table
class="table"
size="small"
:columns="columns"
:dataSource="managementservers"
:rowKey="item => item.id"
:pagination="false" >
<template #peername="{ text, record }">
<router-link :to="{ path: '/managementserver/' + record.peerid }">{{ text }}</router-link>
</template>
<template #peerserviceip="{ text, record }">
<router-link :to="{ path: '/managementserver/' + record.peerid }">{{ text }}</router-link>
</template>
<template #lastupdated="{ text }">
{{ $toLocaleDate(text) }}
</template>
</a-table>
</template>
<script>
import { api } from '@/api'
import Status from '@/components/widgets/Status'
export default {
name: 'ManagementServerPeerTab',
components: {
Status
},
props: {
resource: {
type: Object,
required: true
}
},
data () {
return {
managementservers: [],
columns: [
{
title: this.$t('label.peername'),
dataIndex: 'peername',
slots: { customRender: 'peername' }
},
{
title: this.$t('label.peermsid'),
dataIndex: 'peermsid'
},
{
title: this.$t('label.peerrunid'),
dataIndex: 'peerrunid'
},
{
title: this.$t('label.peerserviceip'),
dataIndex: 'peerserviceip',
slots: { customRender: 'peerserviceip' }
},
{
title: this.$t('label.peerserviceport'),
dataIndex: 'peerserviceport'
},
{
title: this.$t('label.state.reported'),
dataIndex: 'state'
},
{
title: this.$t('label.peerstate.lastupdated'),
dataIndex: 'lastupdated',
slots: { customRender: 'lastupdated' }
}
]
}
},
created () {
this.fetchData()
},
watch: {
resource: function (newItem) {
this.fetchData()
}
},
methods: {
fetchData () {
this.managementservers = []
api('listManagementServers', {
peers: true,
id: this.resource.id
}).then(json => {
this.managementservers = json.listmanagementserversresponse.managementserver?.[0]?.peers || []
})
}
}
}
</script>