mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
Add missing hosts info to the prometheus exporter output. (#8328)
Sometimes the hostStats object of the agents becomes null in the management server. It is a rare situation, and we haven't found the root cause yet, but it occurs occasionally in our CloudStack deployments with many hosts. The hostStat is null, even though the agent is UP and hosting multiple VMs. It is possible to access the VM consoles and execute tasks on them. This pull request doesn't address the issue directly; rather it displays those hosts in Prometheus so we can restart the agent and get the necessary information.
This commit is contained in:
parent
c599011ef5
commit
2993c99363
@ -82,6 +82,24 @@ public class PrometheusExporterImpl extends ManagerBase implements PrometheusExp
|
||||
private static final String ONLINE = "online";
|
||||
private static final String OFFLINE = "offline";
|
||||
|
||||
enum MissingInfoFilter {
|
||||
Host_Stats("hostStats"),
|
||||
CPU_CAPACITY("cpuCapacity"),
|
||||
MEM_CAPACITY("memCapacity"),
|
||||
CORE_CAPACITY("coreCapacity");
|
||||
|
||||
private final String name;
|
||||
|
||||
MissingInfoFilter(String name){
|
||||
this.name = name;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return name;
|
||||
}
|
||||
}
|
||||
|
||||
private static List<Item> metricsItems = new ArrayList<>();
|
||||
|
||||
@Inject
|
||||
@ -129,8 +147,6 @@ public class PrometheusExporterImpl extends ManagerBase implements PrometheusExp
|
||||
Map<String, Integer> upHosts = new HashMap<>();
|
||||
Map<String, Integer> downHosts = new HashMap<>();
|
||||
|
||||
HostStats hostStats;
|
||||
|
||||
for (final HostVO host : hostDao.listAll()) {
|
||||
if (host == null || host.getType() != Host.Type.Routing || host.getDataCenterId() != dcId) {
|
||||
continue;
|
||||
@ -147,8 +163,6 @@ public class PrometheusExporterImpl extends ManagerBase implements PrometheusExp
|
||||
int isDedicated = (dr != null) ? 1 : 0;
|
||||
metricsList.add(new ItemHostIsDedicated(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), isDedicated));
|
||||
|
||||
String hostTags = markTagMaps(host, totalHosts, upHosts, downHosts);
|
||||
hostStats = ApiDBUtils.getHostStatistics(host.getId());
|
||||
|
||||
// Get account, domain details for dedicated hosts
|
||||
if (isDedicated == 1) {
|
||||
@ -160,16 +174,22 @@ public class PrometheusExporterImpl extends ManagerBase implements PrometheusExp
|
||||
metricsList.add(new ItemHostDedicatedToAccount(zoneName, host.getName(), accountName, domain.getPath(), isDedicated));
|
||||
}
|
||||
|
||||
String hostTags = markTagMaps(host, totalHosts, upHosts, downHosts);
|
||||
HostStats hostStats = ApiDBUtils.getHostStatistics(host.getId());
|
||||
|
||||
if (hostStats == null){
|
||||
metricsList.add(new MissingHostInfo(zoneName, host.getName(), MissingInfoFilter.Host_Stats));
|
||||
}
|
||||
|
||||
final String cpuFactor = String.valueOf(CapacityManager.CpuOverprovisioningFactor.valueIn(host.getClusterId()));
|
||||
final CapacityVO cpuCapacity = capacityDao.findByHostIdType(host.getId(), Capacity.CAPACITY_TYPE_CPU);
|
||||
final double cpuUsedMhz = hostStats.getCpuUtilization() * host.getCpus() * host.getSpeed() / 100.0 ;
|
||||
|
||||
if (host.isInMaintenanceStates()) {
|
||||
metricsList.add(new ItemHostCpu(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), cpuFactor, ALLOCATED, 0L, isDedicated, hostTags));
|
||||
metricsList.add(new ItemHostCpu(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), cpuFactor, USED, 0L, isDedicated, hostTags));
|
||||
metricsList.add(new ItemHostCpu(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), cpuFactor, TOTAL, 0L, isDedicated, hostTags));
|
||||
if (cpuCapacity == null && !host.isInMaintenanceStates()){
|
||||
metricsList.add(new MissingHostInfo(zoneName, host.getName(), MissingInfoFilter.CPU_CAPACITY));
|
||||
}
|
||||
else if (cpuCapacity != null && cpuCapacity.getCapacityState() == CapacityState.Enabled) {
|
||||
|
||||
if (hostStats != null && cpuCapacity != null && cpuCapacity.getCapacityState() == CapacityState.Enabled) {
|
||||
final double cpuUsedMhz = hostStats.getCpuUtilization() * host.getCpus() * host.getSpeed() / 100.0 ;
|
||||
metricsList.add(new ItemHostCpu(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), cpuFactor, ALLOCATED, cpuCapacity.getUsedCapacity(), isDedicated, hostTags));
|
||||
metricsList.add(new ItemHostCpu(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), cpuFactor, USED, cpuUsedMhz, isDedicated, hostTags));
|
||||
metricsList.add(new ItemHostCpu(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), cpuFactor, TOTAL, cpuCapacity.getTotalCapacity(), isDedicated, hostTags));
|
||||
@ -181,12 +201,12 @@ public class PrometheusExporterImpl extends ManagerBase implements PrometheusExp
|
||||
|
||||
final String memoryFactor = String.valueOf(CapacityManager.MemOverprovisioningFactor.valueIn(host.getClusterId()));
|
||||
final CapacityVO memCapacity = capacityDao.findByHostIdType(host.getId(), Capacity.CAPACITY_TYPE_MEMORY);
|
||||
if (host.isInMaintenanceStates()) {
|
||||
metricsList.add(new ItemHostMemory(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), memoryFactor, ALLOCATED, 0L, isDedicated, hostTags));
|
||||
metricsList.add(new ItemHostMemory(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), memoryFactor, USED, 0, isDedicated, hostTags));
|
||||
metricsList.add(new ItemHostMemory(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), memoryFactor, TOTAL, 0L, isDedicated, hostTags));
|
||||
|
||||
if (memCapacity == null && !host.isInMaintenanceStates()){
|
||||
metricsList.add(new MissingHostInfo(zoneName, host.getName(), MissingInfoFilter.MEM_CAPACITY));
|
||||
}
|
||||
else if (memCapacity != null && memCapacity.getCapacityState() == CapacityState.Enabled) {
|
||||
|
||||
if (hostStats != null && memCapacity != null && memCapacity.getCapacityState() == CapacityState.Enabled) {
|
||||
metricsList.add(new ItemHostMemory(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), memoryFactor, ALLOCATED, memCapacity.getUsedCapacity(), isDedicated, hostTags));
|
||||
metricsList.add(new ItemHostMemory(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), memoryFactor, USED, hostStats.getUsedMemory(), isDedicated, hostTags));
|
||||
metricsList.add(new ItemHostMemory(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), memoryFactor, TOTAL, memCapacity.getTotalCapacity(), isDedicated, hostTags));
|
||||
@ -197,13 +217,13 @@ public class PrometheusExporterImpl extends ManagerBase implements PrometheusExp
|
||||
}
|
||||
|
||||
metricsList.add(new ItemHostVM(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), vmDao.listByHostId(host.getId()).size()));
|
||||
|
||||
final CapacityVO coreCapacity = capacityDao.findByHostIdType(host.getId(), Capacity.CAPACITY_TYPE_CPU_CORE);
|
||||
if (host.isInMaintenanceStates()) {
|
||||
metricsList.add(new ItemVMCore(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), USED, 0L, isDedicated, hostTags));
|
||||
metricsList.add(new ItemVMCore(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), TOTAL, 0L, isDedicated, hostTags));
|
||||
|
||||
if (coreCapacity == null && !host.isInMaintenanceStates()){
|
||||
metricsList.add(new MissingHostInfo(zoneName, host.getName(), MissingInfoFilter.CORE_CAPACITY));
|
||||
}
|
||||
else if (coreCapacity != null && coreCapacity.getCapacityState() == CapacityState.Enabled) {
|
||||
|
||||
if (hostStats != null && coreCapacity != null && coreCapacity.getCapacityState() == CapacityState.Enabled) {
|
||||
metricsList.add(new ItemVMCore(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), USED, coreCapacity.getUsedCapacity(), isDedicated, hostTags));
|
||||
metricsList.add(new ItemVMCore(zoneName, zoneUuid, host.getName(), host.getUuid(), host.getPrivateIpAddress(), TOTAL, coreCapacity.getTotalCapacity(), isDedicated, hostTags));
|
||||
} else {
|
||||
@ -213,17 +233,17 @@ public class PrometheusExporterImpl extends ManagerBase implements PrometheusExp
|
||||
}
|
||||
|
||||
final List<CapacityDaoImpl.SummedCapacity> cpuCapacity = capacityDao.findCapacityBy((int) Capacity.CAPACITY_TYPE_CPU, dcId, null, null);
|
||||
if (cpuCapacity != null && cpuCapacity.size() > 0) {
|
||||
if (cpuCapacity != null && !cpuCapacity.isEmpty()) {
|
||||
metricsList.add(new ItemHostCpu(zoneName, zoneUuid, null, null, null, null, ALLOCATED, cpuCapacity.get(0).getAllocatedCapacity() != null ? cpuCapacity.get(0).getAllocatedCapacity() : 0, 0, ""));
|
||||
}
|
||||
|
||||
final List<CapacityDaoImpl.SummedCapacity> memCapacity = capacityDao.findCapacityBy((int) Capacity.CAPACITY_TYPE_MEMORY, dcId, null, null);
|
||||
if (memCapacity != null && memCapacity.size() > 0) {
|
||||
if (memCapacity != null && !memCapacity.isEmpty()) {
|
||||
metricsList.add(new ItemHostMemory(zoneName, zoneUuid, null, null, null, null, ALLOCATED, memCapacity.get(0).getAllocatedCapacity() != null ? memCapacity.get(0).getAllocatedCapacity() : 0, 0, ""));
|
||||
}
|
||||
|
||||
final List<CapacityDaoImpl.SummedCapacity> coreCapacity = capacityDao.findCapacityBy((int) Capacity.CAPACITY_TYPE_CPU_CORE, dcId, null, null);
|
||||
if (coreCapacity != null && coreCapacity.size() > 0) {
|
||||
if (coreCapacity != null && !coreCapacity.isEmpty()) {
|
||||
metricsList.add(new ItemVMCore(zoneName, zoneUuid, null, null, null, ALLOCATED, coreCapacity.get(0).getAllocatedCapacity() != null ? coreCapacity.get(0).getAllocatedCapacity() : 0, 0, ""));
|
||||
}
|
||||
|
||||
@ -626,6 +646,25 @@ public class PrometheusExporterImpl extends ManagerBase implements PrometheusExp
|
||||
}
|
||||
}
|
||||
|
||||
class MissingHostInfo extends Item {
|
||||
|
||||
String zoneName;
|
||||
String hostName;
|
||||
MissingInfoFilter filter;
|
||||
|
||||
public MissingHostInfo(String zoneName, String hostname, MissingInfoFilter filter) {
|
||||
super("cloudstack_host_missing_info");
|
||||
this.zoneName = zoneName;
|
||||
this.hostName = hostname;
|
||||
this.filter = filter;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toMetricsString() {
|
||||
return String.format("%s{zone=\"%s\",hostname=\"%s\",filter=\"%s\"} -1", name, zoneName, hostName, filter);
|
||||
}
|
||||
}
|
||||
|
||||
class ItemHostCpu extends Item {
|
||||
String zoneName;
|
||||
String zoneUuid;
|
||||
|
||||
Loading…
x
Reference in New Issue
Block a user