mirror of
https://github.com/apache/cloudstack.git
synced 2025-10-26 08:42:29 +01:00
* Update gson date format for serializing/deserializing Date in MS stats (across multiple management servers) * review * review comments, and unit tests * added unit test with different date format * Use separate Gson for MS stats serialization/deserialization
2205 lines
117 KiB
Java
2205 lines
117 KiB
Java
// Licensed to the Apache Software Foundation (ASF) under one
|
|
// or more contributor license agreements. See the NOTICE file
|
|
// distributed with this work for additional information
|
|
// regarding copyright ownership. The ASF licenses this file
|
|
// to you under the Apache License, Version 2.0 (the
|
|
// "License"); you may not use this file except in compliance
|
|
// with the License. You may obtain a copy of the License at
|
|
//
|
|
// http://www.apache.org/licenses/LICENSE-2.0
|
|
//
|
|
// Unless required by applicable law or agreed to in writing,
|
|
// software distributed under the License is distributed on an
|
|
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
// KIND, either express or implied. See the License for the
|
|
// specific language governing permissions and limitations
|
|
// under the License.
|
|
package com.cloud.server;
|
|
|
|
import static com.cloud.configuration.ConfigurationManagerImpl.DELETE_QUERY_BATCH_SIZE;
|
|
import static com.cloud.utils.NumbersUtil.toHumanReadableSize;
|
|
|
|
import java.lang.management.ManagementFactory;
|
|
import java.lang.management.MemoryMXBean;
|
|
import java.lang.management.RuntimeMXBean;
|
|
import java.net.URI;
|
|
import java.net.URISyntaxException;
|
|
import java.text.ParseException;
|
|
import java.text.SimpleDateFormat;
|
|
import java.util.ArrayList;
|
|
import java.util.Arrays;
|
|
import java.util.Calendar;
|
|
import java.util.Collection;
|
|
import java.util.Date;
|
|
import java.util.HashMap;
|
|
import java.util.List;
|
|
import java.util.Locale;
|
|
import java.util.Map;
|
|
import java.util.Properties;
|
|
import java.util.Set;
|
|
import java.util.TimeZone;
|
|
import java.util.concurrent.ConcurrentHashMap;
|
|
import java.util.concurrent.Executors;
|
|
import java.util.concurrent.ScheduledExecutorService;
|
|
import java.util.concurrent.TimeUnit;
|
|
import java.util.stream.Collectors;
|
|
|
|
import javax.inject.Inject;
|
|
|
|
import com.cloud.utils.DateUtil;
|
|
import org.apache.cloudstack.engine.subsystem.api.storage.DataStore;
|
|
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreManager;
|
|
import org.apache.cloudstack.engine.subsystem.api.storage.DataStoreProvider;
|
|
import org.apache.cloudstack.engine.subsystem.api.storage.EndPoint;
|
|
import org.apache.cloudstack.engine.subsystem.api.storage.EndPointSelector;
|
|
import org.apache.cloudstack.framework.config.ConfigKey;
|
|
import org.apache.cloudstack.framework.config.Configurable;
|
|
import org.apache.cloudstack.framework.config.dao.ConfigurationDao;
|
|
import org.apache.cloudstack.managed.context.ManagedContextRunnable;
|
|
import org.apache.cloudstack.management.ManagementServerHost;
|
|
import org.apache.cloudstack.storage.datastore.db.PrimaryDataStoreDao;
|
|
import org.apache.cloudstack.storage.datastore.db.StoragePoolVO;
|
|
import org.apache.cloudstack.utils.bytescale.ByteScaleUtils;
|
|
import org.apache.cloudstack.utils.graphite.GraphiteClient;
|
|
import org.apache.cloudstack.utils.graphite.GraphiteException;
|
|
import org.apache.cloudstack.utils.identity.ManagementServerNode;
|
|
import org.apache.cloudstack.utils.usage.UsageUtils;
|
|
import org.apache.commons.collections.CollectionUtils;
|
|
import org.apache.commons.collections.MapUtils;
|
|
import org.apache.commons.lang3.BooleanUtils;
|
|
import org.apache.commons.lang3.StringUtils;
|
|
import org.apache.commons.lang3.time.DateUtils;
|
|
import org.apache.logging.log4j.Level;
|
|
import org.influxdb.BatchOptions;
|
|
import org.influxdb.InfluxDB;
|
|
import org.influxdb.InfluxDBFactory;
|
|
import org.influxdb.dto.BatchPoints;
|
|
import org.influxdb.dto.Point;
|
|
import org.influxdb.dto.Pong;
|
|
import org.jetbrains.annotations.NotNull;
|
|
import org.springframework.stereotype.Component;
|
|
|
|
import com.cloud.agent.AgentManager;
|
|
import com.cloud.agent.api.Answer;
|
|
import com.cloud.agent.api.GetStorageStatsCommand;
|
|
import com.cloud.agent.api.HostStatsEntry;
|
|
import com.cloud.agent.api.VgpuTypesInfo;
|
|
import com.cloud.agent.api.VmDiskStatsEntry;
|
|
import com.cloud.agent.api.VmNetworkStatsEntry;
|
|
import com.cloud.agent.api.VmStatsEntry;
|
|
import com.cloud.agent.api.VmStatsEntryBase;
|
|
import com.cloud.agent.api.VolumeStatsEntry;
|
|
import com.cloud.api.ApiSessionListener;
|
|
import com.cloud.capacity.CapacityManager;
|
|
import com.cloud.cluster.ClusterManager;
|
|
import com.cloud.cluster.ClusterManagerListener;
|
|
import com.cloud.cluster.ClusterServicePdu;
|
|
import com.cloud.cluster.ManagementServerHostVO;
|
|
import com.cloud.cluster.ManagementServerStatusVO;
|
|
import com.cloud.cluster.dao.ManagementServerHostDao;
|
|
import com.cloud.cluster.dao.ManagementServerHostPeerDao;
|
|
import com.cloud.cluster.dao.ManagementServerStatusDao;
|
|
import com.cloud.dc.Vlan.VlanType;
|
|
import com.cloud.dc.VlanVO;
|
|
import com.cloud.dc.dao.ClusterDao;
|
|
import com.cloud.dc.dao.VlanDao;
|
|
import com.cloud.exception.StorageUnavailableException;
|
|
import com.cloud.gpu.dao.HostGpuGroupsDao;
|
|
import com.cloud.host.Host;
|
|
import com.cloud.host.HostStats;
|
|
import com.cloud.host.HostVO;
|
|
import com.cloud.host.Status;
|
|
import com.cloud.host.dao.HostDao;
|
|
import com.cloud.hypervisor.Hypervisor;
|
|
import com.cloud.hypervisor.Hypervisor.HypervisorType;
|
|
import com.cloud.network.as.AutoScaleManager;
|
|
import com.cloud.org.Cluster;
|
|
import com.cloud.resource.ResourceManager;
|
|
import com.cloud.resource.ResourceState;
|
|
import com.cloud.serializer.GsonHelper;
|
|
import com.cloud.storage.ImageStoreDetailsUtil;
|
|
import com.cloud.storage.ScopeType;
|
|
import com.cloud.storage.Storage;
|
|
import com.cloud.storage.Storage.ImageFormat;
|
|
import com.cloud.storage.StorageManager;
|
|
import com.cloud.storage.StorageStats;
|
|
import com.cloud.storage.VolumeStats;
|
|
import com.cloud.storage.VolumeStatsVO;
|
|
import com.cloud.storage.VolumeVO;
|
|
import com.cloud.storage.dao.VolumeDao;
|
|
import com.cloud.storage.dao.VolumeStatsDao;
|
|
import com.cloud.user.UserStatisticsVO;
|
|
import com.cloud.user.VmDiskStatisticsVO;
|
|
import com.cloud.user.dao.UserStatisticsDao;
|
|
import com.cloud.user.dao.VmDiskStatisticsDao;
|
|
import com.cloud.utils.LogUtils;
|
|
import com.cloud.utils.NumbersUtil;
|
|
import com.cloud.utils.Pair;
|
|
import com.cloud.utils.component.ComponentMethodInterceptable;
|
|
import com.cloud.utils.component.ManagerBase;
|
|
import com.cloud.utils.concurrency.NamedThreadFactory;
|
|
import com.cloud.utils.db.DbProperties;
|
|
import com.cloud.utils.db.DbUtil;
|
|
import com.cloud.utils.db.Filter;
|
|
import com.cloud.utils.db.GlobalLock;
|
|
import com.cloud.utils.db.SearchCriteria;
|
|
import com.cloud.utils.db.Transaction;
|
|
import com.cloud.utils.db.TransactionCallbackNoReturn;
|
|
import com.cloud.utils.db.TransactionStatus;
|
|
import com.cloud.utils.exception.CloudRuntimeException;
|
|
import com.cloud.utils.net.MacAddress;
|
|
import com.cloud.utils.script.Script;
|
|
import com.cloud.vm.NicVO;
|
|
import com.cloud.vm.UserVmManager;
|
|
import com.cloud.vm.VMInstanceVO;
|
|
import com.cloud.vm.VirtualMachine;
|
|
import com.cloud.vm.VirtualMachineManager;
|
|
import com.cloud.vm.VmDiskStats;
|
|
import com.cloud.vm.VmNetworkStats;
|
|
import com.cloud.vm.VmStats;
|
|
import com.cloud.vm.VmStatsVO;
|
|
import com.cloud.vm.dao.NicDao;
|
|
import com.cloud.vm.dao.UserVmDao;
|
|
import com.cloud.vm.dao.VMInstanceDao;
|
|
import com.cloud.vm.dao.VmStatsDao;
|
|
import com.codahale.metrics.JvmAttributeGaugeSet;
|
|
import com.codahale.metrics.Metric;
|
|
import com.codahale.metrics.MetricRegistry;
|
|
import com.codahale.metrics.MetricSet;
|
|
import com.codahale.metrics.jvm.BufferPoolMetricSet;
|
|
import com.codahale.metrics.jvm.GarbageCollectorMetricSet;
|
|
import com.codahale.metrics.jvm.MemoryUsageGaugeSet;
|
|
import com.codahale.metrics.jvm.ThreadStatesGaugeSet;
|
|
import com.google.gson.Gson;
|
|
import com.google.gson.GsonBuilder;
|
|
import com.google.gson.JsonElement;
|
|
import com.google.gson.JsonObject;
|
|
import com.google.gson.JsonParseException;
|
|
import com.sun.management.OperatingSystemMXBean;
|
|
|
|
/**
|
|
* Provides real time stats for various agent resources up to x seconds
|
|
*
|
|
* @startuml
|
|
*
|
|
* StatsCollector -> ClusterManager : register
|
|
* ClusterManager -> StatsCollector : onManagementNodeJoined
|
|
* StatsCollector -> list : add MS
|
|
* ClusterManager -> StatsCollector : onManagementNodeJoined
|
|
* StatsCollector -> list : add MS to send list
|
|
* StatsCollector -> collector : update own status
|
|
* StatsCollector -> list : get all ms ids
|
|
* StatsCollector -> ClusterManager : update status for my (ms id) to all ms_ids
|
|
* ClusterManager -> ClusterManager : update ms_ids on status on (ms id)
|
|
* ClusterManager -> StatsCollector : onManagementNodeLeft
|
|
* StatsCollector -> list : add MS
|
|
* ClusterManager -> StatsCollector : status data updated for (ms id)
|
|
* StatsCollector -> StatsCollector : update entry for (ms id)
|
|
* ClusterManager -> StatsCollector : onManagementNodeLeft
|
|
* StatsCollector -> list : add MS
|
|
* @enduml
|
|
*/
|
|
@Component
|
|
public class StatsCollector extends ManagerBase implements ComponentMethodInterceptable, Configurable, DbStatsCollection {
|
|
|
|
public static enum ExternalStatsProtocol {
|
|
NONE("none"), GRAPHITE("graphite"), INFLUXDB("influxdb");
|
|
String _type;
|
|
|
|
ExternalStatsProtocol(String type) {
|
|
_type = type;
|
|
}
|
|
|
|
@Override
|
|
public String toString() {
|
|
return _type;
|
|
}
|
|
}
|
|
|
|
|
|
private static final int UNDEFINED_PORT_VALUE = -1;
|
|
|
|
/**
|
|
* Default value for the Graphite connection port: {@value}
|
|
*/
|
|
private static final int GRAPHITE_DEFAULT_PORT = 2003;
|
|
|
|
/**
|
|
* Default value for the InfluxDB connection port: {@value}
|
|
*/
|
|
private static final int INFLUXDB_DEFAULT_PORT = 8086;
|
|
|
|
private static final String UUID_TAG = "uuid";
|
|
|
|
private static final String TOTAL_MEMORY_KBS_FIELD = "total_memory_kb";
|
|
private static final String FREE_MEMORY_KBS_FIELD = "free_memory_kb";
|
|
private static final String CPU_UTILIZATION_FIELD = "cpu_utilization";
|
|
private static final String CPUS_FIELD = "cpus";
|
|
private static final String CPU_SOCKETS_FIELD = "cpu_sockets";
|
|
private static final String NETWORK_READ_KBS_FIELD = "network_read_kbs";
|
|
private static final String NETWORK_WRITE_KBS_FIELD = "network_write_kbs";
|
|
private static final String MEMORY_TARGET_KBS_FIELD = "memory_target_kbs";
|
|
private static final String DISK_READ_IOPS_FIELD = "disk_read_iops";
|
|
private static final String DISK_READ_KBS_FIELD = "disk_read_kbs";
|
|
private static final String DISK_WRITE_IOPS_FIELD = "disk_write_iops";
|
|
private static final String DISK_WRITE_KBS_FIELD = "disk_write_kbs";
|
|
|
|
private static final int HOURLY_TIME = 60;
|
|
private static final int DAILY_TIME = HOURLY_TIME * 24;
|
|
private static final Long ONE_MINUTE_IN_MILLISCONDS = 60000L;
|
|
|
|
private static final String DEFAULT_DATABASE_NAME = "cloudstack";
|
|
private static final String INFLUXDB_HOST_MEASUREMENT = "host_stats";
|
|
private static final String INFLUXDB_VM_MEASUREMENT = "vm_stats";
|
|
|
|
public static final ConfigKey<Integer> MANAGEMENT_SERVER_STATUS_COLLECTION_INTERVAL = new ConfigKey<>("Advanced",
|
|
Integer.class, "management.server.stats.interval", "60",
|
|
"Time interval in seconds, for management servers stats collection. Set to <= 0 to disable management servers stats.", false);
|
|
private static final ConfigKey<Integer> DATABASE_SERVER_STATUS_COLLECTION_INTERVAL = new ConfigKey<>("Advanced",
|
|
Integer.class, "database.server.stats.interval", "60",
|
|
"Time interval in seconds, for database servers stats collection. Set to <= 0 to disable database servers stats.", false);
|
|
private static final ConfigKey<Integer> DATABASE_SERVER_LOAD_HISTORY_RETENTION_NUMBER = new ConfigKey<>("Advanced",
|
|
Integer.class, "database.server.stats.retention", "3",
|
|
"The number of queries/seconds values to retain in history. This will define for how many periods of 'database.server.stats.interval' seconds, the queries/seconds values will be kept in memory",
|
|
true);
|
|
private static final ConfigKey<Integer> vmDiskStatsInterval = new ConfigKey<>("Advanced", Integer.class, "vm.disk.stats.interval", "0",
|
|
"Interval (in seconds) to report vm disk statistics. Vm disk statistics will be disabled if this is set to 0 or less than 0.", false);
|
|
private static final ConfigKey<Integer> vmDiskStatsIntervalMin = new ConfigKey<>("Advanced", Integer.class, "vm.disk.stats.interval.min", "300",
|
|
"Minimal interval (in seconds) to report vm disk statistics. If vm.disk.stats.interval is smaller than this, use this to report vm disk statistics.", false);
|
|
private static final ConfigKey<Integer> vmNetworkStatsInterval = new ConfigKey<>("Advanced", Integer.class, "vm.network.stats.interval", "0",
|
|
"Interval (in seconds) to report vm network statistics (for Shared networks). Vm network statistics will be disabled if this is set to 0 or less than 0.", false);
|
|
private static final ConfigKey<Integer> vmNetworkStatsIntervalMin = new ConfigKey<>("Advanced", Integer.class, "vm.network.stats.interval.min", "300",
|
|
"Minimal Interval (in seconds) to report vm network statistics (for Shared networks). If vm.network.stats.interval is smaller than this, use this to report vm network statistics.",
|
|
false);
|
|
private static final ConfigKey<Integer> StatsTimeout = new ConfigKey<>("Advanced", Integer.class, "stats.timeout", "60000",
|
|
"The timeout for stats call in milli seconds.", true,
|
|
ConfigKey.Scope.Cluster);
|
|
private static final ConfigKey<String> statsOutputUri = new ConfigKey<>("Advanced", String.class, "stats.output.uri", "",
|
|
"URI to send StatsCollector statistics to. The collector is defined on the URI scheme. Example: graphite://graphite-hostaddress:port or influxdb://influxdb-hostaddress/dbname. Note that the port is optional, if not added the default port for the respective collector (graphite or influxdb) will be used. Additionally, the database name '/dbname' is also optional; default db name is 'cloudstack'. You must create and configure the database if using influxdb.",
|
|
true);
|
|
protected static ConfigKey<Boolean> vmStatsIncrementMetrics = new ConfigKey<>("Advanced", Boolean.class, "vm.stats.increment.metrics", "false",
|
|
"When set to 'true', VM metrics(NetworkReadKBs, NetworkWriteKBs, DiskWriteKBs, DiskReadKBs, DiskReadIOs and DiskWriteIOs) that are collected from the hypervisor are summed before being returned."
|
|
+ "On the other hand, when set to 'false', the VM metrics API will just display the latest metrics collected.", true);
|
|
protected static ConfigKey<Integer> vmStatsMaxRetentionTime = new ConfigKey<>("Advanced", Integer.class, "vm.stats.max.retention.time", "720",
|
|
"The maximum time (in minutes) for keeping VM stats records in the database. The VM stats cleanup process will be disabled if this is set to 0 or less than 0.", true);
|
|
|
|
protected static ConfigKey<Boolean> vmStatsCollectUserVMOnly = new ConfigKey<>("Advanced", Boolean.class, "vm.stats.user.vm.only", "false",
|
|
"When set to 'false' stats for system VMs will be collected otherwise stats collection will be done only for user VMs", true);
|
|
|
|
protected static ConfigKey<Boolean> vmDiskStatsRetentionEnabled = new ConfigKey<>("Advanced", Boolean.class, "vm.disk.stats.retention.enabled", "false",
|
|
"When set to 'true' stats for VM disks will be stored in the database otherwise disk stats will not be stored", true);
|
|
|
|
protected static ConfigKey<Integer> vmDiskStatsMaxRetentionTime = new ConfigKey<>("Advanced", Integer.class, "vm.disk.stats.max.retention.time", "720",
|
|
"The maximum time (in minutes) for keeping VM disks stats records in the database. The VM disks stats cleanup process will be disabled if this is set to 0 or less than 0.", true);
|
|
|
|
private static StatsCollector s_instance = null;
|
|
|
|
private static Gson gson = new Gson();
|
|
private static Gson msStatsGson = new GsonBuilder()
|
|
.setDateFormat(DateUtil.ZONED_DATETIME_FORMAT)
|
|
.create();
|
|
|
|
private ScheduledExecutorService _executor = null;
|
|
@Inject
|
|
private AgentManager _agentMgr;
|
|
@Inject
|
|
private UserVmManager _userVmMgr;
|
|
@Inject
|
|
private HostDao _hostDao;
|
|
@Inject
|
|
private ClusterDao _clusterDao;
|
|
@Inject
|
|
protected UserVmDao _userVmDao;
|
|
@Inject
|
|
protected VmStatsDao vmStatsDao;
|
|
@Inject
|
|
private VolumeDao _volsDao;
|
|
@Inject
|
|
protected VolumeStatsDao volumeStatsDao;
|
|
@Inject
|
|
private PrimaryDataStoreDao _storagePoolDao;
|
|
@Inject
|
|
private StorageManager _storageManager;
|
|
@Inject
|
|
private DataStoreManager _dataStoreMgr;
|
|
@Inject
|
|
private ResourceManager _resourceMgr;
|
|
@Inject
|
|
private ConfigurationDao _configDao;
|
|
@Inject
|
|
private EndPointSelector _epSelector;
|
|
@Inject
|
|
private VmDiskStatisticsDao _vmDiskStatsDao;
|
|
@Inject
|
|
private UserStatisticsDao _userStatsDao;
|
|
@Inject
|
|
private NicDao _nicDao;
|
|
@Inject
|
|
private VlanDao _vlanDao;
|
|
@Inject
|
|
private AutoScaleManager _asManager;
|
|
@Inject
|
|
private VMInstanceDao _vmInstance;
|
|
@Inject
|
|
private HostGpuGroupsDao _hostGpuGroupsDao;
|
|
@Inject
|
|
private ImageStoreDetailsUtil imageStoreDetailsUtil;
|
|
@Inject
|
|
private ManagementServerHostDao managementServerHostDao;
|
|
// stats collector is now a clustered agent
|
|
@Inject
|
|
private ClusterManager clusterManager;
|
|
@Inject
|
|
private ManagementServerStatusDao managementServerStatusDao;
|
|
@Inject
|
|
private ManagementServerHostPeerDao managementServerHostPeerDao;
|
|
@Inject
|
|
VirtualMachineManager virtualMachineManager;
|
|
|
|
private final ConcurrentHashMap<String, ManagementServerHostStats> managementServerHostStats = new ConcurrentHashMap<>();
|
|
private final ConcurrentHashMap<String, Object> dbStats = new ConcurrentHashMap<>();
|
|
private final ConcurrentHashMap<Long, HostStats> _hostStats = new ConcurrentHashMap<>();
|
|
protected ConcurrentHashMap<Long, VmStats> _VmStats = new ConcurrentHashMap<>();
|
|
private final Map<String, VolumeStats> _volumeStats = new ConcurrentHashMap<>();
|
|
private ConcurrentHashMap<Long, StorageStats> _storageStats = new ConcurrentHashMap<>();
|
|
private ConcurrentHashMap<Long, StorageStats> _storagePoolStats = new ConcurrentHashMap<>();
|
|
|
|
private static final long DEFAULT_INITIAL_DELAY = 15000L;
|
|
|
|
private long hostStatsInterval = -1L;
|
|
private long vmStatsInterval = -1L;
|
|
private long storageStatsInterval = -1L;
|
|
private long volumeStatsInterval = -1L;
|
|
private long autoScaleStatsInterval = -1L;
|
|
|
|
private String externalStatsPrefix = "";
|
|
String externalStatsHost = null;
|
|
int externalStatsPort = -1;
|
|
private String externalStatsScheme;
|
|
ExternalStatsProtocol externalStatsType = ExternalStatsProtocol.NONE;
|
|
private String databaseName = DEFAULT_DATABASE_NAME;
|
|
|
|
private ScheduledExecutorService _diskStatsUpdateExecutor;
|
|
private int _usageAggregationRange = 1440;
|
|
private String _usageTimeZone = "GMT";
|
|
private final long mgmtSrvrId = MacAddress.getMacAddress().toLong();
|
|
private static final int ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION = 5; // 5 seconds
|
|
private boolean _dailyOrHourly = false;
|
|
protected long managementServerNodeId = ManagementServerNode.getManagementServerId();
|
|
protected long msId = managementServerNodeId;
|
|
final static MetricRegistry METRIC_REGISTRY = new MetricRegistry();
|
|
|
|
public static StatsCollector getInstance() {
|
|
return s_instance;
|
|
}
|
|
|
|
public static StatsCollector getInstance(Map<String, String> configs) {
|
|
s_instance.init(configs);
|
|
return s_instance;
|
|
}
|
|
|
|
public StatsCollector() {
|
|
s_instance = this;
|
|
}
|
|
|
|
@Override
|
|
public boolean start() {
|
|
init(_configDao.getConfiguration());
|
|
registerAll("gc", new GarbageCollectorMetricSet(), METRIC_REGISTRY);
|
|
registerAll("buffers", new BufferPoolMetricSet(ManagementFactory.getPlatformMBeanServer()), METRIC_REGISTRY);
|
|
registerAll("memory", new MemoryUsageGaugeSet(), METRIC_REGISTRY);
|
|
registerAll("threads", new ThreadStatesGaugeSet(), METRIC_REGISTRY);
|
|
registerAll("jvm", new JvmAttributeGaugeSet(), METRIC_REGISTRY);
|
|
return true;
|
|
}
|
|
@Override
|
|
public boolean stop() {
|
|
_executor.shutdown();
|
|
return true;
|
|
}
|
|
|
|
private void registerAll(String prefix, MetricSet metricSet, MetricRegistry registry) {
|
|
String registryTemplate = new String(prefix + "%s");
|
|
for (Map.Entry<String, Metric> entry : metricSet.getMetrics().entrySet()) {
|
|
String registryName = String.format(registryTemplate, entry.getKey());
|
|
if (entry.getValue() instanceof MetricSet) {
|
|
registerAll(registryName, (MetricSet) entry.getValue(), registry);
|
|
} else {
|
|
registry.register(registryName, entry.getValue());
|
|
}
|
|
}
|
|
}
|
|
|
|
protected void init(Map<String, String> configs) {
|
|
_executor = Executors.newScheduledThreadPool(6, new NamedThreadFactory("StatsCollector"));
|
|
|
|
hostStatsInterval = NumbersUtil.parseLong(configs.get("host.stats.interval"), ONE_MINUTE_IN_MILLISCONDS);
|
|
vmStatsInterval = NumbersUtil.parseLong(configs.get("vm.stats.interval"), ONE_MINUTE_IN_MILLISCONDS);
|
|
storageStatsInterval = NumbersUtil.parseLong(configs.get("storage.stats.interval"), ONE_MINUTE_IN_MILLISCONDS);
|
|
volumeStatsInterval = NumbersUtil.parseLong(configs.get("volume.stats.interval"), ONE_MINUTE_IN_MILLISCONDS);
|
|
autoScaleStatsInterval = AutoScaleManager.AutoScaleStatsInterval.value();
|
|
ManagementServerStatusAdministrator managementServerStatusAdministrator = new ManagementServerStatusAdministrator();
|
|
clusterManager.registerStatusAdministrator(managementServerStatusAdministrator);
|
|
clusterManager.registerListener(managementServerStatusAdministrator);
|
|
|
|
gson = GsonHelper.getGson();
|
|
|
|
String statsUri = statsOutputUri.value();
|
|
if (StringUtils.isNotBlank(statsUri)) {
|
|
try {
|
|
URI uri = new URI(statsUri);
|
|
externalStatsScheme = uri.getScheme();
|
|
|
|
try {
|
|
externalStatsType = ExternalStatsProtocol.valueOf(externalStatsScheme.toUpperCase());
|
|
} catch (IllegalArgumentException e) {
|
|
logger.error(externalStatsScheme + " is not a valid protocol for external statistics. No statistics will be send.");
|
|
}
|
|
|
|
if (StringUtils.isNotEmpty(uri.getHost())) {
|
|
externalStatsHost = uri.getHost();
|
|
}
|
|
|
|
externalStatsPort = retrieveExternalStatsPortFromUri(uri);
|
|
|
|
databaseName = configureDatabaseName(uri);
|
|
|
|
if (StringUtils.isNotEmpty(uri.getPath())) {
|
|
externalStatsPrefix = uri.getPath().substring(1);
|
|
}
|
|
|
|
/* Append a dot (.) to the prefix if it is set */
|
|
if (StringUtils.isNotEmpty(externalStatsPrefix)) {
|
|
externalStatsPrefix += ".";
|
|
} else {
|
|
externalStatsPrefix = "";
|
|
}
|
|
|
|
} catch (URISyntaxException e) {
|
|
logger.error("Failed to parse external statistics URI: ", e);
|
|
}
|
|
}
|
|
|
|
if (hostStatsInterval > 0) {
|
|
_executor.scheduleWithFixedDelay(new HostCollector(), DEFAULT_INITIAL_DELAY, hostStatsInterval, TimeUnit.MILLISECONDS);
|
|
}
|
|
|
|
if (vmStatsInterval > 0) {
|
|
_executor.scheduleWithFixedDelay(new VmStatsCollector(), DEFAULT_INITIAL_DELAY, vmStatsInterval, TimeUnit.MILLISECONDS);
|
|
} else {
|
|
logger.info("Skipping collect VM stats. The global parameter vm.stats.interval is set to 0 or less than 0.");
|
|
}
|
|
|
|
_executor.scheduleWithFixedDelay(new VmStatsCleaner(), DEFAULT_INITIAL_DELAY, 60000L, TimeUnit.MILLISECONDS);
|
|
|
|
_executor.scheduleWithFixedDelay(new VolumeStatsCleaner(), DEFAULT_INITIAL_DELAY, 60000L, TimeUnit.MILLISECONDS);
|
|
|
|
scheduleCollection(MANAGEMENT_SERVER_STATUS_COLLECTION_INTERVAL, new ManagementServerCollector(), 1L);
|
|
scheduleCollection(DATABASE_SERVER_STATUS_COLLECTION_INTERVAL, new DbCollector(), 0L);
|
|
|
|
if (storageStatsInterval > 0) {
|
|
_executor.scheduleWithFixedDelay(new StorageCollector(), DEFAULT_INITIAL_DELAY, storageStatsInterval, TimeUnit.MILLISECONDS);
|
|
}
|
|
|
|
if (autoScaleStatsInterval > 0) {
|
|
_executor.scheduleWithFixedDelay(new AutoScaleMonitor(), DEFAULT_INITIAL_DELAY, autoScaleStatsInterval * 1000L, TimeUnit.MILLISECONDS);
|
|
}
|
|
|
|
if (vmDiskStatsInterval.value() > 0) {
|
|
if (vmDiskStatsInterval.value() < vmDiskStatsIntervalMin.value()) {
|
|
logger.debug("vm.disk.stats.interval - " + vmDiskStatsInterval.value() + " is smaller than vm.disk.stats.interval.min - " + vmDiskStatsIntervalMin.value()
|
|
+ ", so use vm.disk.stats.interval.min");
|
|
_executor.scheduleAtFixedRate(new VmDiskStatsTask(), vmDiskStatsIntervalMin.value(), vmDiskStatsIntervalMin.value(), TimeUnit.SECONDS);
|
|
} else {
|
|
_executor.scheduleAtFixedRate(new VmDiskStatsTask(), vmDiskStatsInterval.value(), vmDiskStatsInterval.value(), TimeUnit.SECONDS);
|
|
}
|
|
} else {
|
|
logger.debug("vm.disk.stats.interval - " + vmDiskStatsInterval.value() + " is 0 or less than 0, so not scheduling the vm disk stats thread");
|
|
}
|
|
|
|
if (vmNetworkStatsInterval.value() > 0) {
|
|
if (vmNetworkStatsInterval.value() < vmNetworkStatsIntervalMin.value()) {
|
|
logger.debug("vm.network.stats.interval - " + vmNetworkStatsInterval.value() + " is smaller than vm.network.stats.interval.min - "
|
|
+ vmNetworkStatsIntervalMin.value() + ", so use vm.network.stats.interval.min");
|
|
_executor.scheduleAtFixedRate(new VmNetworkStatsTask(), vmNetworkStatsIntervalMin.value(), vmNetworkStatsIntervalMin.value(), TimeUnit.SECONDS);
|
|
} else {
|
|
_executor.scheduleAtFixedRate(new VmNetworkStatsTask(), vmNetworkStatsInterval.value(), vmNetworkStatsInterval.value(), TimeUnit.SECONDS);
|
|
}
|
|
} else {
|
|
logger.debug("vm.network.stats.interval - " + vmNetworkStatsInterval.value() + " is 0 or less than 0, so not scheduling the vm network stats thread");
|
|
}
|
|
|
|
if (volumeStatsInterval > 0) {
|
|
_executor.scheduleAtFixedRate(new VolumeStatsTask(), DEFAULT_INITIAL_DELAY, volumeStatsInterval, TimeUnit.MILLISECONDS);
|
|
}
|
|
|
|
//Schedule disk stats update task
|
|
_diskStatsUpdateExecutor = Executors.newScheduledThreadPool(1, new NamedThreadFactory("DiskStatsUpdater"));
|
|
|
|
String aggregationRange = configs.get("usage.stats.job.aggregation.range");
|
|
_usageAggregationRange = NumbersUtil.parseInt(aggregationRange, 1440);
|
|
_usageTimeZone = configs.get("usage.aggregation.timezone");
|
|
if (_usageTimeZone == null) {
|
|
_usageTimeZone = "GMT";
|
|
}
|
|
TimeZone usageTimezone = TimeZone.getTimeZone(_usageTimeZone);
|
|
Calendar cal = Calendar.getInstance(usageTimezone);
|
|
cal.setTime(new Date());
|
|
long endDate = 0;
|
|
if (_usageAggregationRange == DAILY_TIME) {
|
|
cal.set(Calendar.HOUR_OF_DAY, 0);
|
|
cal.set(Calendar.MINUTE, 0);
|
|
cal.set(Calendar.SECOND, 0);
|
|
cal.set(Calendar.MILLISECOND, 0);
|
|
cal.roll(Calendar.DAY_OF_YEAR, true);
|
|
cal.add(Calendar.MILLISECOND, -1);
|
|
endDate = cal.getTime().getTime();
|
|
_dailyOrHourly = true;
|
|
} else if (_usageAggregationRange == HOURLY_TIME) {
|
|
cal.set(Calendar.MINUTE, 0);
|
|
cal.set(Calendar.SECOND, 0);
|
|
cal.set(Calendar.MILLISECOND, 0);
|
|
cal.roll(Calendar.HOUR_OF_DAY, true);
|
|
cal.add(Calendar.MILLISECOND, -1);
|
|
endDate = cal.getTime().getTime();
|
|
_dailyOrHourly = true;
|
|
} else {
|
|
endDate = cal.getTime().getTime();
|
|
_dailyOrHourly = false;
|
|
}
|
|
if (_usageAggregationRange < UsageUtils.USAGE_AGGREGATION_RANGE_MIN) {
|
|
logger.warn("Usage stats job aggregation range is to small, using the minimum value of " + UsageUtils.USAGE_AGGREGATION_RANGE_MIN);
|
|
_usageAggregationRange = UsageUtils.USAGE_AGGREGATION_RANGE_MIN;
|
|
}
|
|
|
|
long period = _usageAggregationRange * ONE_MINUTE_IN_MILLISCONDS;
|
|
_diskStatsUpdateExecutor.scheduleAtFixedRate(new VmDiskStatsUpdaterTask(), (endDate - System.currentTimeMillis()), period, TimeUnit.MILLISECONDS);
|
|
|
|
ManagementServerHostVO mgmtServerVo = managementServerHostDao.findByMsid(managementServerNodeId);
|
|
if (mgmtServerVo != null) {
|
|
msId = mgmtServerVo.getId();
|
|
} else {
|
|
logger.warn(String.format("Cannot find management server with msid [%s]. "
|
|
+ "Therefore, VM stats will be recorded with the management server MAC address converted as a long in the mgmt_server_id column.", managementServerNodeId));
|
|
}
|
|
}
|
|
|
|
private void scheduleCollection(ConfigKey<Integer> statusCollectionInterval, AbstractStatsCollector collector, long delay) {
|
|
if (statusCollectionInterval.value() > 0) {
|
|
_executor.scheduleAtFixedRate(collector,
|
|
delay,
|
|
statusCollectionInterval.value(),
|
|
TimeUnit.SECONDS);
|
|
} else {
|
|
logger.debug(String.format("%s - %d is 0 or less, so not scheduling the status collector thread",
|
|
statusCollectionInterval.key(), statusCollectionInterval.value()));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Configures the database name according to the URI path. For instance, if the URI is as influxdb://address:port/dbname, the database name will be 'dbname'.
|
|
*/
|
|
protected String configureDatabaseName(URI uri) {
|
|
String dbname = StringUtils.removeStart(uri.getPath(), "/");
|
|
if (StringUtils.isBlank(dbname)) {
|
|
return DEFAULT_DATABASE_NAME;
|
|
} else {
|
|
return dbname;
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Configures the port to be used when connecting with the stats collector service.
|
|
* Default values are 8086 for influx DB and 2003 for GraphiteDB.
|
|
* Throws URISyntaxException in case of non configured port and external StatsType
|
|
*/
|
|
protected int retrieveExternalStatsPortFromUri(URI uri) throws URISyntaxException {
|
|
int port = uri.getPort();
|
|
if (externalStatsType != ExternalStatsProtocol.NONE) {
|
|
if (port != UNDEFINED_PORT_VALUE) {
|
|
return port;
|
|
}
|
|
if (externalStatsType == ExternalStatsProtocol.GRAPHITE) {
|
|
return GRAPHITE_DEFAULT_PORT;
|
|
}
|
|
if (externalStatsType == ExternalStatsProtocol.INFLUXDB) {
|
|
return INFLUXDB_DEFAULT_PORT;
|
|
}
|
|
}
|
|
throw new URISyntaxException(uri.toString(), String.format(
|
|
"Cannot define a port for the Stats Collector host %s://%s:%s or URI scheme is incorrect. The configured URI in stats.output.uri is not supported. Please configure as the following examples: graphite://graphite-hostaddress:port, or influxdb://influxdb-hostaddress:port. Note that the port is optional, if not added the default port for the respective collector (graphite or influxdb) will be used.",
|
|
externalStatsPrefix, externalStatsHost, externalStatsPort));
|
|
}
|
|
|
|
protected Pair<Map<Long, VMInstanceVO>, Map<String, Long>> getVmMapForStatsForHost(Host host) {
|
|
List<VMInstanceVO> vms = _vmInstance.listByHostAndState(host.getId(), VirtualMachine.State.Running);
|
|
boolean collectUserVMStatsOnly = Boolean.TRUE.equals(vmStatsCollectUserVMOnly.value());
|
|
if (collectUserVMStatsOnly) {
|
|
vms = vms.stream().filter(vm -> VirtualMachine.Type.User.equals(vm.getType())).collect(Collectors.toList());
|
|
}
|
|
Map<Long, VMInstanceVO> idInstanceMap = new HashMap<>();
|
|
Map<String, Long> instanceNameIdMap = new HashMap<>();
|
|
vms.forEach(vm -> {
|
|
if (!collectUserVMStatsOnly || VirtualMachine.Type.User.equals(vm.getType())) {
|
|
idInstanceMap.put(vm.getId(), vm);
|
|
instanceNameIdMap.put(vm.getInstanceName(), vm.getId());
|
|
}
|
|
});
|
|
return new Pair<>(idInstanceMap, instanceNameIdMap);
|
|
}
|
|
|
|
class HostCollector extends AbstractStatsCollector {
|
|
@Override
|
|
protected void runInContext() {
|
|
try {
|
|
SearchCriteria<HostVO> sc = createSearchCriteriaForHostTypeRoutingStateUpAndNotInMaintenance();
|
|
List<HostVO> hosts = _hostDao.search(sc, null);
|
|
|
|
logger.debug(String.format("HostStatsCollector is running to process %d UP hosts", hosts.size()));
|
|
|
|
Map<Object, Object> metrics = new HashMap<>();
|
|
for (HostVO host : hosts) {
|
|
HostStatsEntry hostStatsEntry = (HostStatsEntry) _resourceMgr.getHostStatistics(host);
|
|
if (hostStatsEntry != null) {
|
|
hostStatsEntry.setHostVo(host);
|
|
metrics.put(hostStatsEntry.getHostId(), hostStatsEntry);
|
|
_hostStats.put(host.getId(), hostStatsEntry);
|
|
} else {
|
|
logger.warn("The Host stats is null for host: {}", host);
|
|
}
|
|
}
|
|
|
|
if (externalStatsType == ExternalStatsProtocol.INFLUXDB) {
|
|
sendMetricsToInfluxdb(metrics);
|
|
}
|
|
|
|
updateGpuEnabledHostsDetails(hosts);
|
|
} catch (Throwable t) {
|
|
logger.error("Error trying to retrieve host stats", t);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Updates GPU details on hosts supporting GPU.
|
|
*/
|
|
private void updateGpuEnabledHostsDetails(List<HostVO> hosts) {
|
|
List<HostVO> gpuEnabledHosts = new ArrayList<HostVO>();
|
|
List<Long> hostIds = _hostGpuGroupsDao.listHostIds();
|
|
if (CollectionUtils.isEmpty(hostIds)) {
|
|
return;
|
|
}
|
|
for (HostVO host : hosts) {
|
|
if (hostIds.contains(host.getId())) {
|
|
gpuEnabledHosts.add(host);
|
|
}
|
|
}
|
|
for (HostVO host : gpuEnabledHosts) {
|
|
HashMap<String, HashMap<String, VgpuTypesInfo>> groupDetails = _resourceMgr.getGPUStatistics(host);
|
|
if (!MapUtils.isEmpty(groupDetails)) {
|
|
_resourceMgr.updateGPUDetails(host.getId(), groupDetails);
|
|
}
|
|
}
|
|
}
|
|
|
|
@Override
|
|
protected Point createInfluxDbPoint(Object metricsObject) {
|
|
return createInfluxDbPointForHostMetrics(metricsObject);
|
|
}
|
|
}
|
|
|
|
class DbCollector extends AbstractStatsCollector {
|
|
List<Double> loadHistory = new ArrayList<>();
|
|
DbCollector() {
|
|
dbStats.put(loadAvarages, loadHistory);
|
|
}
|
|
@Override
|
|
protected void runInContext() {
|
|
logger.debug(String.format("%s is running...", this.getClass().getSimpleName()));
|
|
|
|
try {
|
|
long lastUptime = (dbStats.containsKey(uptime) ? (Long) dbStats.get(uptime) : 0);
|
|
long lastQueries = (dbStats.containsKey(queries) ? (Long) dbStats.get(queries) : 0);
|
|
getDynamicDataFromDB();
|
|
long interval = (Long) dbStats.get(uptime) - lastUptime;
|
|
long activity = (Long) dbStats.get(queries) - lastQueries;
|
|
loadHistory.add(0, interval == 0 ? -1 : Double.valueOf(activity / interval));
|
|
int maxsize = DATABASE_SERVER_LOAD_HISTORY_RETENTION_NUMBER.value();
|
|
while (loadHistory.size() > maxsize) {
|
|
loadHistory.remove(maxsize);
|
|
}
|
|
} catch (Throwable e) {
|
|
// pokemon catch to make sure the thread stays running
|
|
logger.error("db statistics collection failed due to " + e.getLocalizedMessage());
|
|
if (logger.isDebugEnabled()) {
|
|
logger.debug("db statistics collection failed.", e);
|
|
}
|
|
}
|
|
}
|
|
|
|
private void getDynamicDataFromDB() {
|
|
Map<String, String> stats = DbUtil.getDbInfo("STATUS", queries, uptime);
|
|
dbStats.put(collectionTime, new Date());
|
|
dbStats.put(queries, (Long.valueOf(stats.get(queries))));
|
|
dbStats.put(uptime, (Long.valueOf(stats.get(uptime))));
|
|
}
|
|
|
|
@Override
|
|
protected Point createInfluxDbPoint(Object metricsObject) {
|
|
return null;
|
|
}
|
|
}
|
|
|
|
class ManagementServerCollector extends AbstractStatsCollector {
|
|
@Override
|
|
protected void runInContext() {
|
|
logger.debug(String.format("%s is running...", this.getClass().getSimpleName()));
|
|
long msid = ManagementServerNode.getManagementServerId();
|
|
ManagementServerHostVO mshost = null;
|
|
ManagementServerHostStatsEntry hostStatsEntry = null;
|
|
try {
|
|
mshost = managementServerHostDao.findByMsid(msid);
|
|
// get local data
|
|
hostStatsEntry = getDataFrom(mshost);
|
|
managementServerHostStats.put(mshost.getUuid(), hostStatsEntry);
|
|
// send to other hosts
|
|
clusterManager.publishStatus(msStatsGson.toJson(hostStatsEntry));
|
|
} catch (Throwable t) {
|
|
// pokemon catch to make sure the thread stays running
|
|
logger.error("Error trying to retrieve management server host statistics", t);
|
|
}
|
|
try {
|
|
// send to DB
|
|
storeStatus(hostStatsEntry, mshost);
|
|
} catch (Throwable t) {
|
|
// pokemon catch to make sure the thread stays running
|
|
logger.error("Error trying to store management server host statistics", t);
|
|
}
|
|
}
|
|
|
|
private void storeStatus(ManagementServerHostStatsEntry hostStatsEntry, ManagementServerHostVO mshost) {
|
|
if (hostStatsEntry == null || mshost == null) {
|
|
return;
|
|
}
|
|
ManagementServerStatusVO msStats = managementServerStatusDao.findByMsId(hostStatsEntry.getManagementServerHostUuid());
|
|
if (msStats == null) {
|
|
logger.info(String.format("creating new status info record for host %s - %s",
|
|
mshost.getName(),
|
|
hostStatsEntry.getManagementServerHostUuid()));
|
|
msStats = new ManagementServerStatusVO();
|
|
msStats.setMsId(hostStatsEntry.getManagementServerHostUuid());
|
|
}
|
|
msStats.setOsDistribution(hostStatsEntry.getOsDistribution()); // for now just the bunch details come later
|
|
msStats.setJavaName(hostStatsEntry.getJvmVendor());
|
|
msStats.setJavaVersion(hostStatsEntry.getJvmVersion());
|
|
Date startTime = new Date(hostStatsEntry.getJvmStartTime());
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace(String.format("reporting starttime %s", startTime));
|
|
}
|
|
msStats.setLastJvmStart(startTime);
|
|
msStats.setLastSystemBoot(hostStatsEntry.getSystemBootTime());
|
|
msStats.setUpdated(new Date());
|
|
managementServerStatusDao.persist(msStats);
|
|
}
|
|
|
|
@NotNull
|
|
private ManagementServerHostStatsEntry getDataFrom(ManagementServerHostVO mshost) {
|
|
ManagementServerHostStatsEntry newEntry = new ManagementServerHostStatsEntry();
|
|
logger.trace("Metrics collection start...");
|
|
newEntry.setManagementServerHostId(mshost.getId());
|
|
newEntry.setManagementServerHostUuid(mshost.getUuid());
|
|
newEntry.setManagementServerRunId(mshost.getRunid());
|
|
newEntry.setDbLocal(isDbLocal());
|
|
newEntry.setUsageLocal(isUsageLocal());
|
|
retrieveSession(newEntry);
|
|
getJvmDimensions(newEntry);
|
|
logger.trace("Metrics collection extra...");
|
|
getRuntimeData(newEntry);
|
|
getMemoryData(newEntry);
|
|
// newEntry must now include a pid!
|
|
getProcFileSystemData(newEntry);
|
|
// proc memory data has precedence over mbean memory data
|
|
getCpuData(newEntry);
|
|
getFileSystemData(newEntry);
|
|
getDataBaseStatistics(newEntry, mshost.getMsid());
|
|
gatherAllMetrics(newEntry);
|
|
logger.trace("Metrics collection end!");
|
|
return newEntry;
|
|
}
|
|
|
|
private void retrieveSession(ManagementServerHostStatsEntry newEntry) {
|
|
long sessions = ApiSessionListener.getSessionCount();
|
|
newEntry.setSessions(sessions);
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace(String.format("Sessions found in Api %d vs context %d", sessions,ApiSessionListener.getNumberOfSessions()));
|
|
} else {
|
|
logger.debug("Sessions active: " + sessions);
|
|
}
|
|
}
|
|
|
|
private void getDataBaseStatistics(ManagementServerHostStatsEntry newEntry, long msid) {
|
|
int count = _hostDao.countByMs(msid);
|
|
newEntry.setAgentCount(count);
|
|
}
|
|
|
|
private void getMemoryData(@NotNull ManagementServerHostStatsEntry newEntry) {
|
|
MemoryMXBean mxBean = ManagementFactory.getMemoryMXBean();
|
|
newEntry.setTotalInit(mxBean.getHeapMemoryUsage().getInit() + mxBean.getNonHeapMemoryUsage().getInit());
|
|
newEntry.setTotalUsed(mxBean.getHeapMemoryUsage().getUsed() + mxBean.getNonHeapMemoryUsage().getUsed());
|
|
newEntry.setMaxJvmMemoryBytes(mxBean.getHeapMemoryUsage().getMax() + mxBean.getNonHeapMemoryUsage().getMax());
|
|
newEntry.setTotalCommitted(mxBean.getHeapMemoryUsage().getCommitted() + mxBean.getNonHeapMemoryUsage().getCommitted());
|
|
}
|
|
|
|
private void getCpuData(@NotNull ManagementServerHostStatsEntry newEntry) {
|
|
java.lang.management.OperatingSystemMXBean bean = ManagementFactory.getOperatingSystemMXBean();
|
|
newEntry.setAvailableProcessors(bean.getAvailableProcessors());
|
|
newEntry.setLoadAverage(bean.getSystemLoadAverage());
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace(String.format(
|
|
"Metrics processors - %d , loadavg - %f ",
|
|
newEntry.getAvailableProcessors(),
|
|
newEntry.getLoadAverage()));
|
|
}
|
|
if (bean instanceof OperatingSystemMXBean) {
|
|
OperatingSystemMXBean mxBean = (OperatingSystemMXBean) bean;
|
|
// if we got these from /proc, skip the bean
|
|
if (newEntry.getSystemMemoryTotal() == 0) {
|
|
newEntry.setSystemMemoryTotal(mxBean.getTotalPhysicalMemorySize());
|
|
}
|
|
if (newEntry.getSystemMemoryFree() == 0) {
|
|
newEntry.setSystemMemoryFree(mxBean.getFreePhysicalMemorySize());
|
|
}
|
|
if (newEntry.getSystemMemoryUsed() <= 0) {
|
|
newEntry.setSystemMemoryUsed(mxBean.getCommittedVirtualMemorySize());
|
|
}
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace(String.format("data from 'OperatingSystemMXBean': total mem: %d, free mem: %d, used mem: %d",
|
|
newEntry.getSystemMemoryTotal(),
|
|
newEntry.getSystemMemoryFree(),
|
|
newEntry.getSystemMemoryUsed()));
|
|
}
|
|
}
|
|
}
|
|
|
|
private void getRuntimeData(@NotNull ManagementServerHostStatsEntry newEntry) {
|
|
final RuntimeMXBean mxBean = ManagementFactory.getRuntimeMXBean();
|
|
newEntry.setJvmUptime(mxBean.getUptime());
|
|
newEntry.setJvmStartTime(mxBean.getStartTime());
|
|
newEntry.setProcessId(mxBean.getPid());
|
|
newEntry.setJvmName(mxBean.getName());
|
|
newEntry.setJvmVendor(mxBean.getVmVendor());
|
|
newEntry.setJvmVersion(mxBean.getVmVersion());
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace(String.format(
|
|
"Metrics uptime - %d , starttime - %d",
|
|
newEntry.getJvmUptime(),
|
|
newEntry.getJvmStartTime()));
|
|
}
|
|
}
|
|
|
|
private void getJvmDimensions(@NotNull ManagementServerHostStatsEntry newEntry) {
|
|
Runtime runtime = Runtime.getRuntime();
|
|
newEntry.setTotalJvmMemoryBytes(runtime.totalMemory());
|
|
newEntry.setFreeJvmMemoryBytes(runtime.freeMemory());
|
|
newEntry.setMaxJvmMemoryBytes(runtime.maxMemory());
|
|
//long maxMem = runtime.maxMemory();
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace(String.format(
|
|
"Metrics proc - %d , maxMem - %d , totalMemory - %d , freeMemory - %f ",
|
|
newEntry.getAvailableProcessors(),
|
|
newEntry.getMaxJvmMemoryBytes(),
|
|
newEntry.getTotalJvmMemoryBytes(),
|
|
newEntry.getFreeJvmMemoryBytes()));
|
|
}
|
|
}
|
|
|
|
/**
|
|
* As for data from outside the JVM, we only rely on /proc/ contained data.
|
|
*
|
|
* @param newEntry item to add the information to
|
|
*/
|
|
private void getProcFileSystemData(@NotNull ManagementServerHostStatsEntry newEntry) {
|
|
// this should be taken from ("cat /proc/version"), not sure how standard this /etc entry is
|
|
String OS = Script.runSimpleBashScript("cat /etc/os-release | grep PRETTY_NAME | cut -f2 -d '=' | tr -d '\"'");
|
|
newEntry.setOsDistribution(OS);
|
|
String kernel = Script.runSimpleBashScript("uname -r");
|
|
newEntry.setKernelVersion(kernel);
|
|
// if we got these from the bean, skip
|
|
if (newEntry.getSystemMemoryTotal() == 0) {
|
|
String mem = Script.runSimpleBashScript("cat /proc/meminfo | grep MemTotal | cut -f 2 -d ':' | tr -d 'a-zA-z '").trim();
|
|
newEntry.setSystemMemoryTotal(Long.parseLong(mem) * ByteScaleUtils.KiB);
|
|
logger.info(String.format("system memory from /proc: %d", newEntry.getSystemMemoryTotal()));
|
|
}
|
|
if (newEntry.getSystemMemoryFree() == 0) {
|
|
String free = Script.runSimpleBashScript("cat /proc/meminfo | grep MemFree | cut -f 2 -d ':' | tr -d 'a-zA-z '").trim();
|
|
newEntry.setSystemMemoryFree(Long.parseLong(free) * ByteScaleUtils.KiB);
|
|
logger.info(String.format("free memory from /proc: %d", newEntry.getSystemMemoryFree()));
|
|
}
|
|
if (newEntry.getSystemMemoryUsed() <= 0) {
|
|
String used = Script.runSimpleBashScript(String.format("ps -o rss= %d", newEntry.getPid()));
|
|
newEntry.setSystemMemoryUsed(Long.parseLong(used));
|
|
logger.info(String.format("used memory from /proc: %d", newEntry.getSystemMemoryUsed()));
|
|
}
|
|
try {
|
|
String bootTime = Script.runSimpleBashScript("date -d @$(grep btime /proc/stat | awk '{print $2}') '+%Y-%m-%d %H:%M:%S'");
|
|
SimpleDateFormat formatter = new SimpleDateFormat("yyyy-MM-dd HH:mm:ss", Locale.ENGLISH);
|
|
Date date = formatter.parse(bootTime);
|
|
newEntry.setSystemBootTime(date);
|
|
} catch (ParseException e) {
|
|
logger.error("can not retrieve system uptime", e);
|
|
}
|
|
String maxuse = Script.runSimpleBashScript(String.format("ps -o vsz= %d", newEntry.getPid()));
|
|
newEntry.setSystemMemoryVirtualSize(Long.parseLong(maxuse) * 1024);
|
|
|
|
newEntry.setSystemTotalCpuCycles(getSystemCpuCyclesTotal());
|
|
newEntry.setSystemLoadAverages(getCpuLoads());
|
|
|
|
newEntry.setSystemCyclesUsage(getSystemCpuUsage());
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace(
|
|
String.format("cpu\ncapacities: %f\n loads: %s ; %s ; %s\n stats: %d ; %d ; %d",
|
|
newEntry.getSystemTotalCpuCycles(),
|
|
newEntry.getSystemLoadAverages()[0], newEntry.getSystemLoadAverages()[1], newEntry.getSystemLoadAverages()[2],
|
|
newEntry.getSystemCyclesUsage()[0], newEntry.getSystemCyclesUsage()[1], newEntry.getSystemCyclesUsage()[2]
|
|
)
|
|
);
|
|
}
|
|
}
|
|
|
|
@NotNull
|
|
private double[] getCpuLoads() {
|
|
String[] cpuloadString = Script.runSimpleBashScript("cat /proc/loadavg").split(" ");
|
|
double[] cpuloads = {Double.parseDouble(cpuloadString[0]), Double.parseDouble(cpuloadString[1]), Double.parseDouble(cpuloadString[2])};
|
|
return cpuloads;
|
|
}
|
|
|
|
private long [] getSystemCpuUsage() {
|
|
String[] cpustats = Script.runSimpleBashScript("cat /proc/stat | grep \"cpu \" | tr -d \"cpu\"").trim().split(" ");
|
|
long [] cycleUsage = {Long.parseLong(cpustats[0]) + Long.parseLong(cpustats[1]), Long.parseLong(cpustats[2]), Long.parseLong(cpustats[3])};
|
|
return cycleUsage;
|
|
}
|
|
|
|
private double getSystemCpuCyclesTotal() {
|
|
String cpucaps = Script.runSimpleBashScript("cat /proc/cpuinfo | grep \"cpu MHz\" | grep \"cpu MHz\" | cut -f 2 -d : | tr -d ' '| tr '\\n' \" \"");
|
|
double totalcpucap = 0;
|
|
if (StringUtils.isEmpty(cpucaps)) {
|
|
String totalCpus = Script.runSimpleBashScript("nproc --all| tr '\\n' \" \"");
|
|
String maxCpuSpeed = Script.runSimpleBashScript("lscpu | egrep 'CPU max MHz' | head -1 | cut -f 2 -d : | tr -d ' '| tr '\\n' \" \"");
|
|
if (StringUtils.isNotEmpty(totalCpus) && StringUtils.isNotEmpty(maxCpuSpeed)) {
|
|
totalcpucap = Double.parseDouble(totalCpus) * Double.parseDouble(maxCpuSpeed);
|
|
}
|
|
} else {
|
|
for (String cpucap : cpucaps.split(" ")) {
|
|
totalcpucap += Double.parseDouble(cpucap);
|
|
}
|
|
}
|
|
return totalcpucap;
|
|
}
|
|
|
|
private void getFileSystemData(@NotNull ManagementServerHostStatsEntry newEntry) {
|
|
Set<String> logFileNames = LogUtils.getLogFileNames();
|
|
StringBuilder logInfoBuilder = new StringBuilder();
|
|
for (String fileName : logFileNames) {
|
|
String du = Script.runSimpleBashScript(String.format("du -sh %s | cut -f '1'", fileName));
|
|
String df = Script.runSimpleBashScript(String.format("df -h %s | grep -v Filesystem | awk '{print \"on disk \" $1 \" mounted on \" $6 \" (\" $5 \" full)\"}'", fileName));
|
|
logInfoBuilder.append(fileName).append(" using: ").append(du).append('\n').append(df);
|
|
}
|
|
newEntry.setLogInfo(logInfoBuilder.toString());
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace("log stats:\n" + newEntry.getLogInfo());
|
|
}
|
|
}
|
|
|
|
private void gatherAllMetrics(ManagementServerHostStatsEntry metricsEntry) {
|
|
Map<String, Object> metricDetails = new HashMap<>();
|
|
for (String metricName : METRIC_REGISTRY.getGauges().keySet()) {
|
|
Object value = getMetric(metricName);
|
|
metricDetails.put(metricName, value);
|
|
if (logger.isTraceEnabled()) {
|
|
logger.trace(String.format("Metrics collection '%s'=%s", metricName, value));
|
|
}
|
|
// gather what we need from this list
|
|
extractDetailToField(metricsEntry, metricName, value);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* store a value in the local fields of newEntry
|
|
*
|
|
* @param metricsEntry the stats info we need to communicate
|
|
* @param metricName the detail to extract
|
|
* @param value ;)
|
|
*/
|
|
private void extractDetailToField(ManagementServerHostStatsEntry metricsEntry, String metricName, Object value) {
|
|
switch (metricName) {
|
|
case "memoryheap.used":
|
|
metricsEntry.setHeapMemoryUsed((Long) value);
|
|
break;
|
|
case "memoryheap.max":
|
|
metricsEntry.setHeapMemoryTotal((Long) value);
|
|
break;
|
|
case "threadsblocked.count":
|
|
metricsEntry.setThreadsBlockedCount((Integer) value);
|
|
break;
|
|
case "threadscount":
|
|
metricsEntry.setThreadsTotalCount((Integer) value);
|
|
break;
|
|
case "threadsdaemon.count":
|
|
metricsEntry.setThreadsDaemonCount((Integer) value);
|
|
break;
|
|
case "threadsrunnable.count":
|
|
metricsEntry.setThreadsRunnableCount((Integer) value);
|
|
break;
|
|
case "threadsterminated.count":
|
|
metricsEntry.setThreadsTerminatedCount((Integer) value);
|
|
break;
|
|
case "threadswaiting.count":
|
|
metricsEntry.setThreadsWaitingCount((Integer) value);
|
|
break;
|
|
case "threadsdeadlocks":
|
|
case "threadsnew.count":
|
|
case "threadstimed_waiting.count":
|
|
default:
|
|
logger.debug(String.format("not storing detail %s, %s", metricName, value));
|
|
/*
|
|
* 'buffers.direct.capacity'=8192 type=Long
|
|
* 'buffers.direct.count'=1 type=Long
|
|
* 'buffers.direct.used'=8192 type=Long
|
|
* 'buffers.mapped.capacity'=0 type=Long
|
|
* 'buffers.mapped.count'=0 type=Long
|
|
* 'buffers.mapped.used'=0 type=Long
|
|
* 'gc.G1-Old-Generation.count'=0 type=Long
|
|
* 'gc.G1-Old-Generation.time'=0 type=Long
|
|
* 'gc.G1-Young-Generation.count'=36 type=Long
|
|
* 'gc.G1-Young-Generation.time'=678 type=Long
|
|
* 'jvm.name'=532601@matah type=String
|
|
* 'jvm.uptime'=272482 type=Long
|
|
* 'jvm.vendor'=Red Hat, Inc. OpenJDK 64-Bit Server VM 11.0.12+7 (11) type=String
|
|
* 'memory.heap.committed'=1200619520 type=Long
|
|
* 'memory.heap.init'=522190848 type=Long
|
|
*+ 'memoryheap.max'=4294967296 type=Long
|
|
* 'memory.heap.usage'=0.06405723094940186 type=Double
|
|
*+ 'memoryheap.used'=275123712 type=Long
|
|
* 'memory.non-heap.committed'=217051136 type=Long
|
|
* 'memory.non-heap.init'=7667712 type=Long
|
|
* 'memory.non-heap.max'=-1 type=Long
|
|
* 'memory.non-heap.usage'=-2.11503936E8 type=Double
|
|
* 'memory.non-heap.used'=211503936 type=Long
|
|
* 'memory.pools.CodeHeap-'non-nmethods'.usage'=0.3137061403508772 type=Double
|
|
* 'memory.pools.CodeHeap-'non-profiled-nmethods'.usage'=0.16057488836310319 type=Double
|
|
* 'memory.pools.CodeHeap-'profiled-nmethods'.usage'=0.3391885643349885 type=Double
|
|
* 'memory.pools.Compressed-Class-Space.usage'=0.012650594115257263 type=Double
|
|
* 'memory.pools.G1-Eden-Space.usage'=0.005822416302765648 type=Double
|
|
* 'memory.pools.G1-Old-Gen.usage'=0.054535746574401855 type=Double
|
|
* 'memory.pools.G1-Survivor-Space.usage'=1.0 type=Double
|
|
* 'memory.pools.Metaspace.usage'=0.9765298966718151 type=Double
|
|
* 'memory.total.committed'=1417670656 type=Long
|
|
* 'memory.total.init'=529858560 type=Long
|
|
* 'memory.total.max'=4294967295 type=Long
|
|
* 'memory.total.used'=486627648 type=Long
|
|
*+ 'threadsblocked.count'=1 type=Integer
|
|
*+ 'threadscount'=439 type=Integer
|
|
*+ 'threadsdaemon.count'=12 type=Integer
|
|
* 'threadsdeadlocks'=[] type=EmptySet
|
|
* 'threads.new.count'=0 type=Integer
|
|
*+ 'threadsrunnable.count'=5 type=Integer
|
|
*+ 'threadsterminated.count'=0 type=Integer
|
|
* 'threads.timed_waiting.count'=52 type=Integer
|
|
*+ 'threadswaiting.count'=381 type=Integer
|
|
*/
|
|
break;
|
|
}
|
|
}
|
|
|
|
private Object getMetric(String metricName) {
|
|
return METRIC_REGISTRY.getGauges().get(metricName).getValue();
|
|
}
|
|
|
|
@Override
|
|
protected Point createInfluxDbPoint(Object metricsObject) {
|
|
return null;
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* @return true if there is a usage server installed locally.
|
|
*/
|
|
protected boolean isUsageLocal() {
|
|
boolean local = false;
|
|
String usageInstall = Script.runSimpleBashScript("systemctl status cloudstack-usage | grep \" Loaded:\"");
|
|
logger.debug(String.format("usage install: %s", usageInstall));
|
|
|
|
if (StringUtils.isNotBlank(usageInstall)) {
|
|
local = usageInstall.contains("enabled");
|
|
}
|
|
return local;
|
|
}
|
|
|
|
/**
|
|
* @return true if the DB endpoint is local to this server
|
|
*/
|
|
protected boolean isDbLocal() {
|
|
Properties p = getDbProperties();
|
|
String configeredHost = p.getProperty("db.cloud.host");
|
|
String localHost = p.getProperty("cluster.node.IP");
|
|
// see if these resolve to the same
|
|
if ("localhost".equals(configeredHost)) return true;
|
|
if ("127.0.0.1".equals(configeredHost)) return true;
|
|
if ("::1".equals(configeredHost)) return true;
|
|
if (StringUtils.isNotBlank(configeredHost) && StringUtils.isNotBlank(localHost) && configeredHost.equals(localHost)) return true;
|
|
return false;
|
|
}
|
|
|
|
protected Properties getDbProperties() {
|
|
return DbProperties.getDbProperties();
|
|
}
|
|
|
|
protected class ManagementServerStatusAdministrator implements ClusterManager.StatusAdministrator, ClusterManagerListener {
|
|
@Override
|
|
public String newStatus(ClusterServicePdu pdu) {
|
|
if (logger.isDebugEnabled()) {
|
|
logger.debug(String.format("StatusUpdate from %s, json: %s", pdu.getSourcePeer(), pdu.getJsonPackage()));
|
|
}
|
|
|
|
ManagementServerHostStatsEntry hostStatsEntry;
|
|
try {
|
|
hostStatsEntry = msStatsGson.fromJson(pdu.getJsonPackage(), ManagementServerHostStatsEntry.class);
|
|
managementServerHostStats.put(hostStatsEntry.getManagementServerHostUuid(), hostStatsEntry);
|
|
|
|
// Update peer state to Up in mshost_peer
|
|
updatePeerInfo(hostStatsEntry);
|
|
} catch (JsonParseException e) {
|
|
logger.error("Exception in decoding of other MS hosts status from : " + pdu.getSourcePeer());
|
|
if (logger.isDebugEnabled()) {
|
|
logger.debug("Exception in decoding of other MS hosts status: ", e);
|
|
}
|
|
}
|
|
return null;
|
|
}
|
|
|
|
private void updatePeerInfo(ManagementServerHostStatsEntry hostStatsEntry) {
|
|
// Update msId to id of the management server if msId is same as managementServerNodeId
|
|
if (msId == managementServerNodeId) {
|
|
ManagementServerHostVO mgmtServerVo = managementServerHostDao.findByMsid(managementServerNodeId);
|
|
if (mgmtServerVo != null) {
|
|
msId = mgmtServerVo.getId();
|
|
} else {
|
|
logger.warn(String.format("Cannot find management server with msid [%s]. Therefore, do not update peer info.", managementServerNodeId));
|
|
return;
|
|
}
|
|
}
|
|
// Update peer state to Up in mshost_peer
|
|
if (msId != hostStatsEntry.getManagementServerHostId()) {
|
|
managementServerHostPeerDao.updatePeerInfo(msId, hostStatsEntry.getManagementServerHostId(), hostStatsEntry.getManagementServerRunId(), ManagementServerHost.State.Up);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void onManagementNodeJoined(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
|
// do nothing, but wait for the status to come through
|
|
}
|
|
|
|
@Override
|
|
public void onManagementNodeLeft(List<? extends ManagementServerHost> nodeList, long selfNodeId) {
|
|
// remove the status for those ones
|
|
for (ManagementServerHost node : nodeList) {
|
|
logger.info(String.format("node %s (%s) at %s (%od) is reported to have left the cluster, invalidating status.",node.getName(), node.getUuid(), node.getServiceIP(), node.getMsid()));
|
|
managementServerHostStats.remove(node.getUuid());
|
|
}
|
|
}
|
|
|
|
@Override
|
|
public void onManagementNodeIsolated() {
|
|
logger.error(String.format("This management server is reported to be isolated (msid %d", mgmtSrvrId));
|
|
// not sure if anything should be done now.
|
|
}
|
|
}
|
|
|
|
class VmStatsCollector extends AbstractStatsCollector {
|
|
@Override
|
|
protected void runInContext() {
|
|
try {
|
|
SearchCriteria<HostVO> sc = createSearchCriteriaForHostTypeRoutingStateUpAndNotInMaintenance();
|
|
List<HostVO> hosts = _hostDao.search(sc, null);
|
|
|
|
logger.debug(String.format("VmStatsCollector is running to process VMs across %d UP hosts", hosts.size()));
|
|
|
|
Map<Object, Object> metrics = new HashMap<>();
|
|
for (HostVO host : hosts) {
|
|
Date timestamp = new Date();
|
|
Pair<Map<Long, VMInstanceVO>, Map<String, Long>> vmsAndMap = getVmMapForStatsForHost(host);
|
|
Map<Long, VMInstanceVO> vmMap = vmsAndMap.first();
|
|
try {
|
|
Map<Long, ? extends VmStats> vmStatsById = virtualMachineManager.getVirtualMachineStatistics(
|
|
host, vmsAndMap.second());
|
|
if (MapUtils.isEmpty(vmStatsById)) {
|
|
continue;
|
|
}
|
|
Set<Long> vmIdSet = vmStatsById.keySet();
|
|
for (Long vmId : vmIdSet) {
|
|
VmStatsEntry statsForCurrentIteration = (VmStatsEntry)vmStatsById.get(vmId);
|
|
statsForCurrentIteration.setVmId(vmId);
|
|
VMInstanceVO vm = vmMap.get(vmId);
|
|
statsForCurrentIteration.setVmUuid(vm.getUuid());
|
|
|
|
persistVirtualMachineStats(statsForCurrentIteration, timestamp);
|
|
|
|
if (externalStatsType == ExternalStatsProtocol.GRAPHITE) {
|
|
prepareVmMetricsForGraphite(metrics, statsForCurrentIteration);
|
|
} else {
|
|
metrics.put(statsForCurrentIteration.getVmId(), statsForCurrentIteration);
|
|
}
|
|
}
|
|
|
|
if (!metrics.isEmpty()) {
|
|
if (externalStatsType == ExternalStatsProtocol.GRAPHITE) {
|
|
sendVmMetricsToGraphiteHost(metrics, host);
|
|
} else if (externalStatsType == ExternalStatsProtocol.INFLUXDB) {
|
|
sendMetricsToInfluxdb(metrics);
|
|
}
|
|
}
|
|
|
|
metrics.clear();
|
|
} catch (Exception e) {
|
|
logger.debug("Failed to get VM stats for : {}", host);
|
|
}
|
|
}
|
|
|
|
} catch (Throwable t) {
|
|
logger.error("Error trying to retrieve VM stats", t);
|
|
}
|
|
}
|
|
|
|
@Override
|
|
protected Point createInfluxDbPoint(Object metricsObject) {
|
|
return createInfluxDbPointForVmMetrics(metricsObject);
|
|
}
|
|
}
|
|
|
|
/**
|
|
* <p>Previously, the VM stats cleanup process was triggered during the data collection process.
|
|
* So, when data collection was disabled, the cleaning process was also disabled.</p>
|
|
*
|
|
* <p>With the introduction of persistence of VM stats, as well as the provision of historical data,
|
|
* we created this class to allow that both the collection process and the data cleaning process
|
|
* can be enabled/disabled independently.</p>
|
|
*/
|
|
class VmStatsCleaner extends ManagedContextRunnable{
|
|
protected void runInContext() {
|
|
cleanUpVirtualMachineStats();
|
|
}
|
|
}
|
|
|
|
class VolumeStatsCleaner extends ManagedContextRunnable{
|
|
protected void runInContext() {
|
|
cleanUpVolumeStats();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Gets the latest or the accumulation of the stats collected from a given VM.
|
|
*
|
|
* @param vmId the specific VM.
|
|
* @param accumulate whether or not the stats data should be accumulated.
|
|
* @return the latest or the accumulation of the stats for the specified VM.
|
|
*/
|
|
public VmStats getVmStats(long vmId, Boolean accumulate) {
|
|
List<VmStatsVO> vmStatsVOList = vmStatsDao.findByVmIdOrderByTimestampDesc(vmId);
|
|
|
|
if (CollectionUtils.isEmpty(vmStatsVOList)) {
|
|
return null;
|
|
}
|
|
|
|
if (accumulate != null) {
|
|
return getLatestOrAccumulatedVmMetricsStats(vmStatsVOList, accumulate.booleanValue());
|
|
}
|
|
return getLatestOrAccumulatedVmMetricsStats(vmStatsVOList, BooleanUtils.toBoolean(vmStatsIncrementMetrics.value()));
|
|
}
|
|
|
|
/**
|
|
* Gets the latest or the accumulation of a list of VM stats.<br>
|
|
* It extracts the stats data from the VmStatsVO.
|
|
*
|
|
* @param vmStatsVOList the list of VM stats.
|
|
* @param accumulate {@code true} if the data should be accumulated, {@code false} otherwise.
|
|
* @return the {@link VmStatsEntry} containing the latest or the accumulated stats.
|
|
*/
|
|
protected VmStatsEntry getLatestOrAccumulatedVmMetricsStats (List<VmStatsVO> vmStatsVOList, boolean accumulate) {
|
|
if (accumulate) {
|
|
return accumulateVmMetricsStats(vmStatsVOList);
|
|
}
|
|
return gson.fromJson(vmStatsVOList.get(0).getVmStatsData(), VmStatsEntry.class);
|
|
}
|
|
|
|
/**
|
|
* Accumulates (I/O) stats for a given VM.
|
|
*
|
|
* @param vmStatsVOList the list of stats for a given VM.
|
|
* @return the {@link VmStatsEntry} containing the accumulated (I/O) stats.
|
|
*/
|
|
protected VmStatsEntry accumulateVmMetricsStats(List<VmStatsVO> vmStatsVOList) {
|
|
VmStatsEntry latestVmStats = gson.fromJson(vmStatsVOList.remove(0).getVmStatsData(), VmStatsEntry.class);
|
|
|
|
VmStatsEntry vmStatsEntry = new VmStatsEntry();
|
|
vmStatsEntry.setEntityType(latestVmStats.getEntityType());
|
|
vmStatsEntry.setVmId(latestVmStats.getVmId());
|
|
vmStatsEntry.setCPUUtilization(latestVmStats.getCPUUtilization());
|
|
vmStatsEntry.setNumCPUs(latestVmStats.getNumCPUs());
|
|
vmStatsEntry.setMemoryKBs(latestVmStats.getMemoryKBs());
|
|
vmStatsEntry.setIntFreeMemoryKBs(latestVmStats.getIntFreeMemoryKBs());
|
|
vmStatsEntry.setTargetMemoryKBs(latestVmStats.getTargetMemoryKBs());
|
|
vmStatsEntry.setNetworkReadKBs(latestVmStats.getNetworkReadKBs());
|
|
vmStatsEntry.setNetworkWriteKBs(latestVmStats.getNetworkWriteKBs());
|
|
vmStatsEntry.setDiskWriteKBs(latestVmStats.getDiskWriteKBs());
|
|
vmStatsEntry.setDiskReadIOs(latestVmStats.getDiskReadIOs());
|
|
vmStatsEntry.setDiskWriteIOs(latestVmStats.getDiskWriteIOs());
|
|
vmStatsEntry.setDiskReadKBs(latestVmStats.getDiskReadKBs());
|
|
|
|
for (VmStatsVO vmStatsVO : vmStatsVOList) {
|
|
VmStatsEntry currentVmStatsEntry = gson.fromJson(vmStatsVO.getVmStatsData(), VmStatsEntry.class);
|
|
|
|
vmStatsEntry.setNetworkReadKBs(vmStatsEntry.getNetworkReadKBs() + currentVmStatsEntry.getNetworkReadKBs());
|
|
vmStatsEntry.setNetworkWriteKBs(vmStatsEntry.getNetworkWriteKBs() + currentVmStatsEntry.getNetworkWriteKBs());
|
|
vmStatsEntry.setDiskReadKBs(vmStatsEntry.getDiskReadKBs() + currentVmStatsEntry.getDiskReadKBs());
|
|
vmStatsEntry.setDiskWriteKBs(vmStatsEntry.getDiskWriteKBs() + currentVmStatsEntry.getDiskWriteKBs());
|
|
vmStatsEntry.setDiskReadIOs(vmStatsEntry.getDiskReadIOs() + currentVmStatsEntry.getDiskReadIOs());
|
|
vmStatsEntry.setDiskWriteIOs(vmStatsEntry.getDiskWriteIOs() + currentVmStatsEntry.getDiskWriteIOs());
|
|
}
|
|
return vmStatsEntry;
|
|
}
|
|
|
|
class VmDiskStatsUpdaterTask extends ManagedContextRunnable {
|
|
@Override
|
|
protected void runInContext() {
|
|
GlobalLock scanLock = GlobalLock.getInternLock("vm.disk.stats");
|
|
try {
|
|
if (scanLock.lock(ACQUIRE_GLOBAL_LOCK_TIMEOUT_FOR_COOPERATION)) {
|
|
//Check for ownership
|
|
//msHost in UP state with min id should run the job
|
|
ManagementServerHostVO msHost = managementServerHostDao.findOneInUpState(new Filter(ManagementServerHostVO.class, "id", true, 0L, 1L));
|
|
if (msHost == null || (msHost.getMsid() != mgmtSrvrId)) {
|
|
logger.debug("Skipping aggregate disk stats update");
|
|
scanLock.unlock();
|
|
return;
|
|
}
|
|
try {
|
|
Transaction.execute(new TransactionCallbackNoReturn() {
|
|
@Override
|
|
public void doInTransactionWithoutResult(TransactionStatus status) {
|
|
//get all stats with delta > 0
|
|
List<VmDiskStatisticsVO> updatedVmNetStats = _vmDiskStatsDao.listUpdatedStats();
|
|
for (VmDiskStatisticsVO stat : updatedVmNetStats) {
|
|
if (_dailyOrHourly) {
|
|
//update agg bytes
|
|
stat.setAggBytesRead(stat.getCurrentBytesRead() + stat.getNetBytesRead());
|
|
stat.setAggBytesWrite(stat.getCurrentBytesWrite() + stat.getNetBytesWrite());
|
|
stat.setAggIORead(stat.getCurrentIORead() + stat.getNetIORead());
|
|
stat.setAggIOWrite(stat.getCurrentIOWrite() + stat.getNetIOWrite());
|
|
_vmDiskStatsDao.update(stat.getId(), stat);
|
|
}
|
|
}
|
|
logger.debug("Successfully updated aggregate vm disk stats");
|
|
}
|
|
});
|
|
} catch (Exception e) {
|
|
logger.debug("Failed to update aggregate disk stats", e);
|
|
} finally {
|
|
scanLock.unlock();
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
logger.debug("Exception while trying to acquire disk stats lock", e);
|
|
} finally {
|
|
scanLock.releaseRef();
|
|
}
|
|
}
|
|
}
|
|
|
|
private void logLessLatestStatDiscrepancy(String prefix, String hostName, String vmName, long reported, long stored, boolean toHumanReadable) {
|
|
if (logger.isDebugEnabled()) {
|
|
logger.debug(String.format("%s that's less than the last one. Assuming something went wrong and persisting it. Host: %s . VM: %s Reported: %s Stored: %s",
|
|
prefix, hostName, vmName, toHumanReadable ? toHumanReadableSize(reported) : reported, toHumanReadable ? toHumanReadableSize(stored) : stored));
|
|
}
|
|
}
|
|
|
|
class VmDiskStatsTask extends ManagedContextRunnable {
|
|
@Override
|
|
protected void runInContext() {
|
|
//Check for ownership
|
|
//msHost in UP state with min id should run the job
|
|
ManagementServerHostVO msHost = managementServerHostDao.findOneInUpState(new Filter(ManagementServerHostVO.class, "id", true, 0L, 1L));
|
|
boolean persistVolumeStats = vmDiskStatsRetentionEnabled.value();
|
|
if (msHost == null || (msHost.getMsid() != mgmtSrvrId)) {
|
|
logger.debug("Skipping collect vm disk stats from hosts");
|
|
return;
|
|
}
|
|
// collect the vm disk statistics(total) from hypervisor. added by weizhou, 2013.03.
|
|
logger.debug("VmDiskStatsTask is running...");
|
|
|
|
SearchCriteria<HostVO> sc = createSearchCriteriaForHostTypeRoutingStateUpAndNotInMaintenance();
|
|
sc.addAnd("hypervisorType", SearchCriteria.Op.IN, HypervisorType.KVM, HypervisorType.VMware);
|
|
List<HostVO> hosts = _hostDao.search(sc, null);
|
|
|
|
for (HostVO host : hosts) {
|
|
Date timestamp = new Date();
|
|
try {
|
|
Transaction.execute(new TransactionCallbackNoReturn() {
|
|
@Override
|
|
public void doInTransactionWithoutResult(TransactionStatus status) {
|
|
Pair<Map<Long, VMInstanceVO>, Map<String, Long>> vmsAndMap = getVmMapForStatsForHost(host);
|
|
Map<Long, VMInstanceVO> vmMap = vmsAndMap.first();
|
|
HashMap<Long, List<? extends VmDiskStats>> vmDiskStatsById =
|
|
virtualMachineManager.getVmDiskStatistics(host, vmsAndMap.second());
|
|
if (vmDiskStatsById == null)
|
|
return;
|
|
|
|
Set<Long> vmIdSet = vmDiskStatsById.keySet();
|
|
for (Long vmId : vmIdSet) {
|
|
List<? extends VmDiskStats> vmDiskStats = vmDiskStatsById.get(vmId);
|
|
if (vmDiskStats == null)
|
|
continue;
|
|
VMInstanceVO vm = vmMap.get(vmId);
|
|
for (VmDiskStats vmDiskStat : vmDiskStats) {
|
|
VmDiskStatsEntry vmDiskStatEntry = (VmDiskStatsEntry)vmDiskStat;
|
|
SearchCriteria<VolumeVO> sc_volume = _volsDao.createSearchCriteria();
|
|
sc_volume.addAnd("path", SearchCriteria.Op.LIKE, vmDiskStatEntry.getPath() + "%");
|
|
List<VolumeVO> volumes = _volsDao.search(sc_volume, null);
|
|
|
|
if (CollectionUtils.isEmpty(volumes))
|
|
break;
|
|
|
|
VolumeVO volume = volumes.get(0);
|
|
VmDiskStatisticsVO previousVmDiskStats = _vmDiskStatsDao.findBy(vm.getAccountId(), vm.getDataCenterId(), vmId, volume.getId());
|
|
VmDiskStatisticsVO vmDiskStat_lock = _vmDiskStatsDao.lock(vm.getAccountId(), vm.getDataCenterId(), vmId, volume.getId());
|
|
|
|
if (persistVolumeStats) {
|
|
persistVolumeStats(volume.getId(), vmDiskStatEntry, vm.getHypervisorType(), timestamp);
|
|
}
|
|
|
|
if (areAllDiskStatsZero(vmDiskStatEntry)) {
|
|
logger.debug("IO/bytes read and write are all 0. Not updating vm_disk_statistics");
|
|
continue;
|
|
}
|
|
|
|
if (vmDiskStat_lock == null) {
|
|
logger.warn("unable to find vm disk stats from host for account: {} with vm: {} and volume: {}", vm.getAccountId(), vm, volume);
|
|
continue;
|
|
}
|
|
|
|
if (isCurrentVmDiskStatsDifferentFromPrevious(previousVmDiskStats, vmDiskStat_lock)) {
|
|
logger.debug("vm disk stats changed from the time GetVmDiskStatsCommand was sent. " + "Ignoring current answer. Host: " + host.getName()
|
|
+ " . VM: " + vmDiskStatEntry.getVmName() + " Read(Bytes): " + toHumanReadableSize(vmDiskStatEntry.getBytesRead()) + " write(Bytes): " + toHumanReadableSize(vmDiskStatEntry.getBytesWrite())
|
|
+ " Read(IO): " + toHumanReadableSize(vmDiskStatEntry.getIORead()) + " write(IO): " + toHumanReadableSize(vmDiskStatEntry.getIOWrite()));
|
|
continue;
|
|
}
|
|
|
|
if (vmDiskStat_lock.getCurrentBytesRead() > vmDiskStatEntry.getBytesRead()) {
|
|
logLessLatestStatDiscrepancy("Read # of bytes", host.getName(), vmDiskStatEntry.getVmName(), vmDiskStatEntry.getBytesRead(), vmDiskStat_lock.getCurrentBytesRead(), true);
|
|
vmDiskStat_lock.setNetBytesRead(vmDiskStat_lock.getNetBytesRead() + vmDiskStat_lock.getCurrentBytesRead());
|
|
}
|
|
vmDiskStat_lock.setCurrentBytesRead(vmDiskStatEntry.getBytesRead());
|
|
if (vmDiskStat_lock.getCurrentBytesWrite() > vmDiskStatEntry.getBytesWrite()) {
|
|
logLessLatestStatDiscrepancy("Write # of bytes", host.getName(), vmDiskStatEntry.getVmName(), vmDiskStatEntry.getBytesWrite(), vmDiskStat_lock.getCurrentBytesWrite(), true);
|
|
vmDiskStat_lock.setNetBytesWrite(vmDiskStat_lock.getNetBytesWrite() + vmDiskStat_lock.getCurrentBytesWrite());
|
|
}
|
|
vmDiskStat_lock.setCurrentBytesWrite(vmDiskStatEntry.getBytesWrite());
|
|
if (vmDiskStat_lock.getCurrentIORead() > vmDiskStatEntry.getIORead()) {
|
|
logLessLatestStatDiscrepancy("Read # of IO", host.getName(), vmDiskStatEntry.getVmName(), vmDiskStatEntry.getIORead(), vmDiskStat_lock.getCurrentIORead(), false);
|
|
vmDiskStat_lock.setNetIORead(vmDiskStat_lock.getNetIORead() + vmDiskStat_lock.getCurrentIORead());
|
|
}
|
|
vmDiskStat_lock.setCurrentIORead(vmDiskStatEntry.getIORead());
|
|
if (vmDiskStat_lock.getCurrentIOWrite() > vmDiskStatEntry.getIOWrite()) {
|
|
logLessLatestStatDiscrepancy("Write # of IO", host.getName(), vmDiskStatEntry.getVmName(), vmDiskStatEntry.getIOWrite(), vmDiskStat_lock.getCurrentIOWrite(), false);
|
|
vmDiskStat_lock.setNetIOWrite(vmDiskStat_lock.getNetIOWrite() + vmDiskStat_lock.getCurrentIOWrite());
|
|
}
|
|
vmDiskStat_lock.setCurrentIOWrite(vmDiskStatEntry.getIOWrite());
|
|
|
|
if (!_dailyOrHourly) {
|
|
//update agg bytes
|
|
vmDiskStat_lock.setAggBytesWrite(vmDiskStat_lock.getNetBytesWrite() + vmDiskStat_lock.getCurrentBytesWrite());
|
|
vmDiskStat_lock.setAggBytesRead(vmDiskStat_lock.getNetBytesRead() + vmDiskStat_lock.getCurrentBytesRead());
|
|
vmDiskStat_lock.setAggIOWrite(vmDiskStat_lock.getNetIOWrite() + vmDiskStat_lock.getCurrentIOWrite());
|
|
vmDiskStat_lock.setAggIORead(vmDiskStat_lock.getNetIORead() + vmDiskStat_lock.getCurrentIORead());
|
|
}
|
|
|
|
_vmDiskStatsDao.update(vmDiskStat_lock.getId(), vmDiskStat_lock);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
} catch (Exception e) {
|
|
logger.warn("Error while collecting vm disk stats from host {} : ", host, e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
class VmNetworkStatsTask extends ManagedContextRunnable {
|
|
@Override
|
|
protected void runInContext() {
|
|
//Check for ownership
|
|
//msHost in UP state with min id should run the job
|
|
ManagementServerHostVO msHost = managementServerHostDao.findOneInUpState(new Filter(ManagementServerHostVO.class, "id", true, 0L, 1L));
|
|
if (msHost == null || (msHost.getMsid() != mgmtSrvrId)) {
|
|
logger.debug("Skipping collect vm network stats from hosts");
|
|
return;
|
|
}
|
|
// collect the vm network statistics(total) from hypervisor
|
|
logger.debug("VmNetworkStatsTask is running...");
|
|
|
|
SearchCriteria<HostVO> sc = createSearchCriteriaForHostTypeRoutingStateUpAndNotInMaintenance();
|
|
List<HostVO> hosts = _hostDao.search(sc, null);
|
|
|
|
for (HostVO host : hosts) {
|
|
try {
|
|
Transaction.execute(new TransactionCallbackNoReturn() {
|
|
@Override
|
|
public void doInTransactionWithoutResult(TransactionStatus status) {
|
|
Pair<Map<Long, VMInstanceVO>, Map<String, Long>> vmsAndMap = getVmMapForStatsForHost(host);
|
|
Map<Long, VMInstanceVO> vmMap = vmsAndMap.first();
|
|
HashMap<Long, List<? extends VmNetworkStats>> vmNetworkStatsById =
|
|
virtualMachineManager.getVmNetworkStatistics(host, vmsAndMap.second());
|
|
if (vmNetworkStatsById == null)
|
|
return;
|
|
|
|
Set<Long> vmIdSet = vmNetworkStatsById.keySet();
|
|
for (Long vmId : vmIdSet) {
|
|
List<? extends VmNetworkStats> vmNetworkStats = vmNetworkStatsById.get(vmId);
|
|
if (CollectionUtils.isEmpty(vmNetworkStats))
|
|
continue;
|
|
VMInstanceVO userVm = vmMap.get(vmId);
|
|
if (!VirtualMachine.Type.User.equals(userVm.getType())) {
|
|
logger.debug("Cannot find uservm with id: {} , continue", vmId);
|
|
continue;
|
|
}
|
|
logger.debug("Now we are updating the user_statistics table for VM: {} after collecting vm network statistics from host: {}", userVm, host);
|
|
for (VmNetworkStats vmNetworkStat : vmNetworkStats) {
|
|
VmNetworkStatsEntry vmNetworkStatEntry = (VmNetworkStatsEntry)vmNetworkStat;
|
|
SearchCriteria<NicVO> sc_nic = _nicDao.createSearchCriteria();
|
|
sc_nic.addAnd("macAddress", SearchCriteria.Op.EQ, vmNetworkStatEntry.getMacAddress());
|
|
NicVO nic = _nicDao.search(sc_nic, null).get(0);
|
|
List<VlanVO> vlan = _vlanDao.listVlansByNetworkId(nic.getNetworkId());
|
|
if (vlan == null || vlan.size() == 0 || vlan.get(0).getVlanType() != VlanType.DirectAttached)
|
|
continue; // only get network statistics for DirectAttached network (shared networks in Basic zone and Advanced zone with/without SG)
|
|
UserStatisticsVO previousvmNetworkStats = _userStatsDao.findBy(userVm.getAccountId(), userVm.getDataCenterId(), nic.getNetworkId(),
|
|
nic.getIPv4Address(), vmId, "UserVm");
|
|
if (previousvmNetworkStats == null) {
|
|
previousvmNetworkStats = new UserStatisticsVO(userVm.getAccountId(), userVm.getDataCenterId(), nic.getIPv4Address(), vmId, "UserVm",
|
|
nic.getNetworkId());
|
|
_userStatsDao.persist(previousvmNetworkStats);
|
|
}
|
|
UserStatisticsVO vmNetworkStat_lock = _userStatsDao.lock(userVm.getAccountId(), userVm.getDataCenterId(), nic.getNetworkId(),
|
|
nic.getIPv4Address(), vmId, "UserVm");
|
|
|
|
if ((vmNetworkStatEntry.getBytesSent() == 0) && (vmNetworkStatEntry.getBytesReceived() == 0)) {
|
|
logger.debug("bytes sent and received are all 0. Not updating user_statistics");
|
|
continue;
|
|
}
|
|
|
|
if (vmNetworkStat_lock == null) {
|
|
logger.warn("unable to find vm network stats from host for account: {} with vm: {} and nic: {}", userVm.getAccountId(), userVm, nic);
|
|
continue;
|
|
}
|
|
|
|
if (previousvmNetworkStats != null && ((previousvmNetworkStats.getCurrentBytesSent() != vmNetworkStat_lock.getCurrentBytesSent())
|
|
|| (previousvmNetworkStats.getCurrentBytesReceived() != vmNetworkStat_lock.getCurrentBytesReceived()))) {
|
|
logger.debug("vm network stats changed from the time GetNmNetworkStatsCommand was sent. " + "Ignoring current answer. Host: "
|
|
+ host.getName() + " . VM: " + vmNetworkStatEntry.getVmName() + " Sent(Bytes): " + vmNetworkStatEntry.getBytesSent() + " Received(Bytes): "
|
|
+ vmNetworkStatEntry.getBytesReceived());
|
|
continue;
|
|
}
|
|
|
|
if (vmNetworkStat_lock.getCurrentBytesSent() > vmNetworkStatEntry.getBytesSent()) {
|
|
logLessLatestStatDiscrepancy("Sent # of bytes", host.getName(), vmNetworkStatEntry.getVmName(), vmNetworkStatEntry.getBytesSent(), vmNetworkStat_lock.getCurrentBytesSent(), true);
|
|
vmNetworkStat_lock.setNetBytesSent(vmNetworkStat_lock.getNetBytesSent() + vmNetworkStat_lock.getCurrentBytesSent());
|
|
}
|
|
vmNetworkStat_lock.setCurrentBytesSent(vmNetworkStatEntry.getBytesSent());
|
|
|
|
if (vmNetworkStat_lock.getCurrentBytesReceived() > vmNetworkStatEntry.getBytesReceived()) {
|
|
logLessLatestStatDiscrepancy("Received # of bytes", host.getName(), vmNetworkStatEntry.getVmName(), vmNetworkStatEntry.getBytesReceived(), vmNetworkStat_lock.getCurrentBytesReceived(), true);
|
|
vmNetworkStat_lock.setNetBytesReceived(vmNetworkStat_lock.getNetBytesReceived() + vmNetworkStat_lock.getCurrentBytesReceived());
|
|
}
|
|
vmNetworkStat_lock.setCurrentBytesReceived(vmNetworkStatEntry.getBytesReceived());
|
|
|
|
if (!_dailyOrHourly) {
|
|
//update agg bytes
|
|
vmNetworkStat_lock.setAggBytesReceived(vmNetworkStat_lock.getNetBytesReceived() + vmNetworkStat_lock.getCurrentBytesReceived());
|
|
vmNetworkStat_lock.setAggBytesSent(vmNetworkStat_lock.getNetBytesSent() + vmNetworkStat_lock.getCurrentBytesSent());
|
|
}
|
|
|
|
_userStatsDao.update(vmNetworkStat_lock.getId(), vmNetworkStat_lock);
|
|
}
|
|
}
|
|
}
|
|
});
|
|
} catch (Exception e) {
|
|
logger.warn("Error while collecting vm network stats from host {} : ", host, e);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
class VolumeStatsTask extends ManagedContextRunnable {
|
|
@Override
|
|
protected void runInContext() {
|
|
try {
|
|
List<StoragePoolVO> pools = _storagePoolDao.listAll();
|
|
|
|
for (StoragePoolVO pool : pools) {
|
|
List<VolumeVO> volumes = _volsDao.findByPoolId(pool.getId(), null);
|
|
for (VolumeVO volume : volumes) {
|
|
if (!List.of(ImageFormat.QCOW2, ImageFormat.VHD, ImageFormat.OVA, ImageFormat.RAW).contains(volume.getFormat()) &&
|
|
!List.of(Storage.StoragePoolType.PowerFlex, Storage.StoragePoolType.FiberChannel).contains(pool.getPoolType())) {
|
|
logger.warn("Volume stats not implemented for this format type " + volume.getFormat());
|
|
break;
|
|
}
|
|
}
|
|
try {
|
|
Map<String, VolumeStatsEntry> volumeStatsByUuid;
|
|
if (pool.getScope() == ScopeType.ZONE) {
|
|
volumeStatsByUuid = new HashMap<>();
|
|
for (final Cluster cluster : _clusterDao.listClustersByDcId(pool.getDataCenterId())) {
|
|
final Map<String, VolumeStatsEntry> volumeStatsForCluster = _userVmMgr.getVolumeStatistics(cluster.getId(), pool.getUuid(), pool.getPoolType(), StatsTimeout.value());
|
|
if (volumeStatsForCluster != null) {
|
|
volumeStatsByUuid.putAll(volumeStatsForCluster);
|
|
}
|
|
}
|
|
} else {
|
|
volumeStatsByUuid = _userVmMgr.getVolumeStatistics(pool.getClusterId(), pool.getUuid(), pool.getPoolType(), StatsTimeout.value());
|
|
}
|
|
if (volumeStatsByUuid != null) {
|
|
for (final Map.Entry<String, VolumeStatsEntry> entry : volumeStatsByUuid.entrySet()) {
|
|
if (entry == null || entry.getKey() == null || entry.getValue() == null) {
|
|
continue;
|
|
}
|
|
_volumeStats.put(entry.getKey(), entry.getValue());
|
|
}
|
|
}
|
|
} catch (Exception e) {
|
|
logger.warn("Failed to get volume stats for cluster with ID: " + pool.getClusterId(), e);
|
|
continue;
|
|
}
|
|
}
|
|
} catch (Throwable t) {
|
|
logger.error("Error trying to retrieve volume stats", t);
|
|
}
|
|
}
|
|
}
|
|
|
|
public VolumeStats getVolumeStats(String volumeLocator) {
|
|
if (volumeLocator != null && _volumeStats.containsKey(volumeLocator)) {
|
|
return _volumeStats.get(volumeLocator);
|
|
}
|
|
return null;
|
|
}
|
|
|
|
class StorageCollector extends ManagedContextRunnable {
|
|
@Override
|
|
protected void runInContext() {
|
|
try {
|
|
if (logger.isDebugEnabled()) {
|
|
logger.debug("StorageCollector is running...");
|
|
}
|
|
|
|
List<DataStore> stores = _dataStoreMgr.listImageStores();
|
|
ConcurrentHashMap<Long, StorageStats> storageStats = new ConcurrentHashMap<>();
|
|
for (DataStore store : stores) {
|
|
if (store.getUri() == null) {
|
|
continue;
|
|
}
|
|
|
|
String nfsVersion = imageStoreDetailsUtil.getNfsVersion(store.getId());
|
|
GetStorageStatsCommand command = new GetStorageStatsCommand(store.getTO(), nfsVersion);
|
|
EndPoint ssAhost = _epSelector.select(store);
|
|
if (ssAhost == null) {
|
|
logger.debug("There is no secondary storage VM for secondary storage host " + store.getName());
|
|
continue;
|
|
}
|
|
long storeId = store.getId();
|
|
Answer answer = ssAhost.sendMessage(command);
|
|
if (answer != null && answer.getResult()) {
|
|
storageStats.put(storeId, (StorageStats)answer);
|
|
logger.trace("Store: {} Used: {} Total Available: {}", store, toHumanReadableSize(((StorageStats) answer).getByteUsed()), toHumanReadableSize(((StorageStats) answer).getCapacityBytes()));
|
|
}
|
|
}
|
|
updateStorageStats(storageStats);
|
|
ConcurrentHashMap<Long, StorageStats> storagePoolStats = new ConcurrentHashMap<Long, StorageStats>();
|
|
|
|
List<StoragePoolVO> storagePools = _storagePoolDao.listAll();
|
|
for (StoragePoolVO pool : storagePools) {
|
|
// check if the pool has enabled hosts
|
|
List<Long> hostIds = _storageManager.getUpHostsInPool(pool.getId());
|
|
if (hostIds == null || hostIds.isEmpty())
|
|
continue;
|
|
GetStorageStatsCommand command = new GetStorageStatsCommand(pool.getUuid(), pool.getPoolType(), pool.getPath());
|
|
long poolId = pool.getId();
|
|
try {
|
|
Answer answer = _storageManager.sendToPool(pool, command);
|
|
if (answer != null && answer.getResult()) {
|
|
StorageStats stats = (StorageStats)answer;
|
|
storagePoolStats.put(pool.getId(), stats);
|
|
|
|
boolean poolNeedsUpdating = false;
|
|
long capacityBytes = stats.getCapacityBytes();
|
|
long usedBytes = stats.getByteUsed();
|
|
Long capacityIops = stats.getCapacityIops();
|
|
Long usedIops = stats.getUsedIops();
|
|
// Seems like we have dynamically updated the pool size since the prev. size and the current do not match
|
|
if ((_storagePoolStats.get(poolId) != null && _storagePoolStats.get(poolId).getCapacityBytes() != capacityBytes)
|
|
|| pool.getCapacityBytes() != capacityBytes) {
|
|
if (capacityBytes > 0) {
|
|
pool.setCapacityBytes(capacityBytes);
|
|
poolNeedsUpdating = true;
|
|
} else {
|
|
logger.warn("Not setting capacity bytes, received {} capacity for pool {}",
|
|
NumbersUtil.toReadableSize(((StorageStats)answer).getCapacityBytes()), pool);
|
|
}
|
|
}
|
|
if (((_storagePoolStats.get(poolId) != null && _storagePoolStats.get(poolId).getByteUsed() != usedBytes)
|
|
|| pool.getUsedBytes() != usedBytes) && (pool.getStorageProviderName().equalsIgnoreCase(DataStoreProvider.DEFAULT_PRIMARY) || _storageManager.canPoolProvideStorageStats(pool))) {
|
|
pool.setUsedBytes(usedBytes);
|
|
poolNeedsUpdating = true;
|
|
}
|
|
poolNeedsUpdating = isPoolNeedsIopsStatsUpdate(pool, capacityIops, usedIops) || poolNeedsUpdating;
|
|
if (poolNeedsUpdating) {
|
|
pool.setUpdateTime(new Date());
|
|
_storagePoolDao.update(pool.getId(), pool);
|
|
}
|
|
}
|
|
} catch (StorageUnavailableException e) {
|
|
logger.info("Unable to reach " + pool, e);
|
|
} catch (Exception e) {
|
|
logger.warn("Unable to get stats for " + pool, e);
|
|
}
|
|
}
|
|
_storagePoolStats = storagePoolStats;
|
|
} catch (Throwable t) {
|
|
logger.error("Error trying to retrieve storage stats", t);
|
|
}
|
|
}
|
|
|
|
private void updateStorageStats(ConcurrentHashMap<Long, StorageStats> storageStats) {
|
|
for (Long storeId : storageStats.keySet()) {
|
|
if (_storageStats.containsKey(storeId)
|
|
&& (_storageStats.get(storeId).getCapacityBytes() == 0l
|
|
|| _storageStats.get(storeId).getCapacityBytes() != storageStats.get(storeId).getCapacityBytes())) {
|
|
// get add to DB rigorously
|
|
_storageManager.updateImageStoreStatus(storeId, null, null, storageStats.get(storeId).getCapacityBytes());
|
|
}
|
|
}
|
|
// if in _storageStats and not in storageStats it gets discarded
|
|
_storageStats = storageStats;
|
|
}
|
|
}
|
|
|
|
protected boolean isPoolNeedsIopsStatsUpdate(StoragePoolVO pool, Long capacityIops, Long usedIops) {
|
|
boolean poolNeedsUpdating = false;
|
|
long poolId = pool.getId();
|
|
if (capacityIops != null && ((_storagePoolStats.get(poolId) != null &&
|
|
!capacityIops.equals(_storagePoolStats.get(poolId).getCapacityIops())) ||
|
|
!capacityIops.equals(pool.getCapacityIops()))) {
|
|
pool.setCapacityIops(capacityIops);
|
|
poolNeedsUpdating = true;
|
|
}
|
|
if (usedIops != null && ((_storagePoolStats.get(poolId) != null &&
|
|
!usedIops.equals(_storagePoolStats.get(poolId).getUsedIops())) ||
|
|
!usedIops.equals(pool.getUsedIops()))) {
|
|
pool.setUsedIops(usedIops);
|
|
poolNeedsUpdating = true;
|
|
}
|
|
return poolNeedsUpdating;
|
|
}
|
|
|
|
class AutoScaleMonitor extends ManagedContextRunnable {
|
|
@Override
|
|
protected void runInContext() {
|
|
try {
|
|
if (logger.isDebugEnabled()) {
|
|
logger.debug("AutoScaling Monitor is running...");
|
|
}
|
|
//msHost in UP state with min id should run the job
|
|
ManagementServerHostVO msHost = managementServerHostDao.findOneInUpState(new Filter(ManagementServerHostVO.class, "id", true, 0L, 1L));
|
|
if (msHost == null || (msHost.getMsid() != mgmtSrvrId)) {
|
|
logger.debug("Skipping AutoScaling Monitor");
|
|
return;
|
|
}
|
|
|
|
_asManager.checkAllAutoScaleVmGroups();
|
|
|
|
} catch (Throwable t) {
|
|
logger.error("Error trying to monitor autoscaling", t);
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
/**
|
|
* This class allows to writing metrics in InfluxDB for the table that matches the Collector extending it.
|
|
* Thus, VmStatsCollector and HostCollector can use same method to write on different measures (host_stats or vm_stats table).
|
|
*/
|
|
abstract class AbstractStatsCollector extends ManagedContextRunnable {
|
|
/**
|
|
* Sends metrics to influxdb host. This method supports both VM and Host metrics
|
|
*/
|
|
protected void sendMetricsToInfluxdb(Map<Object, Object> metrics) {
|
|
InfluxDB influxDbConnection = createInfluxDbConnection();
|
|
|
|
try {
|
|
Pong response = influxDbConnection.ping();
|
|
if (response.getVersion().equalsIgnoreCase("unknown")) {
|
|
throw new CloudRuntimeException(String.format("Cannot ping influxdb host %s:%s.", externalStatsHost, externalStatsPort));
|
|
}
|
|
|
|
Collection<Object> metricsObjects = metrics.values();
|
|
List<Point> points = new ArrayList<>();
|
|
|
|
logger.debug(String.format("Sending stats to %s host %s:%s", externalStatsType, externalStatsHost, externalStatsPort));
|
|
|
|
for (Object metricsObject : metricsObjects) {
|
|
Point vmPoint = createInfluxDbPoint(metricsObject);
|
|
points.add(vmPoint);
|
|
}
|
|
writeBatches(influxDbConnection, databaseName, points);
|
|
} finally {
|
|
influxDbConnection.close();
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Creates a InfluxDB point for the given stats collector (VmStatsCollector, or HostCollector).
|
|
*/
|
|
protected abstract Point createInfluxDbPoint(Object metricsObject);
|
|
}
|
|
|
|
public boolean imageStoreHasEnoughCapacity(DataStore imageStore) {
|
|
if (!_storageStats.keySet().contains(imageStore.getId())) { // Stats not available for this store yet, can be a new store. Better to assume it has enough capacity?
|
|
return true;
|
|
}
|
|
|
|
long imageStoreId = imageStore.getId();
|
|
StorageStats imageStoreStats = _storageStats.get(imageStoreId);
|
|
|
|
if (imageStoreStats == null) {
|
|
logger.debug(String.format("Stats for image store [%s] not found.", imageStoreId));
|
|
return false;
|
|
}
|
|
|
|
double totalCapacity = imageStoreStats.getCapacityBytes();
|
|
double usedCapacity = imageStoreStats.getByteUsed();
|
|
double threshold = getImageStoreCapacityThreshold();
|
|
String readableTotalCapacity = NumbersUtil.toReadableSize((long) totalCapacity);
|
|
String readableUsedCapacity = NumbersUtil.toReadableSize((long) usedCapacity);
|
|
|
|
logger.printf(Level.DEBUG, "Verifying image storage [%s]. Capacity: total=[%s], used=[%s], threshold=[%.2f%%].", imageStore, readableTotalCapacity, readableUsedCapacity, threshold * 100);
|
|
if (usedCapacity / totalCapacity <= threshold) {
|
|
return true;
|
|
}
|
|
|
|
logger.printf(Level.WARN, "Image storage [%s] has not enough capacity. Capacity: total=[%s], used=[%s], threshold=[%.2f%%].", imageStore, readableTotalCapacity, readableUsedCapacity, threshold * 100);
|
|
return false;
|
|
}
|
|
|
|
public long imageStoreCurrentFreeCapacity(DataStore imageStore) {
|
|
StorageStats imageStoreStats = _storageStats.get(imageStore.getId());
|
|
return imageStoreStats != null ? Math.max(0, imageStoreStats.getCapacityBytes() - imageStoreStats.getByteUsed()) : 0;
|
|
}
|
|
|
|
/**
|
|
* Calculates secondary storage disk capacity against a configurable threshold instead of the hardcoded default 95 % value
|
|
* @param imageStore secondary storage
|
|
* @param storeCapThreshold the threshold capacity for computing if secondary storage has enough space to accommodate the @this object
|
|
* @return
|
|
*/
|
|
public boolean imageStoreHasEnoughCapacity(DataStore imageStore, Double storeCapThreshold) {
|
|
StorageStats imageStoreStats = _storageStats.get(imageStore.getId());
|
|
if (imageStoreStats != null && (imageStoreStats.getByteUsed() / (imageStoreStats.getCapacityBytes() * 1.0)) <= storeCapThreshold) {
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/**
|
|
* Sends VMs metrics to the configured graphite host.
|
|
*/
|
|
protected void sendVmMetricsToGraphiteHost(Map<Object, Object> metrics, HostVO host) {
|
|
logger.debug("Sending VmStats of host {} to {} host {}:{}", host, externalStatsType, externalStatsHost, externalStatsPort);
|
|
try {
|
|
GraphiteClient g = new GraphiteClient(externalStatsHost, externalStatsPort);
|
|
g.sendMetrics(metrics);
|
|
} catch (GraphiteException e) {
|
|
logger.debug("Failed sending VmStats to Graphite host " + externalStatsHost + ":" + externalStatsPort + ": " + e.getMessage());
|
|
}
|
|
}
|
|
|
|
/**
|
|
* Prepares metrics for Graphite.
|
|
* @note this method must only be executed in case the configured stats collector is a Graphite host;
|
|
* otherwise, it will compromise the map of metrics used by another type of collector (e.g. InfluxDB).
|
|
*/
|
|
private void prepareVmMetricsForGraphite(Map<Object, Object> metrics, VmStatsEntry statsForCurrentIteration) {
|
|
VMInstanceVO vmVO = _vmInstance.findById(statsForCurrentIteration.getVmId());
|
|
String vmName = vmVO.getUuid();
|
|
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".cpu.num", statsForCurrentIteration.getNumCPUs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".cpu.utilization", statsForCurrentIteration.getCPUUtilization());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".network.read_kbs", statsForCurrentIteration.getNetworkReadKBs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".network.write_kbs", statsForCurrentIteration.getNetworkWriteKBs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".disk.write_kbs", statsForCurrentIteration.getDiskWriteKBs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".disk.read_kbs", statsForCurrentIteration.getDiskReadKBs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".disk.write_iops", statsForCurrentIteration.getDiskWriteIOs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".disk.read_iops", statsForCurrentIteration.getDiskReadIOs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".memory.total_kbs", statsForCurrentIteration.getMemoryKBs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".memory.internalfree_kbs", statsForCurrentIteration.getIntFreeMemoryKBs());
|
|
metrics.put(externalStatsPrefix + "cloudstack.stats.instances." + vmName + ".memory.target_kbs", statsForCurrentIteration.getTargetMemoryKBs());
|
|
}
|
|
|
|
/**
|
|
* Persists VM stats in the database.
|
|
* @param statsForCurrentIteration the metrics stats data to persist.
|
|
* @param timestamp the time that will be stamped.
|
|
*/
|
|
protected void persistVirtualMachineStats(VmStatsEntry statsForCurrentIteration, Date timestamp) {
|
|
VmStatsEntryBase vmStats = new VmStatsEntryBase(statsForCurrentIteration.getVmId(), statsForCurrentIteration.getMemoryKBs(), statsForCurrentIteration.getIntFreeMemoryKBs(),
|
|
statsForCurrentIteration.getTargetMemoryKBs(), statsForCurrentIteration.getCPUUtilization(), statsForCurrentIteration.getNetworkReadKBs(),
|
|
statsForCurrentIteration.getNetworkWriteKBs(), statsForCurrentIteration.getNumCPUs(), statsForCurrentIteration.getDiskReadKBs(),
|
|
statsForCurrentIteration.getDiskWriteKBs(), statsForCurrentIteration.getDiskReadIOs(), statsForCurrentIteration.getDiskWriteIOs(),
|
|
statsForCurrentIteration.getEntityType());
|
|
VmStatsVO vmStatsVO = new VmStatsVO(statsForCurrentIteration.getVmId(), msId, timestamp, gson.toJson(vmStats));
|
|
logger.trace(String.format("Recording VM stats: [%s].", vmStatsVO.toString()));
|
|
vmStatsDao.persist(vmStatsVO);
|
|
}
|
|
|
|
private String getVmDiskStatsEntryAsString(VmDiskStatsEntry statsForCurrentIteration, Hypervisor.HypervisorType hypervisorType) {
|
|
VmDiskStatsEntry entry;
|
|
if (Hypervisor.HypervisorType.KVM.equals(hypervisorType)) {
|
|
entry = new VmDiskStatsEntry(statsForCurrentIteration.getVmName(),
|
|
statsForCurrentIteration.getPath(),
|
|
statsForCurrentIteration.getDeltaIoWrite(),
|
|
statsForCurrentIteration.getDeltaIoRead(),
|
|
statsForCurrentIteration.getDeltaBytesWrite(),
|
|
statsForCurrentIteration.getDeltaBytesRead());
|
|
} else {
|
|
entry = statsForCurrentIteration;
|
|
}
|
|
JsonElement element = gson.toJsonTree(entry);
|
|
JsonObject obj = element.getAsJsonObject();
|
|
for (String key : Arrays.asList("deltaIoRead", "deltaIoWrite", "deltaBytesWrite", "deltaBytesRead")) {
|
|
obj.remove(key);
|
|
}
|
|
return obj.toString();
|
|
}
|
|
|
|
/**
|
|
* Persists VM disk stats in the database.
|
|
* @param statsForCurrentIteration the metrics stats data to persist.
|
|
* @param timestamp the time that will be stamped.
|
|
*/
|
|
protected void persistVolumeStats(long volumeId, VmDiskStatsEntry statsForCurrentIteration, Hypervisor.HypervisorType hypervisorType, Date timestamp) {
|
|
VolumeStatsVO volumeStatsVO = new VolumeStatsVO(volumeId, msId, timestamp, getVmDiskStatsEntryAsString(statsForCurrentIteration, hypervisorType));
|
|
if (logger.isDebugEnabled()) {
|
|
logger.debug(String.format("Recording volume stats: [%s].", volumeStatsVO));
|
|
}
|
|
volumeStatsDao.persist(volumeStatsVO);
|
|
}
|
|
|
|
/**
|
|
* Removes the oldest VM stats records according to the global
|
|
* parameter {@code vm.stats.max.retention.time}.
|
|
*/
|
|
protected void cleanUpVirtualMachineStats() {
|
|
Integer maxRetentionTime = vmStatsMaxRetentionTime.value();
|
|
if (maxRetentionTime <= 0) {
|
|
logger.debug(String.format("Skipping VM stats cleanup. The [%s] parameter [%s] is set to 0 or less than 0.",
|
|
vmStatsMaxRetentionTime.scope(), vmStatsMaxRetentionTime.toString()));
|
|
return;
|
|
}
|
|
logger.trace("Removing older VM stats records.");
|
|
Date now = new Date();
|
|
Date limit = DateUtils.addMinutes(now, -maxRetentionTime);
|
|
vmStatsDao.removeAllByTimestampLessThan(limit, DELETE_QUERY_BATCH_SIZE.value());
|
|
}
|
|
|
|
/**
|
|
* Removes the oldest Volume stats records according to the global
|
|
* parameter {@code vm.disk.stats.max.retention.time}.
|
|
*/
|
|
protected void cleanUpVolumeStats() {
|
|
Integer maxRetentionTime = vmDiskStatsMaxRetentionTime.value();
|
|
if (maxRetentionTime <= 0) {
|
|
if (logger.isDebugEnabled()) {
|
|
logger.debug(String.format("Skipping Volume stats cleanup. The [%s] parameter [%s] is set to 0 or less than 0.",
|
|
vmDiskStatsMaxRetentionTime.scope(), vmDiskStatsMaxRetentionTime.toString()));
|
|
}
|
|
return;
|
|
}
|
|
logger.trace("Removing older Volume stats records.");
|
|
Date now = new Date();
|
|
Date limit = DateUtils.addMinutes(now, -maxRetentionTime);
|
|
volumeStatsDao.removeAllByTimestampLessThan(limit, DELETE_QUERY_BATCH_SIZE.value());
|
|
}
|
|
|
|
/**
|
|
* Sends host metrics to a configured InfluxDB host. The metrics respects the following specification.</br>
|
|
* <b>Tags:</b>vm_id, uuid, instance_name, data_center_id, host_id</br>
|
|
* <b>Fields:</b>memory_total_kb, memory_internal_free_kbs, memory_target_kbs, cpu_utilization, cpus, network_write_kb, disk_read_iops, disk_read_kbs, disk_write_iops, disk_write_kbs
|
|
*/
|
|
protected Point createInfluxDbPointForHostMetrics(Object metricsObject) {
|
|
HostStatsEntry hostStatsEntry = (HostStatsEntry)metricsObject;
|
|
|
|
Map<String, String> tagsToAdd = new HashMap<>();
|
|
tagsToAdd.put(UUID_TAG, hostStatsEntry.getHostVo().getUuid());
|
|
|
|
Map<String, Object> fieldsToAdd = new HashMap<>();
|
|
fieldsToAdd.put(TOTAL_MEMORY_KBS_FIELD, hostStatsEntry.getTotalMemoryKBs());
|
|
fieldsToAdd.put(FREE_MEMORY_KBS_FIELD, hostStatsEntry.getFreeMemoryKBs());
|
|
fieldsToAdd.put(CPU_UTILIZATION_FIELD, hostStatsEntry.getCpuUtilization());
|
|
fieldsToAdd.put(CPUS_FIELD, hostStatsEntry.getHostVo().getCpus());
|
|
fieldsToAdd.put(CPU_SOCKETS_FIELD, hostStatsEntry.getHostVo().getCpuSockets());
|
|
fieldsToAdd.put(NETWORK_READ_KBS_FIELD, hostStatsEntry.getNetworkReadKBs());
|
|
fieldsToAdd.put(NETWORK_WRITE_KBS_FIELD, hostStatsEntry.getNetworkWriteKBs());
|
|
|
|
return Point.measurement(INFLUXDB_HOST_MEASUREMENT).tag(tagsToAdd).time(System.currentTimeMillis(), TimeUnit.MILLISECONDS).fields(fieldsToAdd).build();
|
|
}
|
|
|
|
/**
|
|
* Sends VMs metrics to a configured InfluxDB host. The metrics respects the following specification.</br>
|
|
* <b>Tags:</b>vm_id, uuid, instance_name, data_center_id, host_id</br>
|
|
* <b>Fields:</b>memory_total_kb, memory_internal_free_kbs, memory_target_kbs, cpu_utilization, cpus, network_write_kb, disk_read_iops, disk_read_kbs, disk_write_iops, disk_write_kbs
|
|
*/
|
|
protected Point createInfluxDbPointForVmMetrics(Object metricsObject) {
|
|
VmStatsEntry vmStatsEntry = (VmStatsEntry)metricsObject;
|
|
|
|
Map<String, String> tagsToAdd = new HashMap<>();
|
|
tagsToAdd.put(UUID_TAG, vmStatsEntry.getVmUuid());
|
|
|
|
Map<String, Object> fieldsToAdd = new HashMap<>();
|
|
fieldsToAdd.put(TOTAL_MEMORY_KBS_FIELD, vmStatsEntry.getMemoryKBs());
|
|
fieldsToAdd.put(FREE_MEMORY_KBS_FIELD, vmStatsEntry.getIntFreeMemoryKBs());
|
|
fieldsToAdd.put(MEMORY_TARGET_KBS_FIELD, vmStatsEntry.getTargetMemoryKBs());
|
|
fieldsToAdd.put(CPU_UTILIZATION_FIELD, vmStatsEntry.getCPUUtilization());
|
|
fieldsToAdd.put(CPUS_FIELD, vmStatsEntry.getNumCPUs());
|
|
fieldsToAdd.put(NETWORK_READ_KBS_FIELD, vmStatsEntry.getNetworkReadKBs());
|
|
fieldsToAdd.put(NETWORK_WRITE_KBS_FIELD, vmStatsEntry.getNetworkWriteKBs());
|
|
fieldsToAdd.put(DISK_READ_IOPS_FIELD, vmStatsEntry.getDiskReadIOs());
|
|
fieldsToAdd.put(DISK_READ_KBS_FIELD, vmStatsEntry.getDiskReadKBs());
|
|
fieldsToAdd.put(DISK_WRITE_IOPS_FIELD, vmStatsEntry.getDiskWriteIOs());
|
|
fieldsToAdd.put(DISK_WRITE_KBS_FIELD, vmStatsEntry.getDiskWriteKBs());
|
|
|
|
return Point.measurement(INFLUXDB_VM_MEASUREMENT).tag(tagsToAdd).time(System.currentTimeMillis(), TimeUnit.MILLISECONDS).fields(fieldsToAdd).build();
|
|
}
|
|
|
|
/**
|
|
* Creates connection to InfluxDB. If the database does not exist, it throws a CloudRuntimeException. </br>
|
|
* @note the user can configure the database name on parameter 'stats.output.influxdb.database.name'; such database must be yet created and configured by the user.
|
|
* The Default name for the database is 'cloudstack_stats'.
|
|
*/
|
|
protected InfluxDB createInfluxDbConnection() {
|
|
String influxDbQueryUrl = String.format("http://%s:%s/", externalStatsHost, externalStatsPort);
|
|
InfluxDB influxDbConnection = InfluxDBFactory.connect(influxDbQueryUrl);
|
|
|
|
if (!influxDbConnection.databaseExists(databaseName)) {
|
|
throw new CloudRuntimeException(String.format("Database with name %s does not exist in influxdb host %s:%s", databaseName, externalStatsHost, externalStatsPort));
|
|
}
|
|
|
|
return influxDbConnection;
|
|
}
|
|
|
|
/**
|
|
* Writes batches of InfluxDB database points into a given database.
|
|
*/
|
|
protected void writeBatches(InfluxDB influxDbConnection, String dbName, List<Point> points) {
|
|
BatchPoints batchPoints = BatchPoints.database(dbName).build();
|
|
if(!influxDbConnection.isBatchEnabled()){
|
|
influxDbConnection.enableBatch(BatchOptions.DEFAULTS);
|
|
}
|
|
|
|
for (Point point : points) {
|
|
batchPoints.point(point);
|
|
}
|
|
|
|
influxDbConnection.write(batchPoints);
|
|
}
|
|
|
|
/**
|
|
* Returns true if at least one of the current disk stats is different from the previous.</br>
|
|
* The considered disk stats are the following: bytes read, bytes write, IO read, and IO write.
|
|
*/
|
|
protected boolean isCurrentVmDiskStatsDifferentFromPrevious(VmDiskStatisticsVO previousVmDiskStats, VmDiskStatisticsVO currentVmDiskStats) {
|
|
if (previousVmDiskStats != null) {
|
|
boolean bytesReadDifferentFromPrevious = previousVmDiskStats.getCurrentBytesRead() != currentVmDiskStats.getCurrentBytesRead();
|
|
boolean bytesWriteDifferentFromPrevious = previousVmDiskStats.getCurrentBytesWrite() != currentVmDiskStats.getCurrentBytesWrite();
|
|
boolean ioReadDifferentFromPrevious = previousVmDiskStats.getCurrentIORead() != currentVmDiskStats.getCurrentIORead();
|
|
boolean ioWriteDifferentFromPrevious = previousVmDiskStats.getCurrentIOWrite() != currentVmDiskStats.getCurrentIOWrite();
|
|
return bytesReadDifferentFromPrevious || bytesWriteDifferentFromPrevious || ioReadDifferentFromPrevious || ioWriteDifferentFromPrevious;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
/**
|
|
* Returns true if all the VmDiskStatsEntry are Zeros (Bytes read, Bytes write, IO read, and IO write must be all equals to zero)
|
|
*/
|
|
protected boolean areAllDiskStatsZero(VmDiskStatsEntry vmDiskStat) {
|
|
return (vmDiskStat.getBytesRead() == 0) && (vmDiskStat.getBytesWrite() == 0) && (vmDiskStat.getIORead() == 0) && (vmDiskStat.getIOWrite() == 0);
|
|
}
|
|
|
|
/**
|
|
* Creates a HostVO SearchCriteria where:
|
|
* <ul>
|
|
* <li>"status" is Up;</li>
|
|
* <li>"resourceState" is not in Maintenance, PrepareForMaintenance, or ErrorInMaintenance; and</li>
|
|
* <li>"type" is Routing.</li>
|
|
* </ul>
|
|
*/
|
|
private SearchCriteria<HostVO> createSearchCriteriaForHostTypeRoutingStateUpAndNotInMaintenance() {
|
|
SearchCriteria<HostVO> sc = _hostDao.createSearchCriteria();
|
|
sc.addAnd("status", SearchCriteria.Op.EQ, Status.Up.toString());
|
|
sc.addAnd("resourceState", SearchCriteria.Op.NIN,
|
|
ResourceState.Maintenance,
|
|
ResourceState.PrepareForMaintenance,
|
|
ResourceState.ErrorInPrepareForMaintenance,
|
|
ResourceState.ErrorInMaintenance);
|
|
sc.addAnd("type", SearchCriteria.Op.EQ, Host.Type.Routing.toString());
|
|
return sc;
|
|
}
|
|
|
|
public StorageStats getStorageStats(long id) {
|
|
return _storageStats.get(id);
|
|
}
|
|
|
|
public HostStats getHostStats(long hostId) {
|
|
return _hostStats.get(hostId);
|
|
}
|
|
|
|
public Map<String, Object> getDbStats() {
|
|
return dbStats;
|
|
}
|
|
|
|
|
|
public ManagementServerHostStats getManagementServerHostStats(String managementServerUuid) {
|
|
return managementServerHostStats.get(managementServerUuid);
|
|
}
|
|
|
|
public StorageStats getStoragePoolStats(long id) {
|
|
return _storagePoolStats.get(id);
|
|
}
|
|
|
|
@Override
|
|
public String getConfigComponentName() {
|
|
return StatsCollector.class.getSimpleName();
|
|
}
|
|
|
|
@Override
|
|
public ConfigKey<?>[] getConfigKeys() {
|
|
return new ConfigKey<?>[] {vmDiskStatsInterval, vmDiskStatsIntervalMin, vmNetworkStatsInterval, vmNetworkStatsIntervalMin, StatsTimeout, statsOutputUri,
|
|
vmStatsIncrementMetrics, vmStatsMaxRetentionTime, vmStatsCollectUserVMOnly, vmDiskStatsRetentionEnabled, vmDiskStatsMaxRetentionTime,
|
|
MANAGEMENT_SERVER_STATUS_COLLECTION_INTERVAL,
|
|
DATABASE_SERVER_STATUS_COLLECTION_INTERVAL,
|
|
DATABASE_SERVER_LOAD_HISTORY_RETENTION_NUMBER};
|
|
}
|
|
|
|
public double getImageStoreCapacityThreshold() {
|
|
return CapacityManager.SecondaryStorageCapacityThreshold.value();
|
|
}
|
|
}
|