From 5c29d5ba453b3441f2fd84232f9d833c288c72ce Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Gabriel=20Beims=20Br=C3=A4scher?= Date: Tue, 1 Sep 2020 07:29:43 -0300 Subject: [PATCH] influxdb: Avoid out of memory by influxDB (#4291) After a few hours running with InfluxDB configured, CloudStack hangs due to OutOfMemoryException raised. The exception happens at com.cloud.server.StatsCollector.writeBatches(StatsCollector.java:1510): 2020-08-12 21:19:00,972 ERROR [c.c.s.StatsCollector] (StatsCollector-6:ctx-0a4cfe6a) (logid:03a7ba48) Error trying to retrieve host stats java.lang.OutOfMemoryError: unable to create new native thread ... at org.influxdb.impl.BatchProcessor.(BatchProcessor.java:294) at org.influxdb.impl.BatchProcessor$Builder.build(BatchProcessor.java:201) at org.influxdb.impl.InfluxDBImpl.enableBatch(InfluxDBImpl.java:311) at com.cloud.server.StatsCollector.writeBatches(StatsCollector.java:1510) at com.cloud.server.StatsCollector$AbstractStatsCollector.sendMetricsToInfluxdb(StatsCollector.java:1351) at com.cloud.server.StatsCollector$HostCollector.runInContext(StatsCollector.java:522) Context on InfluxDB Batch: Enabling batch on InfluxDB is great and speeds writing but it requires caution to avoid Zombie threads. Solution: This happens because the batching feature creates an internal thread pool that needs to be shut down explicitly; therefore, it is important to add: influxDB.close(). --- pom.xml | 2 +- .../java/com/cloud/server/StatsCollector.java | 36 +++++++++++-------- 2 files changed, 22 insertions(+), 16 deletions(-) diff --git a/pom.xml b/pom.xml index c38b840b238..64a34f7ff2f 100644 --- a/pom.xml +++ b/pom.xml @@ -130,7 +130,7 @@ 23.6-jre 4.5.4 4.4.8 - 2.15 + 2.20 2.9.2 1.9.2 0.16 diff --git a/server/src/main/java/com/cloud/server/StatsCollector.java b/server/src/main/java/com/cloud/server/StatsCollector.java index 5683106931d..3937bd99666 100644 --- a/server/src/main/java/com/cloud/server/StatsCollector.java +++ b/server/src/main/java/com/cloud/server/StatsCollector.java @@ -1334,21 +1334,25 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc protected void sendMetricsToInfluxdb(Map metrics) { InfluxDB influxDbConnection = createInfluxDbConnection(); - Pong response = influxDbConnection.ping(); - if (response.getVersion().equalsIgnoreCase("unknown")) { - throw new CloudRuntimeException(String.format("Cannot ping influxdb host %s:%s.", externalStatsHost, externalStatsPort)); + try { + Pong response = influxDbConnection.ping(); + if (response.getVersion().equalsIgnoreCase("unknown")) { + throw new CloudRuntimeException(String.format("Cannot ping influxdb host %s:%s.", externalStatsHost, externalStatsPort)); + } + + Collection metricsObjects = metrics.values(); + List points = new ArrayList<>(); + + s_logger.debug(String.format("Sending stats to %s host %s:%s", externalStatsType, externalStatsHost, externalStatsPort)); + + for (Object metricsObject : metricsObjects) { + Point vmPoint = creteInfluxDbPoint(metricsObject); + points.add(vmPoint); + } + writeBatches(influxDbConnection, databaseName, points); + } finally { + influxDbConnection.close(); } - - Collection metricsObjects = metrics.values(); - List points = new ArrayList<>(); - - s_logger.debug(String.format("Sending stats to %s host %s:%s", externalStatsType, externalStatsHost, externalStatsPort)); - - for (Object metricsObject : metricsObjects) { - Point vmPoint = creteInfluxDbPoint(metricsObject); - points.add(vmPoint); - } - writeBatches(influxDbConnection, databaseName, points); } /** @@ -1507,7 +1511,9 @@ public class StatsCollector extends ManagerBase implements ComponentMethodInterc */ protected void writeBatches(InfluxDB influxDbConnection, String dbName, List points) { BatchPoints batchPoints = BatchPoints.database(dbName).build(); - influxDbConnection.enableBatch(BatchOptions.DEFAULTS); + if(!influxDbConnection.isBatchEnabled()){ + influxDbConnection.enableBatch(BatchOptions.DEFAULTS); + } for (Point point : points) { batchPoints.point(point);