From ebdb28ee4bd6c11c4947fbe49c1ddd9fe02652b6 Mon Sep 17 00:00:00 2001 From: Tyler Ouyang Date: Tue, 1 Aug 2023 09:57:09 -0700 Subject: [PATCH] Fix AgentJanitor bugs (#1225) I previously mis-used Java stream. Fixed them and added more logging. Validated in pre-prod --- .../pinterest/teletraan/worker/AgentJanitor.java | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) diff --git a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java index 011e50ac8c..df830edbc9 100644 --- a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java +++ b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java @@ -23,6 +23,7 @@ import java.util.Map; import java.util.Set; import java.util.concurrent.TimeUnit; +import java.util.stream.Collectors; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -122,16 +123,18 @@ private boolean isHostStale(HostAgentBean hostAgentBean) { */ private void determineStaleHostCandidates() { long minThreshold = janitorStartTime - minStaleHostThreshold; - long maxThreshold = janitorStartTime - maxLaunchLatencyThreshold; + long maxThreshold = janitorStartTime - maxStaleHostThreshold; List unreachableHosts; try { + LOG.debug("getting hosts between {}, {}", maxThreshold, minThreshold); unreachableHosts = hostAgentDAO.getStaleHosts(maxThreshold, minThreshold); } catch (Exception ex) { LOG.error("failed to get unreachable hosts", ex); return; } - ArrayList unreachableHostIds = new ArrayList<>(); - unreachableHosts.stream().map(hostAgent -> unreachableHostIds.add(hostAgent.getHost_id())); + List unreachableHostIds = unreachableHosts.stream().map(HostAgentBean::getHost_id) + .collect(Collectors.toList()); + LOG.debug("fetched {} unreachable hosts", unreachableHostIds.size()); Set terminatedHosts = getTerminatedHostsFromSource(unreachableHostIds); for (String unreachableId : unreachableHostIds) { @@ -152,6 +155,7 @@ private void processStaleHosts() { long maxThreshold = janitorStartTime - maxStaleHostThreshold; List staleHosts; try { + LOG.debug("getting hosts before {}", maxThreshold); staleHosts = hostAgentDAO.getStaleHosts(maxThreshold); } catch (Exception ex) { LOG.error("failed to get stale hosts", ex); @@ -159,7 +163,8 @@ private void processStaleHosts() { } Map staleHostMap = new HashMap<>(); - staleHosts.stream().map(hostAgent -> staleHostMap.put(hostAgent.getHost_id(), hostAgent)); + staleHosts.stream().forEach(hostAgent -> staleHostMap.put(hostAgent.getHost_id(), hostAgent)); + LOG.debug("fetched {} unreachable hosts", staleHostMap.values().size()); Set terminatedHosts = getTerminatedHostsFromSource(new ArrayList<>(staleHostMap.keySet())); for (String staleId : staleHostMap.keySet()) { @@ -170,6 +175,8 @@ private void processStaleHosts() { if (isHostStale(hostAgent)) { LOG.warn("Agent ({}) is stale (not Pinging Teletraan), but might be running.", hostAgent); + } else { + LOG.debug("host {} is not stale", staleId); } } }