From 3f94aac39071bdff9d204ca150d97b29a04cc964 Mon Sep 17 00:00:00 2001 From: Tyler Ouyang Date: Wed, 26 Jul 2023 17:44:37 -0700 Subject: [PATCH] Use same start time --- .../deployservice/dao/HostAgentDAO.java | 17 +++++----- .../pinterest/deployservice/dao/HostDAO.java | 2 +- .../deployservice/db/DBHostAgentDAOImpl.java | 29 ++++++++++------- .../deployservice/db/DBHostDAOImpl.java | 4 +-- .../teletraan/worker/AgentJanitor.java | 31 +++++++++---------- 5 files changed, 44 insertions(+), 39 deletions(-) diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostAgentDAO.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostAgentDAO.java index e61447dec7..b5ad61d8aa 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostAgentDAO.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostAgentDAO.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,11 +15,10 @@ */ package com.pinterest.deployservice.dao; -import com.pinterest.deployservice.bean.HostAgentBean; - -import java.util.Collection; +import java.sql.SQLException; import java.util.List; -import java.util.Set; + +import com.pinterest.deployservice.bean.HostAgentBean; /** * A collection of methods to help hosts and groups mapping @@ -35,9 +34,11 @@ public interface HostAgentDAO { HostAgentBean getHostById(String hostId) throws Exception; - List getStaleHosts(long after) throws Exception; + List getStaleHosts(long lastUpdateBefore) throws SQLException; + + List getStaleHosts(long lastUpdateAfter, long lastUpdateBefore) throws SQLException; - List getStaleEnvHosts(long after) throws Exception; + List getStaleEnvHosts(long lastUpdateBefore) throws Exception; List getHostsByAgent(String agentVersion, long pageIndex, int pageSize) throws Exception; } diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostDAO.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostDAO.java index fc1714ec99..556fba8516 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostDAO.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/dao/HostDAO.java @@ -56,7 +56,7 @@ public interface HostDAO { List getTerminatingHosts() throws Exception; - List getStaleAgentlessHostIds(long noUpdateSince, int limit) throws SQLException; + List getStaleAgentlessHostIds(long lastUpdateBefore, int limit) throws SQLException; Collection getHostsByEnvId(String envId) throws Exception; diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostAgentDAOImpl.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostAgentDAOImpl.java index 2c25b1a426..f166686668 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostAgentDAOImpl.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostAgentDAOImpl.java @@ -4,9 +4,9 @@ * Licensed under the Apache License, Version 2.0 (the "License"); * you may not use this file except in compliance with the License. * You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. @@ -15,10 +15,8 @@ */ package com.pinterest.deployservice.db; -import com.pinterest.deployservice.bean.HostAgentBean; -import com.pinterest.deployservice.bean.HostState; -import com.pinterest.deployservice.bean.SetClause; -import com.pinterest.deployservice.dao.HostAgentDAO; +import java.sql.SQLException; +import java.util.List; import org.apache.commons.dbcp.BasicDataSource; import org.apache.commons.dbutils.QueryRunner; @@ -26,9 +24,9 @@ import org.apache.commons.dbutils.handlers.BeanHandler; import org.apache.commons.dbutils.handlers.BeanListHandler; -import java.util.Collection; -import java.util.List; -import java.util.Set; +import com.pinterest.deployservice.bean.HostAgentBean; +import com.pinterest.deployservice.bean.SetClause; +import com.pinterest.deployservice.dao.HostAgentDAO; public class DBHostAgentDAOImpl implements HostAgentDAO { private static final String INSERT_HOST_TEMPLATE = "INSERT INTO hosts_and_agents SET %s ON DUPLICATE KEY UPDATE %s"; @@ -36,7 +34,8 @@ public class DBHostAgentDAOImpl implements HostAgentDAO { private static final String DELETE_HOST_BY_ID = "DELETE FROM hosts_and_agents WHERE host_id=?"; private static final String GET_HOST_BY_NAME = "SELECT * FROM hosts_and_agents WHERE host_name=?"; private static final String GET_HOST_BY_HOSTID = "SELECT * FROM hosts_and_agents WHERE host_id=?"; - private static final String GET_STALE_HOST = "SELECT DISTINCT hosts_and_agents.* FROM hosts_and_agents WHERE hosts_and_agents.last_update? AND last_update getStaleHosts(long after) throws Exception { + public List getStaleHosts(long lastUpdateBefore) throws SQLException { + ResultSetHandler> h = new BeanListHandler<>(HostAgentBean.class); + return new QueryRunner(dataSource).query(GET_HOSTS_BY_LAST_UPDATE, h, lastUpdateBefore); + } + + @Override + public List getStaleHosts(long lastUpdateAfter, long lastUpdateBefore) throws SQLException { ResultSetHandler> h = new BeanListHandler<>(HostAgentBean.class); - return new QueryRunner(dataSource).query(GET_STALE_HOST, h, after); + return new QueryRunner(dataSource).query(GET_HOSTS_BY_LAST_UPDATES, h, lastUpdateAfter, lastUpdateBefore); } @Override diff --git a/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java b/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java index 2536ea82df..14f16a1770 100644 --- a/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java +++ b/deploy-service/common/src/main/java/com/pinterest/deployservice/db/DBHostDAOImpl.java @@ -195,9 +195,9 @@ public List getTerminatingHosts() throws Exception { } @Override - public List getStaleAgentlessHostIds(long noUpdateSince, int limit) throws SQLException { + public List getStaleAgentlessHostIds(long lastUpdateBefore, int limit) throws SQLException { return new QueryRunner(dataSource).query(GET_STALE_AGENTLESS_HOST_IDS, - SingleResultSetHandlerFactory.newListObjectHandler(), noUpdateSince, limit); + SingleResultSetHandlerFactory.newListObjectHandler(), lastUpdateBefore, limit); } @Override diff --git a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java index fb6d762534..bb0cb86a2b 100644 --- a/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java +++ b/deploy-service/teletraanservice/src/main/java/com/pinterest/teletraan/worker/AgentJanitor.java @@ -46,9 +46,10 @@ public class AgentJanitor extends SimpleAgentJanitor { private static final Logger LOG = LoggerFactory.getLogger(AgentJanitor.class); private final RodimusManager rodimusManager; - private long maxLaunchLatencyThreshold; - private long absoluteThreshold = TimeUnit.DAYS.toMillis(7); - private int agentlessHostBatchSize = 300; + private final long maxLaunchLatencyThreshold; + private final long absoluteThreshold = TimeUnit.DAYS.toMillis(7); + private final int agentlessHostBatchSize = 300; + private long janitorStartTime; public AgentJanitor(ServiceContext serviceContext, int minStaleHostThresholdSeconds, int maxStaleHostThresholdSeconds, int maxLaunchLatencyThresholdSeconds) { @@ -88,8 +89,7 @@ private boolean isHostStale(HostAgentBean hostAgentBean) { return false; } - long current_time = System.currentTimeMillis(); - if (current_time - hostAgentBean.getLast_update() >= absoluteThreshold) { + if (janitorStartTime - hostAgentBean.getLast_update() >= absoluteThreshold) { return true; } @@ -103,11 +103,11 @@ private boolean isHostStale(HostAgentBean hostAgentBean) { Long launchGracePeriod = getInstanceLaunchGracePeriod(hostAgentBean.getAuto_scaling_group()); if ((hostBean.getState() == HostState.PROVISIONED) - && (current_time - hostAgentBean.getLast_update() >= launchGracePeriod)) { + && (janitorStartTime - hostAgentBean.getLast_update() >= launchGracePeriod)) { return true; } if (hostBean.getState() != HostState.TERMINATING && !hostBean.isPendingTerminate() && - (current_time - hostAgentBean.getLast_update() >= maxStaleHostThreshold)) { + (janitorStartTime - hostAgentBean.getLast_update() >= maxStaleHostThreshold)) { return true; } return false; @@ -115,17 +115,17 @@ private boolean isHostStale(HostAgentBean hostAgentBean) { /** * Process stale hosts which have not pinged since - * current_time - minStaleHostThreshold + * janitorStartTime - minStaleHostThreshold * They will be candidates for stale hosts which will be removed in future * executions. * Either mark them as UNREACHABLE, or remove if confirmed with source of truth. */ private void determineStaleHostCandidates() { - long current_time = System.currentTimeMillis(); - long minThreshold = current_time - minStaleHostThreshold; + long minThreshold = janitorStartTime - minStaleHostThreshold; + long maxThreshold = janitorStartTime - maxLaunchLatencyThreshold; List unreachableHosts; try { - unreachableHosts = hostAgentDAO.getStaleHosts(minThreshold); + unreachableHosts = hostAgentDAO.getStaleHosts(maxThreshold, minThreshold); } catch (Exception ex) { LOG.error("failed to get unreachable hosts", ex); return; @@ -145,12 +145,11 @@ private void determineStaleHostCandidates() { /** * Process stale hosts which have not pinged since - * current_time - maxStaleHostThreshold + * janitorStartTime - maxStaleHostThreshold * They are confirmed stale hosts, should be removed from Teletraan */ private void processStaleHosts() { - long current_time = System.currentTimeMillis(); - long maxThreshold = current_time - maxStaleHostThreshold; + long maxThreshold = janitorStartTime - maxStaleHostThreshold; List staleHosts; try { staleHosts = hostAgentDAO.getStaleHosts(maxThreshold); @@ -184,8 +183,7 @@ private void processStaleHosts() { * here. We wait 10x maxLaunchLatencyThreshold before doing cleanup. */ private void cleanUpAgentlessHosts() { - long current_time = System.currentTimeMillis(); - long noUpdateSince = current_time - 10 * maxLaunchLatencyThreshold; + long noUpdateSince = janitorStartTime - 10 * maxLaunchLatencyThreshold; List agentlessHosts; try { agentlessHosts = hostDAO.getStaleAgentlessHostIds(noUpdateSince, agentlessHostBatchSize); @@ -206,6 +204,7 @@ private void cleanUpAgentlessHosts() { @Override void processAllHosts() { + janitorStartTime = System.currentTimeMillis(); processStaleHosts(); determineStaleHostCandidates(); cleanUpAgentlessHosts();