From da447cdaba2335b7e04d76f5e0edbee4dba60735 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 12 Dec 2024 11:58:26 +0100
Subject: [PATCH 01/16] only tag node if needed

---
 .../src/simcore_service_autoscaling/utils/utils_docker.py | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
index 65caa0f40b1..4c5b5e6f79f 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/utils/utils_docker.py
@@ -521,8 +521,14 @@ async def tag_node(
     tags: dict[DockerLabelKey, str],
     available: bool,
 ) -> Node:
+    assert node.spec  # nosec
+    if (node.spec.labels == tags) and (
+        (node.spec.availability is Availability.active) == available
+    ):
+        # nothing to do
+        return node
     with log_context(
-        logger, logging.DEBUG, msg=f"tagging {node.id=} with {tags=} and {available=}"
+        logger, logging.DEBUG, msg=f"tag {node.id=} with {tags=} and {available=}"
     ):
         assert node.id  # nosec
 

From 190d764a4969a6e8b8a04b45f1627b8cdd832898 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 12 Dec 2024 14:45:37 +0100
Subject: [PATCH 02/16] space

---
 .../autoscaling/src/simcore_service_autoscaling/modules/dask.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
index 4c5ee00f86c..d57508babf8 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/dask.py
@@ -273,7 +273,7 @@ def _list_processing_tasks_on_worker(
     async with _scheduler_client(scheduler_url, authentication) as client:
         worker_url, _ = _dask_worker_from_ec2_instance(client, ec2_instance)
 
-        _logger.debug("looking for processing tasksfor %s", f"{worker_url=}")
+        _logger.debug("looking for processing tasks for %s", f"{worker_url=}")
 
         # now get the used resources
         worker_processing_tasks: list[

From faeb390dab3b18b3b5084f3f7b14d2b8dc7fda7e Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 12 Dec 2024 15:52:33 +0100
Subject: [PATCH 03/16] moved fixture down

---
 services/autoscaling/tests/unit/conftest.py   | 108 +++++++++++++++++-
 .../unit/test_modules_auto_scaling_dynamic.py |  13 ++-
 .../unit/test_modules_buffer_machine_core.py  |  97 +---------------
 3 files changed, 119 insertions(+), 99 deletions(-)

diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py
index 4a48f2776b6..d4e58d5bf96 100644
--- a/services/autoscaling/tests/unit/conftest.py
+++ b/services/autoscaling/tests/unit/conftest.py
@@ -28,11 +28,16 @@
     EC2InstanceType,
     Resources,
 )
+from common_library.json_serialization import json_dumps
 from deepdiff import DeepDiff
 from faker import Faker
 from fakeredis.aioredis import FakeRedis
 from fastapi import FastAPI
-from models_library.docker import DockerLabelKey, StandardSimcoreDockerLabels
+from models_library.docker import (
+    DockerGenericTag,
+    DockerLabelKey,
+    StandardSimcoreDockerLabels,
+)
 from models_library.generated_models.docker_rest_api import Availability
 from models_library.generated_models.docker_rest_api import Node as DockerNode
 from models_library.generated_models.docker_rest_api import (
@@ -57,6 +62,7 @@
 )
 from settings_library.rabbit import RabbitSettings
 from settings_library.ssm import SSMSettings
+from simcore_service_autoscaling.constants import PRE_PULLED_IMAGES_EC2_TAG_KEY
 from simcore_service_autoscaling.core.application import create_app
 from simcore_service_autoscaling.core.settings import (
     AUTOSCALING_ENV_PREFIX,
@@ -71,8 +77,14 @@
     DaskTaskResources,
 )
 from simcore_service_autoscaling.modules import auto_scaling_core
+from simcore_service_autoscaling.modules.auto_scaling_mode_dynamic import (
+    DynamicAutoscaling,
+)
 from simcore_service_autoscaling.modules.docker import AutoscalingDocker
 from simcore_service_autoscaling.modules.ec2 import SimcoreEC2API
+from simcore_service_autoscaling.utils.buffer_machines_pool_core import (
+    get_deactivated_buffer_ec2_tags,
+)
 from simcore_service_autoscaling.utils.utils_docker import (
     _OSPARC_SERVICE_READY_LABEL_KEY,
     _OSPARC_SERVICES_READY_DATETIME_LABEL_KEY,
@@ -81,7 +93,9 @@
 from tenacity.retry import retry_if_exception_type
 from tenacity.stop import stop_after_delay
 from tenacity.wait import wait_fixed
-from types_aiobotocore_ec2.literals import InstanceTypeType
+from types_aiobotocore_ec2 import EC2Client
+from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType
+from types_aiobotocore_ec2.type_defs import TagTypeDef
 
 pytest_plugins = [
     "pytest_simcore.aws_server",
@@ -1042,3 +1056,93 @@ async def _(
         autospec=True,
         side_effect=_,
     )
+
+
+@pytest.fixture
+async def create_buffer_machines(
+    ec2_client: EC2Client,
+    aws_ami_id: str,
+    app_settings: ApplicationSettings,
+    initialized_app: FastAPI,
+) -> Callable[
+    [int, InstanceTypeType, InstanceStateNameType, list[DockerGenericTag]],
+    Awaitable[list[str]],
+]:
+    async def _do(
+        num: int,
+        instance_type: InstanceTypeType,
+        instance_state_name: InstanceStateNameType,
+        pre_pull_images: list[DockerGenericTag],
+    ) -> list[str]:
+        assert app_settings.AUTOSCALING_EC2_INSTANCES
+
+        assert instance_state_name in [
+            "running",
+            "stopped",
+        ], "only 'running' and 'stopped' are supported for testing"
+
+        resource_tags: list[TagTypeDef] = [
+            {"Key": tag_key, "Value": tag_value}
+            for tag_key, tag_value in get_deactivated_buffer_ec2_tags(
+                initialized_app, DynamicAutoscaling()
+            ).items()
+        ]
+        if pre_pull_images is not None and instance_state_name == "stopped":
+            resource_tags.append(
+                {
+                    "Key": PRE_PULLED_IMAGES_EC2_TAG_KEY,
+                    "Value": f"{json_dumps(pre_pull_images)}",
+                }
+            )
+        with log_context(
+            logging.INFO, f"creating {num} buffer machines of {instance_type}"
+        ):
+            instances = await ec2_client.run_instances(
+                ImageId=aws_ami_id,
+                MaxCount=num,
+                MinCount=num,
+                InstanceType=instance_type,
+                KeyName=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_KEY_NAME,
+                SecurityGroupIds=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_SECURITY_GROUP_IDS,
+                SubnetId=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_SUBNET_ID,
+                IamInstanceProfile={
+                    "Arn": app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ATTACHED_IAM_PROFILE
+                },
+                TagSpecifications=[
+                    {"ResourceType": "instance", "Tags": resource_tags},
+                    {"ResourceType": "volume", "Tags": resource_tags},
+                    {"ResourceType": "network-interface", "Tags": resource_tags},
+                ],
+                UserData="echo 'I am pytest'",
+            )
+            instance_ids = [
+                i["InstanceId"] for i in instances["Instances"] if "InstanceId" in i
+            ]
+
+        waiter = ec2_client.get_waiter("instance_exists")
+        await waiter.wait(InstanceIds=instance_ids)
+        instances = await ec2_client.describe_instances(InstanceIds=instance_ids)
+        assert "Reservations" in instances
+        assert instances["Reservations"]
+        assert "Instances" in instances["Reservations"][0]
+        assert len(instances["Reservations"][0]["Instances"]) == num
+        for instance in instances["Reservations"][0]["Instances"]:
+            assert "State" in instance
+            assert "Name" in instance["State"]
+            assert instance["State"]["Name"] == "running"
+
+        if instance_state_name == "stopped":
+            await ec2_client.stop_instances(InstanceIds=instance_ids)
+            instances = await ec2_client.describe_instances(InstanceIds=instance_ids)
+            assert "Reservations" in instances
+            assert instances["Reservations"]
+            assert "Instances" in instances["Reservations"][0]
+            assert len(instances["Reservations"][0]["Instances"]) == num
+            for instance in instances["Reservations"][0]["Instances"]:
+                assert "State" in instance
+                assert "Name" in instance["State"]
+                assert instance["State"]["Name"] == "stopped"
+
+        return instance_ids
+
+    return _do
diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index ccdb2461c04..51be00eeea5 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -24,6 +24,7 @@
 from fastapi import FastAPI
 from models_library.docker import (
     DOCKER_TASK_EC2_INSTANCE_TYPE_PLACEMENT_CONSTRAINT_KEY,
+    DockerGenericTag,
     DockerLabelKey,
     StandardSimcoreDockerLabels,
 )
@@ -68,7 +69,7 @@
     _OSPARC_SERVICES_READY_DATETIME_LABEL_KEY,
 )
 from types_aiobotocore_ec2.client import EC2Client
-from types_aiobotocore_ec2.literals import InstanceTypeType
+from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType
 from types_aiobotocore_ec2.type_defs import FilterTypeDef, InstanceTypeDef
 
 
@@ -1790,3 +1791,13 @@ async def test__activate_drained_nodes_with_drained_node(
         },
         available=True,
     )
+
+
+async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
+    minimal_configuration: None,
+    create_buffer_machines: Callable[
+        [int, InstanceTypeType, InstanceStateNameType, list[DockerGenericTag]],
+        Awaitable[list[str]],
+    ],
+):
+    ...
diff --git a/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py b/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py
index 24a552f342b..6c03e44b275 100644
--- a/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py
+++ b/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py
@@ -17,7 +17,6 @@
 import pytest
 import tenacity
 from aws_library.ec2 import AWSTagKey, EC2InstanceBootSpecific
-from common_library.json_serialization import json_dumps
 from faker import Faker
 from fastapi import FastAPI
 from fastapi.encoders import jsonable_encoder
@@ -30,19 +29,15 @@
 from pytest_simcore.helpers.logging_tools import log_context
 from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict, setenvs_from_dict
 from simcore_service_autoscaling.constants import PRE_PULLED_IMAGES_EC2_TAG_KEY
-from simcore_service_autoscaling.core.settings import ApplicationSettings
 from simcore_service_autoscaling.modules.auto_scaling_mode_dynamic import (
     DynamicAutoscaling,
 )
 from simcore_service_autoscaling.modules.buffer_machines_pool_core import (
     monitor_buffer_machines,
 )
-from simcore_service_autoscaling.utils.buffer_machines_pool_core import (
-    get_deactivated_buffer_ec2_tags,
-)
 from types_aiobotocore_ec2 import EC2Client
 from types_aiobotocore_ec2.literals import InstanceStateNameType, InstanceTypeType
-from types_aiobotocore_ec2.type_defs import FilterTypeDef, TagTypeDef
+from types_aiobotocore_ec2.type_defs import FilterTypeDef
 
 
 @pytest.fixture
@@ -345,96 +340,6 @@ async def test_monitor_buffer_machines(
     )
 
 
-@pytest.fixture
-async def create_buffer_machines(
-    ec2_client: EC2Client,
-    aws_ami_id: str,
-    app_settings: ApplicationSettings,
-    initialized_app: FastAPI,
-) -> Callable[
-    [int, InstanceTypeType, InstanceStateNameType, list[DockerGenericTag]],
-    Awaitable[list[str]],
-]:
-    async def _do(
-        num: int,
-        instance_type: InstanceTypeType,
-        instance_state_name: InstanceStateNameType,
-        pre_pull_images: list[DockerGenericTag],
-    ) -> list[str]:
-        assert app_settings.AUTOSCALING_EC2_INSTANCES
-
-        assert instance_state_name in [
-            "running",
-            "stopped",
-        ], "only 'running' and 'stopped' are supported for testing"
-
-        resource_tags: list[TagTypeDef] = [
-            {"Key": tag_key, "Value": tag_value}
-            for tag_key, tag_value in get_deactivated_buffer_ec2_tags(
-                initialized_app, DynamicAutoscaling()
-            ).items()
-        ]
-        if pre_pull_images is not None and instance_state_name == "stopped":
-            resource_tags.append(
-                {
-                    "Key": PRE_PULLED_IMAGES_EC2_TAG_KEY,
-                    "Value": f"{json_dumps(pre_pull_images)}",
-                }
-            )
-        with log_context(
-            logging.INFO, f"creating {num} buffer machines of {instance_type}"
-        ):
-            instances = await ec2_client.run_instances(
-                ImageId=aws_ami_id,
-                MaxCount=num,
-                MinCount=num,
-                InstanceType=instance_type,
-                KeyName=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_KEY_NAME,
-                SecurityGroupIds=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_SECURITY_GROUP_IDS,
-                SubnetId=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_SUBNET_ID,
-                IamInstanceProfile={
-                    "Arn": app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ATTACHED_IAM_PROFILE
-                },
-                TagSpecifications=[
-                    {"ResourceType": "instance", "Tags": resource_tags},
-                    {"ResourceType": "volume", "Tags": resource_tags},
-                    {"ResourceType": "network-interface", "Tags": resource_tags},
-                ],
-                UserData="echo 'I am pytest'",
-            )
-            instance_ids = [
-                i["InstanceId"] for i in instances["Instances"] if "InstanceId" in i
-            ]
-
-        waiter = ec2_client.get_waiter("instance_exists")
-        await waiter.wait(InstanceIds=instance_ids)
-        instances = await ec2_client.describe_instances(InstanceIds=instance_ids)
-        assert "Reservations" in instances
-        assert instances["Reservations"]
-        assert "Instances" in instances["Reservations"][0]
-        assert len(instances["Reservations"][0]["Instances"]) == num
-        for instance in instances["Reservations"][0]["Instances"]:
-            assert "State" in instance
-            assert "Name" in instance["State"]
-            assert instance["State"]["Name"] == "running"
-
-        if instance_state_name == "stopped":
-            await ec2_client.stop_instances(InstanceIds=instance_ids)
-            instances = await ec2_client.describe_instances(InstanceIds=instance_ids)
-            assert "Reservations" in instances
-            assert instances["Reservations"]
-            assert "Instances" in instances["Reservations"][0]
-            assert len(instances["Reservations"][0]["Instances"]) == num
-            for instance in instances["Reservations"][0]["Instances"]:
-                assert "State" in instance
-                assert "Name" in instance["State"]
-                assert instance["State"]["Name"] == "stopped"
-
-        return instance_ids
-
-    return _do
-
-
 @dataclass
 class _BufferMachineParams:
     instance_state_name: InstanceStateNameType

From 78db78f8fee6581af872383e21a244d7268f7b6e Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 12 Dec 2024 16:20:32 +0100
Subject: [PATCH 04/16] renaming fixture

---
 services/autoscaling/tests/unit/conftest.py   | 22 ++++++++++++----
 .../unit/test_modules_auto_scaling_dynamic.py | 25 ++++++++-----------
 .../unit/test_utils_auto_scaling_core.py      | 10 +++-----
 3 files changed, 32 insertions(+), 25 deletions(-)

diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py
index d4e58d5bf96..3239761efd2 100644
--- a/services/autoscaling/tests/unit/conftest.py
+++ b/services/autoscaling/tests/unit/conftest.py
@@ -50,7 +50,7 @@
     Service,
     TaskSpec,
 )
-from pydantic import ByteSize, PositiveInt, TypeAdapter
+from pydantic import ByteSize, NonNegativeInt, PositiveInt, TypeAdapter
 from pytest_mock import MockType
 from pytest_mock.plugin import MockerFixture
 from pytest_simcore.helpers.host import get_localhost_ip
@@ -1005,10 +1005,22 @@ def _creator(
 
 
 @pytest.fixture
-def mock_machines_buffer(monkeypatch: pytest.MonkeyPatch) -> int:
-    num_machines_in_buffer = 5
-    monkeypatch.setenv("EC2_INSTANCES_MACHINES_BUFFER", f"{num_machines_in_buffer}")
-    return num_machines_in_buffer
+def num_hot_buffer() -> NonNegativeInt:
+    return 5
+
+
+@pytest.fixture
+def with_instances_machines_hot_buffer(
+    num_hot_buffer: int,
+    app_environment: EnvVarsDict,
+    monkeypatch: pytest.MonkeyPatch,
+) -> EnvVarsDict:
+    return app_environment | setenvs_from_dict(
+        monkeypatch,
+        {
+            "EC2_INSTANCES_MACHINES_BUFFER": f"{num_hot_buffer}",
+        },
+    )
 
 
 @pytest.fixture
diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index 51be00eeea5..e2ddc87a53c 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -308,7 +308,7 @@ async def test_cluster_scaling_with_no_services_does_nothing(
 async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expected_machines(
     patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock,
     minimal_configuration: None,
-    mock_machines_buffer: int,
+    with_instances_machines_hot_buffer: EnvVarsDict,
     app_settings: ApplicationSettings,
     initialized_app: FastAPI,
     aws_allowed_ec2_instance_type_names_env: list[str],
@@ -322,17 +322,13 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect
     instance_type_filters: Sequence[FilterTypeDef],
 ):
     assert app_settings.AUTOSCALING_EC2_INSTANCES
-    assert (
-        mock_machines_buffer
-        == app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
-    )
     await auto_scale_cluster(
         app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
     )
     await assert_autoscaled_dynamic_ec2_instances(
         ec2_client,
         expected_num_reservations=1,
-        expected_num_instances=mock_machines_buffer,
+        expected_num_instances=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
         expected_instance_type=cast(
             InstanceTypeType,
             next(
@@ -347,7 +343,7 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect
         mock_rabbitmq_post_message,
         app_settings,
         initialized_app,
-        instances_pending=mock_machines_buffer,
+        instances_pending=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
     )
     mock_rabbitmq_post_message.reset_mock()
     # calling again should attach the new nodes to the reserve, but nothing should start
@@ -357,7 +353,7 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect
     await assert_autoscaled_dynamic_ec2_instances(
         ec2_client,
         expected_num_reservations=1,
-        expected_num_instances=mock_machines_buffer,
+        expected_num_instances=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
         expected_instance_type=cast(
             InstanceTypeType,
             next(
@@ -376,14 +372,15 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect
         mock_rabbitmq_post_message,
         app_settings,
         initialized_app,
-        nodes_total=mock_machines_buffer,
-        nodes_drained=mock_machines_buffer,
-        instances_running=mock_machines_buffer,
+        nodes_total=with_instances_machines_hot_buffer,
+        nodes_drained=with_instances_machines_hot_buffer,
+        instances_running=with_instances_machines_hot_buffer,
         cluster_total_resources={
-            "cpus": mock_machines_buffer
+            "cpus": app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
             * fake_node.description.resources.nano_cp_us
             / 1e9,
-            "ram": mock_machines_buffer * fake_node.description.resources.memory_bytes,
+            "ram": app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
+            * fake_node.description.resources.memory_bytes,
         },
     )
 
@@ -395,7 +392,7 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect
     await assert_autoscaled_dynamic_ec2_instances(
         ec2_client,
         expected_num_reservations=1,
-        expected_num_instances=mock_machines_buffer,
+        expected_num_instances=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
         expected_instance_type=cast(
             InstanceTypeType,
             next(
diff --git a/services/autoscaling/tests/unit/test_utils_auto_scaling_core.py b/services/autoscaling/tests/unit/test_utils_auto_scaling_core.py
index f576292ec6b..5a5a3240057 100644
--- a/services/autoscaling/tests/unit/test_utils_auto_scaling_core.py
+++ b/services/autoscaling/tests/unit/test_utils_auto_scaling_core.py
@@ -323,7 +323,7 @@ def test_sort_empty_drained_nodes(
 
 
 def test_sort_drained_nodes(
-    mock_machines_buffer: int,
+    with_instances_machines_hot_buffer: EnvVarsDict,
     minimal_configuration: None,
     app_settings: ApplicationSettings,
     random_fake_available_instances: list[EC2InstanceType],
@@ -332,7 +332,9 @@ def test_sort_drained_nodes(
 ):
     machine_buffer_type = get_machine_buffer_type(random_fake_available_instances)
     _NUM_DRAINED_NODES = 20
-    _NUM_NODE_WITH_TYPE_BUFFER = 3 * mock_machines_buffer
+    _NUM_NODE_WITH_TYPE_BUFFER = (
+        3 * app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
+    )
     _NUM_NODES_TERMINATING = 13
     fake_drained_nodes = []
     for _ in range(_NUM_DRAINED_NODES):
@@ -388,10 +390,6 @@ def test_sort_drained_nodes(
         app_settings, fake_drained_nodes, random_fake_available_instances
     )
     assert app_settings.AUTOSCALING_EC2_INSTANCES
-    assert (
-        mock_machines_buffer
-        == app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
-    )
     assert len(sorted_drained_nodes) == (
         _NUM_DRAINED_NODES
         + _NUM_NODE_WITH_TYPE_BUFFER

From c9b5771e1f090b5a2b320485caefc85a45bb2034 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 12 Dec 2024 18:30:55 +0100
Subject: [PATCH 05/16] moving fixtures and preparing test

---
 services/autoscaling/tests/unit/conftest.py   | 76 ++++++++++++++++++-
 .../unit/test_modules_auto_scaling_dynamic.py | 42 +++++++++-
 .../unit/test_modules_buffer_machine_core.py  | 75 +-----------------
 3 files changed, 114 insertions(+), 79 deletions(-)

diff --git a/services/autoscaling/tests/unit/conftest.py b/services/autoscaling/tests/unit/conftest.py
index 3239761efd2..9b7489268e6 100644
--- a/services/autoscaling/tests/unit/conftest.py
+++ b/services/autoscaling/tests/unit/conftest.py
@@ -1070,6 +1070,78 @@ async def _(
     )
 
 
+@pytest.fixture
+def fake_pre_pull_images() -> list[DockerGenericTag]:
+    return TypeAdapter(list[DockerGenericTag]).validate_python(
+        [
+            "nginx:latest",
+            "itisfoundation/my-very-nice-service:latest",
+            "simcore/services/dynamic/another-nice-one:2.4.5",
+            "asd",
+        ]
+    )
+
+
+@pytest.fixture
+def ec2_instances_allowed_types_with_only_1_buffered(
+    faker: Faker,
+    fake_pre_pull_images: list[DockerGenericTag],
+    external_ec2_instances_allowed_types: None | dict[str, EC2InstanceBootSpecific],
+) -> dict[InstanceTypeType, EC2InstanceBootSpecific]:
+    if not external_ec2_instances_allowed_types:
+        return {
+            "t2.micro": EC2InstanceBootSpecific(
+                ami_id=faker.pystr(),
+                pre_pull_images=fake_pre_pull_images,
+                buffer_count=faker.pyint(min_value=1, max_value=10),
+            )
+        }
+
+    allowed_ec2_types = external_ec2_instances_allowed_types
+    allowed_ec2_types_with_buffer_defined = dict(
+        filter(
+            lambda instance_type_and_settings: instance_type_and_settings[
+                1
+            ].buffer_count
+            > 0,
+            allowed_ec2_types.items(),
+        )
+    )
+    assert (
+        allowed_ec2_types_with_buffer_defined
+    ), "one type with buffer is needed for the tests!"
+    assert (
+        len(allowed_ec2_types_with_buffer_defined) == 1
+    ), "more than one type with buffer is disallowed in this test!"
+    return {
+        TypeAdapter(InstanceTypeType).validate_python(k): v
+        for k, v in allowed_ec2_types_with_buffer_defined.items()
+    }
+
+
+@pytest.fixture
+def buffer_count(
+    ec2_instances_allowed_types_with_only_1_buffered: dict[
+        InstanceTypeType, EC2InstanceBootSpecific
+    ],
+) -> int:
+    def _by_buffer_count(
+        instance_type_and_settings: tuple[InstanceTypeType, EC2InstanceBootSpecific]
+    ) -> bool:
+        _, boot_specific = instance_type_and_settings
+        return boot_specific.buffer_count > 0
+
+    allowed_ec2_types = ec2_instances_allowed_types_with_only_1_buffered
+    allowed_ec2_types_with_buffer_defined = dict(
+        filter(_by_buffer_count, allowed_ec2_types.items())
+    )
+    assert allowed_ec2_types_with_buffer_defined, "you need one type with buffer"
+    assert (
+        len(allowed_ec2_types_with_buffer_defined) == 1
+    ), "more than one type with buffer is disallowed in this test!"
+    return next(iter(allowed_ec2_types_with_buffer_defined.values())).buffer_count
+
+
 @pytest.fixture
 async def create_buffer_machines(
     ec2_client: EC2Client,
@@ -1077,14 +1149,14 @@ async def create_buffer_machines(
     app_settings: ApplicationSettings,
     initialized_app: FastAPI,
 ) -> Callable[
-    [int, InstanceTypeType, InstanceStateNameType, list[DockerGenericTag]],
+    [int, InstanceTypeType, InstanceStateNameType, list[DockerGenericTag] | None],
     Awaitable[list[str]],
 ]:
     async def _do(
         num: int,
         instance_type: InstanceTypeType,
         instance_state_name: InstanceStateNameType,
-        pre_pull_images: list[DockerGenericTag],
+        pre_pull_images: list[DockerGenericTag] | None,
     ) -> list[str]:
         assert app_settings.AUTOSCALING_EC2_INSTANCES
 
diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index e2ddc87a53c..521ede32332 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -44,7 +44,10 @@
     assert_cluster_state,
     create_fake_association,
 )
-from pytest_simcore.helpers.aws_ec2 import assert_autoscaled_dynamic_ec2_instances
+from pytest_simcore.helpers.aws_ec2 import (
+    assert_autoscaled_dynamic_ec2_instances,
+    assert_autoscaled_dynamic_warm_pools_ec2_instances,
+)
 from pytest_simcore.helpers.logging_tools import log_context
 from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict
 from simcore_service_autoscaling.core.settings import ApplicationSettings
@@ -1792,9 +1795,42 @@ async def test__activate_drained_nodes_with_drained_node(
 
 async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
     minimal_configuration: None,
+    with_instances_machines_hot_buffer: EnvVarsDict,
+    ec2_client: EC2Client,
+    app_settings: ApplicationSettings,
+    ec2_instances_allowed_types_with_only_1_buffered: dict[
+        InstanceTypeType, EC2InstanceBootSpecific
+    ],
+    ec2_instance_custom_tags: dict[str, str],
+    buffer_count: int,
     create_buffer_machines: Callable[
-        [int, InstanceTypeType, InstanceStateNameType, list[DockerGenericTag]],
+        [int, InstanceTypeType, InstanceStateNameType, list[DockerGenericTag] | None],
         Awaitable[list[str]],
     ],
 ):
-    ...
+    # pre-requisites
+    assert app_settings.AUTOSCALING_EC2_INSTANCES
+    assert app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER > 0
+    # we have nothing running now
+    all_instances = await ec2_client.describe_instances()
+    assert not all_instances["Reservations"]
+
+    # have a few warm buffers ready
+    buffer_machines = await create_buffer_machines(
+        buffer_count,
+        next(iter(list(ec2_instances_allowed_types_with_only_1_buffered))),
+        "stopped",
+        None,
+    )
+    await assert_autoscaled_dynamic_warm_pools_ec2_instances(
+        ec2_client,
+        expected_num_reservations=1,
+        expected_num_instances=buffer_count,
+        expected_instance_type=next(
+            iter(ec2_instances_allowed_types_with_only_1_buffered)
+        ),
+        expected_instance_state="stopped",
+        expected_additional_tag_keys=list(ec2_instance_custom_tags),
+        expected_pre_pulled_images=None,
+        instance_filters=None,
+    )
diff --git a/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py b/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py
index 6c03e44b275..26375418417 100644
--- a/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py
+++ b/services/autoscaling/tests/unit/test_modules_buffer_machine_core.py
@@ -16,8 +16,7 @@
 
 import pytest
 import tenacity
-from aws_library.ec2 import AWSTagKey, EC2InstanceBootSpecific
-from faker import Faker
+from aws_library.ec2 import AWSTagKey
 from fastapi import FastAPI
 from fastapi.encoders import jsonable_encoder
 from models_library.docker import DockerGenericTag
@@ -40,55 +39,6 @@
 from types_aiobotocore_ec2.type_defs import FilterTypeDef
 
 
-@pytest.fixture
-def fake_pre_pull_images() -> list[DockerGenericTag]:
-    return TypeAdapter(list[DockerGenericTag]).validate_python(
-        [
-            "nginx:latest",
-            "itisfoundation/my-very-nice-service:latest",
-            "simcore/services/dynamic/another-nice-one:2.4.5",
-            "asd",
-        ]
-    )
-
-
-@pytest.fixture
-def ec2_instances_allowed_types_with_only_1_buffered(
-    faker: Faker,
-    fake_pre_pull_images: list[DockerGenericTag],
-    external_ec2_instances_allowed_types: None | dict[str, EC2InstanceBootSpecific],
-) -> dict[InstanceTypeType, EC2InstanceBootSpecific]:
-    if not external_ec2_instances_allowed_types:
-        return {
-            "t2.micro": EC2InstanceBootSpecific(
-                ami_id=faker.pystr(),
-                pre_pull_images=fake_pre_pull_images,
-                buffer_count=faker.pyint(min_value=1, max_value=10),
-            )
-        }
-
-    allowed_ec2_types = external_ec2_instances_allowed_types
-    allowed_ec2_types_with_buffer_defined = dict(
-        filter(
-            lambda instance_type_and_settings: instance_type_and_settings[
-                1
-            ].buffer_count
-            > 0,
-            allowed_ec2_types.items(),
-        )
-    )
-    assert (
-        allowed_ec2_types_with_buffer_defined
-    ), "one type with buffer is needed for the tests!"
-    assert (
-        len(allowed_ec2_types_with_buffer_defined) == 1
-    ), "more than one type with buffer is disallowed in this test!"
-    return {
-        TypeAdapter(InstanceTypeType).validate_python(k): v
-        for k, v in allowed_ec2_types_with_buffer_defined.items()
-    }
-
-
 @pytest.fixture
 def with_ec2_instance_allowed_types_env(
     app_environment: EnvVarsDict,
@@ -557,29 +507,6 @@ async def test_monitor_buffer_machines_terminates_unneeded_pool(
     )
 
 
-@pytest.fixture
-def buffer_count(
-    ec2_instances_allowed_types_with_only_1_buffered: dict[
-        InstanceTypeType, EC2InstanceBootSpecific
-    ],
-) -> int:
-    def _by_buffer_count(
-        instance_type_and_settings: tuple[InstanceTypeType, EC2InstanceBootSpecific]
-    ) -> bool:
-        _, boot_specific = instance_type_and_settings
-        return boot_specific.buffer_count > 0
-
-    allowed_ec2_types = ec2_instances_allowed_types_with_only_1_buffered
-    allowed_ec2_types_with_buffer_defined = dict(
-        filter(_by_buffer_count, allowed_ec2_types.items())
-    )
-    assert allowed_ec2_types_with_buffer_defined, "you need one type with buffer"
-    assert (
-        len(allowed_ec2_types_with_buffer_defined) == 1
-    ), "more than one type with buffer is disallowed in this test!"
-    return next(iter(allowed_ec2_types_with_buffer_defined.values())).buffer_count
-
-
 @pytest.fixture
 def pre_pull_images(
     ec2_instances_allowed_types_with_only_1_buffered: dict[InstanceTypeType, Any]

From 851d7a7f391bf274f98bc5c9bb2f0148efd10216 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 12 Dec 2024 18:35:49 +0100
Subject: [PATCH 06/16] ongoing

---
 .../unit/test_modules_auto_scaling_dynamic.py     | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index 521ede32332..0669389db6a 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -1797,6 +1797,7 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
     minimal_configuration: None,
     with_instances_machines_hot_buffer: EnvVarsDict,
     ec2_client: EC2Client,
+    initialized_app: FastAPI,
     app_settings: ApplicationSettings,
     ec2_instances_allowed_types_with_only_1_buffered: dict[
         InstanceTypeType, EC2InstanceBootSpecific
@@ -1807,6 +1808,7 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
         [int, InstanceTypeType, InstanceStateNameType, list[DockerGenericTag] | None],
         Awaitable[list[str]],
     ],
+    spied_cluster_analysis: MockType,
 ):
     # pre-requisites
     assert app_settings.AUTOSCALING_EC2_INSTANCES
@@ -1834,3 +1836,16 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
         expected_pre_pulled_images=None,
         instance_filters=None,
     )
+
+    # let's autoscale, this should move the warm buffers to hot buffers
+    await auto_scale_cluster(
+        app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
+    )
+    analyzed_cluster = assert_cluster_state(
+        spied_cluster_analysis,
+        expected_calls=1,
+        expected_num_machines=0,
+    )
+    assert not analyzed_cluster.active_nodes
+    assert analyzed_cluster.buffer_ec2s
+    assert len(analyzed_cluster.buffer_ec2s) == len(buffer_machines)

From b7026fdc05ee832238888b166d1eb452aa9341cc Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 12 Dec 2024 18:51:15 +0100
Subject: [PATCH 07/16] test ready

---
 .../unit/test_modules_auto_scaling_dynamic.py | 60 ++++++++++++++++---
 1 file changed, 53 insertions(+), 7 deletions(-)

diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index 0669389db6a..2656c3a20b4 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -1794,14 +1794,12 @@ async def test__activate_drained_nodes_with_drained_node(
 
 
 async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
+    patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock,
     minimal_configuration: None,
     with_instances_machines_hot_buffer: EnvVarsDict,
     ec2_client: EC2Client,
     initialized_app: FastAPI,
     app_settings: ApplicationSettings,
-    ec2_instances_allowed_types_with_only_1_buffered: dict[
-        InstanceTypeType, EC2InstanceBootSpecific
-    ],
     ec2_instance_custom_tags: dict[str, str],
     buffer_count: int,
     create_buffer_machines: Callable[
@@ -1809,18 +1807,25 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
         Awaitable[list[str]],
     ],
     spied_cluster_analysis: MockType,
+    instance_type_filters: Sequence[FilterTypeDef],
 ):
     # pre-requisites
     assert app_settings.AUTOSCALING_EC2_INSTANCES
     assert app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER > 0
+
     # we have nothing running now
     all_instances = await ec2_client.describe_instances()
     assert not all_instances["Reservations"]
 
-    # have a few warm buffers ready
+    # have a few warm buffers ready with the same type as the hot buffer machines
     buffer_machines = await create_buffer_machines(
         buffer_count,
-        next(iter(list(ec2_instances_allowed_types_with_only_1_buffered))),
+        cast(
+            InstanceTypeType,
+            next(
+                iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES)
+            ),
+        ),
         "stopped",
         None,
     )
@@ -1828,8 +1833,11 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
         ec2_client,
         expected_num_reservations=1,
         expected_num_instances=buffer_count,
-        expected_instance_type=next(
-            iter(ec2_instances_allowed_types_with_only_1_buffered)
+        expected_instance_type=cast(
+            InstanceTypeType,
+            next(
+                iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES)
+            ),
         ),
         expected_instance_state="stopped",
         expected_additional_tag_keys=list(ec2_instance_custom_tags),
@@ -1841,6 +1849,7 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
     await auto_scale_cluster(
         app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
     )
+    # at analysis time, we had no machines running
     analyzed_cluster = assert_cluster_state(
         spied_cluster_analysis,
         expected_calls=1,
@@ -1849,3 +1858,40 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
     assert not analyzed_cluster.active_nodes
     assert analyzed_cluster.buffer_ec2s
     assert len(analyzed_cluster.buffer_ec2s) == len(buffer_machines)
+
+    # now we should have a warm buffer moved to the hot buffer
+    await assert_autoscaled_dynamic_ec2_instances(
+        ec2_client,
+        expected_num_reservations=1,
+        expected_num_instances=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
+        expected_instance_type=cast(
+            InstanceTypeType,
+            next(
+                iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES)
+            ),
+        ),
+        expected_instance_state="running",
+        expected_additional_tag_keys=list(ec2_instance_custom_tags),
+        instance_filters=instance_type_filters,
+    )
+
+    # let's autoscale again, to check the cluster analysis
+    await auto_scale_cluster(
+        app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
+    )
+    # at analysis time, we had no machines running
+    analyzed_cluster = assert_cluster_state(
+        spied_cluster_analysis,
+        expected_calls=1,
+        expected_num_machines=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
+    )
+    assert not analyzed_cluster.active_nodes
+    assert len(analyzed_cluster.buffer_ec2s) == max(
+        0,
+        buffer_count
+        - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
+    )
+    assert (
+        len(analyzed_cluster.buffer_drained_nodes)
+        == app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
+    )

From e09f5050a47a8ea598410af54287e25f47fc5f95 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Thu, 12 Dec 2024 22:44:16 +0100
Subject: [PATCH 08/16] ongoing

---
 .../modules/auto_scaling_core.py              | 19 +++++++++++++++----
 1 file changed, 15 insertions(+), 4 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
index e2212195aed..69f9b73ce03 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
@@ -418,15 +418,26 @@ async def _activate_drained_nodes(
     )
 
 
-async def _start_buffer_instances(
+async def _start_warm_buffer_instances(
     app: FastAPI, cluster: Cluster, auto_scaling_mode: BaseAutoscaling
 ) -> Cluster:
+    """starts warm buffer if there are assigned tasks, or if a hot buffer of the same type is needed"""
+
+    app_settings = get_application_settings(app)
+    needed_hot_buffers = (
+        app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
+    )
+    hot_buffer_instance_type = next(
+        iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES)
+    )
+    current_hot_buffers = cluster.buffer_drained_nodes
+
     instances_to_start = [
         i.ec2_instance for i in cluster.buffer_ec2s if i.assigned_tasks
     ]
     if not instances_to_start:
         return cluster
-    # change the buffer machine to an active one
+
     with log_context(
         _logger, logging.INFO, f"start {len(instances_to_start)} buffer machines"
     ):
@@ -1187,8 +1198,8 @@ async def _autoscale_cluster(
     # 2. activate available drained nodes to cover some of the tasks
     cluster = await _activate_drained_nodes(app, cluster, auto_scaling_mode)
 
-    # 3. start buffer instances to cover the remaining tasks
-    cluster = await _start_buffer_instances(app, cluster, auto_scaling_mode)
+    # 3. start warm buffer instances to cover the remaining tasks
+    cluster = await _start_warm_buffer_instances(app, cluster, auto_scaling_mode)
 
     # 4. scale down unused instances
     cluster = await _scale_down_unused_cluster_instances(

From 9677c6e8a857d3b2ef87c940e630431bf7ee7637 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 13 Dec 2024 09:36:14 +0100
Subject: [PATCH 09/16] better assert

---
 .../tests/unit/test_modules_auto_scaling_dynamic.py           | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index 2656c3a20b4..6ba134907be 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -1890,6 +1890,10 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
         0,
         buffer_count
         - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
+    ), (
+        "the warm buffers were not used as expected there should be"
+        f" {buffer_count - app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER} remaining, "
+        f"found {len(analyzed_cluster.buffer_ec2s)}"
     )
     assert (
         len(analyzed_cluster.buffer_drained_nodes)

From 252e6741e0dcee34ce7b7367aac0e121d2f1fa72 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 13 Dec 2024 09:44:20 +0100
Subject: [PATCH 10/16] check tagging

---
 .../tests/unit/test_modules_auto_scaling_dynamic.py    | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index 6ba134907be..a369296e07a 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -1808,6 +1808,8 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
     ],
     spied_cluster_analysis: MockType,
     instance_type_filters: Sequence[FilterTypeDef],
+    mock_find_node_with_name_returns_fake_node: mock.Mock,
+    mock_docker_tag_node: mock.Mock,
 ):
     # pre-requisites
     assert app_settings.AUTOSCALING_EC2_INSTANCES
@@ -1849,6 +1851,7 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
     await auto_scale_cluster(
         app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
     )
+    mock_docker_tag_node.assert_not_called()
     # at analysis time, we had no machines running
     analyzed_cluster = assert_cluster_state(
         spied_cluster_analysis,
@@ -1875,10 +1878,15 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
         instance_filters=instance_type_filters,
     )
 
-    # let's autoscale again, to check the cluster analysis
+    # let's autoscale again, to check the cluster analysis and tag the nodes
     await auto_scale_cluster(
         app=initialized_app, auto_scaling_mode=DynamicAutoscaling()
     )
+    mock_docker_tag_node.assert_called()
+    assert (
+        mock_docker_tag_node.call_count
+        == app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
+    )
     # at analysis time, we had no machines running
     analyzed_cluster = assert_cluster_state(
         spied_cluster_analysis,

From b173f1b0b9508f714bd7a88f07be213341838500 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 13 Dec 2024 12:18:50 +0100
Subject: [PATCH 11/16] test passes

---
 .../src/pytest_simcore/helpers/aws_ec2.py     |  5 ++-
 .../modules/auto_scaling_core.py              | 31 ++++++++++++++-----
 .../unit/test_modules_auto_scaling_dynamic.py | 10 ++++--
 3 files changed, 36 insertions(+), 10 deletions(-)

diff --git a/packages/pytest-simcore/src/pytest_simcore/helpers/aws_ec2.py b/packages/pytest-simcore/src/pytest_simcore/helpers/aws_ec2.py
index 1e992f4ee45..7bb826149fe 100644
--- a/packages/pytest-simcore/src/pytest_simcore/helpers/aws_ec2.py
+++ b/packages/pytest-simcore/src/pytest_simcore/helpers/aws_ec2.py
@@ -42,7 +42,10 @@ async def assert_autoscaled_dynamic_ec2_instances(
     expected_instance_state: InstanceStateNameType,
     expected_additional_tag_keys: list[str],
     instance_filters: Sequence[FilterTypeDef] | None,
+    expected_user_data: list[str] | None = None,
 ) -> list[InstanceTypeDef]:
+    if expected_user_data is None:
+        expected_user_data = ["docker swarm join"]
     return await assert_ec2_instances(
         ec2_client,
         expected_num_reservations=expected_num_reservations,
@@ -54,7 +57,7 @@ async def assert_autoscaled_dynamic_ec2_instances(
             "io.simcore.autoscaling.monitored_services_labels",
             *expected_additional_tag_keys,
         ],
-        expected_user_data=["docker swarm join"],
+        expected_user_data=expected_user_data,
         instance_filters=instance_filters,
     )
 
diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
index 69f9b73ce03..2100b9b67d2 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
@@ -424,17 +424,34 @@ async def _start_warm_buffer_instances(
     """starts warm buffer if there are assigned tasks, or if a hot buffer of the same type is needed"""
 
     app_settings = get_application_settings(app)
-    needed_hot_buffers = (
-        app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
-    )
-    hot_buffer_instance_type = next(
-        iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES)
-    )
-    current_hot_buffers = cluster.buffer_drained_nodes
+    assert app_settings.AUTOSCALING_EC2_INSTANCES  # nosec
 
     instances_to_start = [
         i.ec2_instance for i in cluster.buffer_ec2s if i.assigned_tasks
     ]
+
+    if (
+        len(cluster.buffer_drained_nodes)
+        < app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
+    ):
+        # check if we can migrate warm buffers to hot buffers
+        hot_buffer_instance_type = cast(
+            InstanceTypeType,
+            next(
+                iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES)
+            ),
+        )
+        free_starteable_warm_buffers_to_replace_hot_buffers = [
+            warm_buffer.ec2_instance
+            for warm_buffer in cluster.buffer_ec2s
+            if (warm_buffer.ec2_instance.type == hot_buffer_instance_type)
+            and not warm_buffer.assigned_tasks
+        ]
+        instances_to_start += free_starteable_warm_buffers_to_replace_hot_buffers[
+            : app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
+            - len(cluster.buffer_drained_nodes)
+        ]
+
     if not instances_to_start:
         return cluster
 
diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index a369296e07a..7e58a00e321 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -50,6 +50,7 @@
 )
 from pytest_simcore.helpers.logging_tools import log_context
 from pytest_simcore.helpers.monkeypatch_envs import EnvVarsDict
+from simcore_service_autoscaling.constants import BUFFER_MACHINE_TAG_KEY
 from simcore_service_autoscaling.core.settings import ApplicationSettings
 from simcore_service_autoscaling.models import AssociatedInstance, Cluster
 from simcore_service_autoscaling.modules.auto_scaling_core import (
@@ -1809,6 +1810,7 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
     spied_cluster_analysis: MockType,
     instance_type_filters: Sequence[FilterTypeDef],
     mock_find_node_with_name_returns_fake_node: mock.Mock,
+    mock_compute_node_used_resources: mock.Mock,
     mock_docker_tag_node: mock.Mock,
 ):
     # pre-requisites
@@ -1874,8 +1876,12 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
             ),
         ),
         expected_instance_state="running",
-        expected_additional_tag_keys=list(ec2_instance_custom_tags),
+        expected_additional_tag_keys=[
+            *list(ec2_instance_custom_tags),
+            BUFFER_MACHINE_TAG_KEY,
+        ],
         instance_filters=instance_type_filters,
+        expected_user_data=[],
     )
 
     # let's autoscale again, to check the cluster analysis and tag the nodes
@@ -1904,6 +1910,6 @@ async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
         f"found {len(analyzed_cluster.buffer_ec2s)}"
     )
     assert (
-        len(analyzed_cluster.buffer_drained_nodes)
+        len(analyzed_cluster.pending_ec2s)
         == app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
     )

From 6737c4c6f873f9cf730ca7e01ec4d42e74f15025 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 13 Dec 2024 14:04:05 +0100
Subject: [PATCH 12/16] wrong check

---
 .../tests/unit/test_modules_auto_scaling_dynamic.py         | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index 7e58a00e321..315b03820d4 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -376,9 +376,9 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect
         mock_rabbitmq_post_message,
         app_settings,
         initialized_app,
-        nodes_total=with_instances_machines_hot_buffer,
-        nodes_drained=with_instances_machines_hot_buffer,
-        instances_running=with_instances_machines_hot_buffer,
+        nodes_total=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
+        nodes_drained=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
+        instances_running=app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER,
         cluster_total_resources={
             "cpus": app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
             * fake_node.description.resources.nano_cp_us

From 60ff0a71a934edf89750348edaaedb5dfd752eed Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 13 Dec 2024 14:04:22 +0100
Subject: [PATCH 13/16] only run upload docker logs if there is a failure

---
 .github/workflows/ci-testing-deploy.yml | 24 ++++++++++++------------
 1 file changed, 12 insertions(+), 12 deletions(-)

diff --git a/.github/workflows/ci-testing-deploy.yml b/.github/workflows/ci-testing-deploy.yml
index aa1efbee7a9..789c552cc81 100644
--- a/.github/workflows/ci-testing-deploy.yml
+++ b/.github/workflows/ci-testing-deploy.yml
@@ -772,7 +772,7 @@ jobs:
         if: ${{ !cancelled() }}
         run: ./ci/github/unit-testing/catalog.bash test
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -879,7 +879,7 @@ jobs:
         if: ${{ !cancelled() }}
         run: ./ci/github/unit-testing/datcore-adapter.bash test
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -930,7 +930,7 @@ jobs:
         if: ${{ !cancelled() }}
         run: ./ci/github/unit-testing/director.bash test
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -981,7 +981,7 @@ jobs:
         if: ${{ !cancelled() }}
         run: ./ci/github/unit-testing/director-v2.bash test
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -1910,7 +1910,7 @@ jobs:
       - name: test
         run: ./ci/github/integration-testing/webserver.bash test 01
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -1974,7 +1974,7 @@ jobs:
       - name: test
         run: ./ci/github/integration-testing/webserver.bash test 02
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -2038,7 +2038,7 @@ jobs:
       - name: test
         run: ./ci/github/integration-testing/director-v2.bash test 01
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -2111,7 +2111,7 @@ jobs:
       - name: test
         run: ./ci/github/integration-testing/director-v2.bash test 02
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -2177,7 +2177,7 @@ jobs:
       - name: test
         run: ./ci/github/integration-testing/dynamic-sidecar.bash test 01
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -2241,7 +2241,7 @@ jobs:
       - name: test
         run: ./ci/github/integration-testing/simcore-sdk.bash test
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -2330,7 +2330,7 @@ jobs:
       - name: test
         run: ./ci/github/system-testing/public-api.bash test
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs
@@ -2395,7 +2395,7 @@ jobs:
           name: ${{ github.job }}_services_settings_schemas
           path: ./services/**/settings-schema.json
       - name: upload failed tests logs
-        if: ${{ !cancelled() }}
+        if: ${{ failure() }}
         uses: actions/upload-artifact@v4
         with:
           name: ${{ github.job }}_docker_logs

From 66286319ec4201bc1db88b60fd108ff688ed11c9 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 13 Dec 2024 14:36:17 +0100
Subject: [PATCH 14/16] reduce number of test

---
 ...test_modules_auto_scaling_computational.py | 116 +++++++++++++-
 .../unit/test_modules_auto_scaling_dynamic.py | 150 +++++++++++++++++-
 2 files changed, 263 insertions(+), 3 deletions(-)

diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py
index 6e7a0d7c828..bad4215a65e 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_computational.py
@@ -305,6 +305,18 @@ async def _(scale_up_params: _ScaleUpParams) -> list[distributed.Future]:
     return _
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_cluster_scaling_with_no_tasks_does_nothing(
     minimal_configuration: None,
     app_settings: ApplicationSettings,
@@ -330,6 +342,18 @@ async def test_cluster_scaling_with_no_tasks_does_nothing(
 @pytest.mark.acceptance_test(
     "Ensure this does not happen https://github.com/ITISFoundation/osparc-simcore/issues/6227"
 )
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_cluster_scaling_with_disabled_ssm_does_not_block_autoscaling(
     minimal_configuration: None,
     disabled_ssm: None,
@@ -353,6 +377,18 @@ async def test_cluster_scaling_with_disabled_ssm_does_not_block_autoscaling(
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_cluster_scaling_with_task_with_too_much_resources_starts_nothing(
     minimal_configuration: None,
     app_settings: ApplicationSettings,
@@ -800,6 +836,18 @@ async def test_cluster_scaling_up_and_down(  # noqa: PLR0915
     mock_docker_compute_node_used_resources.assert_not_called()
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_cluster_does_not_scale_up_if_defined_instance_is_not_allowed(
     minimal_configuration: None,
     app_settings: ApplicationSettings,
@@ -839,6 +887,18 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_allowed(
     assert "Unexpected error:" in error_messages[0]
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_cluster_does_not_scale_up_if_defined_instance_is_not_fitting_resources(
     minimal_configuration: None,
     app_settings: ApplicationSettings,
@@ -878,6 +938,18 @@ async def test_cluster_does_not_scale_up_if_defined_instance_is_not_fitting_reso
     assert "Unexpected error:" in error_messages[0]
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 @pytest.mark.parametrize(
     "scale_up_params",
     [
@@ -948,6 +1020,18 @@ async def test_cluster_scaling_up_starts_multiple_instances(
     mock_rabbitmq_post_message.reset_mock()
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 @pytest.mark.parametrize(
     "scale_up_params",
     [
@@ -1044,6 +1128,18 @@ async def test_cluster_scaling_up_more_than_allowed_max_starts_max_instances_and
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_cluster_scaling_up_more_than_allowed_with_multiple_types_max_starts_max_instances_and_not_more(
     patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock,
     minimal_configuration: None,
@@ -1141,6 +1237,18 @@ async def test_cluster_scaling_up_more_than_allowed_with_multiple_types_max_star
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 @pytest.mark.parametrize(
     "scale_up_params",
     [
@@ -1305,11 +1413,15 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted(
 
 
 @pytest.mark.parametrize(
-    "with_docker_join_drained", ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"], indirect=True
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
 )
 @pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
     "with_drain_nodes_labelled",
-    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    ["without_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
     indirect=True,
 )
 @pytest.mark.parametrize(
diff --git a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
index 315b03820d4..afd3c01e4a3 100644
--- a/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
+++ b/services/autoscaling/tests/unit/test_modules_auto_scaling_dynamic.py
@@ -291,6 +291,18 @@ async def _(scale_up_params: _ScaleUpParams) -> list[Service]:
     return _
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_cluster_scaling_with_no_services_does_nothing(
     minimal_configuration: None,
     app_settings: ApplicationSettings,
@@ -309,6 +321,18 @@ async def test_cluster_scaling_with_no_services_does_nothing(
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expected_machines(
     patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock,
     minimal_configuration: None,
@@ -409,6 +433,18 @@ async def test_cluster_scaling_with_no_services_and_machine_buffer_starts_expect
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 @pytest.mark.parametrize(
     "scale_up_params",
     [
@@ -992,6 +1028,18 @@ async def test_cluster_scaling_up_and_down(
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 @pytest.mark.parametrize(
     "scale_up_params",
     [
@@ -1068,6 +1116,18 @@ async def test_cluster_scaling_up_and_down_against_aws(
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 @pytest.mark.parametrize(
     "scale_up_params",
     [
@@ -1150,9 +1210,13 @@ async def test_cluster_scaling_up_starts_multiple_instances(
 
 
 @pytest.mark.parametrize(
-    "with_docker_join_drained", ["with_AUTOSCALING_DOCKER_JOIN_DRAINED"], indirect=True
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
 )
 @pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
     "with_drain_nodes_labelled",
     ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
     indirect=True,
@@ -1447,6 +1511,18 @@ async def test_cluster_adapts_machines_on_the_fly(  # noqa: PLR0915
         assert instance["InstanceType"] == scale_up_params2.expected_instance_type
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 @pytest.mark.parametrize(
     "scale_up_params",
     [
@@ -1608,6 +1684,18 @@ async def test_long_pending_ec2_is_detected_as_broken_terminated_and_restarted(
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test__find_terminateable_nodes_with_no_hosts(
     minimal_configuration: None,
     initialized_app: FastAPI,
@@ -1628,6 +1716,18 @@ async def test__find_terminateable_nodes_with_no_hosts(
     assert await _find_terminateable_instances(initialized_app, active_cluster) == []
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test__try_scale_down_cluster_with_no_nodes(
     minimal_configuration: None,
     with_valid_time_before_termination: datetime.timedelta,
@@ -1652,6 +1752,18 @@ async def test__try_scale_down_cluster_with_no_nodes(
     mock_remove_nodes.assert_not_called()
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test__activate_drained_nodes_with_no_tasks(
     minimal_configuration: None,
     with_valid_time_before_termination: datetime.timedelta,
@@ -1685,6 +1797,18 @@ async def test__activate_drained_nodes_with_no_tasks(
     mock_docker_tag_node.assert_not_called()
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test__activate_drained_nodes_with_no_drained_nodes(
     minimal_configuration: None,
     with_valid_time_before_termination: datetime.timedelta,
@@ -1726,6 +1850,18 @@ async def test__activate_drained_nodes_with_no_drained_nodes(
     mock_docker_tag_node.assert_not_called()
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test__activate_drained_nodes_with_drained_node(
     minimal_configuration: None,
     with_valid_time_before_termination: datetime.timedelta,
@@ -1794,6 +1930,18 @@ async def test__activate_drained_nodes_with_drained_node(
     )
 
 
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_docker_join_drained",
+    ["without_AUTOSCALING_DOCKER_JOIN_DRAINED"],
+    indirect=True,
+)
+@pytest.mark.parametrize(
+    # NOTE: only the main test test_cluster_scaling_up_and_down is run with all options
+    "with_drain_nodes_labelled",
+    ["with_AUTOSCALING_DRAIN_NODES_WITH_LABELS"],
+    indirect=True,
+)
 async def test_warm_buffers_are_started_to_replace_missing_hot_buffers(
     patch_ec2_client_launch_instances_min_number_of_instances: mock.Mock,
     minimal_configuration: None,

From c168917fd9bd75d8a7150ec4928df1fa8bddefdf Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 13 Dec 2024 16:34:39 +0100
Subject: [PATCH 15/16] @pcrespov review: typo

---
 .../simcore_service_autoscaling/modules/auto_scaling_core.py  | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
index 2100b9b67d2..9c45de0524b 100644
--- a/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
+++ b/services/autoscaling/src/simcore_service_autoscaling/modules/auto_scaling_core.py
@@ -441,13 +441,13 @@ async def _start_warm_buffer_instances(
                 iter(app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_ALLOWED_TYPES)
             ),
         )
-        free_starteable_warm_buffers_to_replace_hot_buffers = [
+        free_startable_warm_buffers_to_replace_hot_buffers = [
             warm_buffer.ec2_instance
             for warm_buffer in cluster.buffer_ec2s
             if (warm_buffer.ec2_instance.type == hot_buffer_instance_type)
             and not warm_buffer.assigned_tasks
         ]
-        instances_to_start += free_starteable_warm_buffers_to_replace_hot_buffers[
+        instances_to_start += free_startable_warm_buffers_to_replace_hot_buffers[
             : app_settings.AUTOSCALING_EC2_INSTANCES.EC2_INSTANCES_MACHINES_BUFFER
             - len(cluster.buffer_drained_nodes)
         ]

From 4d0234165d54ee16571d47f1622a2065ddc13211 Mon Sep 17 00:00:00 2001
From: sanderegg <35365065+sanderegg@users.noreply.github.com>
Date: Fri, 13 Dec 2024 16:42:46 +0100
Subject: [PATCH 16/16] codecov

---
 .codecov.yml | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/.codecov.yml b/.codecov.yml
index 02666df0a13..eb2e6697348 100644
--- a/.codecov.yml
+++ b/.codecov.yml
@@ -10,10 +10,10 @@ flag_management:
     statuses:
     - type: project
       target: auto
-      threshold: 1%
+      threshold: 2%
     - type: patch
       target: auto
-      threshold: 1%
+      threshold: 2%
 
 
 component_management:
@@ -22,7 +22,7 @@ component_management:
     statuses:
       - type: project
         target: auto
-        threshold: 1%
+        threshold: 2%
         branches:
           - "!master"
   individual_components:
@@ -116,12 +116,12 @@ coverage:
     project:
       default:
         informational: true
-        threshold: 1%
+        threshold: 2%
 
     patch:
       default:
         informational: true
-        threshold: 1%
+        threshold: 2%
 
 comment:
   layout: "header,diff,flags,components,footer"