Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

fix: Explicitly exit when container is out-of-memory #6476

Merged
merged 9 commits into from
Jan 4, 2024
2 changes: 1 addition & 1 deletion samcli/local/docker/container.py
Original file line number Diff line number Diff line change
Expand Up @@ -120,7 +120,7 @@ def __init__(
self.docker_client = docker_client or docker.from_env(version=DOCKER_MIN_API_VERSION)

# Runtime properties of the container. They won't have value until container is created or started
self.id = None
self.id: Optional[str] = None

# aws-lambda-rie defaults to 8080 as the port, however that's a common port. A port is chosen by
# selecting the first free port in a range that's not ephemeral.
Expand Down
48 changes: 48 additions & 0 deletions samcli/local/docker/container_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,48 @@
"""
Class for handling the analysis and inspection of Docker containers
"""
import logging
from dataclasses import dataclass

from samcli.local.docker.container import Container
from samcli.local.docker.manager import ContainerManager

LOG = logging.getLogger(__name__)

DEFAULT_OUT_OF_MEMORY = False


@dataclass
class ContainerState:
out_of_memory: bool


class ContainerAnalyzer:
def __init__(self, container_manager: ContainerManager, container: Container):
self.container_manager = container_manager
self.container = container

def inspect(self) -> ContainerState:
"""
Inspect the state of a container by calling the "inspect()" API that Docker provides.
Extract relevant information into a ContainerState object.

Returns
-------
ContainerState:
Returns a ContainerState object with relevant container data
"""
if not self.container.id:
LOG.debug("Container ID not defined, unable to fetch container state")
return ContainerState(DEFAULT_OUT_OF_MEMORY)

state = self.container_manager.inspect(self.container.id)

if isinstance(state, bool):
Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

This check is admittedly a little weird, but mypy won't like it if we don't do this, and the reason we return a bool here is to follow the legacy pattern in container_manager.

LOG.debug("Unable to fetch container state")
return ContainerState(DEFAULT_OUT_OF_MEMORY)

container_state = ContainerState(state.get("State", {}).get("OOMKilled", DEFAULT_OUT_OF_MEMORY))
LOG.debug("[Container state] OOMKilled %s", container_state.out_of_memory)

return container_state
7 changes: 7 additions & 0 deletions samcli/local/docker/exceptions.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
"""
Docker container related exceptions
"""
from samcli.commands.exceptions import UserException


class ContainerNotStartableException(Exception):
Expand All @@ -17,3 +18,9 @@ class PortAlreadyInUse(Exception):
"""
Exception to raise when the provided port is not available for use.
"""


class ContainerFailureError(UserException):
"""
Raised when the invoke container fails execution
"""
21 changes: 21 additions & 0 deletions samcli/local/docker/manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
import logging
import sys
import threading
from typing import Union, cast

import docker

Expand Down Expand Up @@ -193,6 +194,26 @@ def has_image(self, image_name):
except docker.errors.ImageNotFound:
return False

def inspect(self, container: str) -> Union[bool, dict]:
"""
Low-level Docker API for inspecting the container state

Parameters
----------
container: str
ID of the container

Returns
-------
Union[bool, dict]
Container inspection state if successful, False otherwise
"""
try:
return cast(dict, self.docker_client.api.inspect_container(container))
except (docker.errors.APIError, docker.errors.NullResource) as ex:
LOG.debug("Failed to call Docker inspect: %s", str(ex))
return False


class DockerImagePullFailedException(Exception):
pass
24 changes: 24 additions & 0 deletions samcli/local/lambdafn/runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,6 +13,9 @@
from samcli.lib.telemetry.metric import capture_parameter
from samcli.lib.utils.file_observer import LambdaFunctionObserver
from samcli.lib.utils.packagetype import ZIP
from samcli.local.docker.container import Container
from samcli.local.docker.container_analyzer import ContainerAnalyzer
from samcli.local.docker.exceptions import ContainerFailureError
from samcli.local.docker.lambda_container import LambdaContainer

from ...lib.providers.provider import LayerVersion
Expand Down Expand Up @@ -223,9 +226,30 @@ def _on_invoke_done(self, container):
The current running container
"""
if container:
self._check_exit_state(container)
self._container_manager.stop(container)
self._clean_decompressed_paths()

def _check_exit_state(self, container: Container):
"""
Check and validate the exit state of the invoke container.

Parameters
----------
container: Container
Docker container to be checked

Raises
-------
ContainerFailureError
If the exit reason is due to out-of-memory, return exit code 1

"""
container_analyzer = ContainerAnalyzer(self._container_manager, container)
exit_state = container_analyzer.inspect()
if exit_state.out_of_memory:
raise ContainerFailureError("Container invocation failed due to maximum memory usage")

def _configure_interrupt(self, function_full_path, timeout, container, is_debugging):
"""
When a Lambda function is executing, we setup certain interrupt handlers to stop the execution.
Expand Down
69 changes: 69 additions & 0 deletions tests/unit/local/docker/test_container_analyzer.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,69 @@
from unittest import TestCase
from unittest.mock import Mock, patch

from samcli.lib.utils.packagetype import IMAGE
from samcli.local.docker.container import Container
from samcli.local.docker.container_analyzer import ContainerAnalyzer, ContainerState
from samcli.local.docker.manager import ContainerManager


class TestContainerAnalyzer(TestCase):
def setUp(self) -> None:
self.image = IMAGE
self.cmd = "cmd"
self.working_dir = "working_dir"
self.host_dir = "host_dir"
self.memory_mb = 123
self.exposed_ports = {123: 123}
self.entrypoint = ["a", "b", "c"]
self.env_vars = {"key": "value"}

self.mock_docker_client = Mock()

self.container = Container(
self.image,
self.cmd,
self.working_dir,
self.host_dir,
self.memory_mb,
self.exposed_ports,
self.entrypoint,
self.env_vars,
self.mock_docker_client,
)

@patch("samcli.local.docker.container_analyzer.LOG")
def test_inspect_returns_container_state(self, mock_log):
self.container.id = "id"
manager = ContainerManager()
manager.inspect = Mock()
manager.inspect.return_value = {"State": {"OOMKilled": True}}

analyzer = ContainerAnalyzer(container_manager=manager, container=self.container)
state = analyzer.inspect()

manager.inspect.assert_called_once_with("id")
mock_log.debug.assert_called_once_with("[Container state] OOMKilled %s", True)
self.assertEqual(state, ContainerState(out_of_memory=True))

def test_inspect_no_container_id(self):
manager = ContainerManager()
manager.inspect = Mock()

analyzer = ContainerAnalyzer(container_manager=manager, container=self.container)
state = analyzer.inspect()

manager.inspect.assert_not_called()
self.assertEqual(state, ContainerState(out_of_memory=False))

def test_inspect_docker_call_fails(self):
self.container.id = "id"
manager = ContainerManager()
manager.inspect = Mock()
manager.inspect.return_value = False

analyzer = ContainerAnalyzer(container_manager=manager, container=self.container)
state = analyzer.inspect()

manager.inspect.assert_called_once_with("id")
self.assertEqual(state, ContainerState(out_of_memory=False))
24 changes: 24 additions & 0 deletions tests/unit/local/docker/test_manager.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,8 @@

import requests
from docker.errors import APIError, ImageNotFound

import docker
from samcli.local.docker.manager import ContainerManager, DockerImagePullFailedException
from samcli.local.docker.lambda_image import RAPID_IMAGE_TAG_PREFIX

Expand Down Expand Up @@ -384,3 +386,25 @@ def test_must_call_delete_on_container(self):

manager.stop(container)
container.delete.assert_called_with()


class TestContainerManager_inspect(TestCase):
def test_must_call_inspect_on_container(self):
manager = ContainerManager()
manager.docker_client = Mock()

container = "container_id"

manager.inspect(container)
manager.docker_client.docker_client.api.inspect_container(container)

@patch("samcli.local.docker.manager.LOG")
def test_must_fail_with_error_message(self, mock_log):
manager = ContainerManager()
manager.docker_client.api.inspect_container = Mock()
manager.docker_client.api.inspect_container.side_effect = [docker.errors.APIError("Failed")]

return_val = manager.inspect("container_id")

self.assertEqual(return_val, False)
mock_log.debug.assert_called_once_with("Failed to call Docker inspect: %s", "Failed")
6 changes: 6 additions & 0 deletions tests/unit/local/lambdafn/test_runtime.py
Original file line number Diff line number Diff line change
Expand Up @@ -310,6 +310,8 @@ def test_must_run_container_and_wait_for_result(self, LambdaContainerMock):
self.runtime._configure_interrupt = Mock()
self.runtime._configure_interrupt.return_value = start_timer

self.runtime._check_exit_state = Mock()

LambdaContainerMock.return_value = container
container.is_running.return_value = False

Expand Down Expand Up @@ -369,6 +371,8 @@ def test_exception_from_run_must_trigger_cleanup(self, LambdaContainerMock):
self.runtime._configure_interrupt = Mock()
self.runtime._configure_interrupt.return_value = start_timer

self.runtime._check_exit_state = Mock()

LambdaContainerMock.return_value = container
container.is_running.return_value = False

Expand Down Expand Up @@ -404,6 +408,7 @@ def test_exception_from_wait_for_result_must_trigger_cleanup(self, LambdaContain
self.runtime._get_code_dir.return_value = code_dir
self.runtime._configure_interrupt = Mock()
self.runtime._configure_interrupt.return_value = timer
self.runtime._check_exit_state = Mock()

LambdaContainerMock.return_value = container
container.is_running.return_value = False
Expand Down Expand Up @@ -437,6 +442,7 @@ def test_keyboard_interrupt_must_not_raise(self, LambdaContainerMock):
self.runtime._get_code_dir = MagicMock()
self.runtime._get_code_dir.return_value = code_dir
self.runtime._configure_interrupt = Mock()
self.runtime._check_exit_state = Mock()

LambdaContainerMock.return_value = container
container.is_running.return_value = False
Expand Down
Loading