(fix) Fix runtime (RT) tests and split tests in 2 actions (openhands/…

…root) (All-Hands-AI#3791) Co-authored-by: Engel Nyst <[email protected]>
mamoodi · Sep 14, 2024 · 554636c · 554636c
1 parent 57390eb
commit 554636c
Show file tree

Hide file tree

Showing 21 changed files with 878 additions and 713 deletions.
diff --git a/.github/workflows/ghcr_runtime.yml b/.github/workflows/ghcr_runtime.yml
@@ -1,5 +1,5 @@
 # Workflow that builds, tests and then pushes the runtime docker images to the ghcr.io repository
-name: Build, Test and Publish Runtime Image
+name: Build, Test and Publish RT Image
 
 # Only run one workflow of the same group at a time.
 # There can be at most one running and one pending job in a concurrency group at any time.
@@ -104,9 +104,9 @@ jobs:
           name: runtime-${{ matrix.base_image.tag }}
           path: /tmp/runtime-${{ matrix.base_image.tag }}.tar
 
-  # Run unit tests with the EventStream runtime Docker images
-  test_runtime:
-    name: Test Runtime
+  # Run unit tests with the EventStream runtime Docker images as root
+  test_runtime_root:
+    name: RT Unit Tests (Root)
     needs: [ghcr_build_runtime]
     runs-on: ubuntu-latest
     strategy:
@@ -164,19 +164,92 @@ jobs:
           image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
           image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
 
+          SKIP_CONTAINER_LOGS=true \
           TEST_RUNTIME=eventstream \
           SANDBOX_USER_ID=$(id -u) \
           SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
           TEST_IN_CI=true \
-          poetry run pytest -n 2 --reruns 2 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
+          RUN_AS_OPENHANDS=false \
+          poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
+      - name: Upload coverage to Codecov
+        uses: codecov/codecov-action@v4
+        env:
+          CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
+
+  # Run unit tests with the EventStream runtime Docker images as openhands user
+  test_runtime_oh:
+    name: RT Unit Tests (openhands)
+    runs-on: ubuntu-latest
+    needs: [ghcr_build_runtime]
+    strategy:
+      matrix:
+        base_image: ['nikolaik']
+    steps:
+      - uses: actions/checkout@v4
+      - name: Free Disk Space (Ubuntu)
+        uses: jlumbroso/free-disk-space@main
+        with:
+          tool-cache: true
+          android: true
+          dotnet: true
+          haskell: true
+          large-packages: true
+          swap-storage: true
+      # Forked repos can't push to GHCR, so we need to download the image as an artifact
+      - name: Download runtime image for fork
+        if: github.event.pull_request.head.repo.fork
+        uses: actions/download-artifact@v4
+        with:
+          name: runtime-${{ matrix.base_image }}
+          path: /tmp
+      - name: Load runtime image for fork
+        if: github.event.pull_request.head.repo.fork
+        run: |
+          docker load --input /tmp/runtime-${{ matrix.base_image }}.tar
+      - name: Cache Poetry dependencies
+        uses: actions/cache@v4
+        with:
+          path: |
+            ~/.cache/pypoetry
+            ~/.virtualenvs
+          key: ${{ runner.os }}-poetry-${{ hashFiles('**/poetry.lock') }}
+          restore-keys: |
+            ${{ runner.os }}-poetry-
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.11'
+      - name: Install poetry via pipx
+        run: pipx install poetry
+      - name: Install Python dependencies using Poetry
+        run: make install-python-dependencies
+      - name: Run runtime tests
+        run: |
+          # We install pytest-xdist in order to run tests across CPUs. However, tests start to fail when we run
+          # then across more than 2 CPUs for some reason
+          poetry run pip install pytest-xdist
+
+          # Install to be able to retry on failures for flaky tests
+          poetry run pip install pytest-rerunfailures
+
+          image_name=ghcr.io/${{ github.repository_owner }}/runtime:${{ github.sha }}-${{ matrix.base_image }}
+          image_name=$(echo $image_name | tr '[:upper:]' '[:lower:]')
+
+          SKIP_CONTAINER_LOGS=true \
+          TEST_RUNTIME=eventstream \
+          SANDBOX_USER_ID=$(id -u) \
+          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
+          TEST_IN_CI=true \
+          RUN_AS_OPENHANDS=true \
+          poetry run pytest -n 3 --reruns 1 --reruns-delay 3 --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
         env:
           CODECOV_TOKEN: ${{ secrets.CODECOV_TOKEN }}
 
   # Run integration tests with the eventstream runtime Docker image
   runtime_integration_tests_on_linux:
-    name: Runtime Integration Tests on Linux
+    name: RT Integration Tests (Linux)
     runs-on: ubuntu-latest
     needs: [ghcr_build_runtime]
     strategy:
@@ -237,7 +310,7 @@ jobs:
     name: All Runtime Tests Passed
     if: ${{ !cancelled() && !contains(needs.*.result, 'failure') && !contains(needs.*.result, 'cancelled') }}
     runs-on: ubuntu-latest
-    needs: [test_runtime, runtime_integration_tests_on_linux]
+    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
     steps:
       - name: All tests passed
         run: echo "All runtime tests have passed successfully!"
@@ -246,7 +319,7 @@ jobs:
     name: All Runtime Tests Passed
     if: ${{ cancelled() || contains(needs.*.result, 'failure') || contains(needs.*.result, 'cancelled') }}
     runs-on: ubuntu-latest
-    needs: [test_runtime, runtime_integration_tests_on_linux]
+    needs: [test_runtime_root, test_runtime_oh, runtime_integration_tests_on_linux]
     steps:
       - name: Some tests failed
         run: |

diff --git a/openhands/core/config.py b/openhands/core/config.py
@@ -507,7 +507,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
         if isinstance(value, dict):
             try:
                 if key is not None and key.lower() == 'agent':
-                    logger.openhands_logger.info(
+                    logger.openhands_logger.debug(
                         'Attempt to load default agent config from config toml'
                     )
                     non_dict_fields = {
@@ -517,13 +517,13 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
                     cfg.set_agent_config(agent_config, 'agent')
                     for nested_key, nested_value in value.items():
                         if isinstance(nested_value, dict):
-                            logger.openhands_logger.info(
+                            logger.openhands_logger.debug(
                                 f'Attempt to load group {nested_key} from config toml as agent config'
                             )
                             agent_config = AgentConfig(**nested_value)
                             cfg.set_agent_config(agent_config, nested_key)
                 elif key is not None and key.lower() == 'llm':
-                    logger.openhands_logger.info(
+                    logger.openhands_logger.debug(
                         'Attempt to load default LLM config from config toml'
                     )
                     non_dict_fields = {
@@ -533,7 +533,7 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
                     cfg.set_llm_config(llm_config, 'llm')
                     for nested_key, nested_value in value.items():
                         if isinstance(nested_value, dict):
-                            logger.openhands_logger.info(
+                            logger.openhands_logger.debug(
                                 f'Attempt to load group {nested_key} from config toml as llm config'
                             )
                             llm_config = LLMConfig(**nested_value)
@@ -584,10 +584,10 @@ def load_from_toml(cfg: AppConfig, toml_file: str = 'config.toml'):
 
 def finalize_config(cfg: AppConfig):
     """More tweaks to the config after it's been loaded."""
+    cfg.workspace_base = os.path.abspath(cfg.workspace_base)
     # Set workspace_mount_path if not set by the user
     if cfg.workspace_mount_path is UndefinedString.UNDEFINED:
-        cfg.workspace_mount_path = os.path.abspath(cfg.workspace_base)
-    cfg.workspace_base = os.path.abspath(cfg.workspace_base)
+        cfg.workspace_mount_path = cfg.workspace_base
 
     if cfg.workspace_mount_rewrite:  # and not config.workspace_mount_path:
         # TODO why do we need to check if workspace_mount_path is None?

diff --git a/openhands/runtime/builder/docker.py b/openhands/runtime/builder/docker.py
@@ -68,6 +68,10 @@ def image_exists(self, image_name: str) -> bool:
         Returns:
             bool: Whether the Docker image exists in the registry or in the local store
         """
+        if not image_name:
+            logger.error(f'Invalid image name: `{image_name}`')
+            return False
+
         try:
             logger.info(f'Checking, if image exists locally:\n{image_name}')
             self.docker_client.images.get(image_name)

diff --git a/openhands/runtime/client/client.py b/openhands/runtime/client/client.py
@@ -84,7 +84,6 @@ def __init__(
         self.lock = asyncio.Lock()
         self.plugins: dict[str, Plugin] = {}
         self.browser = BrowserEnv(browsergym_eval_env)
-        self._initial_pwd = work_dir
 
     @property
     def initial_pwd(self):
@@ -116,44 +115,96 @@ async def ainit(self):
         logger.info('Runtime client initialized.')
 
     def _init_user(self, username: str, user_id: int) -> None:
-        """Create user if not exists."""
+        """Create working directory and user if not exists.
+        It performs the following steps effectively:
+        * Creates the Working Directory:
+            - Uses mkdir -p to create the directory.
+            - Sets ownership to username:root.
+            - Adjusts permissions to be readable and writable by group and others.
+        * User Verification and Creation:
+            - Checks if the user exists using id -u.
+            - If the user exists with the correct UID, it skips creation.
+            - If the UID differs, it logs a warning and updates self.user_id.
+            - If the user doesn't exist, it proceeds to create the user.
+        * Sudo Configuration:
+            - Appends %sudo ALL=(ALL) NOPASSWD:ALL to /etc/sudoers to grant
+              passwordless sudo access to the sudo group.
+            - Adds the user to the sudo group with the useradd command, handling
+              UID conflicts by incrementing the UID if necessary.
+        """
+
+        # First create the working directory, independent of the user
+        logger.info(f'Client working directory: {self.initial_pwd}')
+        command = f'umask 002; mkdir -p {self.initial_pwd}'
+        output = subprocess.run(command, shell=True, capture_output=True)
+        out_str = output.stdout.decode()
+
+        command = f'chown -R {username}:root {self.initial_pwd}'
+        output = subprocess.run(command, shell=True, capture_output=True)
+        out_str += output.stdout.decode()
+
+        command = f'chmod g+rw {self.initial_pwd}'
+        output = subprocess.run(command, shell=True, capture_output=True)
+        out_str += output.stdout.decode()
+        logger.debug(f'Created working directory. Output: [{out_str}]')
+
         # Skip root since it is already created
         if username == 'root':
             return
 
         # Check if the username already exists
+        existing_user_id = -1
         try:
-            subprocess.run(
+            result = subprocess.run(
                 f'id -u {username}', shell=True, check=True, capture_output=True
             )
-            logger.debug(f'User {username} already exists. Skipping creation.')
+            existing_user_id = int(result.stdout.decode().strip())
+
+            # The user ID already exists, skip setup
+            if existing_user_id == user_id:
+                logger.debug(
+                    f'User `{username}` already has the provided UID {user_id}. Skipping user setup.'
+                )
+            else:
+                logger.warning(
+                    f'User `{username}` already exists with UID {existing_user_id}. Skipping user setup.'
+                )
+                self.user_id = existing_user_id
             return
-        except subprocess.CalledProcessError:
-            pass  # User does not exist, continue with creation
+        except subprocess.CalledProcessError as e:
+            # Returncode 1 indicates, that the user does not exist yet
+            if e.returncode == 1:
+                logger.debug(
+                    f'User `{username}` does not exist. Proceeding with user creation.'
+                )
+            else:
+                logger.error(
+                    f'Error checking user `{username}`, skipping setup:\n{e}\n'
+                )
+                raise
 
         # Add sudoer
-        sudoer_line = r"echo '%sudo ALL=(ALL) NOPASSWD:ALL' >> /etc/sudoers"
-        output = subprocess.run(sudoer_line, shell=True, capture_output=True)
-        if output.returncode != 0:
-            raise RuntimeError(f'Failed to add sudoer: {output.stderr.decode()}')
-        logger.debug(f'Added sudoer successfully. Output: [{output.stdout.decode()}]')
+        sudoer_line = r'%sudo ALL=(ALL) NOPASSWD:ALL\n'
+        sudoers_path = '/etc/sudoers.d/99_sudo'
+        if not Path(sudoers_path).exists():
+            with open(sudoers_path, 'w') as f:
+                f.write(sudoer_line)
+            output = subprocess.run(['chmod', '0440', sudoers_path])
+            if output.returncode != 0:
+                logger.error('Failed to chmod 99_sudo file!')
+            else:
+                logger.debug('Added sudoer successfully.')
 
         # Attempt to add the user, retrying with incremented user_id if necessary
         while True:
             command = (
                 f'useradd -rm -d /home/{username} -s /bin/bash '
                 f'-g root -G sudo -u {user_id} {username}'
             )
-
-            if not os.path.exists(self.initial_pwd):
-                command += f' && mkdir -p {self.initial_pwd}'
-                command += f' && chown -R {username}:root {self.initial_pwd}'
-                command += f' && chmod g+s {self.initial_pwd}'
-
             output = subprocess.run(command, shell=True, capture_output=True)
             if output.returncode == 0:
                 logger.debug(
-                    f'Added user {username} successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
+                    f'Added user `{username}` successfully with UID {user_id}. Output: [{output.stdout.decode()}]'
                 )
                 break
             elif f'UID {user_id} is not unique' in output.stderr.decode():
@@ -163,7 +214,7 @@ def _init_user(self, username: str, user_id: int) -> None:
                 user_id += 1
             else:
                 raise RuntimeError(
-                    f'Failed to create user {username}: {output.stderr.decode()}'
+                    f'Failed to create user `{username}`! Output: [{output.stderr.decode()}]'
                 )
 
     def _init_bash_shell(self, work_dir: str, username: str) -> None:
@@ -181,17 +232,20 @@ def _init_bash_shell(self, work_dir: str, username: str) -> None:
 
         # This should NOT match "PS1=\u@\h:\w [PEXPECT]$" when `env` is executed
         self.__bash_expect_regex = r'\[PEXPECT_BEGIN\]\s*(.*?)\s*([a-z0-9_-]*)@([a-zA-Z0-9.-]*):(.+)\s*\[PEXPECT_END\]'
-
-        self.shell.sendline(f'export PS1="{self.__bash_PS1}"; export PS2=""')
+        # Set umask to allow group write permissions
+        self.shell.sendline(f'umask 002; export PS1="{self.__bash_PS1}"; export PS2=""')
         self.shell.expect(self.__bash_expect_regex)
 
         self.shell.sendline(
             f'if [ ! -d "{work_dir}" ]; then mkdir -p "{work_dir}"; fi && cd "{work_dir}"'
         )
         self.shell.expect(self.__bash_expect_regex)
         logger.debug(
-            f'Bash initialized. Working directory: {work_dir}. Output: {self.shell.before}'
+            f'Bash initialized. Working directory: {work_dir}. Output: [{self.shell.before}]'
         )
+        # Ensure the group has write permissions on the working directory
+        self.shell.sendline(f'chmod g+rw "{work_dir}"')
+        self.shell.expect(self.__bash_expect_regex)
 
     async def _init_bash_commands(self):
         logger.info(f'Initializing by running {len(INIT_COMMANDS)} bash commands...')
@@ -295,14 +349,14 @@ def _continue_bash(
             bash_prompt = self._get_bash_prompt_and_update_pwd()
             if keep_prompt:
                 output += '\r\n' + bash_prompt
-            logger.debug(f'Command output: {output}')
+            # logger.debug(f'Command output:\n{output}')
         return output, exit_code
 
     async def run_action(self, action) -> Observation:
         action_type = action.action
-        logger.debug(f'Running action: {action}')
+        logger.debug(f'Running action:\n{action}')
         observation = await getattr(self, action_type)(action)
-        logger.debug(f'Action output: {observation}')
+        logger.debug(f'Action output:\n{observation}')
         return observation
 
     async def run(self, action: CmdRunAction) -> CmdOutputObservation:
@@ -355,10 +409,9 @@ async def run_ipython(self, action: IPythonRunCellAction) -> Observation:
             _jupyter_plugin: JupyterPlugin = self.plugins['jupyter']  # type: ignore
             # This is used to make AgentSkills in Jupyter aware of the
             # current working directory in Bash
-            if self.pwd != getattr(self, '_jupyter_pwd', None):
-                logger.debug(
-                    f"{self.pwd} != {getattr(self, '_jupyter_pwd', None)} -> reset Jupyter PWD"
-                )
+            jupyter_pwd = getattr(self, '_jupyter_pwd', None)
+            if self.pwd != jupyter_pwd:
+                logger.debug(f'{self.pwd} != {jupyter_pwd} -> reset Jupyter PWD')
                 reset_jupyter_pwd_code = f'import os; os.chdir("{self.pwd}")'
                 _aux_action = IPythonRunCellAction(code=reset_jupyter_pwd_code)
                 _reset_obs = await _jupyter_plugin.run(_aux_action)
@@ -450,7 +503,7 @@ async def write(self, action: FileWriteAction) -> Observation:
                     os.chown(filepath, file_stat.st_uid, file_stat.st_gid)
                 else:
                     # set the new file permissions if the file is new
-                    os.chmod(filepath, 0o644)
+                    os.chmod(filepath, 0o664)
                     os.chown(filepath, self.user_id, self.user_id)
 
             except FileNotFoundError: