Explorar o código

Revert ssh box implemetation, fix multi-line command issues and add unit tests (#1460)

* update dev md for instruction of running unit tests

* add back unit tests

* revert back to the original sandbox implementation to fix testcases

* allow unit test workflow to find docker.sock

* make sandbox test working via patch

* fix arg parser that's broken for some reason

* fix integration test

* Revert "fix arg parser that's broken for some reason"

This reverts commit 6cc89611337bb74555fd16b4be78681fb7e36573.

* update Development.md
Xingyao Wang hai 1 ano
pai
achega
fe43aeb9b6

+ 6 - 0
.github/workflows/run-unit-tests.yml

@@ -34,11 +34,16 @@ jobs:
           brew install colima docker
           colima start
 
+          # For testcontainers to find the Colima socket
+          # https://github.com/abiosoft/colima/blob/main/docs/FAQ.md#cannot-connect-to-the-docker-daemon-at-unixvarrundockersock-is-the-docker-daemon-running
+          sudo ln -sf $HOME/.colima/default/docker.sock /var/run/docker.sock
+
       - name: Build Environment
         run: make build
 
       - name: Run Tests
         run: poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit
+
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
         env:
@@ -70,6 +75,7 @@ jobs:
 
       - name: Run Tests
         run: poetry run pytest --cov=agenthub --cov=opendevin --cov-report=xml ./tests/unit
+
       - name: Upload coverage to Codecov
         uses: codecov/codecov-action@v4
         env:

+ 12 - 0
Development.md

@@ -82,3 +82,15 @@ If you encounter any issues with the Language Model (LM) or you're simply curiou
     ```bash
     make help
     ```
+
+### 8. Testing
+
+#### Unit tests
+
+```bash
+poetry run pytest ./tests/unit/test_sandbox.py
+```
+
+#### Integration tests
+
+Please refer to [this README](./tests/integration/README.md) for details.

+ 30 - 7
opendevin/sandbox/docker/ssh_box.py

@@ -171,7 +171,7 @@ class DockerSSHBox(Sandbox):
 
     def start_ssh_session(self):
         # start ssh session at the background
-        self.ssh = pxssh.pxssh(echo=False)
+        self.ssh = pxssh.pxssh()
         hostname = SSH_HOSTNAME
         if RUN_AS_DEVIN:
             username = 'opendevin'
@@ -215,14 +215,36 @@ class DockerSSHBox(Sandbox):
             # send a SIGINT to the process
             self.ssh.sendintr()
             self.ssh.prompt()
-            command_output = self.ssh.before.decode('utf-8').strip()
+            command_output = self.ssh.before.decode(
+                'utf-8').lstrip(cmd).strip()
             return -1, f'Command: "{cmd}" timed out. Sending SIGINT to the process: {command_output}'
         command_output = self.ssh.before.decode('utf-8').strip()
 
+        # once out, make sure that we have *every* output, we while loop until we get an empty output
+        while True:
+            logger.debug('WAITING FOR .prompt()')
+            self.ssh.sendline('\n')
+            timeout_not_reached = self.ssh.prompt(timeout=1)
+            if not timeout_not_reached:
+                logger.debug('TIMEOUT REACHED')
+                break
+            logger.debug('WAITING FOR .before')
+            output = self.ssh.before.decode('utf-8').strip()
+            logger.debug(f'WAITING FOR END OF command output ({bool(output)}): {output}')
+            if output == '':
+                break
+            command_output += output
+        command_output = command_output.lstrip(cmd).strip()
+
         # get the exit code
         self.ssh.sendline('echo $?')
-        self.ssh.prompt(timeout=10)
-        exit_code = int(self.ssh.before.decode('utf-8').strip())
+        self.ssh.prompt()
+        exit_code = self.ssh.before.decode('utf-8')
+        while not exit_code.startswith('echo $?'):
+            self.ssh.prompt()
+            exit_code = self.ssh.before.decode('utf-8')
+            logger.debug(f'WAITING FOR exit code: {exit_code}')
+        exit_code = int(exit_code.lstrip('echo $?').strip())
         return exit_code, command_output
 
     def copy_to(self, host_src: str, sandbox_dest: str, recursive: bool = False):
@@ -307,9 +329,10 @@ class DockerSSHBox(Sandbox):
             pass
 
     def get_working_directory(self):
-        self.ssh.sendline('pwd')
-        self.ssh.prompt(timeout=10)
-        return self.ssh.before.decode('utf-8').strip()
+        exit_code, result = self.execute('pwd')
+        if exit_code != 0:
+            raise Exception('Failed to get working directory')
+        return result.strip()
 
     def is_container_running(self):
         try:

+ 115 - 0
tests/unit/test_sandbox.py

@@ -0,0 +1,115 @@
+import pathlib
+import tempfile
+from unittest.mock import patch
+
+import pytest
+
+from opendevin import config
+from opendevin.sandbox.docker.ssh_box import DockerSSHBox
+
+
+@pytest.fixture
+def temp_dir():
+    # get a temporary directory
+    with tempfile.TemporaryDirectory() as temp_dir:
+        pathlib.Path().mkdir(parents=True, exist_ok=True)
+        yield temp_dir
+
+
+def test_ssh_box_run_as_devin(temp_dir):
+    # get a temporary directory
+    with patch.dict(
+        config.config,
+        {
+            config.ConfigType.WORKSPACE_BASE: temp_dir,
+            config.ConfigType.RUN_AS_DEVIN: 'true',
+            config.ConfigType.SANDBOX_TYPE: 'ssh',
+        },
+        clear=True
+    ):
+        ssh_box = DockerSSHBox()
+
+        # test the ssh box
+        exit_code, output = ssh_box.execute('ls -l')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert output.strip() == 'total 0'
+
+        exit_code, output = ssh_box.execute('mkdir test')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert output.strip() == ''
+
+        exit_code, output = ssh_box.execute('ls -l')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert 'opendevin' in output, "The output should contain username 'opendevin'"
+        assert 'test' in output, 'The output should contain the test directory'
+
+        exit_code, output = ssh_box.execute('touch test/foo.txt')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert output.strip() == ''
+
+        exit_code, output = ssh_box.execute('ls -l test')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert 'foo.txt' in output, 'The output should contain the foo.txt file'
+
+
+def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir):
+    # get a temporary directory
+    with patch.dict(
+        config.config,
+        {
+            config.ConfigType.WORKSPACE_BASE: temp_dir,
+            config.ConfigType.RUN_AS_DEVIN: 'true',
+            config.ConfigType.SANDBOX_TYPE: 'ssh',
+        },
+        clear=True
+    ):
+        ssh_box = DockerSSHBox()
+
+        # test the ssh box
+        exit_code, output = ssh_box.execute('pwd\nls -l')
+        assert exit_code == 0, 'The exit code should be 0.'
+        expected_lines = ['/workspacels -l', 'total 0']
+        assert output.strip().splitlines() == expected_lines
+
+def test_ssh_box_stateful_cmd_run_as_devin(temp_dir):
+    # get a temporary directory
+    with patch.dict(
+        config.config,
+        {
+            config.ConfigType.WORKSPACE_BASE: temp_dir,
+            config.ConfigType.RUN_AS_DEVIN: 'true',
+            config.ConfigType.SANDBOX_TYPE: 'ssh',
+        },
+        clear=True
+    ):
+        ssh_box = DockerSSHBox()
+
+        # test the ssh box
+        exit_code, output = ssh_box.execute('mkdir test')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert output.strip() == ''
+
+        exit_code, output = ssh_box.execute('cd test')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert output.strip() == ''
+
+        exit_code, output = ssh_box.execute('pwd')
+        assert exit_code == 0, 'The exit code should be 0.'
+        assert output.strip() == '/workspace/test'
+
+def test_ssh_box_failed_cmd_run_as_devin(temp_dir):
+    # get a temporary directory
+    with patch.dict(
+        config.config,
+        {
+            config.ConfigType.WORKSPACE_BASE: temp_dir,
+            config.ConfigType.RUN_AS_DEVIN: 'true',
+            config.ConfigType.SANDBOX_TYPE: 'ssh',
+        },
+        clear=True
+    ):
+        ssh_box = DockerSSHBox()
+
+        # test the ssh box with a command that fails
+        exit_code, output = ssh_box.execute('non_existing_command')
+        assert exit_code != 0, 'The exit code should not be 0 for a failed command.'