test_runtime.py 7.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236
  1. """Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
  2. import asyncio
  3. import os
  4. import pathlib
  5. import tempfile
  6. import time
  7. from unittest.mock import patch
  8. import pytest
  9. from opendevin.core.config import SandboxConfig
  10. from opendevin.core.logger import opendevin_logger as logger
  11. from opendevin.events import EventStream
  12. from opendevin.events.action import (
  13. CmdRunAction,
  14. )
  15. from opendevin.events.observation import (
  16. CmdOutputObservation,
  17. )
  18. from opendevin.runtime.client.runtime import EventStreamRuntime
  19. from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
  20. from opendevin.runtime.server.runtime import ServerRuntime
  21. @pytest.fixture(autouse=True)
  22. def print_method_name(request):
  23. print('\n########################################################################')
  24. print(f'Running test: {request.node.name}')
  25. print('########################################################################')
  26. @pytest.fixture
  27. def temp_dir(monkeypatch):
  28. # get a temporary directory
  29. with tempfile.TemporaryDirectory() as temp_dir:
  30. pathlib.Path().mkdir(parents=True, exist_ok=True)
  31. yield temp_dir
  32. # This assures that all tests run together for each runtime, not alternating between them,
  33. # which caused them to fail previously.
  34. @pytest.fixture(scope='module', params=[EventStreamRuntime, ServerRuntime])
  35. def box_class(request):
  36. time.sleep(1)
  37. return request.param
  38. async def _load_runtime(box_class, event_stream):
  39. sid = 'test'
  40. plugins = [JupyterRequirement(), AgentSkillsRequirement()]
  41. sandbox_config = SandboxConfig(
  42. use_host_network=True,
  43. )
  44. container_image = sandbox_config.container_image
  45. # NOTE: we will use the default container image specified in the config.sandbox
  46. # if it is an official od_runtime image.
  47. if 'od_runtime' not in container_image:
  48. container_image = 'ubuntu:22.04'
  49. logger.warning(
  50. f'`{sandbox_config.container_image}` is not an od_runtime image. Will use `{container_image}` as the container image for testing.'
  51. )
  52. if box_class == EventStreamRuntime:
  53. runtime = EventStreamRuntime(
  54. sandbox_config=sandbox_config,
  55. event_stream=event_stream,
  56. sid=sid,
  57. # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime
  58. # Instead, we will pre-build a suite of container images with OD-runtime-cli installed.
  59. container_image=container_image,
  60. plugins=plugins,
  61. )
  62. await runtime.ainit()
  63. elif box_class == ServerRuntime:
  64. runtime = ServerRuntime(
  65. sandbox_config=sandbox_config, event_stream=event_stream, sid=sid
  66. )
  67. await runtime.ainit()
  68. runtime.init_sandbox_plugins(plugins)
  69. runtime.init_runtime_tools(
  70. [],
  71. is_async=False,
  72. runtime_tools_config={},
  73. )
  74. else:
  75. raise ValueError(f'Invalid box class: {box_class}')
  76. await asyncio.sleep(1)
  77. return runtime
  78. @pytest.mark.asyncio
  79. async def test_env_vars_os_environ(box_class):
  80. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  81. cli_session = 'main_test'
  82. event_stream = EventStream(cli_session)
  83. runtime = await _load_runtime(box_class, event_stream)
  84. obs: CmdOutputObservation = await runtime.run_action(
  85. CmdRunAction(command='env')
  86. )
  87. print(obs)
  88. obs: CmdOutputObservation = await runtime.run_action(
  89. CmdRunAction(command='echo $FOOBAR')
  90. )
  91. print(obs)
  92. assert obs.exit_code == 0, 'The exit code should be 0.'
  93. assert (
  94. obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
  95. ), f'Output: [{obs.content}] for {box_class}'
  96. await runtime.close()
  97. await asyncio.sleep(1)
  98. @pytest.mark.asyncio
  99. async def test_env_vars_runtime_add_env_vars(box_class):
  100. cli_session = 'main_test'
  101. event_stream = EventStream(cli_session)
  102. runtime = await _load_runtime(box_class, event_stream)
  103. await runtime.add_env_vars({'QUUX': 'abc"def'})
  104. obs: CmdOutputObservation = await runtime.run_action(
  105. CmdRunAction(command='echo $QUUX')
  106. )
  107. print(obs)
  108. assert obs.exit_code == 0, 'The exit code should be 0.'
  109. assert (
  110. obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
  111. ), f'Output: [{obs.content}] for {box_class}'
  112. await runtime.close()
  113. await asyncio.sleep(1)
  114. @pytest.mark.asyncio
  115. async def test_env_vars_runtime_add_empty_dict(box_class):
  116. cli_session = 'main_test'
  117. event_stream = EventStream(cli_session)
  118. runtime = await _load_runtime(box_class, event_stream)
  119. prev_obs = await runtime.run_action(CmdRunAction(command='env'))
  120. assert prev_obs.exit_code == 0, 'The exit code should be 0.'
  121. print(prev_obs)
  122. await runtime.add_env_vars({})
  123. obs = await runtime.run_action(CmdRunAction(command='env'))
  124. assert obs.exit_code == 0, 'The exit code should be 0.'
  125. print(obs)
  126. assert (
  127. obs.content == prev_obs.content
  128. ), 'The env var content should be the same after adding an empty dict.'
  129. await runtime.close()
  130. await asyncio.sleep(1)
  131. @pytest.mark.asyncio
  132. async def test_env_vars_runtime_add_multiple_env_vars(box_class):
  133. cli_session = 'main_test'
  134. event_stream = EventStream(cli_session)
  135. runtime = await _load_runtime(box_class, event_stream)
  136. await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
  137. obs: CmdOutputObservation = await runtime.run_action(
  138. CmdRunAction(command='echo $QUUX $FOOBAR')
  139. )
  140. print(obs)
  141. assert obs.exit_code == 0, 'The exit code should be 0.'
  142. assert (
  143. obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
  144. ), f'Output: [{obs.content}] for {box_class}'
  145. await runtime.close()
  146. await asyncio.sleep(1)
  147. @pytest.mark.asyncio
  148. async def test_env_vars_runtime_add_env_vars_overwrite(box_class):
  149. cli_session = 'main_test'
  150. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  151. event_stream = EventStream(cli_session)
  152. runtime = await _load_runtime(box_class, event_stream)
  153. await runtime.add_env_vars({'FOOBAR': 'xyz'})
  154. obs: CmdOutputObservation = await runtime.run_action(
  155. CmdRunAction(command='echo $FOOBAR')
  156. )
  157. print(obs)
  158. assert obs.exit_code == 0, 'The exit code should be 0.'
  159. assert (
  160. obs.content.strip().split('\r\n')[0].strip() == 'xyz'
  161. ), f'Output: [{obs.content}] for {box_class}'
  162. await runtime.close()
  163. await asyncio.sleep(1)
  164. @pytest.mark.asyncio
  165. async def test_bash_command_pexcept(temp_dir, box_class):
  166. cli_session = 'main_test'
  167. event_stream = EventStream(cli_session)
  168. runtime = await _load_runtime(box_class, event_stream)
  169. # We set env var PS1="\u@\h:\w $"
  170. # and construct the PEXCEPT prompt base on it.
  171. # When run `env`, bad implementation of CmdRunAction will be pexcepted by this
  172. # and failed to pexcept the right content, causing it fail to get error code.
  173. obs = await runtime.run_action(CmdRunAction(command='env'))
  174. # For example:
  175. # 02:16:13 - opendevin:DEBUG: client.py:78 - Executing command: env
  176. # 02:16:13 - opendevin:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
  177. # CONDA_EXE=/opendevin/miniforge3/bin/conda
  178. # [...]
  179. # LC_CTYPE=C.UTF-8
  180. # PS1=\u@\h:\w $
  181. # 02:16:13 - opendevin:DEBUG: client.py:89 - Executing command for exit code: env
  182. # 02:16:13 - opendevin:DEBUG: client.py:92 - Exit code Output:
  183. # CONDA_DEFAULT_ENV=base
  184. # As long as the exit code is 0, the test will pass.
  185. assert isinstance(
  186. obs, CmdOutputObservation
  187. ), 'The observation should be a CmdOutputObservation.'
  188. assert obs.exit_code == 0, 'The exit code should be 0.'
  189. await runtime.close()
  190. await asyncio.sleep(1)