test_runtime.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451
  1. """Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
  2. import asyncio
  3. import os
  4. import pathlib
  5. import tempfile
  6. import time
  7. from unittest.mock import patch
  8. import pytest
  9. from opendevin.core.config import AppConfig, SandboxConfig, load_from_env
  10. from opendevin.core.logger import opendevin_logger as logger
  11. from opendevin.events import EventStream
  12. from opendevin.events.action import (
  13. BrowseURLAction,
  14. CmdRunAction,
  15. FileReadAction,
  16. FileWriteAction,
  17. IPythonRunCellAction,
  18. )
  19. from opendevin.events.observation import (
  20. BrowserOutputObservation,
  21. CmdOutputObservation,
  22. ErrorObservation,
  23. FileReadObservation,
  24. FileWriteObservation,
  25. IPythonRunCellObservation,
  26. )
  27. from opendevin.runtime.client.runtime import EventStreamRuntime
  28. from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
  29. from opendevin.runtime.server.runtime import ServerRuntime
  30. from opendevin.storage import get_file_store
  31. @pytest.fixture(autouse=True)
  32. def print_method_name(request):
  33. print('\n########################################################################')
  34. print(f'Running test: {request.node.name}')
  35. print('########################################################################')
  36. @pytest.fixture
  37. def temp_dir(monkeypatch):
  38. # get a temporary directory
  39. with tempfile.TemporaryDirectory() as temp_dir:
  40. pathlib.Path().mkdir(parents=True, exist_ok=True)
  41. yield temp_dir
  42. # This assures that all tests run together for each runtime, not alternating between them,
  43. # which caused them to fail previously.
  44. @pytest.fixture(scope='module', params=[EventStreamRuntime, ServerRuntime])
  45. def box_class(request):
  46. time.sleep(1)
  47. return request.param
  48. async def _load_runtime(temp_dir, box_class):
  49. sid = 'test'
  50. cli_session = 'main_test'
  51. plugins = [JupyterRequirement(), AgentSkillsRequirement()]
  52. config = AppConfig(
  53. workspace_base=temp_dir,
  54. workspace_mount_path=temp_dir,
  55. sandbox=SandboxConfig(
  56. use_host_network=True,
  57. ),
  58. )
  59. load_from_env(config, os.environ)
  60. file_store = get_file_store(config.file_store, config.file_store_path)
  61. event_stream = EventStream(cli_session, file_store)
  62. container_image = config.sandbox.container_image
  63. # NOTE: we will use the default container image specified in the config.sandbox
  64. # if it is an official od_runtime image.
  65. if 'od_runtime' not in container_image:
  66. container_image = 'ubuntu:22.04'
  67. logger.warning(
  68. f'`{config.sandbox.container_image}` is not an od_runtime image. Will use `{container_image}` as the container image for testing.'
  69. )
  70. if box_class == EventStreamRuntime:
  71. runtime = EventStreamRuntime(
  72. config=config,
  73. event_stream=event_stream,
  74. sid=sid,
  75. # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime
  76. # Instead, we will pre-build a suite of container images with OD-runtime-cli installed.
  77. container_image=container_image,
  78. plugins=plugins,
  79. )
  80. await runtime.ainit()
  81. elif box_class == ServerRuntime:
  82. runtime = ServerRuntime(config=config, event_stream=event_stream, sid=sid)
  83. await runtime.ainit()
  84. runtime.init_sandbox_plugins(plugins)
  85. runtime.init_runtime_tools(
  86. [],
  87. is_async=False,
  88. runtime_tools_config={},
  89. )
  90. else:
  91. raise ValueError(f'Invalid box class: {box_class}')
  92. await asyncio.sleep(1)
  93. return runtime
  94. @pytest.mark.asyncio
  95. async def test_env_vars_os_environ(temp_dir, box_class):
  96. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  97. runtime = await _load_runtime(temp_dir, box_class)
  98. obs: CmdOutputObservation = await runtime.run_action(
  99. CmdRunAction(command='env')
  100. )
  101. print(obs)
  102. obs: CmdOutputObservation = await runtime.run_action(
  103. CmdRunAction(command='echo $FOOBAR')
  104. )
  105. print(obs)
  106. assert obs.exit_code == 0, 'The exit code should be 0.'
  107. assert (
  108. obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
  109. ), f'Output: [{obs.content}] for {box_class}'
  110. await runtime.close()
  111. await asyncio.sleep(1)
  112. @pytest.mark.asyncio
  113. async def test_env_vars_runtime_add_env_vars(temp_dir, box_class):
  114. runtime = await _load_runtime(temp_dir, box_class)
  115. await runtime.add_env_vars({'QUUX': 'abc"def'})
  116. obs: CmdOutputObservation = await runtime.run_action(
  117. CmdRunAction(command='echo $QUUX')
  118. )
  119. print(obs)
  120. assert obs.exit_code == 0, 'The exit code should be 0.'
  121. assert (
  122. obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
  123. ), f'Output: [{obs.content}] for {box_class}'
  124. await runtime.close()
  125. await asyncio.sleep(1)
  126. @pytest.mark.asyncio
  127. async def test_env_vars_runtime_add_empty_dict(temp_dir, box_class):
  128. runtime = await _load_runtime(temp_dir, box_class)
  129. prev_obs = await runtime.run_action(CmdRunAction(command='env'))
  130. assert prev_obs.exit_code == 0, 'The exit code should be 0.'
  131. print(prev_obs)
  132. await runtime.add_env_vars({})
  133. obs = await runtime.run_action(CmdRunAction(command='env'))
  134. assert obs.exit_code == 0, 'The exit code should be 0.'
  135. print(obs)
  136. assert (
  137. obs.content == prev_obs.content
  138. ), 'The env var content should be the same after adding an empty dict.'
  139. await runtime.close()
  140. await asyncio.sleep(1)
  141. @pytest.mark.asyncio
  142. async def test_env_vars_runtime_add_multiple_env_vars(temp_dir, box_class):
  143. runtime = await _load_runtime(temp_dir, box_class)
  144. await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
  145. obs: CmdOutputObservation = await runtime.run_action(
  146. CmdRunAction(command='echo $QUUX $FOOBAR')
  147. )
  148. print(obs)
  149. assert obs.exit_code == 0, 'The exit code should be 0.'
  150. assert (
  151. obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
  152. ), f'Output: [{obs.content}] for {box_class}'
  153. await runtime.close()
  154. await asyncio.sleep(1)
  155. @pytest.mark.asyncio
  156. async def test_env_vars_runtime_add_env_vars_overwrite(temp_dir, box_class):
  157. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  158. runtime = await _load_runtime(temp_dir, box_class)
  159. await runtime.add_env_vars({'FOOBAR': 'xyz'})
  160. obs: CmdOutputObservation = await runtime.run_action(
  161. CmdRunAction(command='echo $FOOBAR')
  162. )
  163. print(obs)
  164. assert obs.exit_code == 0, 'The exit code should be 0.'
  165. assert (
  166. obs.content.strip().split('\r\n')[0].strip() == 'xyz'
  167. ), f'Output: [{obs.content}] for {box_class}'
  168. await runtime.close()
  169. await asyncio.sleep(1)
  170. @pytest.mark.asyncio
  171. async def test_bash_command_pexcept(temp_dir, box_class):
  172. runtime = await _load_runtime(temp_dir, box_class)
  173. # We set env var PS1="\u@\h:\w $"
  174. # and construct the PEXCEPT prompt base on it.
  175. # When run `env`, bad implementation of CmdRunAction will be pexcepted by this
  176. # and failed to pexcept the right content, causing it fail to get error code.
  177. obs = await runtime.run_action(CmdRunAction(command='env'))
  178. # For example:
  179. # 02:16:13 - opendevin:DEBUG: client.py:78 - Executing command: env
  180. # 02:16:13 - opendevin:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
  181. # CONDA_EXE=/opendevin/miniforge3/bin/conda
  182. # [...]
  183. # LC_CTYPE=C.UTF-8
  184. # PS1=\u@\h:\w $
  185. # 02:16:13 - opendevin:DEBUG: client.py:89 - Executing command for exit code: env
  186. # 02:16:13 - opendevin:DEBUG: client.py:92 - Exit code Output:
  187. # CONDA_DEFAULT_ENV=base
  188. # As long as the exit code is 0, the test will pass.
  189. assert isinstance(
  190. obs, CmdOutputObservation
  191. ), 'The observation should be a CmdOutputObservation.'
  192. assert obs.exit_code == 0, 'The exit code should be 0.'
  193. await runtime.close()
  194. await asyncio.sleep(1)
  195. @pytest.mark.asyncio
  196. async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class):
  197. runtime = await _load_runtime(temp_dir, box_class)
  198. # Test run command
  199. action_cmd = CmdRunAction(command='ls -l')
  200. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  201. obs = await runtime.run_action(action_cmd)
  202. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  203. assert isinstance(obs, CmdOutputObservation)
  204. assert obs.exit_code == 0
  205. assert 'total 0' in obs.content
  206. # Test run ipython
  207. test_code = "print('Hello, `World`!\\n')"
  208. action_ipython = IPythonRunCellAction(code=test_code)
  209. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  210. obs = await runtime.run_action(action_ipython)
  211. assert isinstance(obs, IPythonRunCellObservation)
  212. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  213. assert obs.content.strip() == 'Hello, `World`!'
  214. # Test read file (file should not exist)
  215. action_read = FileReadAction(path='hello.sh')
  216. logger.info(action_read, extra={'msg_type': 'ACTION'})
  217. obs = await runtime.run_action(action_read)
  218. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  219. assert isinstance(obs, ErrorObservation)
  220. assert 'File not found' in obs.content
  221. # Test write file
  222. action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
  223. logger.info(action_write, extra={'msg_type': 'ACTION'})
  224. obs = await runtime.run_action(action_write)
  225. assert isinstance(obs, FileWriteObservation)
  226. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  227. assert obs.content == ''
  228. if box_class == ServerRuntime:
  229. assert obs.path == 'hello.sh'
  230. else:
  231. # event stream runtime will always use absolute path
  232. assert obs.path == '/workspace/hello.sh'
  233. # Test read file (file should exist)
  234. action_read = FileReadAction(path='hello.sh')
  235. logger.info(action_read, extra={'msg_type': 'ACTION'})
  236. obs = await runtime.run_action(action_read)
  237. assert isinstance(
  238. obs, FileReadObservation
  239. ), 'The observation should be a FileReadObservation.'
  240. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  241. assert obs.content == 'echo "Hello, World!"\n'
  242. if box_class == ServerRuntime:
  243. assert obs.path == 'hello.sh'
  244. else:
  245. assert obs.path == '/workspace/hello.sh'
  246. await runtime.close()
  247. await asyncio.sleep(1)
  248. @pytest.mark.asyncio
  249. async def test_simple_browse(temp_dir, box_class):
  250. runtime = await _load_runtime(temp_dir, box_class)
  251. # Test browse
  252. action_cmd = CmdRunAction(command='python -m http.server 8000 > server.log 2>&1 &')
  253. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  254. obs = await runtime.run_action(action_cmd)
  255. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  256. assert isinstance(obs, CmdOutputObservation)
  257. assert obs.exit_code == 0
  258. assert '[1]' in obs.content
  259. action_browse = BrowseURLAction(url='http://localhost:8000')
  260. logger.info(action_browse, extra={'msg_type': 'ACTION'})
  261. obs = await runtime.run_action(action_browse)
  262. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  263. assert isinstance(obs, BrowserOutputObservation)
  264. assert 'http://localhost:8000' in obs.url
  265. assert obs.status_code == 200
  266. assert not obs.error
  267. assert obs.open_pages_urls == ['http://localhost:8000/']
  268. assert obs.active_page_index == 0
  269. assert obs.last_browser_action == 'goto("http://localhost:8000")'
  270. assert obs.last_browser_action_error == ''
  271. assert 'Directory listing for /' in obs.content
  272. assert 'server.log' in obs.content
  273. await runtime.close()
  274. @pytest.mark.asyncio
  275. async def test_multiline_commands(temp_dir, box_class):
  276. cmds = [
  277. 'ls -l',
  278. 'echo -e "hello\nworld"',
  279. """
  280. echo -e "hello it\\'s me"
  281. """.strip(),
  282. """
  283. echo \\
  284. -e 'hello' \\
  285. -v
  286. """.strip(),
  287. """
  288. echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
  289. """.strip(),
  290. """
  291. echo -e 'hello
  292. world
  293. are
  294. you\\n
  295. there?'
  296. """.strip(),
  297. """
  298. echo -e 'hello
  299. world "
  300. '
  301. """.strip(),
  302. ]
  303. joined_cmds = '\n'.join(cmds)
  304. runtime = await _load_runtime(temp_dir, box_class)
  305. action = CmdRunAction(command=joined_cmds)
  306. logger.info(action, extra={'msg_type': 'ACTION'})
  307. obs = await runtime.run_action(action)
  308. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  309. assert isinstance(obs, CmdOutputObservation)
  310. assert obs.exit_code == 0, 'The exit code should be 0.'
  311. assert 'total 0' in obs.content
  312. assert 'hello\r\nworld' in obs.content
  313. assert "hello it\\'s me" in obs.content
  314. assert 'hello -v' in obs.content
  315. assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
  316. assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
  317. assert 'hello\r\nworld "\r\n' in obs.content
  318. await runtime.close()
  319. await asyncio.sleep(1)
  320. @pytest.mark.asyncio
  321. async def test_no_ps2_in_output(temp_dir, box_class):
  322. """Test that the PS2 sign is not added to the output of a multiline command."""
  323. runtime = await _load_runtime(temp_dir, box_class)
  324. action = CmdRunAction(command='echo -e "hello\nworld"')
  325. logger.info(action, extra={'msg_type': 'ACTION'})
  326. obs = await runtime.run_action(action)
  327. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  328. if box_class == ServerRuntime:
  329. # the extra PS2 '>' is NOT handled by the ServerRuntime
  330. assert 'hello\r\nworld' in obs.content
  331. assert '>' in obs.content
  332. assert obs.content.count('>') == 1
  333. else:
  334. assert 'hello\r\nworld' in obs.content
  335. assert '>' not in obs.content
  336. @pytest.mark.asyncio
  337. async def test_multiline_command_loop(temp_dir, box_class):
  338. # https://github.com/OpenDevin/OpenDevin/issues/3143
  339. runtime = await _load_runtime(temp_dir, box_class)
  340. init_cmd = """
  341. mkdir -p _modules && \
  342. for month in {01..04}; do
  343. for day in {01..05}; do
  344. touch "_modules/2024-${month}-${day}-sample.md"
  345. done
  346. done
  347. echo "created files"
  348. """
  349. action = CmdRunAction(command=init_cmd)
  350. logger.info(action, extra={'msg_type': 'ACTION'})
  351. obs = await runtime.run_action(action)
  352. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  353. assert isinstance(obs, CmdOutputObservation)
  354. assert obs.exit_code == 0, 'The exit code should be 0.'
  355. assert 'created files' in obs.content
  356. follow_up_cmd = """
  357. for file in _modules/*.md; do
  358. new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
  359. mv "$file" "$new_date"
  360. done
  361. echo "success"
  362. """
  363. action = CmdRunAction(command=follow_up_cmd)
  364. logger.info(action, extra={'msg_type': 'ACTION'})
  365. obs = await runtime.run_action(action)
  366. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  367. assert isinstance(obs, CmdOutputObservation)
  368. assert obs.exit_code == 0, 'The exit code should be 0.'
  369. assert 'success' in obs.content
  370. await runtime.close()
  371. await asyncio.sleep(1)