test_runtime.py 33 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939
  1. """Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
  2. import asyncio
  3. import os
  4. import time
  5. from unittest.mock import patch
  6. import pytest
  7. from pytest import TempPathFactory
  8. from opendevin.core.config import AppConfig, SandboxConfig, load_from_env
  9. from opendevin.core.logger import opendevin_logger as logger
  10. from opendevin.events import EventStream
  11. from opendevin.events.action import (
  12. BrowseURLAction,
  13. CmdRunAction,
  14. FileReadAction,
  15. FileWriteAction,
  16. IPythonRunCellAction,
  17. )
  18. from opendevin.events.observation import (
  19. BrowserOutputObservation,
  20. CmdOutputObservation,
  21. ErrorObservation,
  22. FileReadObservation,
  23. FileWriteObservation,
  24. IPythonRunCellObservation,
  25. )
  26. from opendevin.runtime.client.runtime import EventStreamRuntime
  27. from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
  28. from opendevin.runtime.server.runtime import ServerRuntime
  29. from opendevin.storage import get_file_store
  30. @pytest.fixture(autouse=True)
  31. def print_method_name(request):
  32. print('\n########################################################################')
  33. print(f'Running test: {request.node.name}')
  34. print('########################################################################')
  35. yield
  36. @pytest.fixture
  37. def temp_dir(tmp_path_factory: TempPathFactory) -> str:
  38. return str(tmp_path_factory.mktemp('test_runtime'))
  39. TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'both')
  40. PY3_FOR_TESTING = '/opendevin/miniforge3/bin/mamba run -n base python3'
  41. # Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test.
  42. def get_box_classes():
  43. runtime = TEST_RUNTIME
  44. if runtime.lower() == 'eventstream':
  45. return [EventStreamRuntime]
  46. elif runtime.lower() == 'server':
  47. return [ServerRuntime]
  48. else:
  49. return [EventStreamRuntime, ServerRuntime]
  50. # This assures that all tests run together per runtime, not alternating between them,
  51. # which cause errors (especially outside GitHub actions).
  52. @pytest.fixture(scope='module', params=get_box_classes())
  53. def box_class(request):
  54. time.sleep(2)
  55. return request.param
  56. # TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated.
  57. # since `EventStreamRuntime` supports running as an arbitrary user.
  58. @pytest.fixture(scope='module', params=[True, False])
  59. def run_as_devin(request):
  60. time.sleep(1)
  61. return request.param
  62. @pytest.fixture(scope='module', params=[True, False])
  63. def enable_auto_lint(request):
  64. time.sleep(1)
  65. return request.param
  66. @pytest.fixture(scope='module', params=['ubuntu:22.04', 'debian:11'])
  67. def container_image(request):
  68. time.sleep(1)
  69. return request.param
  70. async def _load_runtime(
  71. temp_dir,
  72. box_class,
  73. run_as_devin: bool = True,
  74. enable_auto_lint: bool = False,
  75. container_image: str | None = None,
  76. ):
  77. sid = 'test'
  78. cli_session = 'main_test'
  79. # AgentSkills need to be initialized **before** Jupyter
  80. # otherwise Jupyter will not access the proper dependencies installed by AgentSkills
  81. plugins = [AgentSkillsRequirement(), JupyterRequirement()]
  82. config = AppConfig(
  83. workspace_base=temp_dir,
  84. workspace_mount_path=temp_dir,
  85. sandbox=SandboxConfig(use_host_network=True),
  86. )
  87. load_from_env(config, os.environ)
  88. config.run_as_devin = run_as_devin
  89. config.sandbox.enable_auto_lint = enable_auto_lint
  90. file_store = get_file_store(config.file_store, config.file_store_path)
  91. event_stream = EventStream(cli_session, file_store)
  92. if container_image is not None:
  93. config.sandbox.container_image = container_image
  94. if box_class == EventStreamRuntime:
  95. # NOTE: we will use the default container image specified in the config.sandbox
  96. # if it is an official od_runtime image.
  97. cur_container_image = config.sandbox.container_image
  98. if 'od_runtime' not in cur_container_image:
  99. cur_container_image = 'ubuntu:22.04'
  100. logger.warning(
  101. f'`{config.sandbox.container_image}` is not an od_runtime image. Will use `{cur_container_image}` as the container image for testing.'
  102. )
  103. runtime = EventStreamRuntime(
  104. config=config,
  105. event_stream=event_stream,
  106. sid=sid,
  107. plugins=plugins,
  108. # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime
  109. # Instead, we will pre-build a suite of container images with OD-runtime-cli installed.
  110. container_image=cur_container_image,
  111. )
  112. await runtime.ainit()
  113. elif box_class == ServerRuntime:
  114. runtime = ServerRuntime(
  115. config=config, event_stream=event_stream, sid=sid, plugins=plugins
  116. )
  117. await runtime.ainit()
  118. from opendevin.runtime.tools import (
  119. RuntimeTool, # deprecate this after ServerRuntime is deprecated
  120. )
  121. runtime.init_runtime_tools(
  122. [RuntimeTool.BROWSER],
  123. is_async=False,
  124. runtime_tools_config={},
  125. )
  126. else:
  127. raise ValueError(f'Invalid box class: {box_class}')
  128. await asyncio.sleep(1)
  129. return runtime
  130. @pytest.mark.asyncio
  131. async def test_env_vars_os_environ(temp_dir, box_class, run_as_devin):
  132. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  133. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  134. obs: CmdOutputObservation = await runtime.run_action(
  135. CmdRunAction(command='env')
  136. )
  137. print(obs)
  138. obs: CmdOutputObservation = await runtime.run_action(
  139. CmdRunAction(command='echo $FOOBAR')
  140. )
  141. print(obs)
  142. assert obs.exit_code == 0, 'The exit code should be 0.'
  143. assert (
  144. obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
  145. ), f'Output: [{obs.content}] for {box_class}'
  146. await runtime.close()
  147. await asyncio.sleep(1)
  148. @pytest.mark.asyncio
  149. async def test_env_vars_runtime_add_env_vars(temp_dir, box_class):
  150. runtime = await _load_runtime(temp_dir, box_class)
  151. await runtime.add_env_vars({'QUUX': 'abc"def'})
  152. obs: CmdOutputObservation = await runtime.run_action(
  153. CmdRunAction(command='echo $QUUX')
  154. )
  155. print(obs)
  156. assert obs.exit_code == 0, 'The exit code should be 0.'
  157. assert (
  158. obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
  159. ), f'Output: [{obs.content}] for {box_class}'
  160. await runtime.close()
  161. await asyncio.sleep(1)
  162. @pytest.mark.asyncio
  163. async def test_env_vars_runtime_add_empty_dict(temp_dir, box_class):
  164. runtime = await _load_runtime(temp_dir, box_class)
  165. prev_obs = await runtime.run_action(CmdRunAction(command='env'))
  166. assert prev_obs.exit_code == 0, 'The exit code should be 0.'
  167. print(prev_obs)
  168. await runtime.add_env_vars({})
  169. obs = await runtime.run_action(CmdRunAction(command='env'))
  170. assert obs.exit_code == 0, 'The exit code should be 0.'
  171. print(obs)
  172. assert (
  173. obs.content == prev_obs.content
  174. ), 'The env var content should be the same after adding an empty dict.'
  175. await runtime.close()
  176. await asyncio.sleep(1)
  177. @pytest.mark.asyncio
  178. async def test_env_vars_runtime_add_multiple_env_vars(temp_dir, box_class):
  179. runtime = await _load_runtime(temp_dir, box_class)
  180. await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
  181. obs: CmdOutputObservation = await runtime.run_action(
  182. CmdRunAction(command='echo $QUUX $FOOBAR')
  183. )
  184. print(obs)
  185. assert obs.exit_code == 0, 'The exit code should be 0.'
  186. assert (
  187. obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
  188. ), f'Output: [{obs.content}] for {box_class}'
  189. await runtime.close()
  190. await asyncio.sleep(1)
  191. @pytest.mark.asyncio
  192. async def test_env_vars_runtime_add_env_vars_overwrite(temp_dir, box_class):
  193. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  194. runtime = await _load_runtime(temp_dir, box_class)
  195. await runtime.add_env_vars({'FOOBAR': 'xyz'})
  196. obs: CmdOutputObservation = await runtime.run_action(
  197. CmdRunAction(command='echo $FOOBAR')
  198. )
  199. print(obs)
  200. assert obs.exit_code == 0, 'The exit code should be 0.'
  201. assert (
  202. obs.content.strip().split('\r\n')[0].strip() == 'xyz'
  203. ), f'Output: [{obs.content}] for {box_class}'
  204. await runtime.close()
  205. await asyncio.sleep(1)
  206. @pytest.mark.asyncio
  207. async def test_bash_command_pexcept(temp_dir, box_class, run_as_devin):
  208. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  209. # We set env var PS1="\u@\h:\w $"
  210. # and construct the PEXCEPT prompt base on it.
  211. # When run `env`, bad implementation of CmdRunAction will be pexcepted by this
  212. # and failed to pexcept the right content, causing it fail to get error code.
  213. obs = await runtime.run_action(CmdRunAction(command='env'))
  214. # For example:
  215. # 02:16:13 - opendevin:DEBUG: client.py:78 - Executing command: env
  216. # 02:16:13 - opendevin:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
  217. # CONDA_EXE=/opendevin/miniforge3/bin/conda
  218. # [...]
  219. # LC_CTYPE=C.UTF-8
  220. # PS1=\u@\h:\w $
  221. # 02:16:13 - opendevin:DEBUG: client.py:89 - Executing command for exit code: env
  222. # 02:16:13 - opendevin:DEBUG: client.py:92 - Exit code Output:
  223. # CONDA_DEFAULT_ENV=base
  224. # As long as the exit code is 0, the test will pass.
  225. assert isinstance(
  226. obs, CmdOutputObservation
  227. ), 'The observation should be a CmdOutputObservation.'
  228. assert obs.exit_code == 0, 'The exit code should be 0.'
  229. await runtime.close()
  230. await asyncio.sleep(1)
  231. @pytest.mark.asyncio
  232. async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_devin):
  233. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  234. # Test run command
  235. action_cmd = CmdRunAction(command='ls -l')
  236. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  237. obs = await runtime.run_action(action_cmd)
  238. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  239. assert isinstance(obs, CmdOutputObservation)
  240. assert obs.exit_code == 0
  241. assert 'total 0' in obs.content
  242. # Test run ipython
  243. test_code = "print('Hello, `World`!\\n')"
  244. action_ipython = IPythonRunCellAction(code=test_code)
  245. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  246. obs = await runtime.run_action(action_ipython)
  247. assert isinstance(obs, IPythonRunCellObservation)
  248. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  249. assert obs.content.strip() == 'Hello, `World`!'
  250. # Test read file (file should not exist)
  251. action_read = FileReadAction(path='hello.sh')
  252. logger.info(action_read, extra={'msg_type': 'ACTION'})
  253. obs = await runtime.run_action(action_read)
  254. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  255. assert isinstance(obs, ErrorObservation)
  256. assert 'File not found' in obs.content
  257. # Test write file
  258. action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
  259. logger.info(action_write, extra={'msg_type': 'ACTION'})
  260. obs = await runtime.run_action(action_write)
  261. assert isinstance(obs, FileWriteObservation)
  262. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  263. assert obs.content == ''
  264. if box_class == ServerRuntime:
  265. assert obs.path == 'hello.sh'
  266. else:
  267. # event stream runtime will always use absolute path
  268. assert obs.path == '/workspace/hello.sh'
  269. # Test read file (file should exist)
  270. action_read = FileReadAction(path='hello.sh')
  271. logger.info(action_read, extra={'msg_type': 'ACTION'})
  272. obs = await runtime.run_action(action_read)
  273. assert isinstance(
  274. obs, FileReadObservation
  275. ), 'The observation should be a FileReadObservation.'
  276. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  277. assert obs.content == 'echo "Hello, World!"\n'
  278. if box_class == ServerRuntime:
  279. assert obs.path == 'hello.sh'
  280. else:
  281. assert obs.path == '/workspace/hello.sh'
  282. # clean up
  283. action = CmdRunAction(command='rm -rf hello.sh')
  284. logger.info(action, extra={'msg_type': 'ACTION'})
  285. obs = await runtime.run_action(action)
  286. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  287. assert obs.exit_code == 0
  288. await runtime.close()
  289. await asyncio.sleep(1)
  290. @pytest.mark.asyncio
  291. async def test_simple_browse(temp_dir, box_class, run_as_devin):
  292. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  293. # Test browse
  294. action_cmd = CmdRunAction(
  295. command=f'{PY3_FOR_TESTING} -m http.server 8000 > server.log 2>&1 &'
  296. )
  297. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  298. obs = await runtime.run_action(action_cmd)
  299. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  300. assert isinstance(obs, CmdOutputObservation)
  301. assert obs.exit_code == 0
  302. assert '[1]' in obs.content
  303. action_cmd = CmdRunAction(command='sleep 5 && cat server.log')
  304. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  305. obs = await runtime.run_action(action_cmd)
  306. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  307. assert obs.exit_code == 0
  308. action_browse = BrowseURLAction(url='http://localhost:8000')
  309. logger.info(action_browse, extra={'msg_type': 'ACTION'})
  310. obs = await runtime.run_action(action_browse)
  311. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  312. assert isinstance(obs, BrowserOutputObservation)
  313. assert 'http://localhost:8000' in obs.url
  314. assert obs.status_code == 200
  315. assert not obs.error
  316. assert obs.open_pages_urls == ['http://localhost:8000/']
  317. assert obs.active_page_index == 0
  318. assert obs.last_browser_action == 'goto("http://localhost:8000")'
  319. assert obs.last_browser_action_error == ''
  320. assert 'Directory listing for /' in obs.content
  321. assert 'server.log' in obs.content
  322. # clean up
  323. action = CmdRunAction(command='rm -rf server.log')
  324. logger.info(action, extra={'msg_type': 'ACTION'})
  325. obs = await runtime.run_action(action)
  326. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  327. assert obs.exit_code == 0
  328. await runtime.close()
  329. await asyncio.sleep(1)
  330. @pytest.mark.asyncio
  331. async def test_single_multiline_command(temp_dir, box_class):
  332. runtime = await _load_runtime(temp_dir, box_class)
  333. action = CmdRunAction(command='echo \\\n -e "foo"')
  334. logger.info(action, extra={'msg_type': 'ACTION'})
  335. obs = await runtime.run_action(action)
  336. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  337. assert obs.exit_code == 0, 'The exit code should be 0.'
  338. assert 'foo' in obs.content
  339. await runtime.close()
  340. await asyncio.sleep(1)
  341. @pytest.mark.asyncio
  342. async def test_multiline_echo(temp_dir, box_class):
  343. runtime = await _load_runtime(temp_dir, box_class)
  344. action = CmdRunAction(command='echo -e "hello\nworld"')
  345. logger.info(action, extra={'msg_type': 'ACTION'})
  346. obs = await runtime.run_action(action)
  347. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  348. assert obs.exit_code == 0, 'The exit code should be 0.'
  349. assert 'hello\r\nworld' in obs.content
  350. await runtime.close()
  351. await asyncio.sleep(1)
  352. @pytest.mark.asyncio
  353. async def test_runtime_whitespace(temp_dir, box_class):
  354. runtime = await _load_runtime(temp_dir, box_class)
  355. action = CmdRunAction(command='echo -e "\\n\\n\\n"')
  356. logger.info(action, extra={'msg_type': 'ACTION'})
  357. obs = await runtime.run_action(action)
  358. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  359. assert obs.exit_code == 0, 'The exit code should be 0.'
  360. assert '\r\n\r\n\r\n' in obs.content
  361. await runtime.close()
  362. await asyncio.sleep(1)
  363. @pytest.mark.asyncio
  364. async def test_multiple_multiline_commands(temp_dir, box_class, run_as_devin):
  365. cmds = [
  366. 'ls -l',
  367. 'echo -e "hello\nworld"',
  368. """
  369. echo -e "hello it\\'s me"
  370. """.strip(),
  371. """
  372. echo \\
  373. -e 'hello' \\
  374. -v
  375. """.strip(),
  376. """
  377. echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
  378. """.strip(),
  379. """
  380. echo -e 'hello
  381. world
  382. are
  383. you\\n
  384. there?'
  385. """.strip(),
  386. """
  387. echo -e 'hello
  388. world "
  389. '
  390. """.strip(),
  391. ]
  392. joined_cmds = '\n'.join(cmds)
  393. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  394. action = CmdRunAction(command=joined_cmds)
  395. logger.info(action, extra={'msg_type': 'ACTION'})
  396. obs = await runtime.run_action(action)
  397. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  398. assert isinstance(obs, CmdOutputObservation)
  399. assert obs.exit_code == 0, 'The exit code should be 0.'
  400. assert 'total 0' in obs.content
  401. assert 'hello\r\nworld' in obs.content
  402. assert "hello it\\'s me" in obs.content
  403. assert 'hello -v' in obs.content
  404. assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
  405. assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
  406. assert 'hello\r\nworld "\r\n' in obs.content
  407. await runtime.close()
  408. await asyncio.sleep(1)
  409. @pytest.mark.asyncio
  410. async def test_no_ps2_in_output(temp_dir, box_class, run_as_devin):
  411. """Test that the PS2 sign is not added to the output of a multiline command."""
  412. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  413. action = CmdRunAction(command='echo -e "hello\nworld"')
  414. logger.info(action, extra={'msg_type': 'ACTION'})
  415. obs = await runtime.run_action(action)
  416. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  417. if box_class == ServerRuntime:
  418. # the extra PS2 '>' is NOT handled by the ServerRuntime
  419. assert 'hello\r\nworld' in obs.content
  420. assert '>' in obs.content
  421. assert obs.content.count('>') == 1
  422. else:
  423. assert 'hello\r\nworld' in obs.content
  424. assert '>' not in obs.content
  425. await runtime.close()
  426. await asyncio.sleep(1)
  427. @pytest.mark.asyncio
  428. async def test_multiline_command_loop(temp_dir, box_class):
  429. # https://github.com/OpenDevin/OpenDevin/issues/3143
  430. runtime = await _load_runtime(temp_dir, box_class)
  431. init_cmd = """
  432. mkdir -p _modules && \
  433. for month in {01..04}; do
  434. for day in {01..05}; do
  435. touch "_modules/2024-${month}-${day}-sample.md"
  436. done
  437. done
  438. echo "created files"
  439. """
  440. action = CmdRunAction(command=init_cmd)
  441. logger.info(action, extra={'msg_type': 'ACTION'})
  442. obs = await runtime.run_action(action)
  443. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  444. assert isinstance(obs, CmdOutputObservation)
  445. assert obs.exit_code == 0, 'The exit code should be 0.'
  446. assert 'created files' in obs.content
  447. follow_up_cmd = """
  448. for file in _modules/*.md; do
  449. new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
  450. mv "$file" "$new_date"
  451. done
  452. echo "success"
  453. """
  454. action = CmdRunAction(command=follow_up_cmd)
  455. logger.info(action, extra={'msg_type': 'ACTION'})
  456. obs = await runtime.run_action(action)
  457. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  458. assert isinstance(obs, CmdOutputObservation)
  459. assert obs.exit_code == 0, 'The exit code should be 0.'
  460. assert 'success' in obs.content
  461. await runtime.close()
  462. await asyncio.sleep(1)
  463. @pytest.mark.asyncio
  464. async def test_cmd_run(temp_dir, box_class, run_as_devin):
  465. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  466. action = CmdRunAction(command='ls -l')
  467. logger.info(action, extra={'msg_type': 'ACTION'})
  468. obs = await runtime.run_action(action)
  469. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  470. assert isinstance(obs, CmdOutputObservation)
  471. assert obs.exit_code == 0
  472. assert 'total 0' in obs.content
  473. action = CmdRunAction(command='mkdir test')
  474. logger.info(action, extra={'msg_type': 'ACTION'})
  475. obs = await runtime.run_action(action)
  476. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  477. assert isinstance(obs, CmdOutputObservation)
  478. assert obs.exit_code == 0
  479. action = CmdRunAction(command='ls -l')
  480. logger.info(action, extra={'msg_type': 'ACTION'})
  481. obs = await runtime.run_action(action)
  482. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  483. assert isinstance(obs, CmdOutputObservation)
  484. assert obs.exit_code == 0
  485. if run_as_devin:
  486. assert 'opendevin' in obs.content
  487. else:
  488. assert 'root' in obs.content
  489. assert 'test' in obs.content
  490. action = CmdRunAction(command='touch test/foo.txt')
  491. logger.info(action, extra={'msg_type': 'ACTION'})
  492. obs = await runtime.run_action(action)
  493. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  494. assert isinstance(obs, CmdOutputObservation)
  495. assert obs.exit_code == 0
  496. action = CmdRunAction(command='ls -l test')
  497. logger.info(action, extra={'msg_type': 'ACTION'})
  498. obs = await runtime.run_action(action)
  499. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  500. assert isinstance(obs, CmdOutputObservation)
  501. assert obs.exit_code == 0
  502. assert 'foo.txt' in obs.content
  503. # clean up: this is needed, since CI will not be
  504. # run as root, and this test may leave a file
  505. # owned by root
  506. action = CmdRunAction(command='rm -rf test')
  507. logger.info(action, extra={'msg_type': 'ACTION'})
  508. obs = await runtime.run_action(action)
  509. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  510. assert isinstance(obs, CmdOutputObservation)
  511. assert obs.exit_code == 0
  512. await runtime.close()
  513. await asyncio.sleep(1)
  514. @pytest.mark.asyncio
  515. async def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_devin):
  516. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  517. action = CmdRunAction(command='cd ~ && pwd')
  518. logger.info(action, extra={'msg_type': 'ACTION'})
  519. obs = await runtime.run_action(action)
  520. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  521. assert isinstance(obs, CmdOutputObservation)
  522. assert obs.exit_code == 0
  523. if run_as_devin:
  524. assert '/home/opendevin' in obs.content
  525. else:
  526. assert '/root' in obs.content
  527. await runtime.close()
  528. await asyncio.sleep(1)
  529. @pytest.mark.asyncio
  530. async def test_multi_cmd_run_in_single_line(temp_dir, box_class):
  531. runtime = await _load_runtime(temp_dir, box_class)
  532. action = CmdRunAction(command='pwd && ls -l')
  533. logger.info(action, extra={'msg_type': 'ACTION'})
  534. obs = await runtime.run_action(action)
  535. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  536. assert isinstance(obs, CmdOutputObservation)
  537. assert obs.exit_code == 0
  538. assert '/workspace' in obs.content
  539. assert 'total 0' in obs.content
  540. await runtime.close()
  541. await asyncio.sleep(1)
  542. @pytest.mark.asyncio
  543. async def test_stateful_cmd(temp_dir, box_class):
  544. runtime = await _load_runtime(temp_dir, box_class)
  545. action = CmdRunAction(command='mkdir test')
  546. logger.info(action, extra={'msg_type': 'ACTION'})
  547. obs = await runtime.run_action(action)
  548. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  549. assert isinstance(obs, CmdOutputObservation)
  550. assert obs.exit_code == 0, 'The exit code should be 0.'
  551. action = CmdRunAction(command='cd test')
  552. logger.info(action, extra={'msg_type': 'ACTION'})
  553. obs = await runtime.run_action(action)
  554. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  555. assert isinstance(obs, CmdOutputObservation)
  556. assert obs.exit_code == 0, 'The exit code should be 0.'
  557. action = CmdRunAction(command='pwd')
  558. logger.info(action, extra={'msg_type': 'ACTION'})
  559. obs = await runtime.run_action(action)
  560. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  561. assert isinstance(obs, CmdOutputObservation)
  562. assert obs.exit_code == 0, 'The exit code should be 0.'
  563. assert '/workspace/test' in obs.content
  564. await runtime.close()
  565. await asyncio.sleep(1)
  566. @pytest.mark.asyncio
  567. async def test_failed_cmd(temp_dir, box_class):
  568. runtime = await _load_runtime(temp_dir, box_class)
  569. action = CmdRunAction(command='non_existing_command')
  570. logger.info(action, extra={'msg_type': 'ACTION'})
  571. obs = await runtime.run_action(action)
  572. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  573. assert isinstance(obs, CmdOutputObservation)
  574. assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
  575. await runtime.close()
  576. await asyncio.sleep(1)
  577. @pytest.mark.asyncio
  578. async def test_ipython_multi_user(temp_dir, box_class, run_as_devin):
  579. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  580. # Test run ipython
  581. # get username
  582. test_code = "import os; print(os.environ['USER'])"
  583. action_ipython = IPythonRunCellAction(code=test_code)
  584. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  585. obs = await runtime.run_action(action_ipython)
  586. assert isinstance(obs, IPythonRunCellObservation)
  587. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  588. if run_as_devin:
  589. assert 'opendevin' in obs.content
  590. else:
  591. assert 'root' in obs.content
  592. # print pwd
  593. test_code = 'import os; print(os.getcwd())'
  594. action_ipython = IPythonRunCellAction(code=test_code)
  595. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  596. obs = await runtime.run_action(action_ipython)
  597. assert isinstance(obs, IPythonRunCellObservation)
  598. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  599. assert obs.content.strip() == '/workspace'
  600. # write a file
  601. test_code = "with open('test.txt', 'w') as f: f.write('Hello, world!')"
  602. action_ipython = IPythonRunCellAction(code=test_code)
  603. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  604. obs = await runtime.run_action(action_ipython)
  605. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  606. assert isinstance(obs, IPythonRunCellObservation)
  607. assert obs.content.strip() == '[Code executed successfully with no output]'
  608. # check file owner via bash
  609. action = CmdRunAction(command='ls -alh test.txt')
  610. logger.info(action, extra={'msg_type': 'ACTION'})
  611. obs = await runtime.run_action(action)
  612. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  613. assert obs.exit_code == 0
  614. if run_as_devin:
  615. # -rw-r--r-- 1 opendevin root 13 Jul 28 03:53 test.txt
  616. assert 'opendevin' in obs.content.split('\r\n')[0]
  617. assert 'root' in obs.content.split('\r\n')[0]
  618. else:
  619. # -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt
  620. assert 'root' in obs.content.split('\r\n')[0]
  621. # clean up
  622. action = CmdRunAction(command='rm -rf test')
  623. logger.info(action, extra={'msg_type': 'ACTION'})
  624. obs = await runtime.run_action(action)
  625. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  626. assert obs.exit_code == 0
  627. await runtime.close()
  628. await asyncio.sleep(1)
  629. @pytest.mark.asyncio
  630. async def test_ipython_simple(temp_dir, box_class):
  631. runtime = await _load_runtime(temp_dir, box_class)
  632. # Test run ipython
  633. # get username
  634. test_code = 'print(1)'
  635. action_ipython = IPythonRunCellAction(code=test_code)
  636. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  637. obs = await runtime.run_action(action_ipython)
  638. assert isinstance(obs, IPythonRunCellObservation)
  639. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  640. assert obs.content.strip() == '1'
  641. async def _test_ipython_agentskills_fileop_pwd_impl(
  642. runtime: ServerRuntime | EventStreamRuntime, enable_auto_lint: bool
  643. ):
  644. # remove everything in /workspace
  645. action = CmdRunAction(command='rm -rf /workspace/*')
  646. logger.info(action, extra={'msg_type': 'ACTION'})
  647. obs = await runtime.run_action(action)
  648. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  649. assert obs.exit_code == 0
  650. action = CmdRunAction(command='mkdir test')
  651. logger.info(action, extra={'msg_type': 'ACTION'})
  652. obs = await runtime.run_action(action)
  653. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  654. assert isinstance(obs, CmdOutputObservation)
  655. assert obs.exit_code == 0
  656. action = IPythonRunCellAction(code="create_file('hello.py')")
  657. logger.info(action, extra={'msg_type': 'ACTION'})
  658. obs = await runtime.run_action(action)
  659. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  660. assert isinstance(obs, IPythonRunCellObservation)
  661. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  662. '[File: /workspace/hello.py (1 lines total)]\n'
  663. '(this is the beginning of the file)\n'
  664. '1|\n'
  665. '(this is the end of the file)\n'
  666. '[File hello.py created.]\n'
  667. ).strip().split('\n')
  668. action = CmdRunAction(command='cd test')
  669. logger.info(action, extra={'msg_type': 'ACTION'})
  670. obs = await runtime.run_action(action)
  671. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  672. assert isinstance(obs, CmdOutputObservation)
  673. assert obs.exit_code == 0
  674. # This should create a file in the current working directory
  675. # i.e., /workspace/test/hello.py instead of /workspace/hello.py
  676. action = IPythonRunCellAction(code="create_file('hello.py')")
  677. logger.info(action, extra={'msg_type': 'ACTION'})
  678. obs = await runtime.run_action(action)
  679. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  680. assert isinstance(obs, IPythonRunCellObservation)
  681. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  682. '[File: /workspace/test/hello.py (1 lines total)]\n'
  683. '(this is the beginning of the file)\n'
  684. '1|\n'
  685. '(this is the end of the file)\n'
  686. '[File hello.py created.]\n'
  687. ).strip().split('\n')
  688. if enable_auto_lint:
  689. # edit file, but make a mistake in indentation
  690. action = IPythonRunCellAction(
  691. code="insert_content_at_line('hello.py', 1, ' print(\"hello world\")')"
  692. )
  693. logger.info(action, extra={'msg_type': 'ACTION'})
  694. obs = await runtime.run_action(action)
  695. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  696. assert isinstance(obs, IPythonRunCellObservation)
  697. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  698. """
  699. [Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
  700. ERRORS:
  701. /workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent
  702. [This is how your edit would have looked if applied]
  703. -------------------------------------------------
  704. (this is the beginning of the file)
  705. 1| print("hello world")
  706. (this is the end of the file)
  707. -------------------------------------------------
  708. [This is the original code before your edit]
  709. -------------------------------------------------
  710. (this is the beginning of the file)
  711. 1|
  712. (this is the end of the file)
  713. -------------------------------------------------
  714. Your changes have NOT been applied. Please fix your edit command and try again.
  715. You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
  716. DO NOT re-run the same failed edit command. Running it again will lead to the same error.
  717. """
  718. ).strip().split('\n')
  719. # edit file with correct indentation
  720. action = IPythonRunCellAction(
  721. code="insert_content_at_line('hello.py', 1, 'print(\"hello world\")')"
  722. )
  723. logger.info(action, extra={'msg_type': 'ACTION'})
  724. obs = await runtime.run_action(action)
  725. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  726. assert isinstance(obs, IPythonRunCellObservation)
  727. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  728. """
  729. [File: /workspace/test/hello.py (1 lines total after edit)]
  730. (this is the beginning of the file)
  731. 1|print("hello world")
  732. (this is the end of the file)
  733. [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
  734. """
  735. ).strip().split('\n')
  736. action = CmdRunAction(command='rm -rf /workspace/*')
  737. logger.info(action, extra={'msg_type': 'ACTION'})
  738. obs = await runtime.run_action(action)
  739. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  740. assert obs.exit_code == 0
  741. @pytest.mark.asyncio
  742. async def test_ipython_agentskills_fileop_pwd(temp_dir, box_class, enable_auto_lint):
  743. """Make sure that cd in bash also update the current working directory in ipython."""
  744. runtime = await _load_runtime(
  745. temp_dir, box_class, enable_auto_lint=enable_auto_lint
  746. )
  747. await _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
  748. await runtime.close()
  749. await asyncio.sleep(1)
  750. @pytest.mark.skipif(
  751. TEST_RUNTIME.lower() == 'eventstream',
  752. reason='Skip this if we want to test EventStreamRuntime',
  753. )
  754. @pytest.mark.skipif(
  755. os.environ.get('TEST_IN_CI', 'false').lower() == 'true',
  756. # FIXME: There's some weird issue with the CI environment.
  757. reason='Skip this if in CI.',
  758. )
  759. @pytest.mark.asyncio
  760. async def test_ipython_agentskills_fileop_pwd_agnostic_sandbox(
  761. temp_dir, enable_auto_lint, container_image
  762. ):
  763. """Make sure that cd in bash also update the current working directory in ipython."""
  764. runtime = await _load_runtime(
  765. temp_dir,
  766. # NOTE: we only test for ServerRuntime, since EventStreamRuntime is image agnostic by design.
  767. ServerRuntime,
  768. enable_auto_lint=enable_auto_lint,
  769. container_image=container_image,
  770. )
  771. await _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
  772. await runtime.close()
  773. await asyncio.sleep(1)