test_runtime.py 49 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857858859860861862863864865866867868869870871872873874875876877878879880881882883884885886887888889890891892893894895896897898899900901902903904905906907908909910911912913914915916917918919920921922923924925926927928929930931932933934935936937938939940941942943944945946947948949950951952953954955956957958959960961962963964965966967968969970971972973974975976977978979980981982983984985986987988989990991992993994995996997998999100010011002100310041005100610071008100910101011101210131014101510161017101810191020102110221023102410251026102710281029103010311032103310341035103610371038103910401041104210431044104510461047104810491050105110521053105410551056105710581059106010611062106310641065106610671068106910701071107210731074107510761077107810791080108110821083108410851086108710881089109010911092109310941095109610971098109911001101110211031104110511061107110811091110111111121113111411151116111711181119112011211122112311241125112611271128112911301131113211331134113511361137113811391140114111421143114411451146114711481149115011511152115311541155115611571158115911601161116211631164116511661167116811691170117111721173117411751176117711781179118011811182118311841185118611871188118911901191119211931194119511961197119811991200120112021203120412051206120712081209121012111212121312141215121612171218121912201221122212231224122512261227122812291230123112321233123412351236123712381239124012411242124312441245124612471248124912501251125212531254125512561257125812591260126112621263126412651266126712681269127012711272127312741275127612771278127912801281128212831284128512861287128812891290129112921293129412951296129712981299130013011302130313041305130613071308130913101311131213131314131513161317131813191320132113221323132413251326132713281329133013311332133313341335133613371338133913401341134213431344134513461347134813491350135113521353135413551356135713581359136013611362136313641365136613671368136913701371137213731374137513761377137813791380138113821383138413851386138713881389139013911392
  1. """Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
  2. import asyncio
  3. import json
  4. import os
  5. import tempfile
  6. import time
  7. from unittest.mock import patch
  8. import pytest
  9. from pytest import TempPathFactory
  10. from opendevin.core.config import AppConfig, SandboxConfig, load_from_env
  11. from opendevin.core.logger import opendevin_logger as logger
  12. from opendevin.events import EventStream
  13. from opendevin.events.action import (
  14. BrowseInteractiveAction,
  15. BrowseURLAction,
  16. CmdRunAction,
  17. FileReadAction,
  18. FileWriteAction,
  19. IPythonRunCellAction,
  20. )
  21. from opendevin.events.observation import (
  22. BrowserOutputObservation,
  23. CmdOutputObservation,
  24. ErrorObservation,
  25. FileReadObservation,
  26. FileWriteObservation,
  27. IPythonRunCellObservation,
  28. )
  29. from opendevin.runtime.client.runtime import EventStreamRuntime
  30. from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
  31. from opendevin.runtime.runtime import Runtime
  32. from opendevin.storage import get_file_store
  33. @pytest.fixture(autouse=True)
  34. def print_method_name(request):
  35. print('\n########################################################################')
  36. print(f'Running test: {request.node.name}')
  37. print('########################################################################')
  38. yield
  39. @pytest.fixture
  40. def temp_dir(tmp_path_factory: TempPathFactory) -> str:
  41. return str(tmp_path_factory.mktemp('test_runtime'))
  42. TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'eventstream')
  43. PY3_FOR_TESTING = '/opendevin/miniforge3/bin/mamba run -n base python3'
  44. # Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test.
  45. def get_box_classes():
  46. runtime = TEST_RUNTIME
  47. if runtime.lower() == 'eventstream':
  48. return [EventStreamRuntime]
  49. else:
  50. raise ValueError(f'Invalid runtime: {runtime}')
  51. # This assures that all tests run together per runtime, not alternating between them,
  52. # which cause errors (especially outside GitHub actions).
  53. @pytest.fixture(scope='module', params=get_box_classes())
  54. def box_class(request):
  55. time.sleep(2)
  56. return request.param
  57. # TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated.
  58. # since `EventStreamRuntime` supports running as an arbitrary user.
  59. @pytest.fixture(scope='module', params=[True, False])
  60. def run_as_devin(request):
  61. time.sleep(1)
  62. return request.param
  63. @pytest.fixture(scope='module', params=[True, False])
  64. def enable_auto_lint(request):
  65. time.sleep(1)
  66. return request.param
  67. @pytest.fixture(scope='module')
  68. def container_image(request):
  69. time.sleep(1)
  70. env_image = os.environ.get('SANDBOX_CONTAINER_IMAGE')
  71. if env_image:
  72. return [env_image]
  73. return [
  74. 'nikolaik/python-nodejs:python3.11-nodejs22',
  75. 'python:3.11-bookworm',
  76. 'node:22-bookworm',
  77. ]
  78. async def _load_runtime(
  79. temp_dir,
  80. box_class,
  81. run_as_devin: bool = True,
  82. enable_auto_lint: bool = False,
  83. container_image: str | None = None,
  84. browsergym_eval_env: str | None = None,
  85. ) -> Runtime:
  86. sid = 'test'
  87. cli_session = 'main_test'
  88. # AgentSkills need to be initialized **before** Jupyter
  89. # otherwise Jupyter will not access the proper dependencies installed by AgentSkills
  90. plugins = [AgentSkillsRequirement(), JupyterRequirement()]
  91. config = AppConfig(
  92. workspace_base=temp_dir,
  93. workspace_mount_path=temp_dir,
  94. sandbox=SandboxConfig(
  95. use_host_network=True,
  96. browsergym_eval_env=browsergym_eval_env,
  97. ),
  98. )
  99. load_from_env(config, os.environ)
  100. config.run_as_devin = run_as_devin
  101. config.sandbox.enable_auto_lint = enable_auto_lint
  102. file_store = get_file_store(config.file_store, config.file_store_path)
  103. event_stream = EventStream(cli_session, file_store)
  104. if container_image is not None:
  105. config.sandbox.container_image = container_image
  106. runtime = box_class(
  107. config=config,
  108. event_stream=event_stream,
  109. sid=sid,
  110. plugins=plugins,
  111. container_image=container_image,
  112. )
  113. await runtime.ainit()
  114. await asyncio.sleep(1)
  115. return runtime
  116. @pytest.mark.asyncio
  117. async def test_env_vars_os_environ(temp_dir, box_class, run_as_devin):
  118. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  119. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  120. obs: CmdOutputObservation = await runtime.run_action(
  121. CmdRunAction(command='env')
  122. )
  123. print(obs)
  124. obs: CmdOutputObservation = await runtime.run_action(
  125. CmdRunAction(command='echo $FOOBAR')
  126. )
  127. print(obs)
  128. assert obs.exit_code == 0, 'The exit code should be 0.'
  129. assert (
  130. obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
  131. ), f'Output: [{obs.content}] for {box_class}'
  132. await runtime.close()
  133. await asyncio.sleep(1)
  134. @pytest.mark.asyncio
  135. async def test_env_vars_runtime_add_env_vars(temp_dir, box_class):
  136. runtime = await _load_runtime(temp_dir, box_class)
  137. await runtime.add_env_vars({'QUUX': 'abc"def'})
  138. obs: CmdOutputObservation = await runtime.run_action(
  139. CmdRunAction(command='echo $QUUX')
  140. )
  141. print(obs)
  142. assert obs.exit_code == 0, 'The exit code should be 0.'
  143. assert (
  144. obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
  145. ), f'Output: [{obs.content}] for {box_class}'
  146. await runtime.close()
  147. await asyncio.sleep(1)
  148. @pytest.mark.asyncio
  149. async def test_env_vars_runtime_add_empty_dict(temp_dir, box_class):
  150. runtime = await _load_runtime(temp_dir, box_class)
  151. prev_obs = await runtime.run_action(CmdRunAction(command='env'))
  152. assert prev_obs.exit_code == 0, 'The exit code should be 0.'
  153. print(prev_obs)
  154. await runtime.add_env_vars({})
  155. obs = await runtime.run_action(CmdRunAction(command='env'))
  156. assert obs.exit_code == 0, 'The exit code should be 0.'
  157. print(obs)
  158. assert (
  159. obs.content == prev_obs.content
  160. ), 'The env var content should be the same after adding an empty dict.'
  161. await runtime.close()
  162. await asyncio.sleep(1)
  163. @pytest.mark.asyncio
  164. async def test_env_vars_runtime_add_multiple_env_vars(temp_dir, box_class):
  165. runtime = await _load_runtime(temp_dir, box_class)
  166. await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
  167. obs: CmdOutputObservation = await runtime.run_action(
  168. CmdRunAction(command='echo $QUUX $FOOBAR')
  169. )
  170. print(obs)
  171. assert obs.exit_code == 0, 'The exit code should be 0.'
  172. assert (
  173. obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
  174. ), f'Output: [{obs.content}] for {box_class}'
  175. await runtime.close()
  176. await asyncio.sleep(1)
  177. @pytest.mark.asyncio
  178. async def test_env_vars_runtime_add_env_vars_overwrite(temp_dir, box_class):
  179. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  180. runtime = await _load_runtime(temp_dir, box_class)
  181. await runtime.add_env_vars({'FOOBAR': 'xyz'})
  182. obs: CmdOutputObservation = await runtime.run_action(
  183. CmdRunAction(command='echo $FOOBAR')
  184. )
  185. print(obs)
  186. assert obs.exit_code == 0, 'The exit code should be 0.'
  187. assert (
  188. obs.content.strip().split('\r\n')[0].strip() == 'xyz'
  189. ), f'Output: [{obs.content}] for {box_class}'
  190. await runtime.close()
  191. await asyncio.sleep(1)
  192. @pytest.mark.asyncio
  193. async def test_bash_command_pexcept(temp_dir, box_class, run_as_devin):
  194. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  195. # We set env var PS1="\u@\h:\w $"
  196. # and construct the PEXCEPT prompt base on it.
  197. # When run `env`, bad implementation of CmdRunAction will be pexcepted by this
  198. # and failed to pexcept the right content, causing it fail to get error code.
  199. obs = await runtime.run_action(CmdRunAction(command='env'))
  200. # For example:
  201. # 02:16:13 - opendevin:DEBUG: client.py:78 - Executing command: env
  202. # 02:16:13 - opendevin:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
  203. # CONDA_EXE=/opendevin/miniforge3/bin/conda
  204. # [...]
  205. # LC_CTYPE=C.UTF-8
  206. # PS1=\u@\h:\w $
  207. # 02:16:13 - opendevin:DEBUG: client.py:89 - Executing command for exit code: env
  208. # 02:16:13 - opendevin:DEBUG: client.py:92 - Exit code Output:
  209. # CONDA_DEFAULT_ENV=base
  210. # As long as the exit code is 0, the test will pass.
  211. assert isinstance(
  212. obs, CmdOutputObservation
  213. ), 'The observation should be a CmdOutputObservation.'
  214. assert obs.exit_code == 0, 'The exit code should be 0.'
  215. await runtime.close()
  216. await asyncio.sleep(1)
  217. @pytest.mark.asyncio
  218. async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_devin):
  219. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  220. # Test run command
  221. action_cmd = CmdRunAction(command='ls -l')
  222. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  223. obs = await runtime.run_action(action_cmd)
  224. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  225. assert isinstance(obs, CmdOutputObservation)
  226. assert obs.exit_code == 0
  227. assert 'total 0' in obs.content
  228. # Test run ipython
  229. test_code = "print('Hello, `World`!\\n')"
  230. action_ipython = IPythonRunCellAction(code=test_code)
  231. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  232. obs = await runtime.run_action(action_ipython)
  233. assert isinstance(obs, IPythonRunCellObservation)
  234. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  235. assert (
  236. obs.content.strip()
  237. == 'Hello, `World`!\n[Jupyter current working directory: /workspace]'
  238. )
  239. # Test read file (file should not exist)
  240. action_read = FileReadAction(path='hello.sh')
  241. logger.info(action_read, extra={'msg_type': 'ACTION'})
  242. obs = await runtime.run_action(action_read)
  243. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  244. assert isinstance(obs, ErrorObservation)
  245. assert 'File not found' in obs.content
  246. # Test write file
  247. action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
  248. logger.info(action_write, extra={'msg_type': 'ACTION'})
  249. obs = await runtime.run_action(action_write)
  250. assert isinstance(obs, FileWriteObservation)
  251. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  252. assert obs.content == ''
  253. # event stream runtime will always use absolute path
  254. assert obs.path == '/workspace/hello.sh'
  255. # Test read file (file should exist)
  256. action_read = FileReadAction(path='hello.sh')
  257. logger.info(action_read, extra={'msg_type': 'ACTION'})
  258. obs = await runtime.run_action(action_read)
  259. assert isinstance(
  260. obs, FileReadObservation
  261. ), 'The observation should be a FileReadObservation.'
  262. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  263. assert obs.content == 'echo "Hello, World!"\n'
  264. assert obs.path == '/workspace/hello.sh'
  265. # clean up
  266. action = CmdRunAction(command='rm -rf hello.sh')
  267. logger.info(action, extra={'msg_type': 'ACTION'})
  268. obs = await runtime.run_action(action)
  269. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  270. assert obs.exit_code == 0
  271. await runtime.close()
  272. await asyncio.sleep(1)
  273. @pytest.mark.asyncio
  274. async def test_simple_browse(temp_dir, box_class, run_as_devin):
  275. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  276. # Test browse
  277. action_cmd = CmdRunAction(
  278. command=f'{PY3_FOR_TESTING} -m http.server 8000 > server.log 2>&1 &'
  279. )
  280. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  281. obs = await runtime.run_action(action_cmd)
  282. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  283. assert isinstance(obs, CmdOutputObservation)
  284. assert obs.exit_code == 0
  285. assert '[1]' in obs.content
  286. action_cmd = CmdRunAction(command='sleep 5 && cat server.log')
  287. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  288. obs = await runtime.run_action(action_cmd)
  289. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  290. assert obs.exit_code == 0
  291. action_browse = BrowseURLAction(url='http://localhost:8000')
  292. logger.info(action_browse, extra={'msg_type': 'ACTION'})
  293. obs = await runtime.run_action(action_browse)
  294. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  295. assert isinstance(obs, BrowserOutputObservation)
  296. assert 'http://localhost:8000' in obs.url
  297. assert not obs.error
  298. assert obs.open_pages_urls == ['http://localhost:8000/']
  299. assert obs.active_page_index == 0
  300. assert obs.last_browser_action == 'goto("http://localhost:8000")'
  301. assert obs.last_browser_action_error == ''
  302. assert 'Directory listing for /' in obs.content
  303. assert 'server.log' in obs.content
  304. # clean up
  305. action = CmdRunAction(command='rm -rf server.log')
  306. logger.info(action, extra={'msg_type': 'ACTION'})
  307. obs = await runtime.run_action(action)
  308. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  309. assert obs.exit_code == 0
  310. await runtime.close()
  311. await asyncio.sleep(1)
  312. @pytest.mark.asyncio
  313. async def test_browsergym_eval_env(temp_dir):
  314. runtime = await _load_runtime(
  315. temp_dir,
  316. # only supported in event stream runtime
  317. box_class=EventStreamRuntime,
  318. run_as_devin=False, # need root permission to access file
  319. container_image='xingyaoww/od-eval-miniwob:v1.0',
  320. browsergym_eval_env='browsergym/miniwob.choose-list',
  321. )
  322. from opendevin.runtime.browser.browser_env import (
  323. BROWSER_EVAL_GET_GOAL_ACTION,
  324. BROWSER_EVAL_GET_REWARDS_ACTION,
  325. )
  326. # Test browse
  327. action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
  328. logger.info(action, extra={'msg_type': 'ACTION'})
  329. obs = await runtime.run_action(action)
  330. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  331. assert isinstance(obs, BrowserOutputObservation)
  332. assert not obs.error
  333. assert 'Select' in obs.content
  334. assert 'from the list and click Submit' in obs.content
  335. # Make sure the browser can produce observation in eva[l
  336. action = BrowseInteractiveAction(browser_actions='noop()')
  337. logger.info(action, extra={'msg_type': 'ACTION'})
  338. obs = await runtime.run_action(action)
  339. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  340. assert (
  341. obs.url.strip()
  342. == 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
  343. )
  344. # Make sure the rewards are working
  345. action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
  346. logger.info(action, extra={'msg_type': 'ACTION'})
  347. obs = await runtime.run_action(action)
  348. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  349. assert json.loads(obs.content) == [0.0]
  350. await runtime.close()
  351. await asyncio.sleep(1)
  352. @pytest.mark.asyncio
  353. async def test_single_multiline_command(temp_dir, box_class):
  354. runtime = await _load_runtime(temp_dir, box_class)
  355. action = CmdRunAction(command='echo \\\n -e "foo"')
  356. logger.info(action, extra={'msg_type': 'ACTION'})
  357. obs = await runtime.run_action(action)
  358. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  359. assert obs.exit_code == 0, 'The exit code should be 0.'
  360. assert 'foo' in obs.content
  361. await runtime.close()
  362. await asyncio.sleep(1)
  363. @pytest.mark.asyncio
  364. async def test_multiline_echo(temp_dir, box_class):
  365. runtime = await _load_runtime(temp_dir, box_class)
  366. action = CmdRunAction(command='echo -e "hello\nworld"')
  367. logger.info(action, extra={'msg_type': 'ACTION'})
  368. obs = await runtime.run_action(action)
  369. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  370. assert obs.exit_code == 0, 'The exit code should be 0.'
  371. assert 'hello\r\nworld' in obs.content
  372. await runtime.close()
  373. await asyncio.sleep(1)
  374. @pytest.mark.asyncio
  375. async def test_runtime_whitespace(temp_dir, box_class):
  376. runtime = await _load_runtime(temp_dir, box_class)
  377. action = CmdRunAction(command='echo -e "\\n\\n\\n"')
  378. logger.info(action, extra={'msg_type': 'ACTION'})
  379. obs = await runtime.run_action(action)
  380. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  381. assert obs.exit_code == 0, 'The exit code should be 0.'
  382. assert '\r\n\r\n\r\n' in obs.content
  383. await runtime.close()
  384. await asyncio.sleep(1)
  385. @pytest.mark.asyncio
  386. async def test_multiple_multiline_commands(temp_dir, box_class, run_as_devin):
  387. cmds = [
  388. 'ls -l',
  389. 'echo -e "hello\nworld"',
  390. """
  391. echo -e "hello it\\'s me"
  392. """.strip(),
  393. """
  394. echo \\
  395. -e 'hello' \\
  396. -v
  397. """.strip(),
  398. """
  399. echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
  400. """.strip(),
  401. """
  402. echo -e 'hello
  403. world
  404. are
  405. you\\n
  406. there?'
  407. """.strip(),
  408. """
  409. echo -e 'hello
  410. world "
  411. '
  412. """.strip(),
  413. ]
  414. joined_cmds = '\n'.join(cmds)
  415. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  416. action = CmdRunAction(command=joined_cmds)
  417. logger.info(action, extra={'msg_type': 'ACTION'})
  418. obs = await runtime.run_action(action)
  419. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  420. assert isinstance(obs, CmdOutputObservation)
  421. assert obs.exit_code == 0, 'The exit code should be 0.'
  422. assert 'total 0' in obs.content
  423. assert 'hello\r\nworld' in obs.content
  424. assert "hello it\\'s me" in obs.content
  425. assert 'hello -v' in obs.content
  426. assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
  427. assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
  428. assert 'hello\r\nworld "\r\n' in obs.content
  429. await runtime.close()
  430. await asyncio.sleep(1)
  431. @pytest.mark.asyncio
  432. async def test_no_ps2_in_output(temp_dir, box_class, run_as_devin):
  433. """Test that the PS2 sign is not added to the output of a multiline command."""
  434. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  435. action = CmdRunAction(command='echo -e "hello\nworld"')
  436. logger.info(action, extra={'msg_type': 'ACTION'})
  437. obs = await runtime.run_action(action)
  438. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  439. assert 'hello\r\nworld' in obs.content
  440. assert '>' not in obs.content
  441. await runtime.close()
  442. await asyncio.sleep(1)
  443. @pytest.mark.asyncio
  444. async def test_multiline_command_loop(temp_dir, box_class):
  445. # https://github.com/OpenDevin/OpenDevin/issues/3143
  446. runtime = await _load_runtime(temp_dir, box_class)
  447. init_cmd = """
  448. mkdir -p _modules && \
  449. for month in {01..04}; do
  450. for day in {01..05}; do
  451. touch "_modules/2024-${month}-${day}-sample.md"
  452. done
  453. done
  454. echo "created files"
  455. """
  456. action = CmdRunAction(command=init_cmd)
  457. logger.info(action, extra={'msg_type': 'ACTION'})
  458. obs = await runtime.run_action(action)
  459. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  460. assert isinstance(obs, CmdOutputObservation)
  461. assert obs.exit_code == 0, 'The exit code should be 0.'
  462. assert 'created files' in obs.content
  463. follow_up_cmd = """
  464. for file in _modules/*.md; do
  465. new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
  466. mv "$file" "$new_date"
  467. done
  468. echo "success"
  469. """
  470. action = CmdRunAction(command=follow_up_cmd)
  471. logger.info(action, extra={'msg_type': 'ACTION'})
  472. obs = await runtime.run_action(action)
  473. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  474. assert isinstance(obs, CmdOutputObservation)
  475. assert obs.exit_code == 0, 'The exit code should be 0.'
  476. assert 'success' in obs.content
  477. await runtime.close()
  478. await asyncio.sleep(1)
  479. @pytest.mark.asyncio
  480. async def test_cmd_run(temp_dir, box_class, run_as_devin):
  481. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  482. action = CmdRunAction(command='ls -l')
  483. logger.info(action, extra={'msg_type': 'ACTION'})
  484. obs = await runtime.run_action(action)
  485. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  486. assert isinstance(obs, CmdOutputObservation)
  487. assert obs.exit_code == 0
  488. assert 'total 0' in obs.content
  489. action = CmdRunAction(command='mkdir test')
  490. logger.info(action, extra={'msg_type': 'ACTION'})
  491. obs = await runtime.run_action(action)
  492. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  493. assert isinstance(obs, CmdOutputObservation)
  494. assert obs.exit_code == 0
  495. action = CmdRunAction(command='ls -l')
  496. logger.info(action, extra={'msg_type': 'ACTION'})
  497. obs = await runtime.run_action(action)
  498. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  499. assert isinstance(obs, CmdOutputObservation)
  500. assert obs.exit_code == 0
  501. if run_as_devin:
  502. assert 'opendevin' in obs.content
  503. else:
  504. assert 'root' in obs.content
  505. assert 'test' in obs.content
  506. action = CmdRunAction(command='touch test/foo.txt')
  507. logger.info(action, extra={'msg_type': 'ACTION'})
  508. obs = await runtime.run_action(action)
  509. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  510. assert isinstance(obs, CmdOutputObservation)
  511. assert obs.exit_code == 0
  512. action = CmdRunAction(command='ls -l test')
  513. logger.info(action, extra={'msg_type': 'ACTION'})
  514. obs = await runtime.run_action(action)
  515. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  516. assert isinstance(obs, CmdOutputObservation)
  517. assert obs.exit_code == 0
  518. assert 'foo.txt' in obs.content
  519. # clean up: this is needed, since CI will not be
  520. # run as root, and this test may leave a file
  521. # owned by root
  522. action = CmdRunAction(command='rm -rf test')
  523. logger.info(action, extra={'msg_type': 'ACTION'})
  524. obs = await runtime.run_action(action)
  525. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  526. assert isinstance(obs, CmdOutputObservation)
  527. assert obs.exit_code == 0
  528. await runtime.close()
  529. await asyncio.sleep(1)
  530. @pytest.mark.asyncio
  531. async def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_devin):
  532. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  533. action = CmdRunAction(command='cd ~ && pwd')
  534. logger.info(action, extra={'msg_type': 'ACTION'})
  535. obs = await runtime.run_action(action)
  536. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  537. assert isinstance(obs, CmdOutputObservation)
  538. assert obs.exit_code == 0
  539. if run_as_devin:
  540. assert '/home/opendevin' in obs.content
  541. else:
  542. assert '/root' in obs.content
  543. await runtime.close()
  544. await asyncio.sleep(1)
  545. @pytest.mark.asyncio
  546. async def test_multi_cmd_run_in_single_line(temp_dir, box_class):
  547. runtime = await _load_runtime(temp_dir, box_class)
  548. action = CmdRunAction(command='pwd && ls -l')
  549. logger.info(action, extra={'msg_type': 'ACTION'})
  550. obs = await runtime.run_action(action)
  551. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  552. assert isinstance(obs, CmdOutputObservation)
  553. assert obs.exit_code == 0
  554. assert '/workspace' in obs.content
  555. assert 'total 0' in obs.content
  556. await runtime.close()
  557. await asyncio.sleep(1)
  558. @pytest.mark.asyncio
  559. async def test_stateful_cmd(temp_dir, box_class):
  560. runtime = await _load_runtime(temp_dir, box_class)
  561. action = CmdRunAction(command='mkdir test')
  562. logger.info(action, extra={'msg_type': 'ACTION'})
  563. obs = await runtime.run_action(action)
  564. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  565. assert isinstance(obs, CmdOutputObservation)
  566. assert obs.exit_code == 0, 'The exit code should be 0.'
  567. action = CmdRunAction(command='cd test')
  568. logger.info(action, extra={'msg_type': 'ACTION'})
  569. obs = await runtime.run_action(action)
  570. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  571. assert isinstance(obs, CmdOutputObservation)
  572. assert obs.exit_code == 0, 'The exit code should be 0.'
  573. action = CmdRunAction(command='pwd')
  574. logger.info(action, extra={'msg_type': 'ACTION'})
  575. obs = await runtime.run_action(action)
  576. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  577. assert isinstance(obs, CmdOutputObservation)
  578. assert obs.exit_code == 0, 'The exit code should be 0.'
  579. assert '/workspace/test' in obs.content
  580. await runtime.close()
  581. await asyncio.sleep(1)
  582. @pytest.mark.asyncio
  583. async def test_failed_cmd(temp_dir, box_class):
  584. runtime = await _load_runtime(temp_dir, box_class)
  585. action = CmdRunAction(command='non_existing_command')
  586. logger.info(action, extra={'msg_type': 'ACTION'})
  587. obs = await runtime.run_action(action)
  588. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  589. assert isinstance(obs, CmdOutputObservation)
  590. assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
  591. await runtime.close()
  592. await asyncio.sleep(1)
  593. @pytest.mark.asyncio
  594. async def test_ipython_multi_user(temp_dir, box_class, run_as_devin):
  595. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  596. # Test run ipython
  597. # get username
  598. test_code = "import os; print(os.environ['USER'])"
  599. action_ipython = IPythonRunCellAction(code=test_code)
  600. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  601. obs = await runtime.run_action(action_ipython)
  602. assert isinstance(obs, IPythonRunCellObservation)
  603. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  604. if run_as_devin:
  605. assert 'opendevin' in obs.content
  606. else:
  607. assert 'root' in obs.content
  608. # print pwd
  609. test_code = 'import os; print(os.getcwd())'
  610. action_ipython = IPythonRunCellAction(code=test_code)
  611. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  612. obs = await runtime.run_action(action_ipython)
  613. assert isinstance(obs, IPythonRunCellObservation)
  614. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  615. assert (
  616. obs.content.strip()
  617. == '/workspace\n[Jupyter current working directory: /workspace]'
  618. )
  619. # write a file
  620. test_code = "with open('test.txt', 'w') as f: f.write('Hello, world!')"
  621. action_ipython = IPythonRunCellAction(code=test_code)
  622. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  623. obs = await runtime.run_action(action_ipython)
  624. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  625. assert isinstance(obs, IPythonRunCellObservation)
  626. assert (
  627. obs.content.strip()
  628. == '[Code executed successfully with no output]\n[Jupyter current working directory: /workspace]'
  629. )
  630. # check file owner via bash
  631. action = CmdRunAction(command='ls -alh test.txt')
  632. logger.info(action, extra={'msg_type': 'ACTION'})
  633. obs = await runtime.run_action(action)
  634. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  635. assert obs.exit_code == 0
  636. if run_as_devin:
  637. # -rw-r--r-- 1 opendevin root 13 Jul 28 03:53 test.txt
  638. assert 'opendevin' in obs.content.split('\r\n')[0]
  639. assert 'root' in obs.content.split('\r\n')[0]
  640. else:
  641. # -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt
  642. assert 'root' in obs.content.split('\r\n')[0]
  643. # clean up
  644. action = CmdRunAction(command='rm -rf test')
  645. logger.info(action, extra={'msg_type': 'ACTION'})
  646. obs = await runtime.run_action(action)
  647. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  648. assert obs.exit_code == 0
  649. await runtime.close()
  650. await asyncio.sleep(1)
  651. @pytest.mark.asyncio
  652. async def test_ipython_simple(temp_dir, box_class):
  653. runtime = await _load_runtime(temp_dir, box_class)
  654. # Test run ipython
  655. # get username
  656. test_code = 'print(1)'
  657. action_ipython = IPythonRunCellAction(code=test_code)
  658. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  659. obs = await runtime.run_action(action_ipython)
  660. assert isinstance(obs, IPythonRunCellObservation)
  661. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  662. assert obs.content.strip() == '1\n[Jupyter current working directory: /workspace]'
  663. await runtime.close()
  664. await asyncio.sleep(1)
  665. async def _test_ipython_agentskills_fileop_pwd_impl(
  666. runtime: EventStreamRuntime, enable_auto_lint: bool
  667. ):
  668. # remove everything in /workspace
  669. action = CmdRunAction(command='rm -rf /workspace/*')
  670. logger.info(action, extra={'msg_type': 'ACTION'})
  671. obs = await runtime.run_action(action)
  672. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  673. assert obs.exit_code == 0
  674. action = CmdRunAction(command='mkdir test')
  675. logger.info(action, extra={'msg_type': 'ACTION'})
  676. obs = await runtime.run_action(action)
  677. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  678. assert isinstance(obs, CmdOutputObservation)
  679. assert obs.exit_code == 0
  680. action = IPythonRunCellAction(code="create_file('hello.py')")
  681. logger.info(action, extra={'msg_type': 'ACTION'})
  682. obs = await runtime.run_action(action)
  683. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  684. assert isinstance(obs, IPythonRunCellObservation)
  685. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  686. '[File: /workspace/hello.py (1 lines total)]\n'
  687. '(this is the beginning of the file)\n'
  688. '1|\n'
  689. '(this is the end of the file)\n'
  690. '[File hello.py created.]\n'
  691. '[Jupyter current working directory: /workspace]'
  692. ).strip().split('\n')
  693. action = CmdRunAction(command='cd test')
  694. logger.info(action, extra={'msg_type': 'ACTION'})
  695. obs = await runtime.run_action(action)
  696. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  697. assert isinstance(obs, CmdOutputObservation)
  698. assert obs.exit_code == 0
  699. # This should create a file in the current working directory
  700. # i.e., /workspace/test/hello.py instead of /workspace/hello.py
  701. action = IPythonRunCellAction(code="create_file('hello.py')")
  702. logger.info(action, extra={'msg_type': 'ACTION'})
  703. obs = await runtime.run_action(action)
  704. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  705. assert isinstance(obs, IPythonRunCellObservation)
  706. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  707. '[File: /workspace/test/hello.py (1 lines total)]\n'
  708. '(this is the beginning of the file)\n'
  709. '1|\n'
  710. '(this is the end of the file)\n'
  711. '[File hello.py created.]\n'
  712. '[Jupyter current working directory: /workspace/test]'
  713. ).strip().split('\n')
  714. if enable_auto_lint:
  715. # edit file, but make a mistake in indentation
  716. action = IPythonRunCellAction(
  717. code="insert_content_at_line('hello.py', 1, ' print(\"hello world\")')"
  718. )
  719. logger.info(action, extra={'msg_type': 'ACTION'})
  720. obs = await runtime.run_action(action)
  721. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  722. assert isinstance(obs, IPythonRunCellObservation)
  723. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  724. """
  725. [Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
  726. ERRORS:
  727. /workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent
  728. [This is how your edit would have looked if applied]
  729. -------------------------------------------------
  730. (this is the beginning of the file)
  731. 1| print("hello world")
  732. (this is the end of the file)
  733. -------------------------------------------------
  734. [This is the original code before your edit]
  735. -------------------------------------------------
  736. (this is the beginning of the file)
  737. 1|
  738. (this is the end of the file)
  739. -------------------------------------------------
  740. Your changes have NOT been applied. Please fix your edit command and try again.
  741. You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
  742. DO NOT re-run the same failed edit command. Running it again will lead to the same error.
  743. [Jupyter current working directory: /workspace/test]
  744. """
  745. ).strip().split('\n')
  746. # edit file with correct indentation
  747. action = IPythonRunCellAction(
  748. code="insert_content_at_line('hello.py', 1, 'print(\"hello world\")')"
  749. )
  750. logger.info(action, extra={'msg_type': 'ACTION'})
  751. obs = await runtime.run_action(action)
  752. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  753. assert isinstance(obs, IPythonRunCellObservation)
  754. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  755. """
  756. [File: /workspace/test/hello.py (1 lines total after edit)]
  757. (this is the beginning of the file)
  758. 1|print("hello world")
  759. (this is the end of the file)
  760. [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
  761. [Jupyter current working directory: /workspace/test]
  762. """
  763. ).strip().split('\n')
  764. action = CmdRunAction(command='rm -rf /workspace/*')
  765. logger.info(action, extra={'msg_type': 'ACTION'})
  766. obs = await runtime.run_action(action)
  767. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  768. assert obs.exit_code == 0
  769. await runtime.close()
  770. await asyncio.sleep(1)
  771. @pytest.mark.asyncio
  772. async def test_ipython_agentskills_fileop_pwd(
  773. temp_dir, box_class, run_as_devin, enable_auto_lint
  774. ):
  775. """Make sure that cd in bash also update the current working directory in ipython."""
  776. runtime = await _load_runtime(
  777. temp_dir, box_class, run_as_devin, enable_auto_lint=enable_auto_lint
  778. )
  779. await _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
  780. await runtime.close()
  781. await asyncio.sleep(1)
  782. @pytest.mark.asyncio
  783. async def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
  784. """Make sure that cd in bash also update the current working directory in ipython.
  785. Handle special case where the pwd is provided as "~", which should be expanded using os.path.expanduser
  786. on the client side.
  787. """
  788. runtime = await _load_runtime(
  789. temp_dir,
  790. box_class,
  791. run_as_devin=False,
  792. )
  793. action = CmdRunAction(command='cd ~')
  794. logger.info(action, extra={'msg_type': 'ACTION'})
  795. obs = await runtime.run_action(action)
  796. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  797. assert obs.exit_code == 0
  798. action = CmdRunAction(command='mkdir test && ls -la')
  799. logger.info(action, extra={'msg_type': 'ACTION'})
  800. obs = await runtime.run_action(action)
  801. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  802. assert isinstance(obs, CmdOutputObservation)
  803. assert obs.exit_code == 0
  804. action = IPythonRunCellAction(code="create_file('hello.py')")
  805. logger.info(action, extra={'msg_type': 'ACTION'})
  806. obs = await runtime.run_action(action)
  807. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  808. assert isinstance(obs, IPythonRunCellObservation)
  809. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  810. '[File: /root/hello.py (1 lines total)]\n'
  811. '(this is the beginning of the file)\n'
  812. '1|\n'
  813. '(this is the end of the file)\n'
  814. '[File hello.py created.]\n'
  815. '[Jupyter current working directory: /root]'
  816. ).strip().split('\n')
  817. action = CmdRunAction(command='cd test')
  818. logger.info(action, extra={'msg_type': 'ACTION'})
  819. obs = await runtime.run_action(action)
  820. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  821. assert isinstance(obs, CmdOutputObservation)
  822. assert obs.exit_code == 0
  823. # This should create a file in the current working directory
  824. # i.e., /workspace/test/hello.py instead of /workspace/hello.py
  825. action = IPythonRunCellAction(code="create_file('hello.py')")
  826. logger.info(action, extra={'msg_type': 'ACTION'})
  827. obs = await runtime.run_action(action)
  828. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  829. assert isinstance(obs, IPythonRunCellObservation)
  830. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  831. '[File: /root/test/hello.py (1 lines total)]\n'
  832. '(this is the beginning of the file)\n'
  833. '1|\n'
  834. '(this is the end of the file)\n'
  835. '[File hello.py created.]\n'
  836. '[Jupyter current working directory: /root/test]'
  837. ).strip().split('\n')
  838. await runtime.close()
  839. await asyncio.sleep(1)
  840. @pytest.mark.asyncio
  841. async def test_ipython_package_install(temp_dir, box_class, run_as_devin):
  842. """Make sure that cd in bash also update the current working directory in ipython."""
  843. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  844. # It should error out since pymsgbox is not installed
  845. action = IPythonRunCellAction(code='import pymsgbox')
  846. logger.info(action, extra={'msg_type': 'ACTION'})
  847. obs = await runtime.run_action(action)
  848. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  849. assert "ModuleNotFoundError: No module named 'pymsgbox'" in obs.content
  850. # Install pymsgbox in Jupyter
  851. action = IPythonRunCellAction(code='%pip install pymsgbox==1.0.9')
  852. logger.info(action, extra={'msg_type': 'ACTION'})
  853. obs = await runtime.run_action(action)
  854. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  855. assert (
  856. 'Successfully installed pymsgbox-1.0.9' in obs.content
  857. or '[Package installed successfully]' in obs.content
  858. )
  859. action = IPythonRunCellAction(code='import pymsgbox')
  860. logger.info(action, extra={'msg_type': 'ACTION'})
  861. obs = await runtime.run_action(action)
  862. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  863. # import should not error out
  864. assert (
  865. obs.content.strip()
  866. == '[Code executed successfully with no output]\n[Jupyter current working directory: /workspace]'
  867. )
  868. await runtime.close()
  869. await asyncio.sleep(1)
  870. def _create_test_file(host_temp_dir):
  871. # Single file
  872. with open(os.path.join(host_temp_dir, 'test_file.txt'), 'w') as f:
  873. f.write('Hello, World!')
  874. @pytest.mark.asyncio
  875. async def test_copy_single_file(temp_dir, box_class):
  876. runtime = await _load_runtime(temp_dir, box_class)
  877. with tempfile.TemporaryDirectory() as host_temp_dir:
  878. _create_test_file(host_temp_dir)
  879. await runtime.copy_to(
  880. os.path.join(host_temp_dir, 'test_file.txt'), '/workspace'
  881. )
  882. action = CmdRunAction(command='ls -alh /workspace')
  883. logger.info(action, extra={'msg_type': 'ACTION'})
  884. obs = await runtime.run_action(action)
  885. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  886. assert isinstance(obs, CmdOutputObservation)
  887. assert obs.exit_code == 0
  888. assert 'test_file.txt' in obs.content
  889. action = CmdRunAction(command='cat /workspace/test_file.txt')
  890. logger.info(action, extra={'msg_type': 'ACTION'})
  891. obs = await runtime.run_action(action)
  892. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  893. assert isinstance(obs, CmdOutputObservation)
  894. assert obs.exit_code == 0
  895. assert 'Hello, World!' in obs.content
  896. await runtime.close()
  897. await asyncio.sleep(1)
  898. def _create_test_dir_with_files(host_temp_dir):
  899. os.mkdir(os.path.join(host_temp_dir, 'test_dir'))
  900. with open(os.path.join(host_temp_dir, 'test_dir', 'file1.txt'), 'w') as f:
  901. f.write('File 1 content')
  902. with open(os.path.join(host_temp_dir, 'test_dir', 'file2.txt'), 'w') as f:
  903. f.write('File 2 content')
  904. @pytest.mark.asyncio
  905. async def test_copy_directory_recursively(temp_dir, box_class):
  906. runtime = await _load_runtime(temp_dir, box_class)
  907. with tempfile.TemporaryDirectory() as host_temp_dir:
  908. # We need a separate directory, since temp_dir is mounted to /workspace
  909. _create_test_dir_with_files(host_temp_dir)
  910. await runtime.copy_to(
  911. os.path.join(host_temp_dir, 'test_dir'), '/workspace', recursive=True
  912. )
  913. action = CmdRunAction(command='ls -alh /workspace')
  914. logger.info(action, extra={'msg_type': 'ACTION'})
  915. obs = await runtime.run_action(action)
  916. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  917. assert isinstance(obs, CmdOutputObservation)
  918. assert obs.exit_code == 0
  919. assert 'test_dir' in obs.content
  920. assert 'file1.txt' not in obs.content
  921. assert 'file2.txt' not in obs.content
  922. action = CmdRunAction(command='ls -alh /workspace/test_dir')
  923. logger.info(action, extra={'msg_type': 'ACTION'})
  924. obs = await runtime.run_action(action)
  925. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  926. assert isinstance(obs, CmdOutputObservation)
  927. assert obs.exit_code == 0
  928. assert 'file1.txt' in obs.content
  929. assert 'file2.txt' in obs.content
  930. action = CmdRunAction(command='cat /workspace/test_dir/file1.txt')
  931. logger.info(action, extra={'msg_type': 'ACTION'})
  932. obs = await runtime.run_action(action)
  933. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  934. assert isinstance(obs, CmdOutputObservation)
  935. assert obs.exit_code == 0
  936. assert 'File 1 content' in obs.content
  937. await runtime.close()
  938. await asyncio.sleep(1)
  939. @pytest.mark.asyncio
  940. async def test_copy_to_non_existent_directory(temp_dir, box_class):
  941. runtime = await _load_runtime(temp_dir, box_class)
  942. with tempfile.TemporaryDirectory() as host_temp_dir:
  943. _create_test_file(host_temp_dir)
  944. await runtime.copy_to(
  945. os.path.join(host_temp_dir, 'test_file.txt'), '/workspace/new_dir'
  946. )
  947. action = CmdRunAction(command='cat /workspace/new_dir/test_file.txt')
  948. logger.info(action, extra={'msg_type': 'ACTION'})
  949. obs = await runtime.run_action(action)
  950. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  951. assert isinstance(obs, CmdOutputObservation)
  952. assert obs.exit_code == 0
  953. assert 'Hello, World!' in obs.content
  954. await runtime.close()
  955. await asyncio.sleep(1)
  956. @pytest.mark.asyncio
  957. async def test_overwrite_existing_file(temp_dir, box_class):
  958. runtime = await _load_runtime(temp_dir, box_class)
  959. # touch a file in /workspace
  960. action = CmdRunAction(command='touch /workspace/test_file.txt')
  961. logger.info(action, extra={'msg_type': 'ACTION'})
  962. obs = await runtime.run_action(action)
  963. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  964. assert isinstance(obs, CmdOutputObservation)
  965. assert obs.exit_code == 0
  966. action = CmdRunAction(command='cat /workspace/test_file.txt')
  967. logger.info(action, extra={'msg_type': 'ACTION'})
  968. obs = await runtime.run_action(action)
  969. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  970. assert isinstance(obs, CmdOutputObservation)
  971. assert obs.exit_code == 0
  972. assert 'Hello, World!' not in obs.content
  973. with tempfile.TemporaryDirectory() as host_temp_dir:
  974. _create_test_file(host_temp_dir)
  975. await runtime.copy_to(
  976. os.path.join(host_temp_dir, 'test_file.txt'), '/workspace'
  977. )
  978. action = CmdRunAction(command='cat /workspace/test_file.txt')
  979. logger.info(action, extra={'msg_type': 'ACTION'})
  980. obs = await runtime.run_action(action)
  981. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  982. assert isinstance(obs, CmdOutputObservation)
  983. assert obs.exit_code == 0
  984. assert 'Hello, World!' in obs.content
  985. await runtime.close()
  986. await asyncio.sleep(1)
  987. @pytest.mark.asyncio
  988. async def test_copy_non_existent_file(temp_dir, box_class):
  989. runtime = await _load_runtime(temp_dir, box_class)
  990. with pytest.raises(FileNotFoundError):
  991. await runtime.copy_to(
  992. os.path.join(temp_dir, 'non_existent_file.txt'),
  993. '/workspace/should_not_exist.txt',
  994. )
  995. action = CmdRunAction(command='ls /workspace/should_not_exist.txt')
  996. logger.info(action, extra={'msg_type': 'ACTION'})
  997. obs = await runtime.run_action(action)
  998. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  999. assert isinstance(obs, CmdOutputObservation)
  1000. assert obs.exit_code != 0 # File should not exist
  1001. await runtime.close()
  1002. await asyncio.sleep(1)
  1003. @pytest.mark.asyncio
  1004. async def test_keep_prompt(temp_dir):
  1005. # only EventStreamRuntime supports keep_prompt
  1006. runtime = await _load_runtime(
  1007. temp_dir, box_class=EventStreamRuntime, run_as_devin=False
  1008. )
  1009. action = CmdRunAction(command='touch /workspace/test_file.txt')
  1010. logger.info(action, extra={'msg_type': 'ACTION'})
  1011. obs = await runtime.run_action(action)
  1012. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1013. assert isinstance(obs, CmdOutputObservation)
  1014. assert obs.exit_code == 0
  1015. assert 'root@' in obs.content
  1016. action = CmdRunAction(command='cat /workspace/test_file.txt', keep_prompt=False)
  1017. logger.info(action, extra={'msg_type': 'ACTION'})
  1018. obs = await runtime.run_action(action)
  1019. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1020. assert isinstance(obs, CmdOutputObservation)
  1021. assert obs.exit_code == 0
  1022. assert 'root@' not in obs.content
  1023. await runtime.close()
  1024. await asyncio.sleep(1)
  1025. @pytest.mark.asyncio
  1026. async def test_git_operation(box_class):
  1027. # do not mount workspace, since workspace mount by tests will be owned by root
  1028. # while the user_id we get via os.getuid() is different from root
  1029. # which causes permission issues
  1030. runtime = await _load_runtime(
  1031. temp_dir=None,
  1032. box_class=box_class,
  1033. # Need to use non-root user to expose issues
  1034. run_as_devin=True,
  1035. )
  1036. # this will happen if permission of runtime is not properly configured
  1037. # fatal: detected dubious ownership in repository at '/workspace'
  1038. # check the ownership of the current directory
  1039. action = CmdRunAction(command='ls -alh .')
  1040. logger.info(action, extra={'msg_type': 'ACTION'})
  1041. obs = await runtime.run_action(action)
  1042. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1043. assert isinstance(obs, CmdOutputObservation)
  1044. assert obs.exit_code == 0
  1045. # drwx--S--- 2 opendevin root 64 Aug 7 23:32 .
  1046. # drwxr-xr-x 1 root root 4.0K Aug 7 23:33 ..
  1047. for line in obs.content.split('\r\n'):
  1048. if ' ..' in line:
  1049. # parent directory should be owned by root
  1050. assert 'root' in line
  1051. assert 'opendevin' not in line
  1052. elif ' .' in line:
  1053. # current directory should be owned by opendevin
  1054. # and its group should be root
  1055. assert 'opendevin' in line
  1056. assert 'root' in line
  1057. # make sure all git operations are allowed
  1058. action = CmdRunAction(command='git init')
  1059. logger.info(action, extra={'msg_type': 'ACTION'})
  1060. obs = await runtime.run_action(action)
  1061. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1062. assert isinstance(obs, CmdOutputObservation)
  1063. assert obs.exit_code == 0
  1064. # create a file
  1065. action = CmdRunAction(command='echo "hello" > test_file.txt')
  1066. logger.info(action, extra={'msg_type': 'ACTION'})
  1067. obs = await runtime.run_action(action)
  1068. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1069. assert isinstance(obs, CmdOutputObservation)
  1070. assert obs.exit_code == 0
  1071. # git add
  1072. action = CmdRunAction(command='git add test_file.txt')
  1073. logger.info(action, extra={'msg_type': 'ACTION'})
  1074. obs = await runtime.run_action(action)
  1075. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1076. assert isinstance(obs, CmdOutputObservation)
  1077. assert obs.exit_code == 0
  1078. # git diff
  1079. action = CmdRunAction(command='git diff')
  1080. logger.info(action, extra={'msg_type': 'ACTION'})
  1081. obs = await runtime.run_action(action)
  1082. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1083. assert isinstance(obs, CmdOutputObservation)
  1084. assert obs.exit_code == 0
  1085. # git commit
  1086. action = CmdRunAction(command='git commit -m "test commit"')
  1087. logger.info(action, extra={'msg_type': 'ACTION'})
  1088. obs = await runtime.run_action(action)
  1089. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1090. assert isinstance(obs, CmdOutputObservation)
  1091. assert obs.exit_code == 0
  1092. await runtime.close()
  1093. await runtime.close()
  1094. await asyncio.sleep(1)
  1095. # ============================================================================================================================
  1096. # Image-specific tests
  1097. # ============================================================================================================================
  1098. @pytest.mark.asyncio
  1099. async def test_bash_python_version(temp_dir, box_class, container_image):
  1100. """Make sure Python is available in bash."""
  1101. if container_image not in [
  1102. 'python:3.11-bookworm',
  1103. 'nikolaik/python-nodejs:python3.11-nodejs22',
  1104. ]:
  1105. pytest.skip('This test is only for python-related images')
  1106. runtime = await _load_runtime(temp_dir, box_class, container_image=container_image)
  1107. action = CmdRunAction(command='which python')
  1108. logger.info(action, extra={'msg_type': 'ACTION'})
  1109. obs = await runtime.run_action(action)
  1110. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1111. assert obs.exit_code == 0
  1112. action = CmdRunAction(command='python --version')
  1113. logger.info(action, extra={'msg_type': 'ACTION'})
  1114. obs = await runtime.run_action(action)
  1115. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1116. assert obs.exit_code == 0
  1117. assert 'Python 3.11' in obs.content # Check for specific version
  1118. action = CmdRunAction(command='pip --version')
  1119. logger.info(action, extra={'msg_type': 'ACTION'})
  1120. obs = await runtime.run_action(action)
  1121. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1122. assert obs.exit_code == 0
  1123. assert 'pip' in obs.content # Check that pip is available
  1124. await runtime.close()
  1125. await asyncio.sleep(1)
  1126. @pytest.mark.asyncio
  1127. async def test_nodejs_22_version(temp_dir, box_class, container_image):
  1128. """Make sure Node.js is available in bash."""
  1129. if container_image not in [
  1130. 'node:22-bookworm',
  1131. 'nikolaik/python-nodejs:python3.11-nodejs22',
  1132. ]:
  1133. pytest.skip('This test is only for nodejs-related images')
  1134. runtime = await _load_runtime(temp_dir, box_class, container_image=container_image)
  1135. action = CmdRunAction(command='node --version')
  1136. logger.info(action, extra={'msg_type': 'ACTION'})
  1137. obs = await runtime.run_action(action)
  1138. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1139. assert obs.exit_code == 0
  1140. assert 'v22' in obs.content # Check for specific version
  1141. await runtime.close()
  1142. await asyncio.sleep(1)