test_runtime.py 46 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115111611171118111911201121112211231124112511261127112811291130113111321133113411351136113711381139114011411142114311441145114611471148114911501151115211531154115511561157115811591160116111621163116411651166116711681169117011711172117311741175117611771178117911801181118211831184118511861187118811891190119111921193119411951196119711981199120012011202120312041205120612071208120912101211121212131214121512161217121812191220122112221223122412251226122712281229123012311232123312341235123612371238123912401241124212431244124512461247124812491250125112521253125412551256125712581259126012611262126312641265126612671268126912701271127212731274127512761277127812791280128112821283128412851286128712881289129012911292129312941295129612971298129913001301130213031304130513061307130813091310131113121313131413151316
  1. """Test the EventStreamRuntime, which connects to the RuntimeClient running in the sandbox."""
  2. import asyncio
  3. import json
  4. import os
  5. import tempfile
  6. import time
  7. from unittest.mock import patch
  8. import pytest
  9. from pytest import TempPathFactory
  10. from opendevin.core.config import AppConfig, SandboxConfig, load_from_env
  11. from opendevin.core.logger import opendevin_logger as logger
  12. from opendevin.events import EventStream
  13. from opendevin.events.action import (
  14. BrowseInteractiveAction,
  15. BrowseURLAction,
  16. CmdRunAction,
  17. FileReadAction,
  18. FileWriteAction,
  19. IPythonRunCellAction,
  20. )
  21. from opendevin.events.observation import (
  22. BrowserOutputObservation,
  23. CmdOutputObservation,
  24. ErrorObservation,
  25. FileReadObservation,
  26. FileWriteObservation,
  27. IPythonRunCellObservation,
  28. )
  29. from opendevin.runtime.client.runtime import EventStreamRuntime
  30. from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
  31. from opendevin.runtime.runtime import Runtime
  32. from opendevin.runtime.server.runtime import ServerRuntime
  33. from opendevin.storage import get_file_store
  34. @pytest.fixture(autouse=True)
  35. def print_method_name(request):
  36. print('\n########################################################################')
  37. print(f'Running test: {request.node.name}')
  38. print('########################################################################')
  39. yield
  40. @pytest.fixture
  41. def temp_dir(tmp_path_factory: TempPathFactory) -> str:
  42. return str(tmp_path_factory.mktemp('test_runtime'))
  43. TEST_RUNTIME = os.getenv('TEST_RUNTIME', 'both')
  44. PY3_FOR_TESTING = '/opendevin/miniforge3/bin/mamba run -n base python3'
  45. # Depending on TEST_RUNTIME, feed the appropriate box class(es) to the test.
  46. def get_box_classes():
  47. runtime = TEST_RUNTIME
  48. if runtime.lower() == 'eventstream':
  49. return [EventStreamRuntime]
  50. elif runtime.lower() == 'server':
  51. return [ServerRuntime]
  52. else:
  53. return [EventStreamRuntime, ServerRuntime]
  54. # This assures that all tests run together per runtime, not alternating between them,
  55. # which cause errors (especially outside GitHub actions).
  56. @pytest.fixture(scope='module', params=get_box_classes())
  57. def box_class(request):
  58. time.sleep(2)
  59. return request.param
  60. # TODO: We will change this to `run_as_user` when `ServerRuntime` is deprecated.
  61. # since `EventStreamRuntime` supports running as an arbitrary user.
  62. @pytest.fixture(scope='module', params=[True, False])
  63. def run_as_devin(request):
  64. time.sleep(1)
  65. return request.param
  66. @pytest.fixture(scope='module', params=[True, False])
  67. def enable_auto_lint(request):
  68. time.sleep(1)
  69. return request.param
  70. @pytest.fixture(scope='module', params=['ubuntu:22.04', 'debian:11'])
  71. def container_image(request):
  72. time.sleep(1)
  73. return request.param
  74. async def _load_runtime(
  75. temp_dir,
  76. box_class,
  77. run_as_devin: bool = True,
  78. enable_auto_lint: bool = False,
  79. container_image: str | None = None,
  80. browsergym_eval_env: str | None = None,
  81. ) -> Runtime:
  82. sid = 'test'
  83. cli_session = 'main_test'
  84. # AgentSkills need to be initialized **before** Jupyter
  85. # otherwise Jupyter will not access the proper dependencies installed by AgentSkills
  86. plugins = [AgentSkillsRequirement(), JupyterRequirement()]
  87. config = AppConfig(
  88. workspace_base=temp_dir,
  89. workspace_mount_path=temp_dir,
  90. sandbox=SandboxConfig(
  91. use_host_network=True,
  92. browsergym_eval_env=browsergym_eval_env,
  93. ),
  94. )
  95. load_from_env(config, os.environ)
  96. config.run_as_devin = run_as_devin
  97. config.sandbox.enable_auto_lint = enable_auto_lint
  98. file_store = get_file_store(config.file_store, config.file_store_path)
  99. event_stream = EventStream(cli_session, file_store)
  100. if container_image is not None:
  101. config.sandbox.container_image = container_image
  102. if box_class == EventStreamRuntime:
  103. # NOTE: we will use the default container image specified in the config.sandbox
  104. # if it is an official od_runtime image.
  105. cur_container_image = config.sandbox.container_image
  106. if 'od_runtime' not in cur_container_image and cur_container_image not in {
  107. 'xingyaoww/od-eval-miniwob:v1.0'
  108. }: # a special exception list
  109. cur_container_image = 'ubuntu:22.04'
  110. logger.warning(
  111. f'`{config.sandbox.container_image}` is not an od_runtime image. Will use `{cur_container_image}` as the container image for testing.'
  112. )
  113. runtime = EventStreamRuntime(
  114. config=config,
  115. event_stream=event_stream,
  116. sid=sid,
  117. plugins=plugins,
  118. # NOTE: we probably don't have a default container image `/sandbox` for the event stream runtime
  119. # Instead, we will pre-build a suite of container images with OD-runtime-cli installed.
  120. container_image=cur_container_image,
  121. )
  122. await runtime.ainit()
  123. elif box_class == ServerRuntime:
  124. runtime = ServerRuntime(
  125. config=config, event_stream=event_stream, sid=sid, plugins=plugins
  126. )
  127. await runtime.ainit()
  128. from opendevin.runtime.tools import (
  129. RuntimeTool, # deprecate this after ServerRuntime is deprecated
  130. )
  131. runtime.init_runtime_tools(
  132. [RuntimeTool.BROWSER],
  133. runtime_tools_config={},
  134. )
  135. else:
  136. raise ValueError(f'Invalid box class: {box_class}')
  137. await asyncio.sleep(1)
  138. return runtime
  139. @pytest.mark.asyncio
  140. async def test_env_vars_os_environ(temp_dir, box_class, run_as_devin):
  141. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  142. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  143. obs: CmdOutputObservation = await runtime.run_action(
  144. CmdRunAction(command='env')
  145. )
  146. print(obs)
  147. obs: CmdOutputObservation = await runtime.run_action(
  148. CmdRunAction(command='echo $FOOBAR')
  149. )
  150. print(obs)
  151. assert obs.exit_code == 0, 'The exit code should be 0.'
  152. assert (
  153. obs.content.strip().split('\n\r')[0].strip() == 'BAZ'
  154. ), f'Output: [{obs.content}] for {box_class}'
  155. await runtime.close()
  156. await asyncio.sleep(1)
  157. @pytest.mark.asyncio
  158. async def test_env_vars_runtime_add_env_vars(temp_dir, box_class):
  159. runtime = await _load_runtime(temp_dir, box_class)
  160. await runtime.add_env_vars({'QUUX': 'abc"def'})
  161. obs: CmdOutputObservation = await runtime.run_action(
  162. CmdRunAction(command='echo $QUUX')
  163. )
  164. print(obs)
  165. assert obs.exit_code == 0, 'The exit code should be 0.'
  166. assert (
  167. obs.content.strip().split('\r\n')[0].strip() == 'abc"def'
  168. ), f'Output: [{obs.content}] for {box_class}'
  169. await runtime.close()
  170. await asyncio.sleep(1)
  171. @pytest.mark.asyncio
  172. async def test_env_vars_runtime_add_empty_dict(temp_dir, box_class):
  173. runtime = await _load_runtime(temp_dir, box_class)
  174. prev_obs = await runtime.run_action(CmdRunAction(command='env'))
  175. assert prev_obs.exit_code == 0, 'The exit code should be 0.'
  176. print(prev_obs)
  177. await runtime.add_env_vars({})
  178. obs = await runtime.run_action(CmdRunAction(command='env'))
  179. assert obs.exit_code == 0, 'The exit code should be 0.'
  180. print(obs)
  181. assert (
  182. obs.content == prev_obs.content
  183. ), 'The env var content should be the same after adding an empty dict.'
  184. await runtime.close()
  185. await asyncio.sleep(1)
  186. @pytest.mark.asyncio
  187. async def test_env_vars_runtime_add_multiple_env_vars(temp_dir, box_class):
  188. runtime = await _load_runtime(temp_dir, box_class)
  189. await runtime.add_env_vars({'QUUX': 'abc"def', 'FOOBAR': 'xyz'})
  190. obs: CmdOutputObservation = await runtime.run_action(
  191. CmdRunAction(command='echo $QUUX $FOOBAR')
  192. )
  193. print(obs)
  194. assert obs.exit_code == 0, 'The exit code should be 0.'
  195. assert (
  196. obs.content.strip().split('\r\n')[0].strip() == 'abc"def xyz'
  197. ), f'Output: [{obs.content}] for {box_class}'
  198. await runtime.close()
  199. await asyncio.sleep(1)
  200. @pytest.mark.asyncio
  201. async def test_env_vars_runtime_add_env_vars_overwrite(temp_dir, box_class):
  202. with patch.dict(os.environ, {'SANDBOX_ENV_FOOBAR': 'BAZ'}):
  203. runtime = await _load_runtime(temp_dir, box_class)
  204. await runtime.add_env_vars({'FOOBAR': 'xyz'})
  205. obs: CmdOutputObservation = await runtime.run_action(
  206. CmdRunAction(command='echo $FOOBAR')
  207. )
  208. print(obs)
  209. assert obs.exit_code == 0, 'The exit code should be 0.'
  210. assert (
  211. obs.content.strip().split('\r\n')[0].strip() == 'xyz'
  212. ), f'Output: [{obs.content}] for {box_class}'
  213. await runtime.close()
  214. await asyncio.sleep(1)
  215. @pytest.mark.asyncio
  216. async def test_bash_command_pexcept(temp_dir, box_class, run_as_devin):
  217. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  218. # We set env var PS1="\u@\h:\w $"
  219. # and construct the PEXCEPT prompt base on it.
  220. # When run `env`, bad implementation of CmdRunAction will be pexcepted by this
  221. # and failed to pexcept the right content, causing it fail to get error code.
  222. obs = await runtime.run_action(CmdRunAction(command='env'))
  223. # For example:
  224. # 02:16:13 - opendevin:DEBUG: client.py:78 - Executing command: env
  225. # 02:16:13 - opendevin:DEBUG: client.py:82 - Command output: PYTHONUNBUFFERED=1
  226. # CONDA_EXE=/opendevin/miniforge3/bin/conda
  227. # [...]
  228. # LC_CTYPE=C.UTF-8
  229. # PS1=\u@\h:\w $
  230. # 02:16:13 - opendevin:DEBUG: client.py:89 - Executing command for exit code: env
  231. # 02:16:13 - opendevin:DEBUG: client.py:92 - Exit code Output:
  232. # CONDA_DEFAULT_ENV=base
  233. # As long as the exit code is 0, the test will pass.
  234. assert isinstance(
  235. obs, CmdOutputObservation
  236. ), 'The observation should be a CmdOutputObservation.'
  237. assert obs.exit_code == 0, 'The exit code should be 0.'
  238. await runtime.close()
  239. await asyncio.sleep(1)
  240. @pytest.mark.asyncio
  241. async def test_simple_cmd_ipython_and_fileop(temp_dir, box_class, run_as_devin):
  242. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  243. # Test run command
  244. action_cmd = CmdRunAction(command='ls -l')
  245. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  246. obs = await runtime.run_action(action_cmd)
  247. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  248. assert isinstance(obs, CmdOutputObservation)
  249. assert obs.exit_code == 0
  250. assert 'total 0' in obs.content
  251. # Test run ipython
  252. test_code = "print('Hello, `World`!\\n')"
  253. action_ipython = IPythonRunCellAction(code=test_code)
  254. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  255. obs = await runtime.run_action(action_ipython)
  256. assert isinstance(obs, IPythonRunCellObservation)
  257. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  258. assert obs.content.strip() == 'Hello, `World`!'
  259. # Test read file (file should not exist)
  260. action_read = FileReadAction(path='hello.sh')
  261. logger.info(action_read, extra={'msg_type': 'ACTION'})
  262. obs = await runtime.run_action(action_read)
  263. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  264. assert isinstance(obs, ErrorObservation)
  265. assert 'File not found' in obs.content
  266. # Test write file
  267. action_write = FileWriteAction(content='echo "Hello, World!"', path='hello.sh')
  268. logger.info(action_write, extra={'msg_type': 'ACTION'})
  269. obs = await runtime.run_action(action_write)
  270. assert isinstance(obs, FileWriteObservation)
  271. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  272. assert obs.content == ''
  273. if box_class == ServerRuntime:
  274. assert obs.path == 'hello.sh'
  275. else:
  276. # event stream runtime will always use absolute path
  277. assert obs.path == '/workspace/hello.sh'
  278. # Test read file (file should exist)
  279. action_read = FileReadAction(path='hello.sh')
  280. logger.info(action_read, extra={'msg_type': 'ACTION'})
  281. obs = await runtime.run_action(action_read)
  282. assert isinstance(
  283. obs, FileReadObservation
  284. ), 'The observation should be a FileReadObservation.'
  285. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  286. assert obs.content == 'echo "Hello, World!"\n'
  287. if box_class == ServerRuntime:
  288. assert obs.path == 'hello.sh'
  289. else:
  290. assert obs.path == '/workspace/hello.sh'
  291. # clean up
  292. action = CmdRunAction(command='rm -rf hello.sh')
  293. logger.info(action, extra={'msg_type': 'ACTION'})
  294. obs = await runtime.run_action(action)
  295. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  296. assert obs.exit_code == 0
  297. await runtime.close()
  298. await asyncio.sleep(1)
  299. @pytest.mark.asyncio
  300. async def test_simple_browse(temp_dir, box_class, run_as_devin):
  301. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  302. # Test browse
  303. action_cmd = CmdRunAction(
  304. command=f'{PY3_FOR_TESTING} -m http.server 8000 > server.log 2>&1 &'
  305. )
  306. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  307. obs = await runtime.run_action(action_cmd)
  308. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  309. assert isinstance(obs, CmdOutputObservation)
  310. assert obs.exit_code == 0
  311. assert '[1]' in obs.content
  312. action_cmd = CmdRunAction(command='sleep 5 && cat server.log')
  313. logger.info(action_cmd, extra={'msg_type': 'ACTION'})
  314. obs = await runtime.run_action(action_cmd)
  315. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  316. assert obs.exit_code == 0
  317. action_browse = BrowseURLAction(url='http://localhost:8000')
  318. logger.info(action_browse, extra={'msg_type': 'ACTION'})
  319. obs = await runtime.run_action(action_browse)
  320. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  321. assert isinstance(obs, BrowserOutputObservation)
  322. assert 'http://localhost:8000' in obs.url
  323. assert not obs.error
  324. assert obs.open_pages_urls == ['http://localhost:8000/']
  325. assert obs.active_page_index == 0
  326. assert obs.last_browser_action == 'goto("http://localhost:8000")'
  327. assert obs.last_browser_action_error == ''
  328. assert 'Directory listing for /' in obs.content
  329. assert 'server.log' in obs.content
  330. # clean up
  331. action = CmdRunAction(command='rm -rf server.log')
  332. logger.info(action, extra={'msg_type': 'ACTION'})
  333. obs = await runtime.run_action(action)
  334. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  335. assert obs.exit_code == 0
  336. await runtime.close()
  337. await asyncio.sleep(1)
  338. @pytest.mark.asyncio
  339. async def test_browsergym_eval_env(temp_dir):
  340. runtime = await _load_runtime(
  341. temp_dir,
  342. # only supported in event stream runtime
  343. box_class=EventStreamRuntime,
  344. run_as_devin=False, # need root permission to access file
  345. container_image='xingyaoww/od-eval-miniwob:v1.0',
  346. browsergym_eval_env='browsergym/miniwob.choose-list',
  347. )
  348. from opendevin.runtime.browser.browser_env import (
  349. BROWSER_EVAL_GET_GOAL_ACTION,
  350. BROWSER_EVAL_GET_REWARDS_ACTION,
  351. )
  352. # Test browse
  353. action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_GOAL_ACTION)
  354. logger.info(action, extra={'msg_type': 'ACTION'})
  355. obs = await runtime.run_action(action)
  356. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  357. assert isinstance(obs, BrowserOutputObservation)
  358. assert not obs.error
  359. assert 'Select' in obs.content
  360. assert 'from the list and click Submit' in obs.content
  361. # Make sure the browser can produce observation in eva[l
  362. action = BrowseInteractiveAction(browser_actions='noop()')
  363. logger.info(action, extra={'msg_type': 'ACTION'})
  364. obs = await runtime.run_action(action)
  365. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  366. assert (
  367. obs.url.strip()
  368. == 'file:///miniwob-plusplus/miniwob/html/miniwob/choose-list.html'
  369. )
  370. # Make sure the rewards are working
  371. action = BrowseInteractiveAction(browser_actions=BROWSER_EVAL_GET_REWARDS_ACTION)
  372. logger.info(action, extra={'msg_type': 'ACTION'})
  373. obs = await runtime.run_action(action)
  374. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  375. assert json.loads(obs.content) == [0.0]
  376. await runtime.close()
  377. await asyncio.sleep(1)
  378. @pytest.mark.asyncio
  379. async def test_single_multiline_command(temp_dir, box_class):
  380. runtime = await _load_runtime(temp_dir, box_class)
  381. action = CmdRunAction(command='echo \\\n -e "foo"')
  382. logger.info(action, extra={'msg_type': 'ACTION'})
  383. obs = await runtime.run_action(action)
  384. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  385. assert obs.exit_code == 0, 'The exit code should be 0.'
  386. assert 'foo' in obs.content
  387. await runtime.close()
  388. await asyncio.sleep(1)
  389. @pytest.mark.asyncio
  390. async def test_multiline_echo(temp_dir, box_class):
  391. runtime = await _load_runtime(temp_dir, box_class)
  392. action = CmdRunAction(command='echo -e "hello\nworld"')
  393. logger.info(action, extra={'msg_type': 'ACTION'})
  394. obs = await runtime.run_action(action)
  395. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  396. assert obs.exit_code == 0, 'The exit code should be 0.'
  397. assert 'hello\r\nworld' in obs.content
  398. await runtime.close()
  399. await asyncio.sleep(1)
  400. @pytest.mark.asyncio
  401. async def test_runtime_whitespace(temp_dir, box_class):
  402. runtime = await _load_runtime(temp_dir, box_class)
  403. action = CmdRunAction(command='echo -e "\\n\\n\\n"')
  404. logger.info(action, extra={'msg_type': 'ACTION'})
  405. obs = await runtime.run_action(action)
  406. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  407. assert obs.exit_code == 0, 'The exit code should be 0.'
  408. assert '\r\n\r\n\r\n' in obs.content
  409. await runtime.close()
  410. await asyncio.sleep(1)
  411. @pytest.mark.asyncio
  412. async def test_multiple_multiline_commands(temp_dir, box_class, run_as_devin):
  413. cmds = [
  414. 'ls -l',
  415. 'echo -e "hello\nworld"',
  416. """
  417. echo -e "hello it\\'s me"
  418. """.strip(),
  419. """
  420. echo \\
  421. -e 'hello' \\
  422. -v
  423. """.strip(),
  424. """
  425. echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
  426. """.strip(),
  427. """
  428. echo -e 'hello
  429. world
  430. are
  431. you\\n
  432. there?'
  433. """.strip(),
  434. """
  435. echo -e 'hello
  436. world "
  437. '
  438. """.strip(),
  439. ]
  440. joined_cmds = '\n'.join(cmds)
  441. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  442. action = CmdRunAction(command=joined_cmds)
  443. logger.info(action, extra={'msg_type': 'ACTION'})
  444. obs = await runtime.run_action(action)
  445. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  446. assert isinstance(obs, CmdOutputObservation)
  447. assert obs.exit_code == 0, 'The exit code should be 0.'
  448. assert 'total 0' in obs.content
  449. assert 'hello\r\nworld' in obs.content
  450. assert "hello it\\'s me" in obs.content
  451. assert 'hello -v' in obs.content
  452. assert 'hello\r\nworld\r\nare\r\nyou\r\nthere?' in obs.content
  453. assert 'hello\r\nworld\r\nare\r\nyou\r\n\r\nthere?' in obs.content
  454. assert 'hello\r\nworld "\r\n' in obs.content
  455. await runtime.close()
  456. await asyncio.sleep(1)
  457. @pytest.mark.asyncio
  458. async def test_no_ps2_in_output(temp_dir, box_class, run_as_devin):
  459. """Test that the PS2 sign is not added to the output of a multiline command."""
  460. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  461. action = CmdRunAction(command='echo -e "hello\nworld"')
  462. logger.info(action, extra={'msg_type': 'ACTION'})
  463. obs = await runtime.run_action(action)
  464. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  465. if box_class == ServerRuntime:
  466. # the extra PS2 '>' is NOT handled by the ServerRuntime
  467. assert 'hello\r\nworld' in obs.content
  468. assert '>' in obs.content
  469. assert obs.content.count('>') == 1
  470. else:
  471. assert 'hello\r\nworld' in obs.content
  472. assert '>' not in obs.content
  473. await runtime.close()
  474. await asyncio.sleep(1)
  475. @pytest.mark.asyncio
  476. async def test_multiline_command_loop(temp_dir, box_class):
  477. # https://github.com/OpenDevin/OpenDevin/issues/3143
  478. runtime = await _load_runtime(temp_dir, box_class)
  479. init_cmd = """
  480. mkdir -p _modules && \
  481. for month in {01..04}; do
  482. for day in {01..05}; do
  483. touch "_modules/2024-${month}-${day}-sample.md"
  484. done
  485. done
  486. echo "created files"
  487. """
  488. action = CmdRunAction(command=init_cmd)
  489. logger.info(action, extra={'msg_type': 'ACTION'})
  490. obs = await runtime.run_action(action)
  491. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  492. assert isinstance(obs, CmdOutputObservation)
  493. assert obs.exit_code == 0, 'The exit code should be 0.'
  494. assert 'created files' in obs.content
  495. follow_up_cmd = """
  496. for file in _modules/*.md; do
  497. new_date=$(echo $file | sed -E 's/2024-(01|02|03|04)-/2024-/;s/2024-01/2024-08/;s/2024-02/2024-09/;s/2024-03/2024-10/;s/2024-04/2024-11/')
  498. mv "$file" "$new_date"
  499. done
  500. echo "success"
  501. """
  502. action = CmdRunAction(command=follow_up_cmd)
  503. logger.info(action, extra={'msg_type': 'ACTION'})
  504. obs = await runtime.run_action(action)
  505. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  506. assert isinstance(obs, CmdOutputObservation)
  507. assert obs.exit_code == 0, 'The exit code should be 0.'
  508. assert 'success' in obs.content
  509. await runtime.close()
  510. await asyncio.sleep(1)
  511. @pytest.mark.asyncio
  512. async def test_cmd_run(temp_dir, box_class, run_as_devin):
  513. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  514. action = CmdRunAction(command='ls -l')
  515. logger.info(action, extra={'msg_type': 'ACTION'})
  516. obs = await runtime.run_action(action)
  517. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  518. assert isinstance(obs, CmdOutputObservation)
  519. assert obs.exit_code == 0
  520. assert 'total 0' in obs.content
  521. action = CmdRunAction(command='mkdir test')
  522. logger.info(action, extra={'msg_type': 'ACTION'})
  523. obs = await runtime.run_action(action)
  524. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  525. assert isinstance(obs, CmdOutputObservation)
  526. assert obs.exit_code == 0
  527. action = CmdRunAction(command='ls -l')
  528. logger.info(action, extra={'msg_type': 'ACTION'})
  529. obs = await runtime.run_action(action)
  530. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  531. assert isinstance(obs, CmdOutputObservation)
  532. assert obs.exit_code == 0
  533. if run_as_devin:
  534. assert 'opendevin' in obs.content
  535. else:
  536. assert 'root' in obs.content
  537. assert 'test' in obs.content
  538. action = CmdRunAction(command='touch test/foo.txt')
  539. logger.info(action, extra={'msg_type': 'ACTION'})
  540. obs = await runtime.run_action(action)
  541. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  542. assert isinstance(obs, CmdOutputObservation)
  543. assert obs.exit_code == 0
  544. action = CmdRunAction(command='ls -l test')
  545. logger.info(action, extra={'msg_type': 'ACTION'})
  546. obs = await runtime.run_action(action)
  547. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  548. assert isinstance(obs, CmdOutputObservation)
  549. assert obs.exit_code == 0
  550. assert 'foo.txt' in obs.content
  551. # clean up: this is needed, since CI will not be
  552. # run as root, and this test may leave a file
  553. # owned by root
  554. action = CmdRunAction(command='rm -rf test')
  555. logger.info(action, extra={'msg_type': 'ACTION'})
  556. obs = await runtime.run_action(action)
  557. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  558. assert isinstance(obs, CmdOutputObservation)
  559. assert obs.exit_code == 0
  560. await runtime.close()
  561. await asyncio.sleep(1)
  562. @pytest.mark.asyncio
  563. async def test_run_as_user_correct_home_dir(temp_dir, box_class, run_as_devin):
  564. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  565. action = CmdRunAction(command='cd ~ && pwd')
  566. logger.info(action, extra={'msg_type': 'ACTION'})
  567. obs = await runtime.run_action(action)
  568. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  569. assert isinstance(obs, CmdOutputObservation)
  570. assert obs.exit_code == 0
  571. if run_as_devin:
  572. assert '/home/opendevin' in obs.content
  573. else:
  574. assert '/root' in obs.content
  575. await runtime.close()
  576. await asyncio.sleep(1)
  577. @pytest.mark.asyncio
  578. async def test_multi_cmd_run_in_single_line(temp_dir, box_class):
  579. runtime = await _load_runtime(temp_dir, box_class)
  580. action = CmdRunAction(command='pwd && ls -l')
  581. logger.info(action, extra={'msg_type': 'ACTION'})
  582. obs = await runtime.run_action(action)
  583. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  584. assert isinstance(obs, CmdOutputObservation)
  585. assert obs.exit_code == 0
  586. assert '/workspace' in obs.content
  587. assert 'total 0' in obs.content
  588. await runtime.close()
  589. await asyncio.sleep(1)
  590. @pytest.mark.asyncio
  591. async def test_stateful_cmd(temp_dir, box_class):
  592. runtime = await _load_runtime(temp_dir, box_class)
  593. action = CmdRunAction(command='mkdir test')
  594. logger.info(action, extra={'msg_type': 'ACTION'})
  595. obs = await runtime.run_action(action)
  596. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  597. assert isinstance(obs, CmdOutputObservation)
  598. assert obs.exit_code == 0, 'The exit code should be 0.'
  599. action = CmdRunAction(command='cd test')
  600. logger.info(action, extra={'msg_type': 'ACTION'})
  601. obs = await runtime.run_action(action)
  602. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  603. assert isinstance(obs, CmdOutputObservation)
  604. assert obs.exit_code == 0, 'The exit code should be 0.'
  605. action = CmdRunAction(command='pwd')
  606. logger.info(action, extra={'msg_type': 'ACTION'})
  607. obs = await runtime.run_action(action)
  608. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  609. assert isinstance(obs, CmdOutputObservation)
  610. assert obs.exit_code == 0, 'The exit code should be 0.'
  611. assert '/workspace/test' in obs.content
  612. await runtime.close()
  613. await asyncio.sleep(1)
  614. @pytest.mark.asyncio
  615. async def test_failed_cmd(temp_dir, box_class):
  616. runtime = await _load_runtime(temp_dir, box_class)
  617. action = CmdRunAction(command='non_existing_command')
  618. logger.info(action, extra={'msg_type': 'ACTION'})
  619. obs = await runtime.run_action(action)
  620. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  621. assert isinstance(obs, CmdOutputObservation)
  622. assert obs.exit_code != 0, 'The exit code should not be 0 for a failed command.'
  623. await runtime.close()
  624. await asyncio.sleep(1)
  625. @pytest.mark.asyncio
  626. async def test_ipython_multi_user(temp_dir, box_class, run_as_devin):
  627. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  628. # Test run ipython
  629. # get username
  630. test_code = "import os; print(os.environ['USER'])"
  631. action_ipython = IPythonRunCellAction(code=test_code)
  632. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  633. obs = await runtime.run_action(action_ipython)
  634. assert isinstance(obs, IPythonRunCellObservation)
  635. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  636. if run_as_devin:
  637. assert 'opendevin' in obs.content
  638. else:
  639. assert 'root' in obs.content
  640. # print pwd
  641. test_code = 'import os; print(os.getcwd())'
  642. action_ipython = IPythonRunCellAction(code=test_code)
  643. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  644. obs = await runtime.run_action(action_ipython)
  645. assert isinstance(obs, IPythonRunCellObservation)
  646. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  647. assert obs.content.strip() == '/workspace'
  648. # write a file
  649. test_code = "with open('test.txt', 'w') as f: f.write('Hello, world!')"
  650. action_ipython = IPythonRunCellAction(code=test_code)
  651. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  652. obs = await runtime.run_action(action_ipython)
  653. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  654. assert isinstance(obs, IPythonRunCellObservation)
  655. assert obs.content.strip() == '[Code executed successfully with no output]'
  656. # check file owner via bash
  657. action = CmdRunAction(command='ls -alh test.txt')
  658. logger.info(action, extra={'msg_type': 'ACTION'})
  659. obs = await runtime.run_action(action)
  660. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  661. assert obs.exit_code == 0
  662. if run_as_devin:
  663. # -rw-r--r-- 1 opendevin root 13 Jul 28 03:53 test.txt
  664. assert 'opendevin' in obs.content.split('\r\n')[0]
  665. assert 'root' in obs.content.split('\r\n')[0]
  666. else:
  667. # -rw-r--r-- 1 root root 13 Jul 28 03:53 test.txt
  668. assert 'root' in obs.content.split('\r\n')[0]
  669. # clean up
  670. action = CmdRunAction(command='rm -rf test')
  671. logger.info(action, extra={'msg_type': 'ACTION'})
  672. obs = await runtime.run_action(action)
  673. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  674. assert obs.exit_code == 0
  675. await runtime.close()
  676. await asyncio.sleep(1)
  677. @pytest.mark.asyncio
  678. async def test_ipython_simple(temp_dir, box_class):
  679. runtime = await _load_runtime(temp_dir, box_class)
  680. # Test run ipython
  681. # get username
  682. test_code = 'print(1)'
  683. action_ipython = IPythonRunCellAction(code=test_code)
  684. logger.info(action_ipython, extra={'msg_type': 'ACTION'})
  685. obs = await runtime.run_action(action_ipython)
  686. assert isinstance(obs, IPythonRunCellObservation)
  687. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  688. assert obs.content.strip() == '1'
  689. await runtime.close()
  690. await asyncio.sleep(1)
  691. async def _test_ipython_agentskills_fileop_pwd_impl(
  692. runtime: ServerRuntime | EventStreamRuntime, enable_auto_lint: bool
  693. ):
  694. # remove everything in /workspace
  695. action = CmdRunAction(command='rm -rf /workspace/*')
  696. logger.info(action, extra={'msg_type': 'ACTION'})
  697. obs = await runtime.run_action(action)
  698. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  699. assert obs.exit_code == 0
  700. action = CmdRunAction(command='mkdir test')
  701. logger.info(action, extra={'msg_type': 'ACTION'})
  702. obs = await runtime.run_action(action)
  703. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  704. assert isinstance(obs, CmdOutputObservation)
  705. assert obs.exit_code == 0
  706. action = IPythonRunCellAction(code="create_file('hello.py')")
  707. logger.info(action, extra={'msg_type': 'ACTION'})
  708. obs = await runtime.run_action(action)
  709. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  710. assert isinstance(obs, IPythonRunCellObservation)
  711. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  712. '[File: /workspace/hello.py (1 lines total)]\n'
  713. '(this is the beginning of the file)\n'
  714. '1|\n'
  715. '(this is the end of the file)\n'
  716. '[File hello.py created.]\n'
  717. ).strip().split('\n')
  718. action = CmdRunAction(command='cd test')
  719. logger.info(action, extra={'msg_type': 'ACTION'})
  720. obs = await runtime.run_action(action)
  721. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  722. assert isinstance(obs, CmdOutputObservation)
  723. assert obs.exit_code == 0
  724. # This should create a file in the current working directory
  725. # i.e., /workspace/test/hello.py instead of /workspace/hello.py
  726. action = IPythonRunCellAction(code="create_file('hello.py')")
  727. logger.info(action, extra={'msg_type': 'ACTION'})
  728. obs = await runtime.run_action(action)
  729. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  730. assert isinstance(obs, IPythonRunCellObservation)
  731. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  732. '[File: /workspace/test/hello.py (1 lines total)]\n'
  733. '(this is the beginning of the file)\n'
  734. '1|\n'
  735. '(this is the end of the file)\n'
  736. '[File hello.py created.]\n'
  737. ).strip().split('\n')
  738. if enable_auto_lint:
  739. # edit file, but make a mistake in indentation
  740. action = IPythonRunCellAction(
  741. code="insert_content_at_line('hello.py', 1, ' print(\"hello world\")')"
  742. )
  743. logger.info(action, extra={'msg_type': 'ACTION'})
  744. obs = await runtime.run_action(action)
  745. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  746. assert isinstance(obs, IPythonRunCellObservation)
  747. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  748. """
  749. [Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
  750. ERRORS:
  751. /workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent
  752. [This is how your edit would have looked if applied]
  753. -------------------------------------------------
  754. (this is the beginning of the file)
  755. 1| print("hello world")
  756. (this is the end of the file)
  757. -------------------------------------------------
  758. [This is the original code before your edit]
  759. -------------------------------------------------
  760. (this is the beginning of the file)
  761. 1|
  762. (this is the end of the file)
  763. -------------------------------------------------
  764. Your changes have NOT been applied. Please fix your edit command and try again.
  765. You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
  766. DO NOT re-run the same failed edit command. Running it again will lead to the same error.
  767. """
  768. ).strip().split('\n')
  769. # edit file with correct indentation
  770. action = IPythonRunCellAction(
  771. code="insert_content_at_line('hello.py', 1, 'print(\"hello world\")')"
  772. )
  773. logger.info(action, extra={'msg_type': 'ACTION'})
  774. obs = await runtime.run_action(action)
  775. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  776. assert isinstance(obs, IPythonRunCellObservation)
  777. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  778. """
  779. [File: /workspace/test/hello.py (1 lines total after edit)]
  780. (this is the beginning of the file)
  781. 1|print("hello world")
  782. (this is the end of the file)
  783. [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
  784. """
  785. ).strip().split('\n')
  786. action = CmdRunAction(command='rm -rf /workspace/*')
  787. logger.info(action, extra={'msg_type': 'ACTION'})
  788. obs = await runtime.run_action(action)
  789. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  790. assert obs.exit_code == 0
  791. await runtime.close()
  792. await asyncio.sleep(1)
  793. @pytest.mark.asyncio
  794. async def test_ipython_agentskills_fileop_pwd(temp_dir, box_class, enable_auto_lint):
  795. """Make sure that cd in bash also update the current working directory in ipython."""
  796. runtime = await _load_runtime(
  797. temp_dir, box_class, enable_auto_lint=enable_auto_lint
  798. )
  799. await _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
  800. await runtime.close()
  801. await asyncio.sleep(1)
  802. @pytest.mark.asyncio
  803. async def test_ipython_agentskills_fileop_pwd_with_userdir(temp_dir, box_class):
  804. """Make sure that cd in bash also update the current working directory in ipython.
  805. Handle special case where the pwd is provided as "~", which should be expanded using os.path.expanduser
  806. on the client side.
  807. """
  808. runtime = await _load_runtime(
  809. temp_dir,
  810. box_class,
  811. run_as_devin=False,
  812. )
  813. action = CmdRunAction(command='cd ~')
  814. logger.info(action, extra={'msg_type': 'ACTION'})
  815. obs = await runtime.run_action(action)
  816. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  817. assert obs.exit_code == 0
  818. action = CmdRunAction(command='mkdir test && ls -la')
  819. logger.info(action, extra={'msg_type': 'ACTION'})
  820. obs = await runtime.run_action(action)
  821. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  822. assert isinstance(obs, CmdOutputObservation)
  823. assert obs.exit_code == 0
  824. action = IPythonRunCellAction(code="create_file('hello.py')")
  825. logger.info(action, extra={'msg_type': 'ACTION'})
  826. obs = await runtime.run_action(action)
  827. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  828. assert isinstance(obs, IPythonRunCellObservation)
  829. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  830. '[File: /root/hello.py (1 lines total)]\n'
  831. '(this is the beginning of the file)\n'
  832. '1|\n'
  833. '(this is the end of the file)\n'
  834. '[File hello.py created.]\n'
  835. ).strip().split('\n')
  836. action = CmdRunAction(command='cd test')
  837. logger.info(action, extra={'msg_type': 'ACTION'})
  838. obs = await runtime.run_action(action)
  839. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  840. assert isinstance(obs, CmdOutputObservation)
  841. assert obs.exit_code == 0
  842. # This should create a file in the current working directory
  843. # i.e., /workspace/test/hello.py instead of /workspace/hello.py
  844. action = IPythonRunCellAction(code="create_file('hello.py')")
  845. logger.info(action, extra={'msg_type': 'ACTION'})
  846. obs = await runtime.run_action(action)
  847. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  848. assert isinstance(obs, IPythonRunCellObservation)
  849. assert obs.content.replace('\r\n', '\n').strip().split('\n') == (
  850. '[File: /root/test/hello.py (1 lines total)]\n'
  851. '(this is the beginning of the file)\n'
  852. '1|\n'
  853. '(this is the end of the file)\n'
  854. '[File hello.py created.]\n'
  855. ).strip().split('\n')
  856. await runtime.close()
  857. await asyncio.sleep(1)
  858. @pytest.mark.skipif(
  859. TEST_RUNTIME.lower() == 'eventstream',
  860. reason='Skip this if we want to test EventStreamRuntime',
  861. )
  862. @pytest.mark.skipif(
  863. os.environ.get('TEST_IN_CI', 'false').lower() == 'true',
  864. # FIXME: There's some weird issue with the CI environment.
  865. reason='Skip this if in CI.',
  866. )
  867. @pytest.mark.asyncio
  868. async def test_ipython_agentskills_fileop_pwd_agnostic_sandbox(
  869. temp_dir, enable_auto_lint, container_image
  870. ):
  871. """Make sure that cd in bash also update the current working directory in ipython."""
  872. runtime = await _load_runtime(
  873. temp_dir,
  874. # NOTE: we only test for ServerRuntime, since EventStreamRuntime is image agnostic by design.
  875. ServerRuntime,
  876. enable_auto_lint=enable_auto_lint,
  877. container_image=container_image,
  878. )
  879. await _test_ipython_agentskills_fileop_pwd_impl(runtime, enable_auto_lint)
  880. await runtime.close()
  881. await asyncio.sleep(1)
  882. @pytest.mark.asyncio
  883. async def test_bash_python_version(temp_dir, box_class):
  884. """Make sure Python is available in bash."""
  885. runtime = await _load_runtime(temp_dir, box_class)
  886. action = CmdRunAction(command='which python')
  887. logger.info(action, extra={'msg_type': 'ACTION'})
  888. obs = await runtime.run_action(action)
  889. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  890. assert obs.exit_code == 0
  891. action = CmdRunAction(command='python --version')
  892. logger.info(action, extra={'msg_type': 'ACTION'})
  893. obs = await runtime.run_action(action)
  894. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  895. assert obs.exit_code == 0
  896. # Should not error out
  897. await runtime.close()
  898. await asyncio.sleep(1)
  899. @pytest.mark.asyncio
  900. async def test_ipython_package_install(temp_dir, box_class, run_as_devin):
  901. """Make sure that cd in bash also update the current working directory in ipython."""
  902. runtime = await _load_runtime(temp_dir, box_class, run_as_devin)
  903. # It should error out since pymsgbox is not installed
  904. action = IPythonRunCellAction(code='import pymsgbox')
  905. logger.info(action, extra={'msg_type': 'ACTION'})
  906. obs = await runtime.run_action(action)
  907. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  908. assert "ModuleNotFoundError: No module named 'pymsgbox'" in obs.content
  909. # Install pymsgbox in Jupyter
  910. action = IPythonRunCellAction(code='%pip install pymsgbox==1.0.9')
  911. logger.info(action, extra={'msg_type': 'ACTION'})
  912. obs = await runtime.run_action(action)
  913. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  914. assert (
  915. 'Successfully installed pymsgbox-1.0.9' in obs.content
  916. or '[Package installed successfully]' in obs.content
  917. )
  918. action = IPythonRunCellAction(code='import pymsgbox')
  919. logger.info(action, extra={'msg_type': 'ACTION'})
  920. obs = await runtime.run_action(action)
  921. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  922. # import should not error out
  923. assert obs.content.strip() == '[Code executed successfully with no output]'
  924. await runtime.close()
  925. await asyncio.sleep(1)
  926. def _create_test_file(host_temp_dir):
  927. # Single file
  928. with open(os.path.join(host_temp_dir, 'test_file.txt'), 'w') as f:
  929. f.write('Hello, World!')
  930. @pytest.mark.asyncio
  931. async def test_copy_single_file(temp_dir, box_class):
  932. runtime = await _load_runtime(temp_dir, box_class)
  933. with tempfile.TemporaryDirectory() as host_temp_dir:
  934. _create_test_file(host_temp_dir)
  935. await runtime.copy_to(
  936. os.path.join(host_temp_dir, 'test_file.txt'), '/workspace'
  937. )
  938. action = CmdRunAction(command='ls -alh /workspace')
  939. logger.info(action, extra={'msg_type': 'ACTION'})
  940. obs = await runtime.run_action(action)
  941. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  942. assert isinstance(obs, CmdOutputObservation)
  943. assert obs.exit_code == 0
  944. assert 'test_file.txt' in obs.content
  945. action = CmdRunAction(command='cat /workspace/test_file.txt')
  946. logger.info(action, extra={'msg_type': 'ACTION'})
  947. obs = await runtime.run_action(action)
  948. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  949. assert isinstance(obs, CmdOutputObservation)
  950. assert obs.exit_code == 0
  951. assert 'Hello, World!' in obs.content
  952. await runtime.close()
  953. await asyncio.sleep(1)
  954. def _create_test_dir_with_files(host_temp_dir):
  955. os.mkdir(os.path.join(host_temp_dir, 'test_dir'))
  956. with open(os.path.join(host_temp_dir, 'test_dir', 'file1.txt'), 'w') as f:
  957. f.write('File 1 content')
  958. with open(os.path.join(host_temp_dir, 'test_dir', 'file2.txt'), 'w') as f:
  959. f.write('File 2 content')
  960. @pytest.mark.asyncio
  961. async def test_copy_directory_recursively(temp_dir, box_class):
  962. runtime = await _load_runtime(temp_dir, box_class)
  963. with tempfile.TemporaryDirectory() as host_temp_dir:
  964. # We need a separate directory, since temp_dir is mounted to /workspace
  965. _create_test_dir_with_files(host_temp_dir)
  966. await runtime.copy_to(
  967. os.path.join(host_temp_dir, 'test_dir'), '/workspace', recursive=True
  968. )
  969. action = CmdRunAction(command='ls -alh /workspace')
  970. logger.info(action, extra={'msg_type': 'ACTION'})
  971. obs = await runtime.run_action(action)
  972. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  973. assert isinstance(obs, CmdOutputObservation)
  974. assert obs.exit_code == 0
  975. assert 'test_dir' in obs.content
  976. assert 'file1.txt' not in obs.content
  977. assert 'file2.txt' not in obs.content
  978. action = CmdRunAction(command='ls -alh /workspace/test_dir')
  979. logger.info(action, extra={'msg_type': 'ACTION'})
  980. obs = await runtime.run_action(action)
  981. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  982. assert isinstance(obs, CmdOutputObservation)
  983. assert obs.exit_code == 0
  984. assert 'file1.txt' in obs.content
  985. assert 'file2.txt' in obs.content
  986. action = CmdRunAction(command='cat /workspace/test_dir/file1.txt')
  987. logger.info(action, extra={'msg_type': 'ACTION'})
  988. obs = await runtime.run_action(action)
  989. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  990. assert isinstance(obs, CmdOutputObservation)
  991. assert obs.exit_code == 0
  992. assert 'File 1 content' in obs.content
  993. await runtime.close()
  994. await asyncio.sleep(1)
  995. @pytest.mark.asyncio
  996. async def test_copy_to_non_existent_directory(temp_dir, box_class):
  997. runtime = await _load_runtime(temp_dir, box_class)
  998. with tempfile.TemporaryDirectory() as host_temp_dir:
  999. _create_test_file(host_temp_dir)
  1000. await runtime.copy_to(
  1001. os.path.join(host_temp_dir, 'test_file.txt'), '/workspace/new_dir'
  1002. )
  1003. action = CmdRunAction(command='cat /workspace/new_dir/test_file.txt')
  1004. logger.info(action, extra={'msg_type': 'ACTION'})
  1005. obs = await runtime.run_action(action)
  1006. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1007. assert isinstance(obs, CmdOutputObservation)
  1008. assert obs.exit_code == 0
  1009. assert 'Hello, World!' in obs.content
  1010. await runtime.close()
  1011. await asyncio.sleep(1)
  1012. @pytest.mark.asyncio
  1013. async def test_overwrite_existing_file(temp_dir, box_class):
  1014. runtime = await _load_runtime(temp_dir, box_class)
  1015. # touch a file in /workspace
  1016. action = CmdRunAction(command='touch /workspace/test_file.txt')
  1017. logger.info(action, extra={'msg_type': 'ACTION'})
  1018. obs = await runtime.run_action(action)
  1019. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1020. assert isinstance(obs, CmdOutputObservation)
  1021. assert obs.exit_code == 0
  1022. action = CmdRunAction(command='cat /workspace/test_file.txt')
  1023. logger.info(action, extra={'msg_type': 'ACTION'})
  1024. obs = await runtime.run_action(action)
  1025. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1026. assert isinstance(obs, CmdOutputObservation)
  1027. assert obs.exit_code == 0
  1028. assert 'Hello, World!' not in obs.content
  1029. with tempfile.TemporaryDirectory() as host_temp_dir:
  1030. _create_test_file(host_temp_dir)
  1031. await runtime.copy_to(
  1032. os.path.join(host_temp_dir, 'test_file.txt'), '/workspace'
  1033. )
  1034. action = CmdRunAction(command='cat /workspace/test_file.txt')
  1035. logger.info(action, extra={'msg_type': 'ACTION'})
  1036. obs = await runtime.run_action(action)
  1037. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1038. assert isinstance(obs, CmdOutputObservation)
  1039. assert obs.exit_code == 0
  1040. assert 'Hello, World!' in obs.content
  1041. await runtime.close()
  1042. await asyncio.sleep(1)
  1043. @pytest.mark.asyncio
  1044. async def test_copy_non_existent_file(temp_dir, box_class):
  1045. runtime = await _load_runtime(temp_dir, box_class)
  1046. with pytest.raises(FileNotFoundError):
  1047. await runtime.copy_to(
  1048. os.path.join(temp_dir, 'non_existent_file.txt'),
  1049. '/workspace/should_not_exist.txt',
  1050. )
  1051. action = CmdRunAction(command='ls /workspace/should_not_exist.txt')
  1052. logger.info(action, extra={'msg_type': 'ACTION'})
  1053. obs = await runtime.run_action(action)
  1054. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1055. assert isinstance(obs, CmdOutputObservation)
  1056. assert obs.exit_code != 0 # File should not exist
  1057. await runtime.close()
  1058. await asyncio.sleep(1)
  1059. @pytest.mark.asyncio
  1060. async def test_keep_prompt(temp_dir):
  1061. # only EventStreamRuntime supports keep_prompt
  1062. runtime = await _load_runtime(
  1063. temp_dir, box_class=EventStreamRuntime, run_as_devin=False
  1064. )
  1065. action = CmdRunAction(command='touch /workspace/test_file.txt')
  1066. logger.info(action, extra={'msg_type': 'ACTION'})
  1067. obs = await runtime.run_action(action)
  1068. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1069. assert isinstance(obs, CmdOutputObservation)
  1070. assert obs.exit_code == 0
  1071. assert 'root@' in obs.content
  1072. action = CmdRunAction(command='cat /workspace/test_file.txt', keep_prompt=False)
  1073. logger.info(action, extra={'msg_type': 'ACTION'})
  1074. obs = await runtime.run_action(action)
  1075. logger.info(obs, extra={'msg_type': 'OBSERVATION'})
  1076. assert isinstance(obs, CmdOutputObservation)
  1077. assert obs.exit_code == 0
  1078. assert 'root@' not in obs.content
  1079. await runtime.close()
  1080. await asyncio.sleep(1)