test_sandbox.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358
  1. import os
  2. import pathlib
  3. import tempfile
  4. import pytest
  5. from opendevin.core.config import AppConfig, SandboxConfig
  6. from opendevin.runtime.docker.ssh_box import DockerSSHBox
  7. from opendevin.runtime.plugins import AgentSkillsRequirement, JupyterRequirement
  8. from opendevin.runtime.utils import split_bash_commands
  9. def create_docker_box_from_app_config(
  10. path: str, config: AppConfig = None
  11. ) -> DockerSSHBox:
  12. if config is None:
  13. config = AppConfig(
  14. sandbox=SandboxConfig(
  15. box_type='ssh',
  16. persist_sandbox=False,
  17. )
  18. )
  19. return DockerSSHBox(
  20. config=config.sandbox,
  21. persist_sandbox=config.persist_sandbox,
  22. workspace_mount_path=path,
  23. sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
  24. cache_dir=config.cache_dir,
  25. run_as_devin=True,
  26. ssh_hostname=config.ssh_hostname,
  27. ssh_password=config.ssh_password,
  28. ssh_port=config.ssh_port,
  29. )
  30. @pytest.fixture
  31. def temp_dir(monkeypatch):
  32. # get a temporary directory
  33. with tempfile.TemporaryDirectory() as temp_dir:
  34. pathlib.Path().mkdir(parents=True, exist_ok=True)
  35. yield temp_dir
  36. def test_split_commands():
  37. cmds = [
  38. 'ls -l',
  39. 'echo -e "hello\nworld"',
  40. """
  41. echo -e 'hello it\\'s me'
  42. """.strip(),
  43. """
  44. echo \\
  45. -e 'hello' \\
  46. -v
  47. """.strip(),
  48. """
  49. echo -e 'hello\\nworld\\nare\\nyou\\nthere?'
  50. """.strip(),
  51. """
  52. echo -e 'hello
  53. world
  54. are
  55. you\\n
  56. there?'
  57. """.strip(),
  58. """
  59. echo -e 'hello
  60. world "
  61. '
  62. """.strip(),
  63. """
  64. kubectl apply -f - <<EOF
  65. apiVersion: v1
  66. kind: Pod
  67. metadata:
  68. name: busybox-sleep
  69. spec:
  70. containers:
  71. - name: busybox
  72. image: busybox:1.28
  73. args:
  74. - sleep
  75. - "1000000"
  76. EOF
  77. """.strip(),
  78. ]
  79. joined_cmds = '\n'.join(cmds)
  80. split_cmds = split_bash_commands(joined_cmds)
  81. for s in split_cmds:
  82. print('\nCMD')
  83. print(s)
  84. cmds = [
  85. c.replace('\\\n', '') for c in cmds
  86. ] # The function strips escaped newlines, but this shouldn't matter
  87. assert (
  88. split_cmds == cmds
  89. ), 'The split commands should be the same as the input commands.'
  90. def test_ssh_box_run_as_devin(temp_dir):
  91. # get a temporary directory
  92. for box in [
  93. create_docker_box_from_app_config(temp_dir),
  94. ]: # FIXME: permission error on mkdir test for exec box
  95. exit_code, output = box.execute('ls -l')
  96. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  97. assert output.strip() == 'total 0'
  98. assert box.workspace_mount_path == temp_dir
  99. exit_code, output = box.execute('ls -l')
  100. assert exit_code == 0, 'The exit code should be 0.'
  101. assert output.strip() == 'total 0'
  102. exit_code, output = box.execute('mkdir test')
  103. assert exit_code == 0, 'The exit code should be 0.'
  104. assert output.strip() == ''
  105. exit_code, output = box.execute('ls -l')
  106. assert exit_code == 0, 'The exit code should be 0.'
  107. assert 'opendevin' in output, "The output should contain username 'opendevin'"
  108. assert 'test' in output, 'The output should contain the test directory'
  109. exit_code, output = box.execute('touch test/foo.txt')
  110. assert exit_code == 0, 'The exit code should be 0.'
  111. assert output.strip() == ''
  112. exit_code, output = box.execute('ls -l test')
  113. assert exit_code == 0, 'The exit code should be 0.'
  114. assert 'foo.txt' in output, 'The output should contain the foo.txt file'
  115. box.close()
  116. def test_ssh_box_multi_line_cmd_run_as_devin(temp_dir):
  117. box = create_docker_box_from_app_config(temp_dir)
  118. exit_code, output = box.execute('pwd && ls -l')
  119. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  120. expected_lines = ['/workspace', 'total 0']
  121. line_sep = '\r\n' if isinstance(box, DockerSSHBox) else '\n'
  122. assert output == line_sep.join(expected_lines), (
  123. 'The output should be the same as the input for ' + box.__class__.__name__
  124. )
  125. box.close()
  126. def test_ssh_box_stateful_cmd_run_as_devin(temp_dir):
  127. box = create_docker_box_from_app_config(temp_dir)
  128. exit_code, output = box.execute('mkdir test')
  129. assert exit_code == 0, 'The exit code should be 0.'
  130. assert output.strip() == ''
  131. exit_code, output = box.execute('cd test')
  132. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  133. assert output.strip() == '', (
  134. 'The output should be empty for ' + box.__class__.__name__
  135. )
  136. exit_code, output = box.execute('pwd')
  137. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  138. assert output.strip() == '/workspace/test', (
  139. 'The output should be /workspace for ' + box.__class__.__name__
  140. )
  141. box.close()
  142. def test_ssh_box_failed_cmd_run_as_devin(temp_dir):
  143. box = create_docker_box_from_app_config(temp_dir)
  144. exit_code, output = box.execute('non_existing_command')
  145. assert exit_code != 0, (
  146. 'The exit code should not be 0 for a failed command for '
  147. + box.__class__.__name__
  148. )
  149. box.close()
  150. def test_single_multiline_command(temp_dir):
  151. box = create_docker_box_from_app_config(temp_dir)
  152. exit_code, output = box.execute('echo \\\n -e "foo"')
  153. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  154. # FIXME: why is there a `>` in the output? Probably PS2?
  155. assert output == '> foo', (
  156. 'The output should be the same as the input for ' + box.__class__.__name__
  157. )
  158. box.close()
  159. def test_multiline_echo(temp_dir):
  160. box = create_docker_box_from_app_config(temp_dir)
  161. exit_code, output = box.execute('echo -e "hello\nworld"')
  162. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  163. # FIXME: why is there a `>` in the output?
  164. assert output == '> hello\r\nworld', (
  165. 'The output should be the same as the input for ' + box.__class__.__name__
  166. )
  167. box.close()
  168. def test_sandbox_whitespace(temp_dir):
  169. box = create_docker_box_from_app_config(temp_dir)
  170. exit_code, output = box.execute('echo -e "\\n\\n\\n"')
  171. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  172. assert output == '\r\n\r\n\r\n', (
  173. 'The output should be the same as the input for ' + box.__class__.__name__
  174. )
  175. box.close()
  176. def test_sandbox_jupyter_plugin(temp_dir):
  177. box = create_docker_box_from_app_config(temp_dir)
  178. box.init_plugins([JupyterRequirement])
  179. exit_code, output = box.execute('echo "print(1)" | execute_cli')
  180. print(output)
  181. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  182. assert output == '1\r\n', (
  183. 'The output should be the same as the input for ' + box.__class__.__name__
  184. )
  185. box.close()
  186. def _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config: AppConfig):
  187. box.init_plugins([AgentSkillsRequirement, JupyterRequirement])
  188. exit_code, output = box.execute('mkdir test')
  189. print(output)
  190. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  191. exit_code, output = box.execute('echo "create_file(\'hello.py\')" | execute_cli')
  192. print(output)
  193. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  194. assert output.strip().split('\r\n') == (
  195. '[File: /workspace/hello.py (1 lines total)]\r\n'
  196. '(this is the beginning of the file)\r\n'
  197. '1|\r\n'
  198. '(this is the end of the file)\r\n'
  199. '[File hello.py created.]\r\n'
  200. ).strip().split('\r\n')
  201. exit_code, output = box.execute('cd test')
  202. print(output)
  203. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  204. exit_code, output = box.execute('echo "create_file(\'hello.py\')" | execute_cli')
  205. print(output)
  206. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  207. assert output.strip().split('\r\n') == (
  208. '[File: /workspace/test/hello.py (1 lines total)]\r\n'
  209. '(this is the beginning of the file)\r\n'
  210. '1|\r\n'
  211. '(this is the end of the file)\r\n'
  212. '[File hello.py created.]\r\n'
  213. ).strip().split('\r\n')
  214. if config.sandbox.enable_auto_lint:
  215. # edit file, but make a mistake in indentation
  216. exit_code, output = box.execute(
  217. 'echo "insert_content_at_line(\'hello.py\', 1, \' print(\\"hello world\\")\')" | execute_cli'
  218. )
  219. print(output)
  220. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  221. assert output.strip().split('\r\n') == (
  222. """
  223. [Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]
  224. ERRORS:
  225. /workspace/test/hello.py:1:3: E999 IndentationError: unexpected indent
  226. [This is how your edit would have looked if applied]
  227. -------------------------------------------------
  228. (this is the beginning of the file)
  229. 1| print("hello world")
  230. (this is the end of the file)
  231. -------------------------------------------------
  232. [This is the original code before your edit]
  233. -------------------------------------------------
  234. (this is the beginning of the file)
  235. 1|
  236. (this is the end of the file)
  237. -------------------------------------------------
  238. Your changes have NOT been applied. Please fix your edit command and try again.
  239. You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.
  240. DO NOT re-run the same failed edit command. Running it again will lead to the same error.
  241. """
  242. ).strip().split('\n')
  243. # edit file with correct indentation
  244. exit_code, output = box.execute(
  245. 'echo "insert_content_at_line(\'hello.py\', 1, \'print(\\"hello world\\")\')" | execute_cli'
  246. )
  247. print(output)
  248. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  249. assert output.strip().split('\r\n') == (
  250. """
  251. [File: /workspace/test/hello.py (1 lines total after edit)]
  252. (this is the beginning of the file)
  253. 1|print("hello world")
  254. (this is the end of the file)
  255. [File updated (edited at line 1). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]
  256. """
  257. ).strip().split('\n')
  258. exit_code, output = box.execute('rm -rf /workspace/*')
  259. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  260. box.close()
  261. def test_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
  262. # get a temporary directory
  263. config = AppConfig(
  264. sandbox=SandboxConfig(
  265. box_type='ssh',
  266. persist_sandbox=False,
  267. enable_auto_lint=False,
  268. )
  269. )
  270. assert not config.sandbox.enable_auto_lint
  271. box = create_docker_box_from_app_config(temp_dir, config)
  272. _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config)
  273. @pytest.mark.skipif(
  274. os.getenv('TEST_IN_CI') != 'true',
  275. reason='The unittest need to download image, so only run on CI',
  276. )
  277. def test_agnostic_sandbox_jupyter_agentskills_fileop_pwd(temp_dir):
  278. for base_sandbox_image in ['ubuntu:22.04', 'debian:11']:
  279. config = AppConfig(
  280. sandbox=SandboxConfig(
  281. box_type='ssh',
  282. container_image=base_sandbox_image,
  283. persist_sandbox=False,
  284. enable_auto_lint=False,
  285. )
  286. )
  287. assert not config.sandbox.enable_auto_lint
  288. box = create_docker_box_from_app_config(temp_dir, config)
  289. _test_sandbox_jupyter_agentskills_fileop_pwd_impl(box, config)
  290. def test_sandbox_jupyter_plugin_backticks(temp_dir):
  291. config = AppConfig(
  292. sandbox=SandboxConfig(
  293. box_type='ssh',
  294. persist_sandbox=False,
  295. enable_auto_lint=False,
  296. )
  297. )
  298. box = create_docker_box_from_app_config(temp_dir, config)
  299. box.init_plugins([JupyterRequirement])
  300. test_code = "print('Hello, `World`!')"
  301. expected_write_command = (
  302. "cat > /tmp/opendevin_jupyter_temp.py <<'EOL'\n" f'{test_code}\n' 'EOL'
  303. )
  304. expected_execute_command = 'cat /tmp/opendevin_jupyter_temp.py | execute_cli'
  305. exit_code, output = box.execute(expected_write_command)
  306. exit_code, output = box.execute(expected_execute_command)
  307. print(output)
  308. assert exit_code == 0, 'The exit code should be 0 for ' + box.__class__.__name__
  309. assert output.strip() == 'Hello, `World`!', (
  310. 'The output should be the same as the input for ' + box.__class__.__name__
  311. )
  312. box.close()