test_agent.py 7.0 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186
  1. import asyncio
  2. import os
  3. import shutil
  4. import subprocess
  5. import pytest
  6. from opendevin.controller.state.state import State
  7. from opendevin.core.config import AppConfig, load_from_toml
  8. from opendevin.core.main import main
  9. from opendevin.core.schema import AgentState
  10. from opendevin.events.action import (
  11. AgentFinishAction,
  12. AgentRejectAction,
  13. )
  14. workspace_base = os.getenv('WORKSPACE_BASE')
  15. # make sure we're testing in the same folder of an existing config.toml
  16. if os.path.exists('config.toml'):
  17. config = AppConfig()
  18. load_from_toml(config, 'config.toml')
  19. if config and config.workspace_base and config.workspace_base != workspace_base:
  20. if os.path.exists(config.workspace_base) and os.access(
  21. config.workspace_base, os.W_OK
  22. ):
  23. print(f'Setting workspace_base to {config.workspace_base}')
  24. workspace_base = config.workspace_base
  25. @pytest.mark.skipif(
  26. os.getenv('AGENT') == 'BrowsingAgent',
  27. reason='BrowsingAgent is a specialized agent',
  28. )
  29. @pytest.mark.skipif(
  30. os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
  31. reason='CodeActAgent only supports ssh sandbox which is stateful',
  32. )
  33. @pytest.mark.skipif(
  34. os.getenv('AGENT') == 'ManagerAgent',
  35. reason='Manager agent is not capable of finishing this in reasonable steps yet',
  36. )
  37. def test_write_simple_script():
  38. task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
  39. final_state: State = asyncio.run(main(task, exit_on_message=True))
  40. assert final_state.agent_state == AgentState.STOPPED
  41. # Verify the script file exists
  42. script_path = os.path.join(workspace_base, 'hello.sh')
  43. assert os.path.exists(script_path), 'The file "hello.sh" does not exist'
  44. # Run the script and capture the output
  45. result = subprocess.run(['bash', script_path], capture_output=True, text=True)
  46. # Verify the output from the script
  47. assert (
  48. result.stdout.strip() == 'hello'
  49. ), f'Expected output "hello", but got "{result.stdout.strip()}"'
  50. @pytest.mark.skipif(
  51. os.getenv('AGENT') == 'BrowsingAgent',
  52. reason='BrowsingAgent is a specialized agent',
  53. )
  54. @pytest.mark.skipif(
  55. os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
  56. reason='CodeActAgent only supports ssh sandbox which is stateful',
  57. )
  58. @pytest.mark.skipif(
  59. os.getenv('AGENT') == 'MonologueAgent' or os.getenv('AGENT') == 'PlannerAgent',
  60. reason='We only keep basic tests for MonologueAgent and PlannerAgent',
  61. )
  62. @pytest.mark.skipif(
  63. os.getenv('SANDBOX_TYPE') == 'local',
  64. reason='local sandbox shows environment-dependent absolute path for pwd command',
  65. )
  66. def test_edits():
  67. # Copy workspace artifacts to workspace_base location
  68. source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/')
  69. files = os.listdir(source_dir)
  70. for file in files:
  71. dest_file = os.path.join(workspace_base, file)
  72. if os.path.exists(dest_file):
  73. os.remove(dest_file)
  74. shutil.copy(os.path.join(source_dir, file), dest_file)
  75. # Execute the task
  76. task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
  77. final_state: State = asyncio.run(main(task, exit_on_message=True))
  78. assert final_state.agent_state == AgentState.STOPPED
  79. # Verify bad.txt has been fixed
  80. text = """This is a stupid typo.
  81. Really?
  82. No more typos!
  83. Enjoy!
  84. """
  85. with open(os.path.join(workspace_base, 'bad.txt'), 'r') as f:
  86. content = f.read()
  87. assert content.strip() == text.strip()
  88. @pytest.mark.skipif(
  89. os.getenv('AGENT') != 'CodeActAgent',
  90. reason='currently only CodeActAgent defaults to have IPython (Jupyter) execution',
  91. )
  92. @pytest.mark.skipif(
  93. os.getenv('SANDBOX_TYPE') != 'ssh',
  94. reason='Currently, only ssh sandbox supports stateful tasks',
  95. )
  96. def test_ipython():
  97. # Execute the task
  98. task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
  99. final_state: State = asyncio.run(main(task, exit_on_message=True))
  100. assert final_state.agent_state == AgentState.STOPPED
  101. # Verify the file exists
  102. file_path = os.path.join(workspace_base, 'test.txt')
  103. assert os.path.exists(file_path), 'The file "test.txt" does not exist'
  104. # Verify the file contains the expected content
  105. with open(file_path, 'r') as f:
  106. content = f.read()
  107. assert (
  108. content.strip() == 'hello world'
  109. ), f'Expected content "hello world", but got "{content.strip()}"'
  110. @pytest.mark.skipif(
  111. os.getenv('AGENT') != 'ManagerAgent',
  112. reason='Currently, only ManagerAgent supports task rejection',
  113. )
  114. @pytest.mark.skipif(
  115. os.getenv('SANDBOX_TYPE') == 'local',
  116. reason='FIXME: local sandbox does not capture stderr',
  117. )
  118. def test_simple_task_rejection():
  119. # Give an impossible task to do: cannot write a commit message because
  120. # the workspace is not a git repo
  121. task = 'Write a git commit message for the current staging area. Do not ask me for confirmation at any point.'
  122. final_state: State = asyncio.run(main(task))
  123. assert final_state.agent_state == AgentState.STOPPED
  124. assert isinstance(final_state.history[-1][0], AgentRejectAction)
  125. @pytest.mark.skipif(
  126. os.getenv('AGENT') != 'CodeActAgent',
  127. reason='currently only CodeActAgent defaults to have IPython (Jupyter) execution',
  128. )
  129. @pytest.mark.skipif(
  130. os.getenv('SANDBOX_TYPE') != 'ssh',
  131. reason='Currently, only ssh sandbox supports stateful tasks',
  132. )
  133. def test_ipython_module():
  134. # Execute the task
  135. task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
  136. final_state: State = asyncio.run(main(task, exit_on_message=True))
  137. assert final_state.agent_state == AgentState.STOPPED
  138. # Verify the file exists
  139. file_path = os.path.join(workspace_base, 'test.txt')
  140. assert os.path.exists(file_path), 'The file "test.txt" does not exist'
  141. # Verify the file contains the expected content
  142. with open(file_path, 'r') as f:
  143. content = f.read()
  144. assert (
  145. content.strip() == '1.0.9'
  146. ), f'Expected content "1.0.9", but got "{content.strip()}"'
  147. @pytest.mark.skipif(
  148. os.getenv('AGENT') != 'BrowsingAgent' and os.getenv('AGENT') != 'CodeActAgent',
  149. reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet',
  150. )
  151. @pytest.mark.skipif(
  152. os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
  153. reason='CodeActAgent only supports ssh sandbox which is stateful',
  154. )
  155. def test_browse_internet(http_server):
  156. # Execute the task
  157. task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'
  158. final_state: State = asyncio.run(main(task, exit_on_message=True))
  159. assert final_state.agent_state == AgentState.STOPPED
  160. assert isinstance(final_state.history[-1][0], AgentFinishAction)
  161. assert 'OpenDevin is all you need!' in str(final_state.history)