test_agent.py 9.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260
  1. import asyncio
  2. import os
  3. import shutil
  4. import subprocess
  5. import pytest
  6. from opendevin.controller.agent import Agent
  7. from opendevin.controller.state.state import State
  8. from opendevin.core.config import parse_arguments
  9. from opendevin.core.main import run_agent_controller
  10. from opendevin.core.schema import AgentState
  11. from opendevin.events.action import (
  12. AgentFinishAction,
  13. AgentRejectAction,
  14. )
  15. from opendevin.events.observation.browse import BrowserOutputObservation
  16. from opendevin.events.observation.delegate import AgentDelegateObservation
  17. from opendevin.llm.llm import LLM
  18. workspace_base = os.getenv('WORKSPACE_BASE')
  19. workspace_mount_path = os.getenv('WORKSPACE_MOUNT_PATH')
  20. workspace_mount_path_in_sandbox = os.getenv('WORKSPACE_MOUNT_PATH_IN_SANDBOX')
  21. print('\nPaths used:')
  22. print(f'workspace_base: {workspace_base}')
  23. print(f'workspace_mount_path: {workspace_mount_path}')
  24. print(f'workspace_mount_path_in_sandbox: {workspace_mount_path_in_sandbox}')
  25. @pytest.mark.skipif(
  26. os.getenv('DEFAULT_AGENT') == 'BrowsingAgent',
  27. reason='BrowsingAgent is a specialized agent',
  28. )
  29. @pytest.mark.skipif(
  30. (
  31. os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
  32. or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
  33. )
  34. and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
  35. reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
  36. )
  37. @pytest.mark.skipif(
  38. os.getenv('DEFAULT_AGENT') == 'ManagerAgent',
  39. reason='Manager agent is not capable of finishing this in reasonable steps yet',
  40. )
  41. def test_write_simple_script():
  42. task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
  43. args = parse_arguments()
  44. # Create the agent
  45. agent = Agent.get_cls(args.agent_cls)(llm=LLM())
  46. final_state: State | None = asyncio.run(
  47. run_agent_controller(agent, task, exit_on_message=True)
  48. )
  49. assert final_state.agent_state == AgentState.STOPPED
  50. assert final_state.last_error is None
  51. # Verify the script file exists
  52. script_path = os.path.join(workspace_base, 'hello.sh')
  53. assert os.path.exists(script_path), 'The file "hello.sh" does not exist'
  54. # Run the script and capture the output
  55. result = subprocess.run(['bash', script_path], capture_output=True, text=True)
  56. # Verify the output from the script
  57. assert (
  58. result.stdout.strip() == 'hello'
  59. ), f'Expected output "hello", but got "{result.stdout.strip()}"'
  60. @pytest.mark.skipif(
  61. os.getenv('DEFAULT_AGENT') == 'BrowsingAgent',
  62. reason='BrowsingAgent is a specialized agent',
  63. )
  64. @pytest.mark.skipif(
  65. (
  66. os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
  67. or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
  68. )
  69. and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
  70. reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
  71. )
  72. @pytest.mark.skipif(
  73. os.getenv('DEFAULT_AGENT') == 'MonologueAgent'
  74. or os.getenv('DEFAULT_AGENT') == 'PlannerAgent',
  75. reason='We only keep basic tests for MonologueAgent and PlannerAgent',
  76. )
  77. @pytest.mark.skipif(
  78. os.getenv('SANDBOX_BOX_TYPE') == 'local',
  79. reason='local sandbox shows environment-dependent absolute path for pwd command',
  80. )
  81. def test_edits():
  82. args = parse_arguments()
  83. # Copy workspace artifacts to workspace_base location
  84. source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/')
  85. files = os.listdir(source_dir)
  86. for file in files:
  87. dest_file = os.path.join(workspace_base, file)
  88. if os.path.exists(dest_file):
  89. os.remove(dest_file)
  90. shutil.copy(os.path.join(source_dir, file), dest_file)
  91. # Create the agent
  92. agent = Agent.get_cls(args.agent_cls)(llm=LLM())
  93. # Execute the task
  94. task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
  95. final_state: State | None = asyncio.run(
  96. run_agent_controller(agent, task, exit_on_message=True)
  97. )
  98. assert final_state.agent_state == AgentState.STOPPED
  99. assert final_state.last_error is None
  100. # Verify bad.txt has been fixed
  101. text = """This is a stupid typo.
  102. Really?
  103. No more typos!
  104. Enjoy!
  105. """
  106. with open(os.path.join(workspace_base, 'bad.txt'), 'r') as f:
  107. content = f.read()
  108. assert content.strip() == text.strip()
  109. @pytest.mark.skipif(
  110. os.getenv('DEFAULT_AGENT') != 'CodeActAgent'
  111. and os.getenv('DEFAULT_AGENT') != 'CodeActSWEAgent',
  112. reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
  113. )
  114. @pytest.mark.skipif(
  115. os.getenv('SANDBOX_BOX_TYPE') != 'ssh',
  116. reason='Currently, only ssh sandbox supports stateful tasks',
  117. )
  118. def test_ipython():
  119. args = parse_arguments()
  120. # Create the agent
  121. agent = Agent.get_cls(args.agent_cls)(llm=LLM())
  122. # Execute the task
  123. task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
  124. final_state: State | None = asyncio.run(
  125. run_agent_controller(agent, task, exit_on_message=True)
  126. )
  127. assert final_state.agent_state == AgentState.STOPPED
  128. assert final_state.last_error is None
  129. # Verify the file exists
  130. file_path = os.path.join(workspace_base, 'test.txt')
  131. assert os.path.exists(file_path), 'The file "test.txt" does not exist'
  132. # Verify the file contains the expected content
  133. with open(file_path, 'r') as f:
  134. content = f.read()
  135. assert (
  136. content.strip() == 'hello world'
  137. ), f'Expected content "hello world", but got "{content.strip()}"'
  138. @pytest.mark.skipif(
  139. os.getenv('DEFAULT_AGENT') != 'ManagerAgent',
  140. reason='Currently, only ManagerAgent supports task rejection',
  141. )
  142. @pytest.mark.skipif(
  143. os.getenv('SANDBOX_BOX_TYPE') == 'local',
  144. reason='FIXME: local sandbox does not capture stderr',
  145. )
  146. def test_simple_task_rejection():
  147. args = parse_arguments()
  148. # Create the agent
  149. agent = Agent.get_cls(args.agent_cls)(llm=LLM())
  150. # Give an impossible task to do: cannot write a commit message because
  151. # the workspace is not a git repo
  152. task = 'Write a git commit message for the current staging area. Do not ask me for confirmation at any point.'
  153. final_state: State | None = asyncio.run(run_agent_controller(agent, task))
  154. assert final_state.agent_state == AgentState.STOPPED
  155. assert final_state.last_error is None
  156. assert isinstance(final_state.history.get_last_action(), AgentRejectAction)
  157. @pytest.mark.skipif(
  158. os.getenv('DEFAULT_AGENT') != 'CodeActAgent'
  159. and os.getenv('DEFAULT_AGENT') != 'CodeActSWEAgent',
  160. reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
  161. )
  162. @pytest.mark.skipif(
  163. os.getenv('SANDBOX_BOX_TYPE') != 'ssh',
  164. reason='Currently, only ssh sandbox supports stateful tasks',
  165. )
  166. def test_ipython_module():
  167. args = parse_arguments()
  168. # Create the agent
  169. agent = Agent.get_cls(args.agent_cls)(llm=LLM())
  170. # Execute the task
  171. task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
  172. final_state: State | None = asyncio.run(
  173. run_agent_controller(agent, task, exit_on_message=True)
  174. )
  175. assert final_state.agent_state == AgentState.STOPPED
  176. assert final_state.last_error is None
  177. # Verify the file exists
  178. file_path = os.path.join(workspace_base, 'test.txt')
  179. assert os.path.exists(file_path), 'The file "test.txt" does not exist'
  180. # Verify the file contains the expected content
  181. with open(file_path, 'r') as f:
  182. content = f.read()
  183. print(content)
  184. assert (
  185. content.strip().split(' ')[-1] == '1.0.9'
  186. ), f'Expected content "1.0.9", but got "{content.strip()}"'
  187. @pytest.mark.skipif(
  188. os.getenv('DEFAULT_AGENT') != 'BrowsingAgent'
  189. and os.getenv('DEFAULT_AGENT') != 'CodeActAgent',
  190. reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet',
  191. )
  192. @pytest.mark.skipif(
  193. (
  194. os.getenv('DEFAULT_AGENT') == 'CodeActAgent'
  195. or os.getenv('DEFAULT_AGENT') == 'CodeActSWEAgent'
  196. )
  197. and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
  198. reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
  199. )
  200. def test_browse_internet(http_server):
  201. args = parse_arguments()
  202. # Create the agent
  203. agent = Agent.get_cls(args.agent_cls)(llm=LLM())
  204. # Execute the task
  205. task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'
  206. final_state: State | None = asyncio.run(
  207. run_agent_controller(agent, task, exit_on_message=True)
  208. )
  209. assert final_state.agent_state == AgentState.STOPPED
  210. assert final_state.last_error is None
  211. # last action
  212. last_action = final_state.history.get_last_action()
  213. assert isinstance(last_action, AgentFinishAction)
  214. # last observation
  215. last_observation = final_state.history.get_last_observation()
  216. assert isinstance(
  217. last_observation, (BrowserOutputObservation, AgentDelegateObservation)
  218. )
  219. if isinstance(last_observation, BrowserOutputObservation):
  220. assert 'OpenDevin is all you need!' in last_observation.content
  221. elif isinstance(last_observation, AgentDelegateObservation):
  222. assert 'OpenDevin is all you need!' in last_observation.outputs['content']