test_agent.py 8.5 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233
  1. import asyncio
  2. import os
  3. import shutil
  4. import subprocess
  5. import pytest
  6. from opendevin.controller.agent import Agent
  7. from opendevin.controller.state.state import State
  8. from opendevin.core.config import parse_arguments
  9. from opendevin.core.main import run_agent_controller
  10. from opendevin.core.schema import AgentState
  11. from opendevin.events.action import (
  12. AgentFinishAction,
  13. AgentRejectAction,
  14. )
  15. from opendevin.llm.llm import LLM
  16. workspace_base = os.getenv('WORKSPACE_BASE')
  17. workspace_mount_path = os.getenv('WORKSPACE_MOUNT_PATH')
  18. workspace_mount_path_in_sandbox = os.getenv('WORKSPACE_MOUNT_PATH_IN_SANDBOX')
  19. print('\nPaths used:')
  20. print(f'workspace_base: {workspace_base}')
  21. print(f'workspace_mount_path: {workspace_mount_path}')
  22. print(f'workspace_mount_path_in_sandbox: {workspace_mount_path_in_sandbox}')
  23. @pytest.mark.skipif(
  24. os.getenv('AGENT') == 'BrowsingAgent',
  25. reason='BrowsingAgent is a specialized agent',
  26. )
  27. @pytest.mark.skipif(
  28. (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
  29. and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
  30. reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
  31. )
  32. @pytest.mark.skipif(
  33. os.getenv('AGENT') == 'ManagerAgent',
  34. reason='Manager agent is not capable of finishing this in reasonable steps yet',
  35. )
  36. def test_write_simple_script():
  37. task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
  38. args = parse_arguments()
  39. # Create the agent
  40. agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
  41. final_state: State | None = asyncio.run(
  42. run_agent_controller(agent, task, exit_on_message=True)
  43. )
  44. assert final_state.agent_state == AgentState.STOPPED
  45. assert final_state.last_error is None
  46. # Verify the script file exists
  47. script_path = os.path.join(workspace_base, 'hello.sh')
  48. assert os.path.exists(script_path), 'The file "hello.sh" does not exist'
  49. # Run the script and capture the output
  50. result = subprocess.run(['bash', script_path], capture_output=True, text=True)
  51. # Verify the output from the script
  52. assert (
  53. result.stdout.strip() == 'hello'
  54. ), f'Expected output "hello", but got "{result.stdout.strip()}"'
  55. @pytest.mark.skipif(
  56. os.getenv('AGENT') == 'BrowsingAgent',
  57. reason='BrowsingAgent is a specialized agent',
  58. )
  59. @pytest.mark.skipif(
  60. (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
  61. and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
  62. reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
  63. )
  64. @pytest.mark.skipif(
  65. os.getenv('AGENT') == 'MonologueAgent' or os.getenv('AGENT') == 'PlannerAgent',
  66. reason='We only keep basic tests for MonologueAgent and PlannerAgent',
  67. )
  68. @pytest.mark.skipif(
  69. os.getenv('SANDBOX_BOX_TYPE') == 'local',
  70. reason='local sandbox shows environment-dependent absolute path for pwd command',
  71. )
  72. def test_edits():
  73. args = parse_arguments()
  74. # Copy workspace artifacts to workspace_base location
  75. source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/')
  76. files = os.listdir(source_dir)
  77. for file in files:
  78. dest_file = os.path.join(workspace_base, file)
  79. if os.path.exists(dest_file):
  80. os.remove(dest_file)
  81. shutil.copy(os.path.join(source_dir, file), dest_file)
  82. # Create the agent
  83. agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
  84. # Execute the task
  85. task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
  86. final_state: State | None = asyncio.run(
  87. run_agent_controller(agent, task, exit_on_message=True)
  88. )
  89. assert final_state.agent_state == AgentState.STOPPED
  90. assert final_state.last_error is None
  91. # Verify bad.txt has been fixed
  92. text = """This is a stupid typo.
  93. Really?
  94. No more typos!
  95. Enjoy!
  96. """
  97. with open(os.path.join(workspace_base, 'bad.txt'), 'r') as f:
  98. content = f.read()
  99. assert content.strip() == text.strip()
  100. @pytest.mark.skipif(
  101. os.getenv('AGENT') != 'CodeActAgent' and os.getenv('AGENT') != 'CodeActSWEAgent',
  102. reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
  103. )
  104. @pytest.mark.skipif(
  105. os.getenv('SANDBOX_BOX_TYPE') != 'ssh',
  106. reason='Currently, only ssh sandbox supports stateful tasks',
  107. )
  108. def test_ipython():
  109. args = parse_arguments()
  110. # Create the agent
  111. agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
  112. # Execute the task
  113. task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
  114. final_state: State | None = asyncio.run(
  115. run_agent_controller(agent, task, exit_on_message=True)
  116. )
  117. assert final_state.agent_state == AgentState.STOPPED
  118. assert final_state.last_error is None
  119. # Verify the file exists
  120. file_path = os.path.join(workspace_base, 'test.txt')
  121. assert os.path.exists(file_path), 'The file "test.txt" does not exist'
  122. # Verify the file contains the expected content
  123. with open(file_path, 'r') as f:
  124. content = f.read()
  125. assert (
  126. content.strip() == 'hello world'
  127. ), f'Expected content "hello world", but got "{content.strip()}"'
  128. @pytest.mark.skipif(
  129. os.getenv('AGENT') != 'ManagerAgent',
  130. reason='Currently, only ManagerAgent supports task rejection',
  131. )
  132. @pytest.mark.skipif(
  133. os.getenv('SANDBOX_BOX_TYPE') == 'local',
  134. reason='FIXME: local sandbox does not capture stderr',
  135. )
  136. def test_simple_task_rejection():
  137. args = parse_arguments()
  138. # Create the agent
  139. agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
  140. # Give an impossible task to do: cannot write a commit message because
  141. # the workspace is not a git repo
  142. task = 'Write a git commit message for the current staging area. Do not ask me for confirmation at any point.'
  143. final_state: State | None = asyncio.run(run_agent_controller(agent, task))
  144. assert final_state.agent_state == AgentState.STOPPED
  145. assert final_state.last_error is None
  146. assert isinstance(final_state.history[-1][0], AgentRejectAction)
  147. @pytest.mark.skipif(
  148. os.getenv('AGENT') != 'CodeActAgent' and os.getenv('AGENT') != 'CodeActSWEAgent',
  149. reason='currently only CodeActAgent and CodeActSWEAgent have IPython (Jupyter) execution by default',
  150. )
  151. @pytest.mark.skipif(
  152. os.getenv('SANDBOX_BOX_TYPE') != 'ssh',
  153. reason='Currently, only ssh sandbox supports stateful tasks',
  154. )
  155. def test_ipython_module():
  156. args = parse_arguments()
  157. # Create the agent
  158. agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
  159. # Execute the task
  160. task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
  161. final_state: State | None = asyncio.run(
  162. run_agent_controller(agent, task, exit_on_message=True)
  163. )
  164. assert final_state.agent_state == AgentState.STOPPED
  165. assert final_state.last_error is None
  166. # Verify the file exists
  167. file_path = os.path.join(workspace_base, 'test.txt')
  168. assert os.path.exists(file_path), 'The file "test.txt" does not exist'
  169. # Verify the file contains the expected content
  170. with open(file_path, 'r') as f:
  171. content = f.read()
  172. print(content)
  173. assert (
  174. content.strip().split(' ')[-1] == '1.0.9'
  175. ), f'Expected content "1.0.9", but got "{content.strip()}"'
  176. @pytest.mark.skipif(
  177. os.getenv('AGENT') != 'BrowsingAgent' and os.getenv('AGENT') != 'CodeActAgent',
  178. reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet',
  179. )
  180. @pytest.mark.skipif(
  181. (os.getenv('AGENT') == 'CodeActAgent' or os.getenv('AGENT') == 'CodeActSWEAgent')
  182. and os.getenv('SANDBOX_BOX_TYPE', '').lower() != 'ssh',
  183. reason='CodeActAgent/CodeActSWEAgent only supports ssh sandbox which is stateful',
  184. )
  185. def test_browse_internet(http_server):
  186. args = parse_arguments()
  187. # Create the agent
  188. agent = Agent.get_cls(args.agent_cls)(llm=LLM(args.model_name))
  189. # Execute the task
  190. task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'
  191. final_state: State | None = asyncio.run(
  192. run_agent_controller(agent, task, exit_on_message=True)
  193. )
  194. assert final_state.agent_state == AgentState.STOPPED
  195. assert final_state.last_error is None
  196. assert isinstance(final_state.history[-1][0], AgentFinishAction)
  197. assert 'OpenDevin is all you need!' in str(final_state.history)