test_agent.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152
  1. import asyncio
  2. import os
  3. import shutil
  4. import subprocess
  5. import pytest
  6. from opendevin.controller.state.state import State
  7. from opendevin.core.main import main
  8. from opendevin.core.schema import AgentState
  9. from opendevin.events.action import (
  10. AgentFinishAction,
  11. )
  12. workspace_base = os.getenv('WORKSPACE_BASE')
  13. @pytest.mark.skipif(
  14. os.getenv('AGENT') == 'BrowsingAgent',
  15. reason='BrowsingAgent is a specialized agent',
  16. )
  17. @pytest.mark.skipif(
  18. os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
  19. reason='CodeActAgent only supports ssh sandbox which is stateful',
  20. )
  21. def test_write_simple_script():
  22. task = "Write a shell script 'hello.sh' that prints 'hello'. Do not ask me for confirmation at any point."
  23. final_state: State = asyncio.run(main(task, exit_on_message=True))
  24. assert final_state.agent_state == AgentState.STOPPED
  25. # Verify the script file exists
  26. script_path = os.path.join(workspace_base, 'hello.sh')
  27. assert os.path.exists(script_path), 'The file "hello.sh" does not exist'
  28. # Run the script and capture the output
  29. result = subprocess.run(['bash', script_path], capture_output=True, text=True)
  30. # Verify the output from the script
  31. assert (
  32. result.stdout.strip() == 'hello'
  33. ), f'Expected output "hello", but got "{result.stdout.strip()}"'
  34. @pytest.mark.skipif(
  35. os.getenv('AGENT') == 'BrowsingAgent',
  36. reason='BrowsingAgent is a specialized agent',
  37. )
  38. @pytest.mark.skipif(
  39. os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
  40. reason='CodeActAgent only supports ssh sandbox which is stateful',
  41. )
  42. @pytest.mark.skipif(
  43. os.getenv('AGENT') == 'MonologueAgent' or os.getenv('AGENT') == 'PlannerAgent',
  44. reason='We only keep basic tests for MonologueAgent and PlannerAgent',
  45. )
  46. @pytest.mark.skipif(
  47. os.getenv('SANDBOX_TYPE') == 'local',
  48. reason='local sandbox shows environment-dependent absolute path for pwd command',
  49. )
  50. def test_edits():
  51. # Move workspace artifacts to workspace_base location
  52. source_dir = os.path.join(os.path.dirname(__file__), 'workspace/test_edits/')
  53. files = os.listdir(source_dir)
  54. for file in files:
  55. dest_file = os.path.join(workspace_base, file)
  56. if os.path.exists(dest_file):
  57. os.remove(dest_file)
  58. shutil.copy(os.path.join(source_dir, file), dest_file)
  59. # Execute the task
  60. task = 'Fix typos in bad.txt. Do not ask me for confirmation at any point.'
  61. final_state: State = asyncio.run(main(task, exit_on_message=True))
  62. assert final_state.agent_state == AgentState.STOPPED
  63. # Verify bad.txt has been fixed
  64. text = """This is a stupid typo.
  65. Really?
  66. No more typos!
  67. Enjoy!
  68. """
  69. with open(os.path.join(workspace_base, 'bad.txt'), 'r') as f:
  70. content = f.read()
  71. assert content.strip() == text.strip()
  72. @pytest.mark.skipif(
  73. os.getenv('AGENT') != 'CodeActAgent',
  74. reason='currently only CodeActAgent defaults to have IPython (Jupyter) execution',
  75. )
  76. @pytest.mark.skipif(
  77. os.getenv('SANDBOX_TYPE') != 'ssh',
  78. reason='Currently, only ssh sandbox supports stateful tasks',
  79. )
  80. def test_ipython():
  81. # Execute the task
  82. task = "Use Jupyter IPython to write a text file containing 'hello world' to '/workspace/test.txt'. Do not ask me for confirmation at any point."
  83. final_state: State = asyncio.run(main(task, exit_on_message=True))
  84. assert final_state.agent_state == AgentState.STOPPED
  85. # Verify the file exists
  86. file_path = os.path.join(workspace_base, 'test.txt')
  87. assert os.path.exists(file_path), 'The file "test.txt" does not exist'
  88. # Verify the file contains the expected content
  89. with open(file_path, 'r') as f:
  90. content = f.read()
  91. assert (
  92. content.strip() == 'hello world'
  93. ), f'Expected content "hello world", but got "{content.strip()}"'
  94. @pytest.mark.skipif(
  95. os.getenv('AGENT') != 'CodeActAgent',
  96. reason='currently only CodeActAgent defaults to have IPython (Jupyter) execution',
  97. )
  98. @pytest.mark.skipif(
  99. os.getenv('SANDBOX_TYPE') != 'ssh',
  100. reason='Currently, only ssh sandbox supports stateful tasks',
  101. )
  102. def test_ipython_module():
  103. # Execute the task
  104. task = "Install and import pymsgbox==1.0.9 and print it's version in /workspace/test.txt. Do not ask me for confirmation at any point."
  105. final_state: State = asyncio.run(main(task, exit_on_message=True))
  106. assert final_state.agent_state == AgentState.STOPPED
  107. # Verify the file exists
  108. file_path = os.path.join(workspace_base, 'test.txt')
  109. assert os.path.exists(file_path), 'The file "test.txt" does not exist'
  110. # Verify the file contains the expected content
  111. with open(file_path, 'r') as f:
  112. content = f.read()
  113. assert (
  114. content.strip() == '1.0.9'
  115. ), f'Expected content "1.0.9", but got "{content.strip()}"'
  116. @pytest.mark.skipif(
  117. os.getenv('AGENT') != 'BrowsingAgent' and os.getenv('AGENT') != 'CodeActAgent',
  118. reason='currently only BrowsingAgent and CodeActAgent are capable of searching the internet',
  119. )
  120. @pytest.mark.skipif(
  121. os.getenv('AGENT') == 'CodeActAgent' and os.getenv('SANDBOX_TYPE').lower() != 'ssh',
  122. reason='CodeActAgent only supports ssh sandbox which is stateful',
  123. )
  124. def test_browse_internet(http_server):
  125. # Execute the task
  126. task = 'Browse localhost:8000, and tell me the ultimate answer to life. Do not ask me for confirmation at any point.'
  127. final_state: State = asyncio.run(main(task, exit_on_message=True))
  128. assert final_state.agent_state == AgentState.STOPPED
  129. assert isinstance(final_state.history[-1][0], AgentFinishAction)
  130. assert 'OpenDevin is all you need!' in str(final_state.history)