test_is_stuck.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224
  1. from unittest.mock import Mock, patch
  2. import pytest
  3. from opendevin.controller.agent_controller import AgentController
  4. from opendevin.events.action import CmdRunAction, FileReadAction, MessageAction
  5. from opendevin.events.observation import (
  6. CmdOutputObservation,
  7. FileReadObservation,
  8. Observation,
  9. )
  10. from opendevin.events.observation.empty import NullObservation
  11. from opendevin.events.observation.error import ErrorObservation
  12. from opendevin.events.stream import EventSource
  13. class TestAgentController:
  14. @pytest.fixture
  15. def controller(self):
  16. controller = Mock(spec=AgentController)
  17. controller._is_stuck = AgentController._is_stuck.__get__(
  18. controller, AgentController
  19. )
  20. controller.delegate = None
  21. controller.state = Mock()
  22. controller.state.history = []
  23. return controller
  24. def test_history_too_short(self, controller):
  25. controller.state.history = [
  26. (
  27. MessageAction(content='Hello', wait_for_response=False),
  28. Observation(content='Response 1'),
  29. ),
  30. (
  31. CmdRunAction(command='ls'),
  32. CmdOutputObservation(
  33. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  34. ),
  35. ),
  36. ]
  37. assert controller._is_stuck() is False
  38. def test_is_stuck_repeating_action_null_observation(self, controller):
  39. # message actions with source USER are not considered in the stuck check
  40. message_action = MessageAction(content='Done', wait_for_response=False)
  41. message_action._source = EventSource.USER
  42. controller.state.history = [
  43. (
  44. MessageAction(content='Hello', wait_for_response=False),
  45. Observation(content='Response 1'),
  46. ),
  47. (CmdRunAction(command='ls'), NullObservation(content='')),
  48. (CmdRunAction(command='ls'), NullObservation(content='')),
  49. # user message shouldn't break detection
  50. (message_action, NullObservation(content='')),
  51. (CmdRunAction(command='ls'), NullObservation(content='')),
  52. (CmdRunAction(command='ls'), NullObservation(content='')),
  53. ]
  54. with patch('logging.Logger.warning') as mock_warning:
  55. assert controller._is_stuck() is True
  56. mock_warning.assert_called_once_with('Action, Observation loop detected')
  57. def test_is_stuck_repeating_action_error_observation(self, controller):
  58. message_action = MessageAction(content='Done', wait_for_response=False)
  59. message_action._source = EventSource.USER
  60. controller.state.history = [
  61. (
  62. MessageAction(content='Hello', wait_for_response=False),
  63. Observation(content='Response 1'),
  64. ),
  65. (
  66. CmdRunAction(command='invalid_command'),
  67. ErrorObservation(content='Command not found'),
  68. ),
  69. (
  70. CmdRunAction(command='invalid_command'),
  71. ErrorObservation(content='Command not found'),
  72. ),
  73. # user message shouldn't break detection
  74. (message_action, NullObservation(content='')),
  75. (
  76. CmdRunAction(command='invalid_command'),
  77. ErrorObservation(content='Different error'),
  78. ),
  79. (
  80. CmdRunAction(command='invalid_command'),
  81. ErrorObservation(content='Command not found'),
  82. ),
  83. ]
  84. with patch('logging.Logger.warning') as mock_warning:
  85. assert controller._is_stuck() is True
  86. mock_warning.assert_called_once_with(
  87. 'Action, ErrorObservation loop detected'
  88. )
  89. def test_is_stuck_repeating_action_observation_pattern(self, controller):
  90. # six tuples of action, observation
  91. message_action = MessageAction(content='Come on', wait_for_response=False)
  92. message_action._source = EventSource.USER
  93. controller.state.history = [
  94. (
  95. message_action,
  96. Observation(content=''),
  97. ),
  98. (
  99. CmdRunAction(command='ls'),
  100. CmdOutputObservation(
  101. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  102. ),
  103. ),
  104. (
  105. FileReadAction(path='file1.txt'),
  106. FileReadObservation(content='File content', path='file1.txt'),
  107. ),
  108. (
  109. CmdRunAction(command='ls'),
  110. CmdOutputObservation(
  111. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  112. ),
  113. ),
  114. (
  115. FileReadAction(path='file1.txt'),
  116. FileReadObservation(content='File content', path='file1.txt'),
  117. ),
  118. # insert a message just because they can, it shouldn't break detection
  119. (message_action, NullObservation(content='')),
  120. (
  121. CmdRunAction(command='ls'),
  122. CmdOutputObservation(
  123. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  124. ),
  125. ),
  126. (
  127. FileReadAction(path='file1.txt'),
  128. FileReadObservation(content='File content', path='file1.txt'),
  129. ),
  130. ]
  131. with patch('logging.Logger.warning') as mock_warning:
  132. assert controller._is_stuck() is True
  133. mock_warning.assert_called_once_with('Action, Observation pattern detected')
  134. def test_is_stuck_not_stuck(self, controller):
  135. message_action = MessageAction(content='Done', wait_for_response=False)
  136. message_action._source = EventSource.USER
  137. controller.state.history = [
  138. (
  139. MessageAction(content='Hello', wait_for_response=False),
  140. Observation(content='Response 1'),
  141. ),
  142. (
  143. CmdRunAction(command='ls'),
  144. CmdOutputObservation(
  145. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  146. ),
  147. ),
  148. (
  149. FileReadAction(path='file1.txt'),
  150. FileReadObservation(content='File content', path='file1.txt'),
  151. ),
  152. (
  153. CmdRunAction(command='pwd'),
  154. CmdOutputObservation(command_id=1, command='pwd', content='/home/user'),
  155. ),
  156. (
  157. FileReadAction(path='file2.txt'),
  158. Observation(content='Another file content'),
  159. ),
  160. # insert a message from the user
  161. (message_action, NullObservation(content='')),
  162. (
  163. CmdRunAction(command='pwd'),
  164. CmdOutputObservation(command_id=1, command='pwd', content='/home/user'),
  165. ),
  166. (
  167. FileReadAction(path='file2.txt'),
  168. Observation(content='Another file content'),
  169. ),
  170. ]
  171. assert controller._is_stuck() is False
  172. def test_is_stuck_four_identical_tuples(self, controller):
  173. message_action = MessageAction(content='Done', wait_for_response=False)
  174. message_action._source = EventSource.USER
  175. controller.state.history = [
  176. (
  177. MessageAction(content='Hello', wait_for_response=False),
  178. Observation(content='Response 1'),
  179. ),
  180. (
  181. CmdRunAction(command='ls'),
  182. CmdOutputObservation(
  183. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  184. ),
  185. ),
  186. (
  187. CmdRunAction(command='ls'),
  188. CmdOutputObservation(
  189. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  190. ),
  191. ),
  192. # message from the user
  193. (message_action, NullObservation(content='')),
  194. (
  195. CmdRunAction(command='ls'),
  196. CmdOutputObservation(
  197. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  198. ),
  199. ),
  200. (
  201. CmdRunAction(command='ls'),
  202. CmdOutputObservation(
  203. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  204. ),
  205. ),
  206. ]
  207. with patch('logging.Logger.warning') as mock_warning:
  208. assert controller._is_stuck() is True
  209. mock_warning.assert_called_once_with('Action, Observation loop detected')
  210. def test_is_stuck_delegate_stuck(self, controller):
  211. controller.delegate = Mock()
  212. controller.delegate._is_stuck.return_value = True
  213. assert controller._is_stuck() is True