test_is_stuck.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298
  1. from unittest.mock import Mock, patch
  2. import pytest
  3. from opendevin.controller.agent_controller import AgentController
  4. from opendevin.events.action import CmdRunAction, FileReadAction, MessageAction
  5. from opendevin.events.action.commands import CmdKillAction
  6. from opendevin.events.observation import (
  7. CmdOutputObservation,
  8. FileReadObservation,
  9. Observation,
  10. )
  11. from opendevin.events.observation.empty import NullObservation
  12. from opendevin.events.observation.error import ErrorObservation
  13. from opendevin.events.stream import EventSource
  14. class TestAgentController:
  15. @pytest.fixture
  16. def controller(self):
  17. controller = Mock(spec=AgentController)
  18. controller._is_stuck = AgentController._is_stuck.__get__(
  19. controller, AgentController
  20. )
  21. controller._eq_no_pid = AgentController._eq_no_pid.__get__(
  22. controller, AgentController
  23. )
  24. controller.delegate = None
  25. controller.state = Mock()
  26. controller.state.history = []
  27. return controller
  28. def test_history_too_short(self, controller):
  29. controller.state.history = [
  30. (
  31. MessageAction(content='Hello', wait_for_response=False),
  32. Observation(content='Response 1'),
  33. ),
  34. (
  35. CmdRunAction(command='ls'),
  36. CmdOutputObservation(
  37. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  38. ),
  39. ),
  40. ]
  41. assert controller._is_stuck() is False
  42. def test_is_stuck_repeating_action_null_observation(self, controller):
  43. # message actions with source USER are not considered in the stuck check
  44. message_action = MessageAction(content='Done', wait_for_response=False)
  45. message_action._source = EventSource.USER
  46. controller.state.history = [
  47. (
  48. MessageAction(content='Hello', wait_for_response=False),
  49. Observation(content='Response 1'),
  50. ),
  51. (CmdRunAction(command='ls'), NullObservation(content='')),
  52. (CmdRunAction(command='ls'), NullObservation(content='')),
  53. # user message shouldn't break detection
  54. (message_action, NullObservation(content='')),
  55. (CmdRunAction(command='ls'), NullObservation(content='')),
  56. (CmdRunAction(command='ls'), NullObservation(content='')),
  57. ]
  58. with patch('logging.Logger.warning') as mock_warning:
  59. assert controller._is_stuck() is True
  60. mock_warning.assert_called_once_with('Action, Observation loop detected')
  61. def test_is_stuck_repeating_action_error_observation(self, controller):
  62. # (action, error_observation), not necessarily the same error
  63. message_action = MessageAction(content='Done', wait_for_response=False)
  64. message_action._source = EventSource.USER
  65. controller.state.history = [
  66. (
  67. MessageAction(content='Hello', wait_for_response=False),
  68. Observation(content='Response 1'),
  69. ),
  70. (
  71. CmdRunAction(command='invalid_command'),
  72. ErrorObservation(content='Command not found'),
  73. ),
  74. (
  75. CmdRunAction(command='invalid_command'),
  76. ErrorObservation(content='Command still not found or another error'),
  77. ),
  78. # user message shouldn't break detection
  79. (message_action, NullObservation(content='')),
  80. (
  81. CmdRunAction(command='invalid_command'),
  82. ErrorObservation(content='Different error'),
  83. ),
  84. (
  85. CmdRunAction(command='invalid_command'),
  86. ErrorObservation(content='Command not found'),
  87. ),
  88. ]
  89. with patch('logging.Logger.warning') as mock_warning:
  90. assert controller._is_stuck() is True
  91. mock_warning.assert_called_once_with(
  92. 'Action, ErrorObservation loop detected'
  93. )
  94. def test_is_stuck_repeating_action_observation_pattern(self, controller):
  95. # six tuples of action, observation
  96. message_action = MessageAction(content='Come on', wait_for_response=False)
  97. message_action._source = EventSource.USER
  98. controller.state.history = [
  99. (
  100. message_action,
  101. Observation(content=''),
  102. ),
  103. (
  104. CmdRunAction(command='ls'),
  105. CmdOutputObservation(
  106. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  107. ),
  108. ),
  109. (
  110. FileReadAction(path='file1.txt'),
  111. FileReadObservation(content='File content', path='file1.txt'),
  112. ),
  113. (
  114. CmdRunAction(command='ls'),
  115. # command_id is ignored for the eq check, it's a pid
  116. CmdOutputObservation(
  117. command_id=2, command='ls', content='file1.txt\nfile2.txt'
  118. ),
  119. ),
  120. (
  121. FileReadAction(path='file1.txt'),
  122. FileReadObservation(content='File content', path='file1.txt'),
  123. ),
  124. # insert a message just because they can, it shouldn't break detection
  125. (message_action, NullObservation(content='')),
  126. (
  127. CmdRunAction(command='ls'),
  128. CmdOutputObservation(
  129. command_id=3, command='ls', content='file1.txt\nfile2.txt'
  130. ),
  131. ),
  132. (
  133. FileReadAction(path='file1.txt'),
  134. FileReadObservation(content='File content', path='file1.txt'),
  135. ),
  136. ]
  137. with patch('logging.Logger.warning') as mock_warning:
  138. assert controller._is_stuck() is True
  139. mock_warning.assert_called_once_with('Action, Observation pattern detected')
  140. def test_is_stuck_not_stuck(self, controller):
  141. message_action = MessageAction(content='Done', wait_for_response=False)
  142. message_action._source = EventSource.USER
  143. controller.state.history = [
  144. (
  145. MessageAction(content='Hello', wait_for_response=False),
  146. Observation(content='Response 1'),
  147. ),
  148. (
  149. CmdRunAction(command='ls'),
  150. CmdOutputObservation(
  151. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  152. ),
  153. ),
  154. (
  155. FileReadAction(path='file1.txt'),
  156. FileReadObservation(content='File content', path='file1.txt'),
  157. ),
  158. (
  159. CmdRunAction(command='pwd'),
  160. # command_id is ignored for the eq check, it's the pid
  161. CmdOutputObservation(command_id=2, command='pwd', content='/home/user'),
  162. ),
  163. (
  164. FileReadAction(path='file2.txt'),
  165. Observation(content='Another file content'),
  166. ),
  167. # insert a message from the user
  168. (message_action, NullObservation(content='')),
  169. (
  170. CmdRunAction(command='pwd'),
  171. CmdOutputObservation(command_id=3, command='pwd', content='/home/user'),
  172. ),
  173. (
  174. FileReadAction(path='file2.txt'),
  175. Observation(content='Another file content'),
  176. ),
  177. ]
  178. assert controller._is_stuck() is False
  179. def test_is_stuck_three_identical_tuples(self, controller):
  180. # the same (action, observation), three times
  181. # prepare messages to interrupt things
  182. message_action = MessageAction(content='Done', wait_for_response=False)
  183. message_action._source = EventSource.USER
  184. message_action2 = MessageAction(content='Or not done?', wait_for_response=False)
  185. message_action2._source = EventSource.USER
  186. controller.state.history = [
  187. (
  188. MessageAction(content='Hello', wait_for_response=False),
  189. Observation(content='Response 1'),
  190. ),
  191. # message from the user shouldn't interfere with the detection
  192. (message_action, NullObservation(content='')),
  193. (
  194. CmdRunAction(command='ls'),
  195. CmdOutputObservation(
  196. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  197. ),
  198. ),
  199. (
  200. CmdRunAction(command='ls'),
  201. # command_id is ignored for the eq check, it's just the pid
  202. CmdOutputObservation(
  203. command_id=2, command='ls', content='file1.txt\nfile2.txt'
  204. ),
  205. ),
  206. # message from the user shouldn't interfere with the detection
  207. (message_action2, NullObservation(content='')),
  208. (
  209. CmdRunAction(command='ls'),
  210. CmdOutputObservation(
  211. command_id=3, command='ls', content='file1.txt\nfile2.txt'
  212. ),
  213. ),
  214. ]
  215. with patch('logging.Logger.warning') as mock_warning:
  216. assert controller._is_stuck() is True
  217. mock_warning.assert_called_once_with('Action, Observation loop detected')
  218. def test_is_stuck_four_tuples_cmd_kill_and_output(self, controller):
  219. message_action = MessageAction(content='Done', wait_for_response=False)
  220. message_action._source = EventSource.USER
  221. controller.state.history = [
  222. (
  223. MessageAction(content='Hello', wait_for_response=False),
  224. Observation(content='Response 1'),
  225. ),
  226. (
  227. CmdKillAction(
  228. command_id=1,
  229. thought='It looks like storybook is stuck, lets kill it',
  230. ),
  231. CmdOutputObservation(
  232. content='Background command storybook has been killed.',
  233. command_id=1,
  234. command='storybook',
  235. exit_code=0,
  236. ),
  237. ),
  238. (
  239. # command_id is ignored for the eq check, it's the pid
  240. CmdKillAction(
  241. command_id=2,
  242. thought='It looks like storybook is stuck, lets kill it',
  243. ),
  244. # command_id here too
  245. CmdOutputObservation(
  246. content='Background command storybook has been killed.',
  247. command_id=2,
  248. command='storybook',
  249. exit_code=0,
  250. ),
  251. ),
  252. # message from the user, shouldn't be counted
  253. (message_action, NullObservation(content='')),
  254. (
  255. CmdKillAction(
  256. command_id=3,
  257. thought='It looks like storybook is stuck, lets kill it',
  258. ),
  259. CmdOutputObservation(
  260. content='Background command storybook has been killed.',
  261. command_id=3,
  262. command='storybook',
  263. exit_code=0,
  264. ),
  265. ),
  266. (
  267. CmdKillAction(
  268. command_id=4,
  269. thought='It looks like storybook is stuck, lets kill it',
  270. ),
  271. CmdOutputObservation(
  272. content='Background command storybook has been killed.',
  273. command_id=4,
  274. command='storybook',
  275. exit_code=0,
  276. ),
  277. ),
  278. ]
  279. with patch('logging.Logger.warning') as mock_warning:
  280. assert controller._is_stuck() is True
  281. mock_warning.assert_called_once_with('Action, Observation loop detected')
  282. def test_is_stuck_delegate_stuck(self, controller):
  283. controller.delegate = Mock()
  284. controller.delegate._is_stuck.return_value = True
  285. assert controller._is_stuck() is True