test_is_stuck.py 12 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296
  1. from unittest.mock import Mock, patch
  2. import pytest
  3. from opendevin.controller.agent_controller import AgentController
  4. from opendevin.events.action import CmdRunAction, FileReadAction, MessageAction
  5. from opendevin.events.action.commands import CmdKillAction
  6. from opendevin.events.observation import (
  7. CmdOutputObservation,
  8. FileReadObservation,
  9. Observation,
  10. )
  11. from opendevin.events.observation.empty import NullObservation
  12. from opendevin.events.observation.error import ErrorObservation
  13. from opendevin.events.stream import EventSource
  14. class TestAgentController:
  15. @pytest.fixture
  16. def controller(self):
  17. controller = Mock(spec=AgentController)
  18. controller._is_stuck = AgentController._is_stuck.__get__(
  19. controller, AgentController
  20. )
  21. controller._eq_no_pid = AgentController._eq_no_pid.__get__(
  22. controller, AgentController
  23. )
  24. controller.delegate = None
  25. controller.state = Mock()
  26. controller.state.history = []
  27. return controller
  28. def test_history_too_short(self, controller):
  29. controller.state.history = [
  30. (
  31. MessageAction(content='Hello', wait_for_response=False),
  32. Observation(content='Response 1'),
  33. ),
  34. (
  35. CmdRunAction(command='ls'),
  36. CmdOutputObservation(
  37. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  38. ),
  39. ),
  40. ]
  41. assert controller._is_stuck() is False
  42. def test_is_stuck_repeating_action_null_observation(self, controller):
  43. # message actions with source USER are not considered in the stuck check
  44. message_action = MessageAction(content='Done', wait_for_response=False)
  45. message_action._source = EventSource.USER
  46. controller.state.history = [
  47. (
  48. MessageAction(content='Hello', wait_for_response=False),
  49. Observation(content='Response 1'),
  50. ),
  51. (CmdRunAction(command='ls'), NullObservation(content='')),
  52. (CmdRunAction(command='ls'), NullObservation(content='')),
  53. # user message shouldn't break detection
  54. (message_action, NullObservation(content='')),
  55. (CmdRunAction(command='ls'), NullObservation(content='')),
  56. (CmdRunAction(command='ls'), NullObservation(content='')),
  57. ]
  58. with patch('logging.Logger.warning') as mock_warning:
  59. assert controller._is_stuck() is True
  60. mock_warning.assert_called_once_with('Action, Observation loop detected')
  61. def test_is_stuck_repeating_action_error_observation(self, controller):
  62. message_action = MessageAction(content='Done', wait_for_response=False)
  63. message_action._source = EventSource.USER
  64. controller.state.history = [
  65. (
  66. MessageAction(content='Hello', wait_for_response=False),
  67. Observation(content='Response 1'),
  68. ),
  69. (
  70. CmdRunAction(command='invalid_command'),
  71. ErrorObservation(content='Command not found'),
  72. ),
  73. (
  74. CmdRunAction(command='invalid_command'),
  75. ErrorObservation(content='Command still not found or another error'),
  76. ),
  77. # user message shouldn't break detection
  78. (message_action, NullObservation(content='')),
  79. (
  80. CmdRunAction(command='invalid_command'),
  81. ErrorObservation(content='Different error'),
  82. ),
  83. (
  84. CmdRunAction(command='invalid_command'),
  85. ErrorObservation(content='Command not found'),
  86. ),
  87. ]
  88. with patch('logging.Logger.warning') as mock_warning:
  89. assert controller._is_stuck() is True
  90. mock_warning.assert_called_once_with(
  91. 'Action, ErrorObservation loop detected'
  92. )
  93. def test_is_stuck_repeating_action_observation_pattern(self, controller):
  94. # six tuples of action, observation
  95. message_action = MessageAction(content='Come on', wait_for_response=False)
  96. message_action._source = EventSource.USER
  97. controller.state.history = [
  98. (
  99. message_action,
  100. Observation(content=''),
  101. ),
  102. (
  103. CmdRunAction(command='ls'),
  104. CmdOutputObservation(
  105. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  106. ),
  107. ),
  108. (
  109. FileReadAction(path='file1.txt'),
  110. FileReadObservation(content='File content', path='file1.txt'),
  111. ),
  112. (
  113. CmdRunAction(command='ls'),
  114. # command_id is ignored for the eq check, it's a pid
  115. CmdOutputObservation(
  116. command_id=2, command='ls', content='file1.txt\nfile2.txt'
  117. ),
  118. ),
  119. (
  120. FileReadAction(path='file1.txt'),
  121. FileReadObservation(content='File content', path='file1.txt'),
  122. ),
  123. # insert a message just because they can, it shouldn't break detection
  124. (message_action, NullObservation(content='')),
  125. (
  126. CmdRunAction(command='ls'),
  127. CmdOutputObservation(
  128. command_id=3, command='ls', content='file1.txt\nfile2.txt'
  129. ),
  130. ),
  131. (
  132. FileReadAction(path='file1.txt'),
  133. FileReadObservation(content='File content', path='file1.txt'),
  134. ),
  135. ]
  136. with patch('logging.Logger.warning') as mock_warning:
  137. assert controller._is_stuck() is True
  138. mock_warning.assert_called_once_with('Action, Observation pattern detected')
  139. def test_is_stuck_not_stuck(self, controller):
  140. message_action = MessageAction(content='Done', wait_for_response=False)
  141. message_action._source = EventSource.USER
  142. controller.state.history = [
  143. (
  144. MessageAction(content='Hello', wait_for_response=False),
  145. Observation(content='Response 1'),
  146. ),
  147. (
  148. CmdRunAction(command='ls'),
  149. CmdOutputObservation(
  150. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  151. ),
  152. ),
  153. (
  154. FileReadAction(path='file1.txt'),
  155. FileReadObservation(content='File content', path='file1.txt'),
  156. ),
  157. (
  158. CmdRunAction(command='pwd'),
  159. # command_id is ignored for the eq check, it's the pid
  160. CmdOutputObservation(command_id=2, command='pwd', content='/home/user'),
  161. ),
  162. (
  163. FileReadAction(path='file2.txt'),
  164. Observation(content='Another file content'),
  165. ),
  166. # insert a message from the user
  167. (message_action, NullObservation(content='')),
  168. (
  169. CmdRunAction(command='pwd'),
  170. CmdOutputObservation(command_id=3, command='pwd', content='/home/user'),
  171. ),
  172. (
  173. FileReadAction(path='file2.txt'),
  174. Observation(content='Another file content'),
  175. ),
  176. ]
  177. assert controller._is_stuck() is False
  178. def test_is_stuck_four_identical_tuples(self, controller):
  179. message_action = MessageAction(content='Done', wait_for_response=False)
  180. message_action._source = EventSource.USER
  181. controller.state.history = [
  182. (
  183. MessageAction(content='Hello', wait_for_response=False),
  184. Observation(content='Response 1'),
  185. ),
  186. (
  187. CmdRunAction(command='ls'),
  188. CmdOutputObservation(
  189. command_id=1, command='ls', content='file1.txt\nfile2.txt'
  190. ),
  191. ),
  192. (
  193. CmdRunAction(command='ls'),
  194. # command_id is ignored for the eq check, it's just the pid
  195. CmdOutputObservation(
  196. command_id=2, command='ls', content='file1.txt\nfile2.txt'
  197. ),
  198. ),
  199. # message from the user shouldn't interfere with the detection
  200. (message_action, NullObservation(content='')),
  201. (
  202. CmdRunAction(command='ls'),
  203. CmdOutputObservation(
  204. command_id=3, command='ls', content='file1.txt\nfile2.txt'
  205. ),
  206. ),
  207. (
  208. CmdRunAction(command='ls'),
  209. CmdOutputObservation(
  210. command_id=4, command='ls', content='file1.txt\nfile2.txt'
  211. ),
  212. ),
  213. ]
  214. with patch('logging.Logger.warning') as mock_warning:
  215. assert controller._is_stuck() is True
  216. mock_warning.assert_called_once_with('Action, Observation loop detected')
  217. def test_is_stuck_four_tuples_cmd_kill_and_output(self, controller):
  218. message_action = MessageAction(content='Done', wait_for_response=False)
  219. message_action._source = EventSource.USER
  220. controller.state.history = [
  221. (
  222. MessageAction(content='Hello', wait_for_response=False),
  223. Observation(content='Response 1'),
  224. ),
  225. (
  226. CmdKillAction(
  227. command_id=1,
  228. thought='It looks like storybook is stuck, lets kill it',
  229. ),
  230. CmdOutputObservation(
  231. content='Background command storybook has been killed.',
  232. command_id=1,
  233. command='storybook',
  234. exit_code=0,
  235. ),
  236. ),
  237. (
  238. # command_id is ignored for the eq check, it's the pid
  239. CmdKillAction(
  240. command_id=2,
  241. thought='It looks like storybook is stuck, lets kill it',
  242. ),
  243. # command_id here too
  244. CmdOutputObservation(
  245. content='Background command storybook has been killed.',
  246. command_id=2,
  247. command='storybook',
  248. exit_code=0,
  249. ),
  250. ),
  251. # message from the user, shouldn't be counted
  252. (message_action, NullObservation(content='')),
  253. (
  254. CmdKillAction(
  255. command_id=3,
  256. thought='It looks like storybook is stuck, lets kill it',
  257. ),
  258. CmdOutputObservation(
  259. content='Background command storybook has been killed.',
  260. command_id=3,
  261. command='storybook',
  262. exit_code=0,
  263. ),
  264. ),
  265. (
  266. CmdKillAction(
  267. command_id=4,
  268. thought='It looks like storybook is stuck, lets kill it',
  269. ),
  270. CmdOutputObservation(
  271. content='Background command storybook has been killed.',
  272. command_id=4,
  273. command='storybook',
  274. exit_code=0,
  275. ),
  276. ),
  277. ]
  278. with patch('logging.Logger.warning') as mock_warning:
  279. assert controller._is_stuck() is True
  280. mock_warning.assert_called_once_with('Action, Observation loop detected')
  281. def test_is_stuck_delegate_stuck(self, controller):
  282. controller.delegate = Mock()
  283. controller.delegate._is_stuck.return_value = True
  284. assert controller._is_stuck() is True