test_pr_handler_guess_success.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464
  1. import json
  2. from unittest.mock import MagicMock, patch
  3. from openhands.core.config import LLMConfig
  4. from openhands.events.action.message import MessageAction
  5. from openhands.llm.llm import LLM
  6. from openhands.resolver.github_issue import GithubIssue, ReviewThread
  7. from openhands.resolver.issue_definitions import PRHandler
  8. def mock_llm_response(content):
  9. """Helper function to create a mock LLM response."""
  10. mock_response = MagicMock()
  11. mock_response.choices = [MagicMock(message=MagicMock(content=content))]
  12. return mock_response
  13. def test_guess_success_review_threads_litellm_call():
  14. """Test that the completion() call for review threads contains the expected content."""
  15. # Create a PR handler instance
  16. llm_config = LLMConfig(model='test', api_key='test')
  17. handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
  18. # Create a mock issue with review threads
  19. issue = GithubIssue(
  20. owner='test-owner',
  21. repo='test-repo',
  22. number=1,
  23. title='Test PR',
  24. body='Test Body',
  25. thread_comments=None,
  26. closing_issues=['Issue 1 description', 'Issue 2 description'],
  27. review_comments=None,
  28. review_threads=[
  29. ReviewThread(
  30. comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
  31. files=['/src/file1.py', '/src/file2.py'],
  32. ),
  33. ReviewThread(
  34. comment='Add more tests\n---\nlatest feedback:\nAdd test cases',
  35. files=['/tests/test_file.py'],
  36. ),
  37. ],
  38. thread_ids=['1', '2'],
  39. head_branch='test-branch',
  40. )
  41. # Create mock history with a detailed response
  42. history = [
  43. MessageAction(
  44. content="""I have made the following changes:
  45. 1. Fixed formatting in file1.py and file2.py
  46. 2. Added docstrings to all functions
  47. 3. Added test cases in test_file.py"""
  48. )
  49. ]
  50. # Create mock LLM config
  51. llm_config = LLMConfig(model='test-model', api_key='test-key')
  52. # Mock the LLM response
  53. mock_response = MagicMock()
  54. mock_response.choices = [
  55. MagicMock(
  56. message=MagicMock(
  57. content="""--- success
  58. true
  59. --- explanation
  60. The changes successfully address the feedback."""
  61. )
  62. )
  63. ]
  64. # Test the guess_success method
  65. with patch.object(LLM, 'completion') as mock_completion:
  66. mock_completion.return_value = mock_response
  67. success, success_list, explanation = handler.guess_success(issue, history)
  68. # Verify the completion() calls
  69. assert mock_completion.call_count == 2 # One call per review thread
  70. # Check first call
  71. first_call = mock_completion.call_args_list[0]
  72. first_prompt = first_call[1]['messages'][0]['content']
  73. assert (
  74. 'Issue descriptions:\n'
  75. + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
  76. in first_prompt
  77. )
  78. assert (
  79. 'Feedback:\nPlease fix the formatting\n---\nlatest feedback:\nAdd docstrings'
  80. in first_prompt
  81. )
  82. assert (
  83. 'Files locations:\n'
  84. + json.dumps(['/src/file1.py', '/src/file2.py'], indent=4)
  85. in first_prompt
  86. )
  87. assert 'Last message from AI agent:\n' + history[0].content in first_prompt
  88. # Check second call
  89. second_call = mock_completion.call_args_list[1]
  90. second_prompt = second_call[1]['messages'][0]['content']
  91. assert (
  92. 'Issue descriptions:\n'
  93. + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
  94. in second_prompt
  95. )
  96. assert (
  97. 'Feedback:\nAdd more tests\n---\nlatest feedback:\nAdd test cases'
  98. in second_prompt
  99. )
  100. assert (
  101. 'Files locations:\n' + json.dumps(['/tests/test_file.py'], indent=4)
  102. in second_prompt
  103. )
  104. assert 'Last message from AI agent:\n' + history[0].content in second_prompt
  105. assert len(json.loads(explanation)) == 2
  106. def test_guess_success_thread_comments_litellm_call():
  107. """Test that the completion() call for thread comments contains the expected content."""
  108. # Create a PR handler instance
  109. llm_config = LLMConfig(model='test', api_key='test')
  110. handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
  111. # Create a mock issue with thread comments
  112. issue = GithubIssue(
  113. owner='test-owner',
  114. repo='test-repo',
  115. number=1,
  116. title='Test PR',
  117. body='Test Body',
  118. thread_comments=[
  119. 'Please improve error handling',
  120. 'Add input validation',
  121. 'latest feedback:\nHandle edge cases',
  122. ],
  123. closing_issues=['Issue 1 description', 'Issue 2 description'],
  124. review_comments=None,
  125. thread_ids=None,
  126. head_branch='test-branch',
  127. )
  128. # Create mock history with a detailed response
  129. history = [
  130. MessageAction(
  131. content="""I have made the following changes:
  132. 1. Added try/catch blocks for error handling
  133. 2. Added input validation checks
  134. 3. Added handling for edge cases"""
  135. )
  136. ]
  137. # Create mock LLM config
  138. llm_config = LLMConfig(model='test-model', api_key='test-key')
  139. # Mock the LLM response
  140. mock_response = MagicMock()
  141. mock_response.choices = [
  142. MagicMock(
  143. message=MagicMock(
  144. content="""--- success
  145. true
  146. --- explanation
  147. The changes successfully address the feedback."""
  148. )
  149. )
  150. ]
  151. # Test the guess_success method
  152. with patch.object(LLM, 'completion') as mock_completion:
  153. mock_completion.return_value = mock_response
  154. success, success_list, explanation = handler.guess_success(issue, history)
  155. # Verify the completion() call
  156. mock_completion.assert_called_once()
  157. call_args = mock_completion.call_args
  158. prompt = call_args[1]['messages'][0]['content']
  159. # Check prompt content
  160. assert (
  161. 'Issue descriptions:\n'
  162. + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
  163. in prompt
  164. )
  165. assert 'PR Thread Comments:\n' + '\n---\n'.join(issue.thread_comments) in prompt
  166. assert 'Last message from AI agent:\n' + history[0].content in prompt
  167. assert len(json.loads(explanation)) == 1
  168. def test_check_feedback_with_llm():
  169. """Test the _check_feedback_with_llm helper function."""
  170. # Create a PR handler instance
  171. llm_config = LLMConfig(model='test', api_key='test')
  172. handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
  173. # Test cases for different LLM responses
  174. test_cases = [
  175. {
  176. 'response': '--- success\ntrue\n--- explanation\nChanges look good',
  177. 'expected': (True, 'Changes look good'),
  178. },
  179. {
  180. 'response': '--- success\nfalse\n--- explanation\nNot all issues fixed',
  181. 'expected': (False, 'Not all issues fixed'),
  182. },
  183. {
  184. 'response': 'Invalid response format',
  185. 'expected': (
  186. False,
  187. 'Failed to decode answer from LLM response: Invalid response format',
  188. ),
  189. },
  190. {
  191. 'response': '--- success\ntrue\n--- explanation\nMultiline\nexplanation\nhere',
  192. 'expected': (True, 'Multiline\nexplanation\nhere'),
  193. },
  194. ]
  195. for case in test_cases:
  196. # Mock the LLM response
  197. mock_response = MagicMock()
  198. mock_response.choices = [MagicMock(message=MagicMock(content=case['response']))]
  199. # Test the function
  200. with patch.object(LLM, 'completion', return_value=mock_response):
  201. success, explanation = handler._check_feedback_with_llm('test prompt')
  202. assert (success, explanation) == case['expected']
  203. def test_check_review_thread():
  204. """Test the _check_review_thread helper function."""
  205. # Create a PR handler instance
  206. llm_config = LLMConfig(model='test', api_key='test')
  207. handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
  208. # Create test data
  209. review_thread = ReviewThread(
  210. comment='Please fix the formatting\n---\nlatest feedback:\nAdd docstrings',
  211. files=['/src/file1.py', '/src/file2.py'],
  212. )
  213. issues_context = json.dumps(
  214. ['Issue 1 description', 'Issue 2 description'], indent=4
  215. )
  216. last_message = 'I have fixed the formatting and added docstrings'
  217. # Mock the LLM response
  218. mock_response = MagicMock()
  219. mock_response.choices = [
  220. MagicMock(
  221. message=MagicMock(
  222. content="""--- success
  223. true
  224. --- explanation
  225. Changes look good"""
  226. )
  227. )
  228. ]
  229. # Test the function
  230. with patch.object(LLM, 'completion') as mock_completion:
  231. mock_completion.return_value = mock_response
  232. success, explanation = handler._check_review_thread(
  233. review_thread, issues_context, last_message
  234. )
  235. # Verify the completion() call
  236. mock_completion.assert_called_once()
  237. call_args = mock_completion.call_args
  238. prompt = call_args[1]['messages'][0]['content']
  239. # Check prompt content
  240. assert 'Issue descriptions:\n' + issues_context in prompt
  241. assert 'Feedback:\n' + review_thread.comment in prompt
  242. assert (
  243. 'Files locations:\n' + json.dumps(review_thread.files, indent=4) in prompt
  244. )
  245. assert 'Last message from AI agent:\n' + last_message in prompt
  246. # Check result
  247. assert success is True
  248. assert explanation == 'Changes look good'
  249. def test_check_thread_comments():
  250. """Test the _check_thread_comments helper function."""
  251. # Create a PR handler instance
  252. llm_config = LLMConfig(model='test', api_key='test')
  253. handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
  254. # Create test data
  255. thread_comments = [
  256. 'Please improve error handling',
  257. 'Add input validation',
  258. 'latest feedback:\nHandle edge cases',
  259. ]
  260. issues_context = json.dumps(
  261. ['Issue 1 description', 'Issue 2 description'], indent=4
  262. )
  263. last_message = 'I have added error handling and input validation'
  264. # Mock the LLM response
  265. mock_response = MagicMock()
  266. mock_response.choices = [
  267. MagicMock(
  268. message=MagicMock(
  269. content="""--- success
  270. true
  271. --- explanation
  272. Changes look good"""
  273. )
  274. )
  275. ]
  276. # Test the function
  277. with patch.object(LLM, 'completion') as mock_completion:
  278. mock_completion.return_value = mock_response
  279. success, explanation = handler._check_thread_comments(
  280. thread_comments, issues_context, last_message
  281. )
  282. # Verify the completion() call
  283. mock_completion.assert_called_once()
  284. call_args = mock_completion.call_args
  285. prompt = call_args[1]['messages'][0]['content']
  286. # Check prompt content
  287. assert 'Issue descriptions:\n' + issues_context in prompt
  288. assert 'PR Thread Comments:\n' + '\n---\n'.join(thread_comments) in prompt
  289. assert 'Last message from AI agent:\n' + last_message in prompt
  290. # Check result
  291. assert success is True
  292. assert explanation == 'Changes look good'
  293. def test_check_review_comments():
  294. """Test the _check_review_comments helper function."""
  295. # Create a PR handler instance
  296. llm_config = LLMConfig(model='test', api_key='test')
  297. handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
  298. # Create test data
  299. review_comments = [
  300. 'Please improve code readability',
  301. 'Add comments to complex functions',
  302. 'Follow PEP 8 style guide',
  303. ]
  304. issues_context = json.dumps(
  305. ['Issue 1 description', 'Issue 2 description'], indent=4
  306. )
  307. last_message = 'I have improved code readability and added comments'
  308. # Mock the LLM response
  309. mock_response = MagicMock()
  310. mock_response.choices = [
  311. MagicMock(
  312. message=MagicMock(
  313. content="""--- success
  314. true
  315. --- explanation
  316. Changes look good"""
  317. )
  318. )
  319. ]
  320. # Test the function
  321. with patch.object(LLM, 'completion') as mock_completion:
  322. mock_completion.return_value = mock_response
  323. success, explanation = handler._check_review_comments(
  324. review_comments, issues_context, last_message
  325. )
  326. # Verify the completion() call
  327. mock_completion.assert_called_once()
  328. call_args = mock_completion.call_args
  329. prompt = call_args[1]['messages'][0]['content']
  330. # Check prompt content
  331. assert 'Issue descriptions:\n' + issues_context in prompt
  332. assert 'PR Review Comments:\n' + '\n---\n'.join(review_comments) in prompt
  333. assert 'Last message from AI agent:\n' + last_message in prompt
  334. # Check result
  335. assert success is True
  336. assert explanation == 'Changes look good'
  337. def test_guess_success_review_comments_litellm_call():
  338. """Test that the completion() call for review comments contains the expected content."""
  339. # Create a PR handler instance
  340. llm_config = LLMConfig(model='test', api_key='test')
  341. handler = PRHandler('test-owner', 'test-repo', 'test-token', llm_config)
  342. # Create a mock issue with review comments
  343. issue = GithubIssue(
  344. owner='test-owner',
  345. repo='test-repo',
  346. number=1,
  347. title='Test PR',
  348. body='Test Body',
  349. thread_comments=None,
  350. closing_issues=['Issue 1 description', 'Issue 2 description'],
  351. review_comments=[
  352. 'Please improve code readability',
  353. 'Add comments to complex functions',
  354. 'Follow PEP 8 style guide',
  355. ],
  356. thread_ids=None,
  357. head_branch='test-branch',
  358. )
  359. # Create mock history with a detailed response
  360. history = [
  361. MessageAction(
  362. content="""I have made the following changes:
  363. 1. Improved code readability by breaking down complex functions
  364. 2. Added detailed comments to all complex functions
  365. 3. Fixed code style to follow PEP 8"""
  366. )
  367. ]
  368. # Mock the LLM response
  369. mock_response = MagicMock()
  370. mock_response.choices = [
  371. MagicMock(
  372. message=MagicMock(
  373. content="""--- success
  374. true
  375. --- explanation
  376. The changes successfully address the feedback."""
  377. )
  378. )
  379. ]
  380. # Test the guess_success method
  381. with patch.object(LLM, 'completion') as mock_completion:
  382. mock_completion.return_value = mock_response
  383. success, success_list, explanation = handler.guess_success(issue, history)
  384. # Verify the completion() call
  385. mock_completion.assert_called_once()
  386. call_args = mock_completion.call_args
  387. prompt = call_args[1]['messages'][0]['content']
  388. # Check prompt content
  389. assert (
  390. 'Issue descriptions:\n'
  391. + json.dumps(['Issue 1 description', 'Issue 2 description'], indent=4)
  392. in prompt
  393. )
  394. assert 'PR Review Comments:\n' + '\n---\n'.join(issue.review_comments) in prompt
  395. assert 'Last message from AI agent:\n' + history[0].content in prompt
  396. assert len(json.loads(explanation)) == 1