test_pr_handler_guess_success.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460
  1. import json
  2. from unittest.mock import patch, MagicMock
  3. from openhands.resolver.issue_definitions import PRHandler
  4. from openhands.resolver.github_issue import GithubIssue, ReviewThread
  5. from openhands.events.action.message import MessageAction
  6. from openhands.core.config import LLMConfig
  7. def test_guess_success_review_threads_litellm_call():
  8. """Test that the litellm.completion() call for review threads contains the expected content."""
  9. # Create a PR handler instance
  10. handler = PRHandler("test-owner", "test-repo", "test-token")
  11. # Create a mock issue with review threads
  12. issue = GithubIssue(
  13. owner="test-owner",
  14. repo="test-repo",
  15. number=1,
  16. title="Test PR",
  17. body="Test Body",
  18. thread_comments=None,
  19. closing_issues=["Issue 1 description", "Issue 2 description"],
  20. review_comments=None,
  21. review_threads=[
  22. ReviewThread(
  23. comment="Please fix the formatting\n---\nlatest feedback:\nAdd docstrings",
  24. files=["/src/file1.py", "/src/file2.py"],
  25. ),
  26. ReviewThread(
  27. comment="Add more tests\n---\nlatest feedback:\nAdd test cases",
  28. files=["/tests/test_file.py"],
  29. ),
  30. ],
  31. thread_ids=["1", "2"],
  32. head_branch="test-branch",
  33. )
  34. # Create mock history with a detailed response
  35. history = [
  36. MessageAction(
  37. content="""I have made the following changes:
  38. 1. Fixed formatting in file1.py and file2.py
  39. 2. Added docstrings to all functions
  40. 3. Added test cases in test_file.py"""
  41. )
  42. ]
  43. # Create mock LLM config
  44. llm_config = LLMConfig(model="test-model", api_key="test-key")
  45. # Mock the LLM response
  46. mock_response = MagicMock()
  47. mock_response.choices = [
  48. MagicMock(
  49. message=MagicMock(
  50. content="""--- success
  51. true
  52. --- explanation
  53. The changes successfully address the feedback."""
  54. )
  55. )
  56. ]
  57. # Test the guess_success method
  58. with patch("litellm.completion") as mock_completion:
  59. mock_completion.return_value = mock_response
  60. success, success_list, explanation = handler.guess_success(
  61. issue, history, llm_config
  62. )
  63. # Verify the litellm.completion() calls
  64. assert mock_completion.call_count == 2 # One call per review thread
  65. # Check first call
  66. first_call = mock_completion.call_args_list[0]
  67. first_prompt = first_call[1]["messages"][0]["content"]
  68. assert (
  69. "Issue descriptions:\n"
  70. + json.dumps(["Issue 1 description", "Issue 2 description"], indent=4)
  71. in first_prompt
  72. )
  73. assert (
  74. "Feedback:\nPlease fix the formatting\n---\nlatest feedback:\nAdd docstrings"
  75. in first_prompt
  76. )
  77. assert (
  78. "Files locations:\n"
  79. + json.dumps(["/src/file1.py", "/src/file2.py"], indent=4)
  80. in first_prompt
  81. )
  82. assert "Last message from AI agent:\n" + history[0].content in first_prompt
  83. # Check second call
  84. second_call = mock_completion.call_args_list[1]
  85. second_prompt = second_call[1]["messages"][0]["content"]
  86. assert (
  87. "Issue descriptions:\n"
  88. + json.dumps(["Issue 1 description", "Issue 2 description"], indent=4)
  89. in second_prompt
  90. )
  91. assert (
  92. "Feedback:\nAdd more tests\n---\nlatest feedback:\nAdd test cases"
  93. in second_prompt
  94. )
  95. assert (
  96. "Files locations:\n" + json.dumps(["/tests/test_file.py"], indent=4)
  97. in second_prompt
  98. )
  99. assert "Last message from AI agent:\n" + history[0].content in second_prompt
  100. def test_guess_success_thread_comments_litellm_call():
  101. """Test that the litellm.completion() call for thread comments contains the expected content."""
  102. # Create a PR handler instance
  103. handler = PRHandler("test-owner", "test-repo", "test-token")
  104. # Create a mock issue with thread comments
  105. issue = GithubIssue(
  106. owner="test-owner",
  107. repo="test-repo",
  108. number=1,
  109. title="Test PR",
  110. body="Test Body",
  111. thread_comments=[
  112. "Please improve error handling",
  113. "Add input validation",
  114. "latest feedback:\nHandle edge cases",
  115. ],
  116. closing_issues=["Issue 1 description", "Issue 2 description"],
  117. review_comments=None,
  118. thread_ids=None,
  119. head_branch="test-branch",
  120. )
  121. # Create mock history with a detailed response
  122. history = [
  123. MessageAction(
  124. content="""I have made the following changes:
  125. 1. Added try/catch blocks for error handling
  126. 2. Added input validation checks
  127. 3. Added handling for edge cases"""
  128. )
  129. ]
  130. # Create mock LLM config
  131. llm_config = LLMConfig(model="test-model", api_key="test-key")
  132. # Mock the LLM response
  133. mock_response = MagicMock()
  134. mock_response.choices = [
  135. MagicMock(
  136. message=MagicMock(
  137. content="""--- success
  138. true
  139. --- explanation
  140. The changes successfully address the feedback."""
  141. )
  142. )
  143. ]
  144. # Test the guess_success method
  145. with patch("litellm.completion") as mock_completion:
  146. mock_completion.return_value = mock_response
  147. success, success_list, explanation = handler.guess_success(
  148. issue, history, llm_config
  149. )
  150. # Verify the litellm.completion() call
  151. mock_completion.assert_called_once()
  152. call_args = mock_completion.call_args
  153. prompt = call_args[1]["messages"][0]["content"]
  154. # Check prompt content
  155. assert (
  156. "Issue descriptions:\n"
  157. + json.dumps(["Issue 1 description", "Issue 2 description"], indent=4)
  158. in prompt
  159. )
  160. assert "PR Thread Comments:\n" + "\n---\n".join(issue.thread_comments) in prompt
  161. assert "Last message from AI agent:\n" + history[0].content in prompt
  162. def test_check_feedback_with_llm():
  163. """Test the _check_feedback_with_llm helper function."""
  164. # Create a PR handler instance
  165. handler = PRHandler("test-owner", "test-repo", "test-token")
  166. # Create mock LLM config
  167. llm_config = LLMConfig(model="test-model", api_key="test-key")
  168. # Test cases for different LLM responses
  169. test_cases = [
  170. {
  171. "response": "--- success\ntrue\n--- explanation\nChanges look good",
  172. "expected": (True, "Changes look good"),
  173. },
  174. {
  175. "response": "--- success\nfalse\n--- explanation\nNot all issues fixed",
  176. "expected": (False, "Not all issues fixed"),
  177. },
  178. {
  179. "response": "Invalid response format",
  180. "expected": (
  181. False,
  182. "Failed to decode answer from LLM response: Invalid response format",
  183. ),
  184. },
  185. {
  186. "response": "--- success\ntrue\n--- explanation\nMultiline\nexplanation\nhere",
  187. "expected": (True, "Multiline\nexplanation\nhere"),
  188. },
  189. ]
  190. for case in test_cases:
  191. # Mock the LLM response
  192. mock_response = MagicMock()
  193. mock_response.choices = [MagicMock(message=MagicMock(content=case["response"]))]
  194. # Test the function
  195. with patch("litellm.completion", return_value=mock_response):
  196. success, explanation = handler._check_feedback_with_llm(
  197. "test prompt", llm_config
  198. )
  199. assert (success, explanation) == case["expected"]
  200. def test_check_review_thread():
  201. """Test the _check_review_thread helper function."""
  202. # Create a PR handler instance
  203. handler = PRHandler("test-owner", "test-repo", "test-token")
  204. # Create test data
  205. review_thread = ReviewThread(
  206. comment="Please fix the formatting\n---\nlatest feedback:\nAdd docstrings",
  207. files=["/src/file1.py", "/src/file2.py"],
  208. )
  209. issues_context = json.dumps(
  210. ["Issue 1 description", "Issue 2 description"], indent=4
  211. )
  212. last_message = "I have fixed the formatting and added docstrings"
  213. llm_config = LLMConfig(model="test-model", api_key="test-key")
  214. # Mock the LLM response
  215. mock_response = MagicMock()
  216. mock_response.choices = [
  217. MagicMock(
  218. message=MagicMock(
  219. content="""--- success
  220. true
  221. --- explanation
  222. Changes look good"""
  223. )
  224. )
  225. ]
  226. # Test the function
  227. with patch("litellm.completion") as mock_completion:
  228. mock_completion.return_value = mock_response
  229. success, explanation = handler._check_review_thread(
  230. review_thread, issues_context, last_message, llm_config
  231. )
  232. # Verify the litellm.completion() call
  233. mock_completion.assert_called_once()
  234. call_args = mock_completion.call_args
  235. prompt = call_args[1]["messages"][0]["content"]
  236. # Check prompt content
  237. assert "Issue descriptions:\n" + issues_context in prompt
  238. assert "Feedback:\n" + review_thread.comment in prompt
  239. assert (
  240. "Files locations:\n" + json.dumps(review_thread.files, indent=4) in prompt
  241. )
  242. assert "Last message from AI agent:\n" + last_message in prompt
  243. # Check result
  244. assert success is True
  245. assert explanation == "Changes look good"
  246. def test_check_thread_comments():
  247. """Test the _check_thread_comments helper function."""
  248. # Create a PR handler instance
  249. handler = PRHandler("test-owner", "test-repo", "test-token")
  250. # Create test data
  251. thread_comments = [
  252. "Please improve error handling",
  253. "Add input validation",
  254. "latest feedback:\nHandle edge cases",
  255. ]
  256. issues_context = json.dumps(
  257. ["Issue 1 description", "Issue 2 description"], indent=4
  258. )
  259. last_message = "I have added error handling and input validation"
  260. llm_config = LLMConfig(model="test-model", api_key="test-key")
  261. # Mock the LLM response
  262. mock_response = MagicMock()
  263. mock_response.choices = [
  264. MagicMock(
  265. message=MagicMock(
  266. content="""--- success
  267. true
  268. --- explanation
  269. Changes look good"""
  270. )
  271. )
  272. ]
  273. # Test the function
  274. with patch("litellm.completion") as mock_completion:
  275. mock_completion.return_value = mock_response
  276. success, explanation = handler._check_thread_comments(
  277. thread_comments, issues_context, last_message, llm_config
  278. )
  279. # Verify the litellm.completion() call
  280. mock_completion.assert_called_once()
  281. call_args = mock_completion.call_args
  282. prompt = call_args[1]["messages"][0]["content"]
  283. # Check prompt content
  284. assert "Issue descriptions:\n" + issues_context in prompt
  285. assert "PR Thread Comments:\n" + "\n---\n".join(thread_comments) in prompt
  286. assert "Last message from AI agent:\n" + last_message in prompt
  287. # Check result
  288. assert success is True
  289. assert explanation == "Changes look good"
  290. def test_check_review_comments():
  291. """Test the _check_review_comments helper function."""
  292. # Create a PR handler instance
  293. handler = PRHandler("test-owner", "test-repo", "test-token")
  294. # Create test data
  295. review_comments = [
  296. "Please improve code readability",
  297. "Add comments to complex functions",
  298. "Follow PEP 8 style guide",
  299. ]
  300. issues_context = json.dumps(
  301. ["Issue 1 description", "Issue 2 description"], indent=4
  302. )
  303. last_message = "I have improved code readability and added comments"
  304. llm_config = LLMConfig(model="test-model", api_key="test-key")
  305. # Mock the LLM response
  306. mock_response = MagicMock()
  307. mock_response.choices = [
  308. MagicMock(
  309. message=MagicMock(
  310. content="""--- success
  311. true
  312. --- explanation
  313. Changes look good"""
  314. )
  315. )
  316. ]
  317. # Test the function
  318. with patch("litellm.completion") as mock_completion:
  319. mock_completion.return_value = mock_response
  320. success, explanation = handler._check_review_comments(
  321. review_comments, issues_context, last_message, llm_config
  322. )
  323. # Verify the litellm.completion() call
  324. mock_completion.assert_called_once()
  325. call_args = mock_completion.call_args
  326. prompt = call_args[1]["messages"][0]["content"]
  327. # Check prompt content
  328. assert "Issue descriptions:\n" + issues_context in prompt
  329. assert "PR Review Comments:\n" + "\n---\n".join(review_comments) in prompt
  330. assert "Last message from AI agent:\n" + last_message in prompt
  331. # Check result
  332. assert success is True
  333. assert explanation == "Changes look good"
  334. def test_guess_success_review_comments_litellm_call():
  335. """Test that the litellm.completion() call for review comments contains the expected content."""
  336. # Create a PR handler instance
  337. handler = PRHandler("test-owner", "test-repo", "test-token")
  338. # Create a mock issue with review comments
  339. issue = GithubIssue(
  340. owner="test-owner",
  341. repo="test-repo",
  342. number=1,
  343. title="Test PR",
  344. body="Test Body",
  345. thread_comments=None,
  346. closing_issues=["Issue 1 description", "Issue 2 description"],
  347. review_comments=[
  348. "Please improve code readability",
  349. "Add comments to complex functions",
  350. "Follow PEP 8 style guide",
  351. ],
  352. thread_ids=None,
  353. head_branch="test-branch",
  354. )
  355. # Create mock history with a detailed response
  356. history = [
  357. MessageAction(
  358. content="""I have made the following changes:
  359. 1. Improved code readability by breaking down complex functions
  360. 2. Added detailed comments to all complex functions
  361. 3. Fixed code style to follow PEP 8"""
  362. )
  363. ]
  364. # Create mock LLM config
  365. llm_config = LLMConfig(model="test-model", api_key="test-key")
  366. # Mock the LLM response
  367. mock_response = MagicMock()
  368. mock_response.choices = [
  369. MagicMock(
  370. message=MagicMock(
  371. content="""--- success
  372. true
  373. --- explanation
  374. The changes successfully address the feedback."""
  375. )
  376. )
  377. ]
  378. # Test the guess_success method
  379. with patch("litellm.completion") as mock_completion:
  380. mock_completion.return_value = mock_response
  381. success, success_list, explanation = handler.guess_success(
  382. issue, history, llm_config
  383. )
  384. # Verify the litellm.completion() call
  385. mock_completion.assert_called_once()
  386. call_args = mock_completion.call_args
  387. prompt = call_args[1]["messages"][0]["content"]
  388. # Check prompt content
  389. assert (
  390. "Issue descriptions:\n"
  391. + json.dumps(["Issue 1 description", "Issue 2 description"], indent=4)
  392. in prompt
  393. )
  394. assert "PR Review Comments:\n" + "\n---\n".join(issue.review_comments) in prompt
  395. assert "Last message from AI agent:\n" + history[0].content in prompt