summarize_outputs.py 2.3 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576
  1. #!/usr/bin/env python3
  2. import argparse
  3. import json
  4. from collections import Counter
  5. ERROR_KEYWORDS = [
  6. 'Agent encountered an error while processing the last action',
  7. 'APIError',
  8. 'Action execution failed',
  9. ]
  10. if __name__ == '__main__':
  11. parser = argparse.ArgumentParser()
  12. parser.add_argument('output_file', type=str, help='The file to summarize')
  13. args = parser.parse_args()
  14. with open(args.output_file, 'r') as file:
  15. lines = file.readlines()
  16. num_lines = len(lines)
  17. num_error_lines = 0
  18. num_agent_stuck_in_loop = 0
  19. num_resolved = 0
  20. num_empty_patch = 0
  21. error_counter = Counter()
  22. for line in lines:
  23. _d = json.loads(line)
  24. patch = _d.get('test_result', {}).get('git_patch', '')
  25. if patch == '':
  26. num_empty_patch += 1
  27. continue
  28. report = _d.get('report', {}) or {}
  29. resolved = report.get('resolved', False)
  30. if resolved:
  31. num_resolved += 1
  32. error = _d.get('error', None)
  33. if error is not None and isinstance(error, str):
  34. agent_stuck_in_loop = 'Agent got stuck in a loop' in error
  35. contains_error = bool(error) and not agent_stuck_in_loop
  36. if agent_stuck_in_loop:
  37. error_counter['Agent got stuck in a loop'] += 1
  38. num_agent_stuck_in_loop += 1
  39. elif contains_error:
  40. error_counter[error] += 1
  41. continue
  42. for keyword in ERROR_KEYWORDS:
  43. if keyword in line:
  44. error_counter[keyword] += 1
  45. num_error_lines += 1
  46. break
  47. # print the error counter (with percentage)
  48. print('-' * 100)
  49. print(
  50. f'# of resolved: {num_resolved} / {num_lines} ({num_resolved / num_lines * 100:.2f}%)'
  51. )
  52. print(
  53. f'# of empty patch: {num_empty_patch} / {num_lines} ({num_empty_patch / num_lines * 100:.2f}%)'
  54. )
  55. print(
  56. f'# of error lines: {num_error_lines} / {num_lines} ({num_error_lines / num_lines * 100:.2f}%)'
  57. )
  58. print(
  59. f'# of loop: {num_agent_stuck_in_loop} / {num_lines} ({num_agent_stuck_in_loop / num_lines * 100:.2f}%)'
  60. )
  61. print('-' * 100)
  62. print('Detailed error breakdown:')
  63. for error, count in error_counter.items():
  64. print(f'{error}: {count} ({count / num_lines * 100:.2f}%)')