prepare_devin_outputs_for_evaluation.py 2.8 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879
  1. '''
  2. Script used to convert devin's output into the desired json format for evaluation on SWE-bench
  3. Usage:
  4. python prepare_devin_outputs_for_evaluation.py <setting>
  5. <setting> can be "passed", "failed", "all"
  6. Outputs:
  7. two json files under evaluation/SWE-bench/data/
  8. '''
  9. #fetch devin's outputs into a json file for evaluation
  10. import os
  11. import sys
  12. import json
  13. import requests
  14. from tqdm import tqdm
  15. def get_devin_eval_output(setting):
  16. repo_url = 'CognitionAI/devin-swebench-results'
  17. folder_path = 'output_diffs'
  18. base_url = 'https://api.github.com/repos/'
  19. pass_api_url = f'{base_url}{repo_url}/contents/{folder_path}/pass'
  20. failed_api_url = f'{base_url}{repo_url}/contents/{folder_path}/fail'
  21. pass_files_info = []
  22. failed_files_info = []
  23. def get_files(api_url, subfolder_name, files_info):
  24. response = requests.get(api_url)
  25. if response.status_code == 200:
  26. contents = response.json()
  27. for item in tqdm(contents):
  28. if item['type'] == 'file':
  29. file_url = f"https://raw.githubusercontent.com/{repo_url}/main/{folder_path}/{subfolder_name}/{item['name']}"
  30. file_content = requests.get(file_url).text
  31. instance_id = item['name'][:-9]
  32. model_name = 'Devin' # Update with actual model name
  33. files_info.append({
  34. 'instance_id': instance_id,
  35. 'model_patch': file_content,
  36. 'model_name_or_path': model_name,
  37. 'pass_or_fail': subfolder_name
  38. })
  39. if setting == 'passed' or setting == 'all':
  40. get_files(pass_api_url, 'pass', pass_files_info)
  41. if setting == 'failed' or setting == 'all':
  42. get_files(failed_api_url, 'fail', failed_files_info)
  43. script_dir = os.path.dirname(os.path.abspath(__file__))
  44. output_dir = os.path.join(script_dir, '../data/devin/')
  45. if not os.path.exists(output_dir):
  46. os.makedirs(output_dir)
  47. if setting == 'passed' or setting == 'all':
  48. with open(os.path.join(output_dir, 'devin_swe_passed.json'), 'w') as pass_file:
  49. json.dump(pass_files_info, pass_file, indent=4)
  50. if setting == 'failed' or setting == 'all':
  51. with open(os.path.join(output_dir, 'devin_swe_failed.json'), 'w') as fail_file:
  52. json.dump(failed_files_info, fail_file, indent=4)
  53. if setting == 'all':
  54. merged_output = pass_files_info + failed_files_info
  55. with open(os.path.join(output_dir, 'devin_swe_outputs.json'), 'w') as merge_file:
  56. json.dump(merged_output, merge_file, indent=4)
  57. if __name__ == '__main__':
  58. if len(sys.argv) != 2:
  59. print('Usage: python script_name.py <setting>')
  60. sys.exit(1)
  61. setting = sys.argv[1]
  62. get_devin_eval_output(setting)