convert_oh_output_to_md.py 2.7 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788
  1. #!/usr/bin/env python3
  2. """Convert OpenHands output to a readable markdown format for visualization."""
  3. import argparse
  4. import json
  5. import os
  6. import pandas as pd
  7. from tqdm import tqdm
  8. from evaluation.swe_bench.eval_infer import process_git_patch
  9. from openhands.events.serialization import event_from_dict
  10. tqdm.pandas()
  11. parser = argparse.ArgumentParser()
  12. parser.add_argument('oh_output_file', type=str)
  13. args = parser.parse_args()
  14. output_md_folder = args.oh_output_file.replace('.jsonl', '.viz')
  15. print(f'Converting {args.oh_output_file} to markdown files in {output_md_folder}')
  16. oh_format = pd.read_json(args.oh_output_file, orient='records', lines=True)
  17. # model name is the folder name of oh_output_file
  18. model_name = os.path.basename(os.path.dirname(args.oh_output_file))
  19. def convert_history_to_str(history):
  20. ret = ''
  21. separator = '\n\n' + '-' * 100 + '\n'
  22. for i, event in enumerate(history):
  23. if i != 0:
  24. ret += separator
  25. if isinstance(event, list):
  26. # "event" is a legacy pair of (action, observation)
  27. event_obj = event_from_dict(event[0])
  28. ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
  29. ret += str(event_obj)
  30. ret += separator
  31. event_obj = event_from_dict(event[1])
  32. ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
  33. ret += str(event_obj)
  34. else:
  35. # "event" is a single event
  36. event_obj = event_from_dict(event)
  37. ret += f'## {i+1}| {event_obj.__class__.__name__}\n\n'
  38. ret += str(event_obj)
  39. return ret
  40. def write_row_to_md_file(row):
  41. if 'git_patch' in row:
  42. model_patch = row['git_patch']
  43. elif 'test_result' in row and 'git_patch' in row['test_result']:
  44. model_patch = row['test_result']['git_patch']
  45. else:
  46. raise ValueError(f'Row {row} does not have a git_patch')
  47. if 'report' in row:
  48. resolved = row['report'].get('resolved', False)
  49. else:
  50. resolved = None
  51. instance_id = row['instance_id']
  52. filename = f'{str(resolved).lower()}.{instance_id}.md'
  53. os.makedirs(output_md_folder, exist_ok=True)
  54. filepath = os.path.join(output_md_folder, filename)
  55. with open(filepath, 'w') as f:
  56. f.write(f'# {instance_id} (resolved: {resolved})\n')
  57. # MetaData
  58. f.write('## MetaData\n')
  59. f.write('```json\n')
  60. f.write(json.dumps(row['metadata'], indent=2))
  61. f.write('\n```\n')
  62. # Trajectory
  63. f.write('## History\n')
  64. f.write(convert_history_to_str(row['history']))
  65. f.write('## Model Patch\n')
  66. f.write(f'{process_git_patch(model_patch)}\n')
  67. oh_format.progress_apply(write_row_to_md_file, axis=1)