helper.py 2.3 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465666768697071727374757677
  1. import os
  2. import re
  3. from functools import partial
  4. from evaluation.utils.shared import codeact_user_response
  5. from openhands.events.action import CmdRunAction, MessageAction
  6. def try_parse_answer(act) -> str | None:
  7. raw_ans = ''
  8. if isinstance(act, MessageAction) and act.source == 'agent':
  9. raw_ans = act.content
  10. elif isinstance(act, CmdRunAction) and act.source == 'agent':
  11. raw_ans = act.thought
  12. else:
  13. return None
  14. agent_answer = re.findall(r'<solution>(.*?)</solution>', raw_ans, re.DOTALL)
  15. if not agent_answer:
  16. return None
  17. return agent_answer[0].strip()
  18. FAKE_RESPONSES = {
  19. 'CodeActAgent': partial(
  20. codeact_user_response, encapsulate_solution=True, try_parse=try_parse_answer
  21. ),
  22. }
  23. INST_SUFFIXES: dict[str, str] = {
  24. 'CodeActAgent': (
  25. 'When you think you have solved the question, '
  26. 'please first send your answer to user through message and then exit.\n'
  27. )
  28. }
  29. def analysis_size(size_str):
  30. size_str = size_str.strip()
  31. avails = {
  32. 'B': 1,
  33. 'Byte': 1,
  34. 'K': 1024,
  35. 'KB': 1024,
  36. 'M': 1024 * 1024,
  37. 'MB': 1024 * 1024,
  38. 'G': 1024 * 1024 * 1024,
  39. 'GB': 1024 * 1024 * 1024,
  40. 'T': 1024 * 1024 * 1024 * 1024,
  41. 'TB': 1024 * 1024 * 1024 * 1024,
  42. 'P': 1024 * 1024 * 1024 * 1024 * 1024,
  43. 'PB': 1024 * 1024 * 1024 * 1024 * 1024,
  44. }
  45. for size_unit in avails:
  46. if size_str.endswith(size_unit):
  47. return int(size_str[: -len(size_unit)]) * avails[size_unit]
  48. return int(size_str)
  49. def compare_results(check_method: str, model_answer: str, final_ans: str) -> bool:
  50. try:
  51. match check_method:
  52. case 'check/integer-match.py':
  53. return int(model_answer) == int(final_ans)
  54. case 'check/size-match.py':
  55. return analysis_size(model_answer) == analysis_size(final_ans)
  56. return (
  57. model_answer.replace('\r\n', '\n').replace('\r', '\n').strip()
  58. == final_ans.replace('\r\n', '\n').replace('\r', '\n').strip()
  59. )
  60. except Exception:
  61. return False
  62. def create_sh_file(filename: str, cmds: str) -> None:
  63. with open(filename, 'w', encoding='utf-8') as file:
  64. file.write(cmds.replace('\r\n', '\n'))
  65. os.chmod(filename, 0o755)