1 ano atrás · 040d6bd806
--- a/evaluation/agent_bench/helper.py
+++ b/evaluation/agent_bench/helper.py
@@ -1,4 +1,7 @@
 
				 import os
			
 
				+import re
			
 
				+
			
 
				+from opendevin.events.action import CmdRunAction, MessageAction
			
 
				 
			
 
				 
			
 
				 def analysis_size(size_str):
			
@@ -42,3 +45,17 @@ def create_sh_file(filename: str, cmds: str) -> None:
 
				     with open(filename, 'w', encoding='utf-8') as file:
			
 
				         file.write(cmds.replace('\r\n', '\n'))
			
 
				     os.chmod(filename, 0o755)
			
 
				+
			
 
				+
			
 
				+def try_parse_answer(act) -> str | None:
			
 
				+    raw_ans = ''
			
 
				+    if isinstance(act, MessageAction) and act.source == 'agent':
			
 
				+        raw_ans = act.content
			
 
				+    elif isinstance(act, CmdRunAction) and act.source == 'agent':
			
 
				+        raw_ans = act.thought
			
 
				+    else:
			
 
				+        return None
			
 
				+    agent_answer = re.findall(r'<solution>(.*?)</solution>', raw_ans)
			
 
				+    if not agent_answer:
			
 
				+        return None
			
 
				+    return agent_answer[0].strip()
			
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -14,7 +14,11 @@ import docker
 
				 from datasets import load_dataset
			
 
				 from tqdm import tqdm
			
 
				 
			
 
				-from evaluation.agent_bench.helper import compare_results, create_sh_file
			
 
				+from evaluation.agent_bench.helper import (
			
 
				+    compare_results,
			
 
				+    create_sh_file,
			
 
				+    try_parse_answer,
			
 
				+)
			
 
				 from opendevin.controller.state.state import State
			
 
				 from opendevin.core.config import args, config, get_llm_config_arg
			
 
				 from opendevin.core.logger import get_console_handler
			
@@ -43,6 +47,12 @@ def codeact_user_response(state: State) -> str:
 
				         'IMPORTANT: YOU SHOULD NEVER ASK FOR HUMAN HELP.\n'
			
 
				     )
			
 
				     if state.history:
			
 
				+        # check if the last action is an answer, if so, return exit for early exit
			
 
				+        last_action, _ = state.history[-1]
			
 
				+        ans = try_parse_answer(last_action)
			
 
				+        if ans is not None:
			
 
				+            return '/exit'
			
 
				+
			
 
				         user_msgs = [
			
 
				             action
			
 
				             for action, _ in state.history