Ver código fonte

fix: add an early exit check for agent answers in agent bench. (#2257)

Signed-off-by: ifuryst <ifuryst@gmail.com>
Leo 1 ano atrás
pai
commit
040d6bd806
2 arquivos alterados com 28 adições e 1 exclusões
  1. 17 0
      evaluation/agent_bench/helper.py
  2. 11 1
      evaluation/agent_bench/run_infer.py

+ 17 - 0
evaluation/agent_bench/helper.py

@@ -1,4 +1,7 @@
 import os
+import re
+
+from opendevin.events.action import CmdRunAction, MessageAction
 
 
 def analysis_size(size_str):
@@ -42,3 +45,17 @@ def create_sh_file(filename: str, cmds: str) -> None:
     with open(filename, 'w', encoding='utf-8') as file:
         file.write(cmds.replace('\r\n', '\n'))
     os.chmod(filename, 0o755)
+
+
+def try_parse_answer(act) -> str | None:
+    raw_ans = ''
+    if isinstance(act, MessageAction) and act.source == 'agent':
+        raw_ans = act.content
+    elif isinstance(act, CmdRunAction) and act.source == 'agent':
+        raw_ans = act.thought
+    else:
+        return None
+    agent_answer = re.findall(r'<solution>(.*?)</solution>', raw_ans)
+    if not agent_answer:
+        return None
+    return agent_answer[0].strip()

+ 11 - 1
evaluation/agent_bench/run_infer.py

@@ -14,7 +14,11 @@ import docker
 from datasets import load_dataset
 from tqdm import tqdm
 
-from evaluation.agent_bench.helper import compare_results, create_sh_file
+from evaluation.agent_bench.helper import (
+    compare_results,
+    create_sh_file,
+    try_parse_answer,
+)
 from opendevin.controller.state.state import State
 from opendevin.core.config import args, config, get_llm_config_arg
 from opendevin.core.logger import get_console_handler
@@ -43,6 +47,12 @@ def codeact_user_response(state: State) -> str:
         'IMPORTANT: YOU SHOULD NEVER ASK FOR HUMAN HELP.\n'
     )
     if state.history:
+        # check if the last action is an answer, if so, return exit for early exit
+        last_action, _ = state.history[-1]
+        ans = try_parse_answer(last_action)
+        if ans is not None:
+            return '/exit'
+
         user_msgs = [
             action
             for action, _ in state.history