Просмотр исходного кода

fix: make max_budget_per_task optional in `run_agent_controller` (#3071)

* fix: make max_budget_per_task optional in `run_agent_controller`

* update arg for each run infer
Xingyao Wang 1 год назад
Родитель
Сommit
da17665cab

+ 1 - 0
evaluation/EDA/run_infer.py

@@ -122,6 +122,7 @@ def process_instance(
             agent,
             instruction,
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
                 agent.__class__.__name__
             ],

+ 1 - 0
evaluation/agent_bench/run_infer.py

@@ -116,6 +116,7 @@ def process_instance(
             agent,
             instruction,
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             fake_user_response_fn=FAKE_RESPONSES[agent.__class__.__name__],
             sandbox=sandbox,
             sid=inst_id,

+ 1 - 0
evaluation/biocoder/run_infer.py

@@ -169,6 +169,7 @@ def process_instance(
             agent,
             instruction,
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
                 agent.__class__.__name__
             ],

+ 1 - 0
evaluation/bird/run_infer.py

@@ -213,6 +213,7 @@ def process_instance(
             agent,
             instruction,
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
                 agent.__class__.__name__
             ],

+ 5 - 1
evaluation/browsing_delegation/run_infer.py

@@ -67,7 +67,11 @@ def process_instance(
 
     state: State | None = asyncio.run(
         run_agent_controller(
-            agent, instruction, max_iterations=metadata.max_iterations, sid=env_id
+            agent,
+            instruction,
+            max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
+            sid=env_id,
         )
     )
 

+ 1 - 0
evaluation/gaia/run_infer.py

@@ -119,6 +119,7 @@ def process_instance(
                 agent,
                 instruction,
                 max_iterations=metadata.max_iterations,
+                max_budget_per_task=config.max_budget_per_task,
                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
                     agent.__class__.__name__
                 ],

+ 1 - 0
evaluation/gorilla/run_infer.py

@@ -113,6 +113,7 @@ def process_instance(agent, question_id, question, metadata, reset_logger: bool
                 agent,
                 instruction,
                 max_iterations=metadata.max_iterations,
+                max_budget_per_task=config.max_budget_per_task,
                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
                     agent.__class__.__name__
                 ),

+ 1 - 0
evaluation/gpqa/run_infer.py

@@ -229,6 +229,7 @@ Ok now its time to start solving the question. Good luck!
                 agent,
                 instruction,
                 max_iterations=metadata.max_iterations,
+                max_budget_per_task=config.max_budget_per_task,
                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
                     agent.__class__.__name__
                 ),

+ 1 - 0
evaluation/humanevalfix/run_infer.py

@@ -182,6 +182,7 @@ def process_instance(
                 agent,
                 instruction,
                 max_iterations=metadata.max_iterations,
+                max_budget_per_task=config.max_budget_per_task,
                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
                     agent.__class__.__name__
                 ),

+ 1 - 0
evaluation/logic_reasoning/run_infer.py

@@ -180,6 +180,7 @@ def process_instance(
                 agent,
                 instruction,
                 max_iterations=metadata.max_iterations,
+                max_budget_per_task=config.max_budget_per_task,
                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
                     agent.__class__.__name__
                 ),

+ 1 - 0
evaluation/miniwob/run_infer.py

@@ -81,6 +81,7 @@ def process_instance(
             agent,
             'PLACEHOLDER_GOAL',
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             runtime_tools_config=runtime_tools_config,
             sandbox=get_sandbox(),
             sid=env_id,

+ 1 - 0
evaluation/mint/run_infer.py

@@ -143,6 +143,7 @@ def process_instance(
             agent,
             instruction,
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             fake_user_response_fn=fake_user_response_fn,
             sandbox=sandbox,
             sid=sid,

+ 1 - 0
evaluation/ml_bench/run_infer.py

@@ -150,6 +150,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
                 agent,
                 instruction,
                 max_iterations=metadata.max_iterations,
+                max_budget_per_task=config.max_budget_per_task,
                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
                     agent.__class__.__name__
                 ),

+ 1 - 0
evaluation/swe_bench/run_infer.py

@@ -282,6 +282,7 @@ IMPORTANT TIPS:
             agent,
             instruction,
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
                 agent.__class__.__name__
             ],

+ 1 - 0
evaluation/toolqa/run_infer.py

@@ -79,6 +79,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
             agent,
             instruction,
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
                 agent.__class__.__name__
             ],

+ 1 - 0
evaluation/webarena/run_infer.py

@@ -82,6 +82,7 @@ def process_instance(
             agent,
             'PLACEHOLDER_GOAL',
             max_iterations=metadata.max_iterations,
+            max_budget_per_task=config.max_budget_per_task,
             runtime_tools_config=runtime_tools_config,
             sandbox=get_sandbox(),
             sid=env_id,

+ 1 - 1
opendevin/core/main.py

@@ -34,7 +34,7 @@ async def run_agent_controller(
     agent: Agent,
     task_str: str,
     max_iterations: int,
-    max_budget_per_task: float,
+    max_budget_per_task: float | None = None,
     exit_on_message: bool = False,
     fake_user_response_fn: Callable[[State | None], str] | None = None,
     sandbox: Sandbox | None = None,