1 vuosi sitten · da17665cab
--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@@ -122,6 +122,7 @@ def process_instance(
 
				             agent,
			
 
				             instruction,
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
			
 
				                 agent.__class__.__name__
			
 
				             ],
			
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -116,6 +116,7 @@ def process_instance(
 
				             agent,
			
 
				             instruction,
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             fake_user_response_fn=FAKE_RESPONSES[agent.__class__.__name__],
			
 
				             sandbox=sandbox,
			
 
				             sid=inst_id,
			
--- a/evaluation/biocoder/run_infer.py
+++ b/evaluation/biocoder/run_infer.py
@@ -169,6 +169,7 @@ def process_instance(
 
				             agent,
			
 
				             instruction,
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
			
 
				                 agent.__class__.__name__
			
 
				             ],
			
--- a/evaluation/bird/run_infer.py
+++ b/evaluation/bird/run_infer.py
@@ -213,6 +213,7 @@ def process_instance(
 
				             agent,
			
 
				             instruction,
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
			
 
				                 agent.__class__.__name__
			
 
				             ],
			
--- a/evaluation/browsing_delegation/run_infer.py
+++ b/evaluation/browsing_delegation/run_infer.py
@@ -67,7 +67,11 @@ def process_instance(
 
				 
			
 
				     state: State | None = asyncio.run(
			
 
				         run_agent_controller(
			
 
				-            agent, instruction, max_iterations=metadata.max_iterations, sid=env_id
			
 
				+            agent,
			
 
				+            instruction,
			
 
				+            max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				+            sid=env_id,
			
 
				         )
			
 
				     )
			
 
				 
			
--- a/evaluation/gaia/run_infer.py
+++ b/evaluation/gaia/run_infer.py
@@ -119,6 +119,7 @@ def process_instance(
 
				                 agent,
			
 
				                 instruction,
			
 
				                 max_iterations=metadata.max_iterations,
			
 
				+                max_budget_per_task=config.max_budget_per_task,
			
 
				                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
			
 
				                     agent.__class__.__name__
			
 
				                 ],
			
--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@@ -113,6 +113,7 @@ def process_instance(agent, question_id, question, metadata, reset_logger: bool
 
				                 agent,
			
 
				                 instruction,
			
 
				                 max_iterations=metadata.max_iterations,
			
 
				+                max_budget_per_task=config.max_budget_per_task,
			
 
				                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
			
 
				                     agent.__class__.__name__
			
 
				                 ),
			
--- a/evaluation/gpqa/run_infer.py
+++ b/evaluation/gpqa/run_infer.py
@@ -229,6 +229,7 @@ Ok now its time to start solving the question. Good luck!
 
				                 agent,
			
 
				                 instruction,
			
 
				                 max_iterations=metadata.max_iterations,
			
 
				+                max_budget_per_task=config.max_budget_per_task,
			
 
				                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
			
 
				                     agent.__class__.__name__
			
 
				                 ),
			
--- a/evaluation/humanevalfix/run_infer.py
+++ b/evaluation/humanevalfix/run_infer.py
@@ -182,6 +182,7 @@ def process_instance(
 
				                 agent,
			
 
				                 instruction,
			
 
				                 max_iterations=metadata.max_iterations,
			
 
				+                max_budget_per_task=config.max_budget_per_task,
			
 
				                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
			
 
				                     agent.__class__.__name__
			
 
				                 ),
			
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@@ -180,6 +180,7 @@ def process_instance(
 
				                 agent,
			
 
				                 instruction,
			
 
				                 max_iterations=metadata.max_iterations,
			
 
				+                max_budget_per_task=config.max_budget_per_task,
			
 
				                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
			
 
				                     agent.__class__.__name__
			
 
				                 ),
			
--- a/evaluation/miniwob/run_infer.py
+++ b/evaluation/miniwob/run_infer.py
@@ -81,6 +81,7 @@ def process_instance(
 
				             agent,
			
 
				             'PLACEHOLDER_GOAL',
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             runtime_tools_config=runtime_tools_config,
			
 
				             sandbox=get_sandbox(),
			
 
				             sid=env_id,
			
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@@ -143,6 +143,7 @@ def process_instance(
 
				             agent,
			
 
				             instruction,
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             fake_user_response_fn=fake_user_response_fn,
			
 
				             sandbox=sandbox,
			
 
				             sid=sid,
			
--- a/evaluation/ml_bench/run_infer.py
+++ b/evaluation/ml_bench/run_infer.py
@@ -150,6 +150,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
 
				                 agent,
			
 
				                 instruction,
			
 
				                 max_iterations=metadata.max_iterations,
			
 
				+                max_budget_per_task=config.max_budget_per_task,
			
 
				                 fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN.get(
			
 
				                     agent.__class__.__name__
			
 
				                 ),
			
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -282,6 +282,7 @@ IMPORTANT TIPS:
 
				             agent,
			
 
				             instruction,
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
			
 
				                 agent.__class__.__name__
			
 
				             ],
			
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@@ -79,6 +79,7 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
 
				             agent,
			
 
				             instruction,
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             fake_user_response_fn=AGENT_CLS_TO_FAKE_USER_RESPONSE_FN[
			
 
				                 agent.__class__.__name__
			
 
				             ],
			
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@@ -82,6 +82,7 @@ def process_instance(
 
				             agent,
			
 
				             'PLACEHOLDER_GOAL',
			
 
				             max_iterations=metadata.max_iterations,
			
 
				+            max_budget_per_task=config.max_budget_per_task,
			
 
				             runtime_tools_config=runtime_tools_config,
			
 
				             sandbox=get_sandbox(),
			
 
				             sid=env_id,
			
--- a/opendevin/core/main.py
+++ b/opendevin/core/main.py
@@ -34,7 +34,7 @@ async def run_agent_controller(
 
				     agent: Agent,
			
 
				     task_str: str,
			
 
				     max_iterations: int,
			
 
				-    max_budget_per_task: float,
			
 
				+    max_budget_per_task: float | None = None,
			
 
				     exit_on_message: bool = False,
			
 
				     fake_user_response_fn: Callable[[State | None], str] | None = None,
			
 
				     sandbox: Sandbox | None = None,