преди 1 година · 563ebd406d
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -99,7 +99,14 @@ def process_instance(
 
				     # create sandbox and run the agent
			
 
				     # =============================================
			
 
				 
			
 
				-    sandbox = DockerSSHBox()
			
 
				+    sandbox = DockerSSHBox(
			
 
				+        config=config.sandbox,
			
 
				+        persist_sandbox=False,
			
 
				+        workspace_mount_path=config.workspace_mount_path,
			
 
				+        sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
			
 
				+        cache_dir=config.cache_dir,
			
 
				+        run_as_devin=config.run_as_devin,
			
 
				+    )
			
 
				     sandbox.execute(f'cd {inst_id}')
			
 
				 
			
 
				     init_cmd = instance.init
			
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@@ -173,7 +173,15 @@ def process_instance(
 
				 
			
 
				         # use a session id for concurrent evaluation
			
 
				         sid = instance['id'] + '_' + str(os.getpid())
			
 
				-        sandbox = DockerSSHBox(sid=sid)
			
 
				+        sandbox = DockerSSHBox(
			
 
				+            config=config.sandbox,
			
 
				+            persist_sandbox=False,
			
 
				+            workspace_mount_path=config.workspace_mount_path,
			
 
				+            sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
			
 
				+            cache_dir=config.cache_dir,
			
 
				+            run_as_devin=config.run_as_devin,
			
 
				+            sid=sid,
			
 
				+        )
			
 
				         exit_code, command_output = sandbox.execute('pip install scitools-pyke')
			
 
				 
			
 
				         # Here's how you can run the agent (similar to the `main` function) and get the final task state
			
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@@ -101,7 +101,15 @@ def process_instance(
 
				 
			
 
				     # use a session id for concurrent processing
			
 
				     sid = instance.task_id + '_' + str(os.getpid())
			
 
				-    sandbox = DockerSSHBox(sid=sid)
			
 
				+    sandbox = DockerSSHBox(
			
 
				+        config=config.sandbox,
			
 
				+        persist_sandbox=False,
			
 
				+        workspace_mount_path=config.workspace_mount_path,
			
 
				+        sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
			
 
				+        cache_dir=config.cache_dir,
			
 
				+        run_as_devin=config.run_as_devin,
			
 
				+        sid=sid,
			
 
				+    )
			
 
				 
			
 
				     requirements_host_src = 'evaluation/mint/requirements.txt'
			
 
				     requirements_sandbox_dest = '/opendevin/plugins/mint/requirements.txt'
			
--- a/evaluation/ml_bench/run_infer.py
+++ b/evaluation/ml_bench/run_infer.py
@@ -112,7 +112,15 @@ def process_instance(instance: Any, metadata: EvalMetadata, reset_logger: bool =
 
				 
			
 
				         # Create a sandbox, using the instance ID and PID as the session ID to avoid conflicts
			
 
				         sid = str(instance['id']) + '_' + str(os.getpid())
			
 
				-        sandbox = DockerSSHBox(sid=sid)
			
 
				+        sandbox = DockerSSHBox(
			
 
				+            config=config.sandbox,
			
 
				+            persist_sandbox=False,
			
 
				+            workspace_mount_path=config.workspace_mount_path,
			
 
				+            sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
			
 
				+            cache_dir=config.cache_dir,
			
 
				+            run_as_devin=config.run_as_devin,
			
 
				+            sid=sid,
			
 
				+        )
			
 
				 
			
 
				         # Set up the task environment
			
 
				         sandbox.execute(f'conda activate {ID2CONDA[instance["github_id"]]}')
			
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@@ -34,7 +34,14 @@ docker_ssh_box: DockerSSHBox | None = None
 
				 def get_sandbox():
			
 
				     global docker_ssh_box
			
 
				     if docker_ssh_box is None:
			
 
				-        docker_ssh_box = DockerSSHBox()
			
 
				+        docker_ssh_box = DockerSSHBox(
			
 
				+            config=config.sandbox,
			
 
				+            persist_sandbox=False,
			
 
				+            workspace_mount_path=config.workspace_mount_path,
			
 
				+            sandbox_workspace_dir=config.workspace_mount_path_in_sandbox,
			
 
				+            cache_dir=config.cache_dir,
			
 
				+            run_as_devin=config.run_as_devin,
			
 
				+        )
			
 
				     return docker_ssh_box