1 rok pred · f9088766e8
--- a/.github/workflows/ghcr_runtime.yml
+++ b/.github/workflows/ghcr_runtime.yml
@@ -113,7 +113,7 @@ jobs:
 
				 
			
 
				           TEST_RUNTIME=eventstream \
			
 
				           SANDBOX_USER_ID=$(id -u) \
			
 
				-          SANDBOX_CONTAINER_IMAGE=$image_name \
			
 
				+          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
			
 
				           TEST_IN_CI=true \
			
 
				           poetry run pytest --cov=agenthub --cov=openhands --cov-report=xml -s ./tests/runtime
			
 
				       - name: Upload coverage to Codecov
			
@@ -149,7 +149,7 @@ jobs:
 
				 
			
 
				           TEST_RUNTIME=eventstream \
			
 
				           SANDBOX_USER_ID=$(id -u) \
			
 
				-          SANDBOX_CONTAINER_IMAGE=$image_name \
			
 
				+          SANDBOX_BASE_CONTAINER_IMAGE=$image_name \
			
 
				           TEST_IN_CI=true \
			
 
				           TEST_ONLY=true \
			
 
				           ./tests/integration/regenerate.sh
			
--- a/config.template.toml
+++ b/config.template.toml
@@ -174,7 +174,7 @@ llm_config = 'gpt3'
 
				 #user_id = 1000
			
 
				 
			
 
				 # Container image to use for the sandbox
			
 
				-#container_image = "nikolaik/python-nodejs:python3.11-nodejs22"
			
 
				+#base_container_image = "nikolaik/python-nodejs:python3.11-nodejs22"
			
 
				 
			
 
				 # Use host network
			
 
				 #use_host_network = false
			
--- a/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
+++ b/docs/i18n/fr/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
@@ -42,10 +42,10 @@ Créez un fichier ```config.toml``` dans le répertoire OpenHands et entrez ces
 
				 [core]
			
 
				 workspace_base="./workspace"
			
 
				 run_as_openhands=true
			
 
				-sandbox_container_image="image_personnalisée"
			
 
				+sandbox_base_container_image="image_personnalisée"
			
 
				 ```
			
 
				 
			
 
				-> Assurez-vous que ```sandbox_container_image``` est défini sur le nom de votre image personnalisée précédente.
			
 
				+> Assurez-vous que ```sandbox_base_container_image``` est défini sur le nom de votre image personnalisée précédente.
			
 
				 
			
 
				 ## Exécution
			
 
				 
			
@@ -61,7 +61,7 @@ Félicitations !
 
				 
			
 
				 Le code pertinent est défini dans [ssh_box.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/ssh_box.py) et [image_agnostic_util.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py).
			
 
				 
			
 
				-En particulier, ssh_box.py vérifie l'objet config pour ```config.sandbox_container_image``` et ensuite tente de récupérer l'image à l'aide de [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72), qui est défini dans image_agnostic_util.py.
			
 
				+En particulier, ssh_box.py vérifie l'objet config pour ```config.sandbox.base_container_image``` et ensuite tente de récupérer l'image à l'aide de [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72), qui est défini dans image_agnostic_util.py.
			
 
				 
			
 
				 Lorsqu'une image personnalisée est utilisée pour la première fois, elle ne sera pas trouvée et donc elle sera construite (à l'exécution ultérieure, l'image construite sera trouvée et renvoyée).
			
 
				 
			
@@ -92,7 +92,7 @@ Si vous voyez cette erreur dans la sortie de la console, il s'agit du fait que O
 
				 [core]
			
 
				 workspace_base="./workspace"
			
 
				 run_as_openhands=true
			
 
				-sandbox_container_image="image_personnalisée"
			
 
				+sandbox_base_container_image="image_personnalisée"
			
 
				 sandbox_user_id="1001"
			
 
				 ```
			
 
				 
			
--- a/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
+++ b/docs/i18n/zh-Hans/docusaurus-plugin-content-docs/current/usage/custom_sandbox_guide.md
@@ -41,10 +41,10 @@ docker build -t custom_image .
 
				 [core]
			
 
				 workspace_base="./workspace"
			
 
				 run_as_openhands=true
			
 
				-sandbox_container_image="custom_image"
			
 
				+base_container_image="custom_image"
			
 
				 ```
			
 
				 
			
 
				-> 确保 `sandbox_container_image` 设置为您前一步中自定义映像的名称。
			
 
				+> 确保 `sandbox_base_container_image` 设置为您前一步中自定义映像的名称。
			
 
				 
			
 
				 ## 运行
			
 
				 
			
@@ -60,7 +60,7 @@ sandbox_container_image="custom_image"
 
				 
			
 
				 相关代码定义在 [ssh_box.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/ssh_box.py) 和 [image_agnostic_util.py](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py) 中。
			
 
				 
			
 
				-特别是 ssh_box.py 检查配置对象中的 ```config.sandbox_container_image```，然后尝试使用 [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72)，在 image_agnostic_util.py 定义中进行检索。
			
 
				+特别是 ssh_box.py 检查配置对象中的 ```config.sandbox.base_container_image```，然后尝试使用 [get_od_sandbox_image](https://github.com/All-Hands-AI/OpenHands/blob/main/openhands/runtime/docker/image_agnostic_util.py#L72)，在 image_agnostic_util.py 定义中进行检索。
			
 
				 
			
 
				 初次使用自定义映像时，该映像将不会被找到，因此将被构建（在后续运行中已构建的映像将被查找并返回）。
			
 
				 
			
@@ -92,7 +92,7 @@ dockerfile_content = (
 
				 [core]
			
 
				 workspace_base="./workspace"
			
 
				 run_as_openhands=true
			
 
				-sandbox_container_image="custom_image"
			
 
				+sandbox_base_container_image="custom_image"
			
 
				 sandbox_user_id="1001"
			
 
				 ```
			
 
				 
			
--- a/docs/modules/usage/how-to/custom-sandbox-guide.md
+++ b/docs/modules/usage/how-to/custom-sandbox-guide.md
@@ -67,10 +67,10 @@ Create a `config.toml` file in the OpenHands directory and enter these contents:
 
				 [core]
			
 
				 workspace_base="./workspace"
			
 
				 run_as_openhands=true
			
 
				-sandbox_container_image="custom_image"
			
 
				+sandbox_base_container_image="custom_image"
			
 
				 ```
			
 
				 
			
 
				-For `sandbox_container_image`, you can specify either:
			
 
				+For `sandbox_base_container_image`, you can specify either:
			
 
				 
			
 
				 1. The name of your custom image that you built in the previous step (e.g., `”custom_image”`)
			
 
				 2. A pre-existing image from Docker Hub (e.g., `”node:20”` if you want a sandbox with Node.js pre-installed)
			
@@ -98,7 +98,7 @@ If you see this error in the console output it is because OpenHands is trying to
 
				 [core]
			
 
				 workspace_base="./workspace"
			
 
				 run_as_openhands=true
			
 
				-sandbox_container_image="custom_image"
			
 
				+sandbox_base_container_image="custom_image"
			
 
				 sandbox_user_id="1001"
			
 
				 ```
			
 
				 
			
--- a/docs/modules/usage/how-to/evaluation-harness.md
+++ b/docs/modules/usage/how-to/evaluation-harness.md
@@ -116,7 +116,7 @@ To create an evaluation workflow for your benchmark, follow these steps:
 
				            runtime='eventstream',
			
 
				            max_iterations=metadata.max_iterations,
			
 
				            sandbox=SandboxConfig(
			
 
				-               container_image='your_container_image',
			
 
				+               base_container_image='your_container_image',
			
 
				                enable_auto_lint=True,
			
 
				                timeout=300,
			
 
				            ),
			
--- a/docs/static/img/backend_architecture.puml
+++ b/docs/static/img/backend_architecture.puml
@@ -135,7 +135,7 @@ class openhands.sandbox.sandbox.DockerInteractive {
 
				   workspace_dir: None
			
 
				   workspace_dir: None
			
 
				   timeout: int
			
 
				-  container_image: None
			
 
				+  base_container_image: None
			
 
				   container_name: None
			
 
				 }
			
 
				 class openhands.observation.UserMessageObservation {
			
--- a/docs/static/img/backend_architecture.svg
+++ b/docs/static/img/backend_architecture.svg
--- a/evaluation/EDA/run_infer.py
+++ b/evaluation/EDA/run_infer.py
@@ -62,7 +62,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=False,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/agent_bench/run_infer.py
+++ b/evaluation/agent_bench/run_infer.py
@@ -44,7 +44,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/aider_bench/run_infer.py
+++ b/evaluation/aider_bench/run_infer.py
@@ -42,7 +42,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				             timeout=100,
			
--- a/evaluation/biocoder/run_infer.py
+++ b/evaluation/biocoder/run_infer.py
@@ -62,7 +62,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
			
 
				+            base_container_image=BIOCODER_BENCH_CONTAINER_IMAGE,
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/bird/run_infer.py
+++ b/evaluation/bird/run_infer.py
@@ -75,7 +75,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/browsing_delegation/run_infer.py
+++ b/evaluation/browsing_delegation/run_infer.py
@@ -40,7 +40,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=False,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/gaia/run_infer.py
+++ b/evaluation/gaia/run_infer.py
@@ -51,7 +51,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/gorilla/run_infer.py
+++ b/evaluation/gorilla/run_infer.py
@@ -43,7 +43,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/gpqa/run_infer.py
+++ b/evaluation/gpqa/run_infer.py
@@ -65,7 +65,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/humanevalfix/run_infer.py
+++ b/evaluation/humanevalfix/run_infer.py
@@ -86,7 +86,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/logic_reasoning/run_infer.py
+++ b/evaluation/logic_reasoning/run_infer.py
@@ -49,7 +49,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='xingyaoww/od-eval-logic-reasoning:v1.0',
			
 
				+            base_container_image='xingyaoww/od-eval-logic-reasoning:v1.0',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				             runtime_extra_deps='$OD_INTERPRETER_PATH -m pip install scitools-pyke',
			
--- a/evaluation/miniwob/run_infer.py
+++ b/evaluation/miniwob/run_infer.py
@@ -49,7 +49,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='xingyaoww/od-eval-miniwob:v1.0',
			
 
				+            base_container_image='xingyaoww/od-eval-miniwob:v1.0',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				             browsergym_eval_env=env_id,
			
--- a/evaluation/mint/run_infer.py
+++ b/evaluation/mint/run_infer.py
@@ -101,7 +101,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='xingyaoww/od-eval-mint:v1.0',
			
 
				+            base_container_image='xingyaoww/od-eval-mint:v1.0',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				             runtime_extra_deps=f'$OD_INTERPRETER_PATH -m pip install {" ".join(MINT_DEPENDENCIES)}',
			
--- a/evaluation/ml_bench/run_infer.py
+++ b/evaluation/ml_bench/run_infer.py
@@ -80,7 +80,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='public.ecr.aws/i5g0m1f6/ml-bench',
			
 
				+            base_container_image='public.ecr.aws/i5g0m1f6/ml-bench',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/swe_bench/run_infer.py
+++ b/evaluation/swe_bench/run_infer.py
@@ -93,9 +93,9 @@ def get_config(
 
				     SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
			
 
				     if USE_INSTANCE_IMAGE:
			
 
				         # We use a different instance image for the each instance of swe-bench eval
			
 
				-        container_image = 'sweb.eval.x86_64.' + instance['instance_id']
			
 
				+        base_container_image = 'sweb.eval.x86_64.' + instance['instance_id']
			
 
				     else:
			
 
				-        container_image = SWE_BENCH_CONTAINER_IMAGE
			
 
				+        base_container_image = SWE_BENCH_CONTAINER_IMAGE
			
 
				 
			
 
				     config = AppConfig(
			
 
				         default_agent=metadata.agent_class,
			
@@ -104,7 +104,7 @@ def get_config(
 
				         max_budget_per_task=4,
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image=container_image,
			
 
				+            base_container_image=base_container_image,
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				             # large enough timeout, since some testcases take very long to run
			
--- a/evaluation/toolqa/run_infer.py
+++ b/evaluation/toolqa/run_infer.py
@@ -45,7 +45,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				         ),
			
--- a/evaluation/webarena/run_infer.py
+++ b/evaluation/webarena/run_infer.py
@@ -54,7 +54,7 @@ def get_config(
 
				         runtime='eventstream',
			
 
				         max_iterations=metadata.max_iterations,
			
 
				         sandbox=SandboxConfig(
			
 
				-            container_image='python:3.11-bookworm',
			
 
				+            base_container_image='python:3.11-bookworm',
			
 
				             enable_auto_lint=True,
			
 
				             use_host_network=False,
			
 
				             browsergym_eval_env=env_id,
			
--- a/openhands/core/config.py
+++ b/openhands/core/config.py
@@ -179,7 +179,8 @@ class SandboxConfig(metaclass=Singleton):
 
				 
			
 
				     Attributes:
			
 
				         api_hostname: The hostname for the EventStream Runtime API.
			
 
				-        container_image: The container image to use for the sandbox.
			
 
				+        base_container_image: The base container image from which to build the runtime image.
			
 
				+        runtime_container_image: The runtime container image to use.
			
 
				         user_id: The user ID for the sandbox.
			
 
				         timeout: The timeout for the sandbox.
			
 
				         enable_auto_lint: Whether to enable auto-lint.
			
@@ -199,7 +200,10 @@ class SandboxConfig(metaclass=Singleton):
 
				     """
			
 
				 
			
 
				     api_hostname: str = 'localhost'
			
 
				-    container_image: str = 'nikolaik/python-nodejs:python3.11-nodejs22'  # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime
			
 
				+    base_container_image: str | None = (
			
 
				+        'nikolaik/python-nodejs:python3.11-nodejs22'  # default to nikolaik/python-nodejs:python3.11-nodejs22 for eventstream runtime
			
 
				+    )
			
 
				+    runtime_container_image: str | None = None
			
 
				     user_id: int = os.getuid() if hasattr(os, 'getuid') else 1000
			
 
				     timeout: int = 120
			
 
				     enable_auto_lint: bool = (
			
--- a/openhands/core/schema/config.py
+++ b/openhands/core/schema/config.py
@@ -22,7 +22,7 @@ class ConfigType(str, Enum):
 
				     CACHE_DIR = 'CACHE_DIR'
			
 
				     LLM_MODEL = 'LLM_MODEL'
			
 
				     CONFIRMATION_MODE = 'CONFIRMATION_MODE'
			
 
				-    SANDBOX_CONTAINER_IMAGE = 'SANDBOX_CONTAINER_IMAGE'
			
 
				+    BASE_CONTAINER_IMAGE = 'BASE_CONTAINER_IMAGE'
			
 
				     RUN_AS_OPENHANDS = 'RUN_AS_OPENHANDS'
			
 
				     LLM_EMBEDDING_MODEL = 'LLM_EMBEDDING_MODEL'
			
 
				     LLM_EMBEDDING_BASE_URL = 'LLM_EMBEDDING_BASE_URL'
			
--- a/openhands/runtime/client/runtime.py
+++ b/openhands/runtime/client/runtime.py
@@ -104,7 +104,6 @@ class EventStreamRuntime(Runtime):
 
				         event_stream: EventStream,
			
 
				         sid: str = 'default',
			
 
				         plugins: list[PluginRequirement] | None = None,
			
 
				-        container_image: str | None = None,
			
 
				     ):
			
 
				         super().__init__(
			
 
				             config, event_stream, sid, plugins
			
@@ -118,11 +117,8 @@ class EventStreamRuntime(Runtime):
 
				         )
			
 
				         # TODO: We can switch to aiodocker when `get_od_sandbox_image` is updated to use aiodocker
			
 
				         self.docker_client: docker.DockerClient = self._init_docker_client()
			
 
				-        self.container_image = (
			
 
				-            self.config.sandbox.container_image
			
 
				-            if container_image is None
			
 
				-            else container_image
			
 
				-        )
			
 
				+        self.base_container_image = self.config.sandbox.base_container_image
			
 
				+        self.runtime_container_image = self.config.sandbox.runtime_container_image
			
 
				         self.container_name = self.container_name_prefix + self.instance_id
			
 
				 
			
 
				         self.container = None
			
@@ -140,11 +136,16 @@ class EventStreamRuntime(Runtime):
 
				                 f'Installing extra user-provided dependencies in the runtime image: {self.config.sandbox.runtime_extra_deps}'
			
 
				             )
			
 
				 
			
 
				-        self.container_image = build_runtime_image(
			
 
				-            self.container_image,
			
 
				-            self.runtime_builder,
			
 
				-            extra_deps=self.config.sandbox.runtime_extra_deps,
			
 
				-        )
			
 
				+        if self.runtime_container_image is None:
			
 
				+            if self.base_container_image is None:
			
 
				+                raise ValueError(
			
 
				+                    'Neither runtime container image nor base container image is set'
			
 
				+                )
			
 
				+            self.runtime_container_image = build_runtime_image(
			
 
				+                self.base_container_image,
			
 
				+                self.runtime_builder,
			
 
				+                extra_deps=self.config.sandbox.runtime_extra_deps,
			
 
				+            )
			
 
				         self.container = await self._init_container(
			
 
				             self.sandbox_workspace_dir,
			
 
				             mount_dir=self.config.workspace_mount_path,
			
@@ -181,7 +182,7 @@ class EventStreamRuntime(Runtime):
 
				     ):
			
 
				         try:
			
 
				             logger.info(
			
 
				-                f'Starting container with image: {self.container_image} and name: {self.container_name}'
			
 
				+                f'Starting container with image: {self.runtime_container_image} and name: {self.container_name}'
			
 
				             )
			
 
				             plugin_arg = ''
			
 
				             if plugins is not None and len(plugins) > 0:
			
@@ -215,7 +216,7 @@ class EventStreamRuntime(Runtime):
 
				             else:
			
 
				                 browsergym_arg = ''
			
 
				             container = self.docker_client.containers.run(
			
 
				-                self.container_image,
			
 
				+                self.runtime_container_image,
			
 
				                 command=(
			
 
				                     f'/openhands/miniforge3/bin/mamba run --no-capture-output -n base '
			
 
				                     'PYTHONUNBUFFERED=1 poetry run '
			
--- a/tests/integration/regenerate.sh
+++ b/tests/integration/regenerate.sh
@@ -57,8 +57,8 @@ mkdir -p $WORKSPACE_BASE
 
				 
			
 
				 # use environmental variable if exists
			
 
				 TEST_RUNTIME="${TEST_RUNTIME:-eventstream}"
			
 
				-if [ -z "$SANDBOX_CONTAINER_IMAGE" ]; then
			
 
				-  SANDBOX_CONTAINER_IMAGE="nikolaik/python-nodejs:python3.11-nodejs22"
			
 
				+if [ -z "$SANDBOX_BASE_CONTAINER_IMAGE" ]; then
			
 
				+  SANDBOX_BASE_CONTAINER_IMAGE="nikolaik/python-nodejs:python3.11-nodejs22"
			
 
				 fi
			
 
				 
			
 
				 MAX_ITERATIONS=15
			
@@ -114,7 +114,7 @@ run_test() {
 
				     MAX_ITERATIONS=$MAX_ITERATIONS \
			
 
				     DEFAULT_AGENT=$agent \
			
 
				     TEST_RUNTIME="$TEST_RUNTIME" \
			
 
				-    SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \
			
 
				+    SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
			
 
				     $pytest_cmd 2>&1 | tee $TMP_FILE
			
 
				 
			
 
				   # Capture the exit code of pytest
			
@@ -185,7 +185,7 @@ regenerate_without_llm() {
 
				       FORCE_APPLY_PROMPTS=true \
			
 
				       DEFAULT_AGENT=$agent \
			
 
				       TEST_RUNTIME="$TEST_RUNTIME" \
			
 
				-      SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \
			
 
				+      SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
			
 
				       poetry run pytest -s $SCRIPT_DIR/test_agent.py::$test_name
			
 
				   set +x
			
 
				 }
			
@@ -212,7 +212,7 @@ regenerate_with_llm() {
 
				       WORKSPACE_MOUNT_PATH=$WORKSPACE_MOUNT_PATH \
			
 
				       DEFAULT_AGENT=$agent \
			
 
				       RUNTIME="$TEST_RUNTIME" \
			
 
				-      SANDBOX_CONTAINER_IMAGE="$SANDBOX_CONTAINER_IMAGE" \
			
 
				+      SANDBOX_BASE_CONTAINER_IMAGE="$SANDBOX_BASE_CONTAINER_IMAGE" \
			
 
				       poetry run python "$PROJECT_ROOT/openhands/core/main.py" \
			
 
				       -i $MAX_ITERATIONS \
			
 
				       -t "$task Do not ask me for confirmation at any point." \
			
--- a/tests/runtime/conftest.py
+++ b/tests/runtime/conftest.py
@@ -61,9 +61,9 @@ def enable_auto_lint(request):
 
				 
			
 
				 
			
 
				 @pytest.fixture(scope='module', params=None)
			
 
				-def container_image(request):
			
 
				+def base_container_image(request):
			
 
				     time.sleep(1)
			
 
				-    env_image = os.environ.get('SANDBOX_CONTAINER_IMAGE')
			
 
				+    env_image = os.environ.get('BASE_CONTAINER_IMAGE')
			
 
				     if env_image:
			
 
				         request.param = env_image
			
 
				     else:
			
@@ -95,11 +95,12 @@ async def _load_runtime(
 
				     box_class,
			
 
				     run_as_openhands: bool = True,
			
 
				     enable_auto_lint: bool = False,
			
 
				-    container_image: str | None = None,
			
 
				+    base_container_image: str | None = None,
			
 
				     browsergym_eval_env: str | None = None,
			
 
				 ) -> Runtime:
			
 
				     sid = 'test'
			
 
				     cli_session = 'main_test'
			
 
				+
			
 
				     # AgentSkills need to be initialized **before** Jupyter
			
 
				     # otherwise Jupyter will not access the proper dependencies installed by AgentSkills
			
 
				     plugins = [AgentSkillsRequirement(), JupyterRequirement()]
			
@@ -114,19 +115,17 @@ async def _load_runtime(
 
				     load_from_env(config, os.environ)
			
 
				     config.run_as_openhands = run_as_openhands
			
 
				     config.sandbox.enable_auto_lint = enable_auto_lint
			
 
				+    if base_container_image is not None:
			
 
				+        config.sandbox.base_container_image = base_container_image
			
 
				 
			
 
				     file_store = get_file_store(config.file_store, config.file_store_path)
			
 
				     event_stream = EventStream(cli_session, file_store)
			
 
				 
			
 
				-    if container_image is not None:
			
 
				-        config.sandbox.container_image = container_image
			
 
				-
			
 
				     runtime = box_class(
			
 
				         config=config,
			
 
				         event_stream=event_stream,
			
 
				         sid=sid,
			
 
				         plugins=plugins,
			
 
				-        container_image=container_image,
			
 
				     )
			
 
				     await runtime.ainit()
			
 
				     await asyncio.sleep(1)
			
--- a/tests/runtime/test_browsing.py
+++ b/tests/runtime/test_browsing.py
@@ -80,7 +80,7 @@ async def test_browsergym_eval_env(temp_dir):
 
				         # only supported in event stream runtime
			
 
				         box_class=EventStreamRuntime,
			
 
				         run_as_openhands=False,  # need root permission to access file
			
 
				-        container_image='xingyaoww/od-eval-miniwob:v1.0',
			
 
				+        base_container_image='xingyaoww/od-eval-miniwob:v1.0',
			
 
				         browsergym_eval_env='browsergym/miniwob.choose-list',
			
 
				     )
			
 
				     from openhands.runtime.browser.browser_env import (
			
--- a/tests/runtime/test_images.py
+++ b/tests/runtime/test_images.py
@@ -14,15 +14,17 @@ from openhands.events.action import CmdRunAction
 
				 
			
 
				 
			
 
				 @pytest.mark.asyncio
			
 
				-async def test_bash_python_version(temp_dir, box_class, container_image):
			
 
				+async def test_bash_python_version(temp_dir, box_class, base_container_image):
			
 
				     """Make sure Python is available in bash."""
			
 
				-    if container_image not in [
			
 
				+    if base_container_image not in [
			
 
				         'python:3.11-bookworm',
			
 
				         'nikolaik/python-nodejs:python3.11-nodejs22',
			
 
				     ]:
			
 
				         pytest.skip('This test is only for python-related images')
			
 
				 
			
 
				-    runtime = await _load_runtime(temp_dir, box_class, container_image=container_image)
			
 
				+    runtime = await _load_runtime(
			
 
				+        temp_dir, box_class, base_container_image=base_container_image
			
 
				+    )
			
 
				 
			
 
				     action = CmdRunAction(command='which python')
			
 
				     logger.info(action, extra={'msg_type': 'ACTION'})
			
@@ -49,15 +51,17 @@ async def test_bash_python_version(temp_dir, box_class, container_image):
 
				 
			
 
				 
			
 
				 @pytest.mark.asyncio
			
 
				-async def test_nodejs_22_version(temp_dir, box_class, container_image):
			
 
				+async def test_nodejs_22_version(temp_dir, box_class, base_container_image):
			
 
				     """Make sure Node.js is available in bash."""
			
 
				-    if container_image not in [
			
 
				+    if base_container_image not in [
			
 
				         'node:22-bookworm',
			
 
				         'nikolaik/python-nodejs:python3.11-nodejs22',
			
 
				     ]:
			
 
				         pytest.skip('This test is only for nodejs-related images')
			
 
				 
			
 
				-    runtime = await _load_runtime(temp_dir, box_class, container_image=container_image)
			
 
				+    runtime = await _load_runtime(
			
 
				+        temp_dir, box_class, base_container_image=base_container_image
			
 
				+    )
			
 
				 
			
 
				     action = CmdRunAction(command='node --version')
			
 
				     logger.info(action, extra={'msg_type': 'ACTION'})
			
@@ -71,14 +75,16 @@ async def test_nodejs_22_version(temp_dir, box_class, container_image):
 
				 
			
 
				 
			
 
				 @pytest.mark.asyncio
			
 
				-async def test_go_version(temp_dir, box_class, container_image):
			
 
				+async def test_go_version(temp_dir, box_class, base_container_image):
			
 
				     """Make sure Go is available in bash."""
			
 
				-    if container_image not in [
			
 
				+    if base_container_image not in [
			
 
				         'golang:1.23-bookworm',
			
 
				     ]:
			
 
				         pytest.skip('This test is only for go-related images')
			
 
				 
			
 
				-    runtime = await _load_runtime(temp_dir, box_class, container_image=container_image)
			
 
				+    runtime = await _load_runtime(
			
 
				+        temp_dir, box_class, base_container_image=base_container_image
			
 
				+    )
			
 
				 
			
 
				     action = CmdRunAction(command='go version')
			
 
				     logger.info(action, extra={'msg_type': 'ACTION'})
			
--- a/tests/unit/test_config.py
+++ b/tests/unit/test_config.py
@@ -75,7 +75,7 @@ def test_load_from_old_style_env(monkeypatch, default_config):
 
				     monkeypatch.setenv('AGENT_MEMORY_ENABLED', 'True')
			
 
				     monkeypatch.setenv('DEFAULT_AGENT', 'PlannerAgent')
			
 
				     monkeypatch.setenv('WORKSPACE_BASE', '/opt/files/workspace')
			
 
				-    monkeypatch.setenv('SANDBOX_CONTAINER_IMAGE', 'custom_image')
			
 
				+    monkeypatch.setenv('SANDBOX_BASE_CONTAINER_IMAGE', 'custom_image')
			
 
				 
			
 
				     load_from_env(default_config, os.environ)
			
 
				 
			
@@ -89,7 +89,7 @@ def test_load_from_old_style_env(monkeypatch, default_config):
 
				     assert (
			
 
				         default_config.workspace_mount_path_in_sandbox is not UndefinedString.UNDEFINED
			
 
				     )
			
 
				-    assert default_config.sandbox.container_image == 'custom_image'
			
 
				+    assert default_config.sandbox.base_container_image == 'custom_image'
			
 
				 
			
 
				 
			
 
				 def test_load_from_new_style_toml(default_config, temp_toml_file):
			
@@ -178,7 +178,7 @@ memory_enabled = true
 
				 [core]
			
 
				 workspace_base = "/opt/files2/workspace"
			
 
				 sandbox_timeout = 500
			
 
				-sandbox_container_image = "node:14"
			
 
				+sandbox_base_container_image = "node:14"
			
 
				 sandbox_user_id = 1001
			
 
				 default_agent = "TestAgent"
			
 
				 """
			
@@ -192,7 +192,7 @@ default_agent = "TestAgent"
 
				     assert default_config.get_agent_config().memory_enabled is True
			
 
				     assert default_config.workspace_base == '/opt/files2/workspace'
			
 
				     assert default_config.sandbox.timeout == 500
			
 
				-    assert default_config.sandbox.container_image == 'node:14'
			
 
				+    assert default_config.sandbox.base_container_image == 'node:14'
			
 
				     assert default_config.sandbox.user_id == 1001
			
 
				     assert default_config.workspace_mount_path_in_sandbox == '/workspace'
			
 
				 
			
@@ -200,7 +200,7 @@ default_agent = "TestAgent"
 
				 
			
 
				     # app config doesn't have fields sandbox_*
			
 
				     assert not hasattr(default_config, 'sandbox_timeout')
			
 
				-    assert not hasattr(default_config, 'sandbox_container_image')
			
 
				+    assert not hasattr(default_config, 'sandbox_base_container_image')
			
 
				     assert not hasattr(default_config, 'sandbox_user_id')
			
 
				 
			
 
				     # after finalize_config, workspace_mount_path is set to the absolute path of workspace_base
			
@@ -319,7 +319,7 @@ model = "test-model"
 
				 
			
 
				 [sandbox]
			
 
				 timeout = 1
			
 
				-container_image = "custom_image"
			
 
				+base_container_image = "custom_image"
			
 
				 user_id = 1001
			
 
				 """
			
 
				         )
			
@@ -330,7 +330,7 @@ user_id = 1001
 
				 
			
 
				     assert default_config.get_llm_config().model == 'test-model'
			
 
				     assert default_config.sandbox.timeout == 1
			
 
				-    assert default_config.sandbox.container_image == 'custom_image'
			
 
				+    assert default_config.sandbox.base_container_image == 'custom_image'
			
 
				     assert default_config.sandbox.user_id == 1001
			
 
				 
			
 
				 
			
@@ -357,7 +357,7 @@ def test_defaults_dict_after_updates(default_config):
 
				     )
			
 
				     assert defaults_after_updates['sandbox']['timeout']['default'] == 120
			
 
				     assert (
			
 
				-        defaults_after_updates['sandbox']['container_image']['default']
			
 
				+        defaults_after_updates['sandbox']['base_container_image']['default']
			
 
				         == 'nikolaik/python-nodejs:python3.11-nodejs22'
			
 
				     )
			
 
				     assert defaults_after_updates == initial_defaults