Browse Source

Use :main instead of :latest (#2539)

Co-authored-by: Boxuan Li <liboxuan@connect.hku.hk>
மனோஜ்குமார் பழனிச்சாமி 1 year ago
parent
commit
41564c2eac

+ 1 - 2
evaluation/TUTORIAL.md

@@ -31,7 +31,6 @@ workspace_base = "/path/to/your/workspace"
 workspace_mount_path = "/path/to/your/workspace"
 workspace_mount_path = "/path/to/your/workspace"
 # ==========================
 # ==========================
 
 
-sandbox_container_image = "ghcr.io/opendevin/sandbox:latest"
 sandbox_type = "ssh"
 sandbox_type = "ssh"
 sandbox_timeout = 120
 sandbox_timeout = 120
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
@@ -179,4 +178,4 @@ model="gpt-4o-2024-05-13"
 api_key="sk-xxx"
 api_key="sk-xxx"
 ```
 ```
 
 
-If `[eval_gpt_4o_2024_05_13]` is not present, it will default to using the model configured in `[llm]`.
+If `[eval_gpt_4o_2024_05_13]` is not present, it will default to using the model configured in `[llm]`.

+ 0 - 1
evaluation/agent_bench/README.md

@@ -18,7 +18,6 @@ cache_dir = "/path/to/cache"
 workspace_base = "/path/to/workspace"
 workspace_base = "/path/to/workspace"
 workspace_mount_path = "/path/to/workspace"
 workspace_mount_path = "/path/to/workspace"
 
 
-sandbox_container_image = "ghcr.io/opendevin/sandbox:latest"
 sandbox_type = "ssh"
 sandbox_type = "ssh"
 sandbox_timeout = 120
 sandbox_timeout = 120
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"

+ 0 - 1
evaluation/miniwob/README.md

@@ -16,7 +16,6 @@ Add the following configurations:
 [core]
 [core]
 max_iterations = 100
 max_iterations = 100
 cache_dir = "/tmp/cache"
 cache_dir = "/tmp/cache"
-sandbox_container_image = "ghcr.io/opendevin/sandbox:latest"
 sandbox_type = "ssh"
 sandbox_type = "ssh"
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
 sandbox_timeout = 120
 sandbox_timeout = 120

+ 0 - 1
evaluation/swe_bench/README.md

@@ -42,7 +42,6 @@ Add the following configurations:
 [core]
 [core]
 max_iterations = 100
 max_iterations = 100
 cache_dir = "/tmp/cache"
 cache_dir = "/tmp/cache"
-sandbox_container_image = "ghcr.io/opendevin/sandbox:latest"
 sandbox_type = "ssh"
 sandbox_type = "ssh"
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
 sandbox_timeout = 120
 sandbox_timeout = 120

+ 1 - 1
evaluation/swe_bench/scripts/docker/Dockerfile.builder

@@ -1,4 +1,4 @@
-FROM ghcr.io/opendevin/sandbox:latest
+FROM ghcr.io/opendevin/sandbox:main
 
 
 RUN apt-get update && \
 RUN apt-get update && \
     apt-get install -y libffi-dev bash gcc git jq wget pkg-config libfreetype-dev libfreetype6 libfreetype6-dev rsync && \
     apt-get install -y libffi-dev bash gcc git jq wget pkg-config libfreetype-dev libfreetype6 libfreetype6-dev rsync && \

+ 0 - 1
evaluation/webarena/README.md

@@ -16,7 +16,6 @@ Add the following configurations:
 [core]
 [core]
 max_iterations = 100
 max_iterations = 100
 cache_dir = "/tmp/cache"
 cache_dir = "/tmp/cache"
-sandbox_container_image = "ghcr.io/opendevin/sandbox:latest"
 sandbox_type = "ssh"
 sandbox_type = "ssh"
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
 sandbox_timeout = 120
 sandbox_timeout = 120

+ 24 - 16
tests/unit/test_image_agnostic_util.py

@@ -1,42 +1,50 @@
 from unittest.mock import MagicMock, patch
 from unittest.mock import MagicMock, patch
+
 from opendevin.runtime.docker.image_agnostic_util import (
 from opendevin.runtime.docker.image_agnostic_util import (
-    generate_dockerfile_content,
     _get_new_image_name,
     _get_new_image_name,
+    generate_dockerfile_content,
     get_od_sandbox_image,
     get_od_sandbox_image,
 )
 )
 
 
 
 
 def test_generate_dockerfile_content():
 def test_generate_dockerfile_content():
-    base_image = "debian:11"
+    base_image = 'debian:11'
     dockerfile_content = generate_dockerfile_content(base_image)
     dockerfile_content = generate_dockerfile_content(base_image)
     assert base_image in dockerfile_content
     assert base_image in dockerfile_content
-    assert "RUN apt update && apt install -y openssh-server wget sudo" in dockerfile_content
+    assert (
+        'RUN apt update && apt install -y openssh-server wget sudo'
+        in dockerfile_content
+    )
 
 
 
 
 def test_get_new_image_name():
 def test_get_new_image_name():
-    base_image = "debian:11"
+    base_image = 'debian:11'
     new_image_name = _get_new_image_name(base_image)
     new_image_name = _get_new_image_name(base_image)
-    assert new_image_name == "od_sandbox:debian__11"
+    assert new_image_name == 'od_sandbox:debian__11'
 
 
-    base_image = "ubuntu:22.04"
+    base_image = 'ubuntu:22.04'
     new_image_name = _get_new_image_name(base_image)
     new_image_name = _get_new_image_name(base_image)
-    assert new_image_name == "od_sandbox:ubuntu__22.04"
+    assert new_image_name == 'od_sandbox:ubuntu__22.04'
 
 
-    base_image = "ubuntu"
+    base_image = 'ubuntu'
     new_image_name = _get_new_image_name(base_image)
     new_image_name = _get_new_image_name(base_image)
-    assert new_image_name == "od_sandbox:ubuntu__latest"
+    assert new_image_name == 'od_sandbox:ubuntu__latest'
 
 
 
 
-@patch("opendevin.runtime.docker.image_agnostic_util._build_sandbox_image")
-@patch("opendevin.runtime.docker.image_agnostic_util.docker.DockerClient")
+@patch('opendevin.runtime.docker.image_agnostic_util._build_sandbox_image')
+@patch('opendevin.runtime.docker.image_agnostic_util.docker.DockerClient')
 def test_get_od_sandbox_image(mock_docker_client, mock_build_sandbox_image):
 def test_get_od_sandbox_image(mock_docker_client, mock_build_sandbox_image):
-    base_image = "debian:11"
-    mock_docker_client.images.list.return_value = [MagicMock(tags=["od_sandbox:debian__11"])]
+    base_image = 'debian:11'
+    mock_docker_client.images.list.return_value = [
+        MagicMock(tags=['od_sandbox:debian__11'])
+    ]
 
 
     image_name = get_od_sandbox_image(base_image, mock_docker_client)
     image_name = get_od_sandbox_image(base_image, mock_docker_client)
-    assert image_name == "od_sandbox:debian__11"
+    assert image_name == 'od_sandbox:debian__11'
 
 
     mock_docker_client.images.list.return_value = []
     mock_docker_client.images.list.return_value = []
     image_name = get_od_sandbox_image(base_image, mock_docker_client)
     image_name = get_od_sandbox_image(base_image, mock_docker_client)
-    assert image_name == "od_sandbox:debian__11"
-    mock_build_sandbox_image.assert_called_once_with(base_image, "od_sandbox:debian__11", mock_docker_client)
+    assert image_name == 'od_sandbox:debian__11'
+    mock_build_sandbox_image.assert_called_once_with(
+        base_image, 'od_sandbox:debian__11', mock_docker_client
+    )