Browse Source

fix yet another swe_bench issue (#2069)

Xingyao Wang 1 year ago
parent
commit
2c0a2dbc61

+ 1 - 1
.gitignore

@@ -204,7 +204,7 @@ cache
 # configuration
 config.toml
 config.toml.bak
-evaluation/swe_bench/eval_workspace
+evaluation/swe_bench/eval_workspace*
 evaluation/outputs
 evaluation/evaluation_outputs
 test_results*

+ 2 - 2
evaluation/swe_bench/BUILD_TESTBED_AND_ENV.md

@@ -34,6 +34,6 @@ Run the following command to do the above two steps. The results will be saved t
 
 ```bash
 pushd evaluation/swe_bench
-docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2 -f ./scripts/docker/Dockerfile.full.v1.1 .
-docker push ghcr.io/opendevin/eval-swe-bench:full-v1.2
+docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 -f ./scripts/docker/Dockerfile.full.v1.1 .
+docker push ghcr.io/opendevin/eval-swe-bench:full-v1.2.1
 ```

+ 1 - 1
evaluation/swe_bench/EVAL_PATCH.md

@@ -117,7 +117,7 @@ Before evaluating generated patches, you need to set up the Docker environment.
 ```shell
 docker run -it \
 -v DIR_TO_YOUR_PATCH_FILES_ON_HOST:/swe_bench_output \
-ghcr.io/opendevin/eval-swe-bench:full-v1.2 /bin/bash
+ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 /bin/bash
 ```
 
 ### Evaluate Model Generated Patches

+ 1 - 1
evaluation/swe_bench/README.md

@@ -15,7 +15,7 @@ In [OpenDevin-SWE-Bench fork](https://github.com/OpenDevin/OD-SWE-bench.git) (mo
 **We pack everything you need for SWE-Bench evaluation into one, gigantic, docker image.** To use it:
 
 ```bash
-docker pull ghcr.io/opendevin/eval-swe-bench:full-v1.2
+docker pull ghcr.io/opendevin/eval-swe-bench:full-v1.2.1
 ```
 
 The Docker image contains several important directories:

+ 10 - 0
evaluation/swe_bench/scripts/docker/Dockerfile.full.v1.2.1

@@ -0,0 +1,10 @@
+FROM ghcr.io/opendevin/eval-swe-bench:full-v1.2
+
+# ================== Update OD-SWE-Bench ==================
+# copy everything except the folder of `eval_data` or `miniforge3`
+# typically, this should be the OD codebase
+RUN --mount=type=bind,source=./eval_workspace,target=/eval_workspace \
+    rsync -ar --progress /eval_workspace/OD-SWE-bench/ /swe_util/OD-SWE-bench
+
+# pushd evaluation/SWE-bench
+# docker build -t ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 -f ./scripts/docker/Dockerfile.full.v1.2.1 .

+ 1 - 1
evaluation/swe_bench/scripts/eval_infer.sh

@@ -26,7 +26,7 @@ docker run --rm \
     -e OD_SWE_BENCH=/swe_util/OD-SWE-bench \
     -e EVAL_DATA_DIR=/swe_util/eval_data \
     -w /swe_util \
-    ghcr.io/opendevin/eval-swe-bench:full-v1.2 \
+    ghcr.io/opendevin/eval-swe-bench:full-v1.2.1 \
     bash -c "./get_agent_report.sh --output-file /swe_bench_output/$FILE_NAME \
     --agent-name CodeActAgent \
     --dataset swe-bench-test-lite \

+ 1 - 1
evaluation/swe_bench/swe_env_box.py

@@ -12,7 +12,7 @@ from opendevin.runtime.plugins import (
     PluginRequirement,
 )
 
-SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2'
+SWE_BENCH_CONTAINER_IMAGE = 'ghcr.io/opendevin/eval-swe-bench:full-v1.2.1'
 
 
 class SWEBenchSSHBox(DockerSSHBox):