1 anno fa · 0fdbe1ee93
--- a/evaluation/swe_bench/README.md
+++ b/evaluation/swe_bench/README.md
@@ -28,17 +28,9 @@ To reproduce how we pack the image, check [this doc](./BUILD_TESTBED_AND_ENV.md)
 
				 
			
 
				 NOTE: We only support SWE-Bench lite for now. But modifying our existing scripts for full SWE-Bench should be quite straight forward.
			
 
				 
			
 
				-## Test if your environment works
			
 
				-
			
 
				-```bash
			
 
				-python3 evaluation/swe_bench/swe_env_box.py
			
 
				-```
			
 
				-
			
 
				-If you get to the interactive shell successfully, it means success!
			
 
				-
			
 
				-## Configure your LLM
			
 
				+## Configure OpenDevin and your LLM
			
 
				 
			
 
				-Create a `config.toml` file if not exists at the root of workspace.
			
 
				+Create a `config.toml` file if it does not exist at the root of the workspace.
			
 
				 
			
 
				 Add the following configurations:
			
 
				 
			
@@ -51,7 +43,7 @@ sandbox_type = "ssh"
 
				 use_host_network = true
			
 
				 ssh_hostname = "localhost"
			
 
				 sandbox_timeout = 120
			
 
				-# eval specific
			
 
				+# SWEBench eval specific
			
 
				 run_as_devin = false
			
 
				 
			
 
				 # TODO: Change these to the model you want to evaluate
			
@@ -67,6 +59,14 @@ api_key = "XXX"
 
				 temperature = 0.0
			
 
				 ```
			
 
				 
			
 
				+## Test if your environment works
			
 
				+
			
 
				+```bash
			
 
				+python3 evaluation/swe_bench/swe_env_box.py
			
 
				+```
			
 
				+
			
 
				+If you get to the interactive shell successfully, it means success!
			
 
				+
			
 
				 ## Run Inference on SWE-Bench Instances
			
 
				 
			
 
				 ```bash