Просмотр исходного кода

Turn off auto linting by default, and on for swe_bench (#1861)

Disable Python linting by default, and turn it on for SWE Bench.

It is turned off by default since this behavior is weird and somewhat annoying to end users.
It is turned on for SWE Bench because linting python files gives LLM a chance to fix the indentations.
Boxuan Li 1 год назад
Родитель
Сommit
a57a213c7c

+ 2 - 0
evaluation/swe_bench/README.md

@@ -43,8 +43,10 @@ sandbox_type = "ssh"
 use_host_network = true
 use_host_network = true
 ssh_hostname = "localhost"
 ssh_hostname = "localhost"
 sandbox_timeout = 120
 sandbox_timeout = 120
+
 # SWEBench eval specific
 # SWEBench eval specific
 run_as_devin = false
 run_as_devin = false
+enable_auto_lint = true
 
 
 # TODO: Change these to the model you want to evaluate
 # TODO: Change these to the model you want to evaluate
 [eval_gpt4_1106_preview]
 [eval_gpt4_1106_preview]

+ 4 - 0
evaluation/swe_bench/swe_env_box.py

@@ -73,6 +73,10 @@ class SWEBenchSSHBox(DockerSSHBox):
             )
             )
         config.workspace_base = workspace_mount_path
         config.workspace_base = workspace_mount_path
         config.workspace_mount_path = workspace_mount_path
         config.workspace_mount_path = workspace_mount_path
+
+        # linting python after editing helps LLM fix indentations
+        config.enable_auto_lint = True
+
         sandbox = cls(
         sandbox = cls(
             container_image=SWE_BENCH_CONTAINER_IMAGE,
             container_image=SWE_BENCH_CONTAINER_IMAGE,
             swe_instance_id=instance['instance_id'],
             swe_instance_id=instance['instance_id'],

+ 3 - 0
opendevin/core/config.py

@@ -94,6 +94,9 @@ class AppConfig(metaclass=Singleton):
     sandbox_timeout: int = 120
     sandbox_timeout: int = 120
     github_token: str | None = None
     github_token: str | None = None
     debug: bool = False
     debug: bool = False
+    enable_auto_lint: bool = (
+        False  # once enabled, OpenDevin would lint files after editing
+    )
 
 
     defaults_dict: ClassVar[dict] = {}
     defaults_dict: ClassVar[dict] = {}
 
 

+ 0 - 3
opendevin/runtime/docker/ssh_box.py

@@ -1,5 +1,4 @@
 import atexit
 import atexit
-import json
 import os
 import os
 import re
 import re
 import sys
 import sys
@@ -265,8 +264,6 @@ class DockerSSHBox(Sandbox):
 
 
     def add_to_env(self, key: str, value: str):
     def add_to_env(self, key: str, value: str):
         super().add_to_env(key, value)
         super().add_to_env(key, value)
-        # Note: json.dumps gives us nice escaping for free
-        self.execute(f'export {key}={json.dumps(value)}')
 
 
     def setup_user(self):
     def setup_user(self):
         # Make users sudoers passwordless
         # Make users sudoers passwordless

+ 1 - 1
opendevin/runtime/plugins/swe_agent_commands/cursors_edit_linting.sh

@@ -36,7 +36,7 @@ edit() {
     # Write the new stuff directly back into the original file
     # Write the new stuff directly back into the original file
     printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE"
     printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE"
     # Run linter if enabled
     # Run linter if enabled
-    if [[ $CURRENT_FILE == *.py && -n "$RUN_LINT_AFTER_EDIT" ]]; then
+    if [[ $CURRENT_FILE == *.py && -n "$ENABLE_AUTO_LINT" ]]; then
         lint_output=$(flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1)
         lint_output=$(flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1)
     else
     else
         # do nothing
         # do nothing

+ 1 - 1
opendevin/runtime/plugins/swe_agent_commands/edit_linting.sh

@@ -68,7 +68,7 @@ edit() {
     printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE"
     printf "%s\n" "${new_lines[@]}" >| "$CURRENT_FILE"
 
 
     # Run linter if enabled
     # Run linter if enabled
-    if [[ $CURRENT_FILE == *.py && -n "$RUN_LINT_AFTER_EDIT" ]]; then
+    if [[ $CURRENT_FILE == *.py && -n "$ENABLE_AUTO_LINT" ]]; then
         lint_output=$(flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1)
         lint_output=$(flake8 --isolated --select=F821,F822,F831,E111,E112,E113,E999,E902 "$CURRENT_FILE" 2>&1)
     else
     else
         # do nothing
         # do nothing

+ 6 - 0
opendevin/runtime/sandbox.py

@@ -1,6 +1,8 @@
+import json
 import os
 import os
 from abc import ABC, abstractmethod
 from abc import ABC, abstractmethod
 
 
+from opendevin.core.config import config
 from opendevin.core.schema import CancellableStream
 from opendevin.core.schema import CancellableStream
 from opendevin.runtime.docker.process import Process
 from opendevin.runtime.docker.process import Process
 from opendevin.runtime.plugins.mixin import PluginMixin
 from opendevin.runtime.plugins.mixin import PluginMixin
@@ -15,9 +17,13 @@ class Sandbox(ABC, PluginMixin):
             if key.startswith('SANDBOX_ENV_'):
             if key.startswith('SANDBOX_ENV_'):
                 sandbox_key = key.removeprefix('SANDBOX_ENV_')
                 sandbox_key = key.removeprefix('SANDBOX_ENV_')
                 self.add_to_env(sandbox_key, os.environ[key])
                 self.add_to_env(sandbox_key, os.environ[key])
+        if config.enable_auto_lint:
+            self.add_to_env('ENABLE_AUTO_LINT', 'true')
 
 
     def add_to_env(self, key: str, value: str):
     def add_to_env(self, key: str, value: str):
         self._env[key] = value
         self._env[key] = value
+        # Note: json.dumps gives us nice escaping for free
+        self.execute(f'export {key}={json.dumps(value)}')
 
 
     @abstractmethod
     @abstractmethod
     def execute(
     def execute(