Bläddra i källkod

feat: add commands for swebench (#682)

* feat: add commands for swebench

* restructure
Alex Bäuerle 1 år sedan
förälder
incheckning
a82e065f56

+ 30 - 14
agenthub/codeact_agent/codeact_agent.py

@@ -1,23 +1,32 @@
 import re
 from typing import List, Mapping
 
-
-from opendevin.agent import Agent
-from opendevin.state import State
 from opendevin.action import (
     Action,
-    CmdRunAction,
     AgentEchoAction,
     AgentFinishAction,
+    CmdRunAction,
 )
+from opendevin.agent import Agent
+from opendevin.llm.llm import LLM
 from opendevin.observation import (
-    CmdOutputObservation,
     AgentMessageObservation,
+    CmdOutputObservation,
 )
+from opendevin.parse_commands import parse_command_file
+from opendevin.state import State
 
-from opendevin.llm.llm import LLM
+COMMAND_DOCS = parse_command_file()
+COMMAND_SEGMENT = (
+    f"""
 
-SYSTEM_MESSAGE = """You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
+Apart from the standard bash commands, you can also use the following special commands:
+{COMMAND_DOCS}
+"""
+    if COMMAND_DOCS is not None
+    else ""
+)
+SYSTEM_MESSAGE = f"""You are a helpful assistant. You will be provided access (as root) to a bash shell to complete user-provided tasks.
 You will be able to execute commands in the bash shell, interact with the file system, install packages, and receive the output of your commands.
 
 DO NOT provide code in ```triple backticks```. Instead, you should execute bash command on behalf of the user by wrapping them with <execute> and </execute>.
@@ -34,6 +43,7 @@ You can also write a block of code to a file:
 echo "import math
 print(math.pi)" > math.py
 </execute>
+{COMMAND_SEGMENT}
 
 When you are done, execute "exit" to close the shell and end the conversation.
 """
@@ -77,15 +87,21 @@ class CodeActAgent(Agent):
         updated_info = state.updated_info
         if updated_info:
             for prev_action, obs in updated_info:
-                assert isinstance(prev_action, (CmdRunAction, AgentEchoAction)), "Expecting CmdRunAction or AgentEchoAction for Action"
-                if isinstance(obs, AgentMessageObservation):  # warning message from itself
+                assert isinstance(
+                    prev_action, (CmdRunAction, AgentEchoAction)
+                ), "Expecting CmdRunAction or AgentEchoAction for Action"
+                if isinstance(
+                    obs, AgentMessageObservation
+                ):  # warning message from itself
                     self.messages.append({"role": "user", "content": obs.content})
                 elif isinstance(obs, CmdOutputObservation):
                     content = "OBSERVATION:\n" + obs.content
                     content += f"\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]"
                     self.messages.append({"role": "user", "content": content})
                 else:
-                    raise NotImplementedError(f"Unknown observation type: {obs.__class__}")
+                    raise NotImplementedError(
+                        f"Unknown observation type: {obs.__class__}"
+                    )
         response = self.llm.completion(
             messages=self.messages,
             stop=["</execute>"],
@@ -101,7 +117,7 @@ class CodeActAgent(Agent):
             command_group = command.group(1)
             if command_group.strip() == "exit":
                 return AgentFinishAction()
-            return CmdRunAction(command = command_group)
+            return CmdRunAction(command=command_group)
             # # execute the code
             # # TODO: does exit_code get loaded into Message?
             # exit_code, observation = self.env.execute(command_group)
@@ -111,9 +127,9 @@ class CodeActAgent(Agent):
             # https://github.com/xingyaoww/mint-bench/blob/main/mint/envs/general_env.py#L18-L23
             # observation = INVALID_INPUT_MESSAGE
             # self._history.append(Message(Role.ASSISTANT, observation))
-            return AgentEchoAction(content=INVALID_INPUT_MESSAGE)  # warning message to itself
-
+            return AgentEchoAction(
+                content=INVALID_INPUT_MESSAGE
+            )  # warning message to itself
 
     def search_memory(self, query: str) -> List[str]:
         raise NotImplementedError("Implement this abstract method")
-

+ 4 - 0
evaluation/SWE-bench/Dockerfile

@@ -29,6 +29,10 @@ RUN conda --version
 COPY environment.yml .
 RUN conda env create -f environment.yml
 
+# Add commands
+COPY ./commands.sh .
+RUN source commands.sh
+
 # Some missing packages
 RUN pip install datasets python-dotenv gitpython
 

+ 155 - 0
evaluation/SWE-bench/commands.sh

@@ -0,0 +1,155 @@
+# @yaml
+# signature: search_dir <search_term> [<dir>]
+# docstring: searches for search_term in all files in dir. If dir is not provided, searches in the current directory
+# arguments:
+#   search_term:
+#     type: string
+#     description: the term to search for
+#     required: true
+#   dir:
+#     type: string
+#     description: the directory to search in (if not provided, searches in the current directory)
+#     required: false
+search_dir() {
+    if [ $# -eq 1 ]; then
+        local search_term="$1"
+        local dir="./"
+    elif [ $# -eq 2 ]; then
+        local search_term="$1"
+        if [ -d "$2" ]; then
+            local dir="$2"
+        else
+            echo "Directory $2 not found"
+            return
+        fi
+    else
+        echo "Usage: search_dir <search_term> [<dir>]"
+        return
+    fi
+    dir=$(realpath "$dir")
+    local matches=$(find "$dir" -type f ! -path '*/.*' -exec grep -nIH "$search_term" {} + | cut -d: -f1 | sort | uniq -c)
+    # if no matches, return
+    if [ -z "$matches" ]; then
+        echo "No matches found for \"$search_term\" in $dir"
+        return
+    fi
+    # Calculate total number of matches
+    local num_matches=$(echo "$matches" | awk '{sum+=$1} END {print sum}')
+    # calculate total number of files matched
+    local num_files=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
+    # if num_files is > 100, print an error
+    if [ $num_files -gt 100 ]; then
+        echo "More than $num_files files matched for \"$search_term\" in $dir. Please narrow your search."
+        return
+    fi
+    
+    echo "Found $num_matches matches for \"$search_term\" in $dir:"
+    echo "$matches" | awk '{$2=$2; gsub(/^\.+\/+/, "./", $2); print $2 " ("$1" matches)"}'
+    echo "End of matches for \"$search_term\" in $dir"
+}
+
+# @yaml
+# signature: search_file <search_term> [<file>]
+# docstring: searches for search_term in file. If file is not provided, searches in the current open file
+# arguments:
+#   search_term:
+#     type: string
+#     description: the term to search for
+#     required: true
+#   file:
+#     type: string
+#     description: the file to search in (if not provided, searches in the current open file)
+#     required: false
+search_file() {
+    # Check if the first argument is provided
+    if [ -z "$1" ]; then
+        echo "Usage: search_file <search_term> [<file>]"
+        return
+    fi
+    # Check if the second argument is provided
+    if [ -n "$2" ]; then
+        # Check if the provided argument is a valid file
+        if [ -f "$2" ]; then
+            local file="$2"  # Set file if valid
+        else
+            echo "Usage: search_file <search_term> [<file>]"
+            echo "Error: File name $2 not found. Please provide a valid file name."
+            return  # Exit if the file is not valid
+        fi
+    else
+        # Check if a file is open
+        if [ -z "$CURRENT_FILE" ]; then
+            echo "No file open. Use the open command first."
+            return  # Exit if no file is open
+        fi
+        local file="$CURRENT_FILE"  # Set file to the current open file
+    fi
+    local search_term="$1"
+    file=$(realpath "$file")
+    # Use grep to directly get the desired formatted output
+    local matches=$(grep -nH "$search_term" "$file")
+    # Check if no matches were found
+    if [ -z "$matches" ]; then
+        echo "No matches found for \"$search_term\" in $file"
+        return
+    fi
+    # Calculate total number of matches
+    local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
+    
+    # calculate total number of lines matched
+    local num_lines=$(echo "$matches" | cut -d: -f1 | sort | uniq | wc -l | awk '{$1=$1; print $0}')
+    # if num_lines is > 100, print an error
+    if [ $num_lines -gt 100 ]; then
+        echo "More than $num_lines lines matched for \"$search_term\" in $file. Please narrow your search."
+        return
+    fi
+
+    # Print the total number of matches and the matches themselves
+    echo "Found $num_matches matches for \"$search_term\" in $file:"
+    echo "$matches" | cut -d: -f1-2 | sort -u -t: -k2,2n | while IFS=: read -r filename line_number; do
+        echo "Line $line_number:$(sed -n "${line_number}p" "$file")"
+    done
+    echo "End of matches for \"$search_term\" in $file"
+}
+
+# @yaml
+# signature: find_file <file_name> [<dir>]
+# docstring: finds all files with the given name in dir. If dir is not provided, searches in the current directory
+# arguments:
+#   file_name:
+#     type: string
+#     description: the name of the file to search for
+#     required: true
+#   dir:
+#     type: string
+#     description: the directory to search in (if not provided, searches in the current directory)
+#     required: false
+find_file() {
+    if [ $# -eq 1 ]; then
+        local file_name="$1"
+        local dir="./"
+    elif [ $# -eq 2 ]; then
+        local file_name="$1"
+        if [ -d "$2" ]; then
+            local dir="$2"
+        else
+            echo "Directory $2 not found"
+            return
+        fi
+    else
+        echo "Usage: find_file <file_name> [<dir>]"
+        return
+    fi
+
+    dir=$(realpath "$dir")
+    local matches=$(find "$dir" -type f -name "$file_name")
+    # if no matches, return
+    if [ -z "$matches" ]; then
+        echo "No matches found for \"$file_name\" in $dir"
+        return
+    fi
+    # Calculate total number of matches
+    local num_matches=$(echo "$matches" | wc -l | awk '{$1=$1; print $0}')
+    echo "Found $num_matches matches for \"$file_name\" in $dir:"
+    echo "$matches" | awk '{print $0}'
+}

+ 52 - 0
opendevin/parse_commands.py

@@ -0,0 +1,52 @@
+import os
+from dataclasses import dataclass
+
+import yaml
+
+
+@dataclass()
+class Command:
+    name: str
+    docstring: str | None = None
+    signature: str | None = None
+
+
+def parse_command_file() -> str | None:
+    if not os.path.exists("commands.sh"):
+        return None
+    content = open("commands.sh", "r").read()
+    lines = content.split("\n")
+    commands: list[Command] = []
+    idx = 0
+    docs: list[str] = []
+    while idx < len(lines):
+        line = lines[idx]
+        idx += 1
+        if line.startswith("# "):
+            docs.append(line[2:])
+        elif line.strip().endswith("() {"):
+            name = line.split()[0][:-2]
+            while lines[idx].strip() != "}":
+                idx += 1
+            docstring, signature = None, name
+            docs_dict = yaml.safe_load("\n".join(docs).replace("@yaml", ""))
+            if docs_dict is not None:
+                docstring = docs_dict.get("docstring")
+                arguments = docs_dict.get("arguments", None)
+                if "signature" in docs_dict:
+                    signature = docs_dict["signature"]
+                else:
+                    if arguments is not None:
+                        for param, settings in arguments.items():
+                            if "required" in settings:
+                                signature += f" <{param}>"
+                            else:
+                                signature += f" [<{param}>]"
+            command = Command(name, docstring, signature)
+            commands.append(command)
+            docs = []
+    function_docs = ""
+    for cmd in commands:
+        if cmd.docstring is not None:
+            function_docs += f"{cmd.signature or cmd.name} - {cmd.docstring}\n"
+    return function_docs