Просмотр исходного кода

Refactor agent interface a bit (#74)

* start moving files

* initial refactor

* factor out command management

* fix command runner

* add workspace to gitignore

* factor out command manager

* remove dupe add_event

* update docs

* fix init

* fix langchain agent after merge
Robert Brennan 2 лет назад
Родитель
Сommit
b84463f512

+ 1 - 1
.gitignore

@@ -14,7 +14,7 @@ dist/
 downloads/
 downloads/
 eggs/
 eggs/
 .eggs/
 .eggs/
-lib/
+./lib/
 lib64/
 lib64/
 parts/
 parts/
 sdist/
 sdist/

+ 51 - 0
agenthub/README.md

@@ -4,3 +4,54 @@ In this folder, there may exist multiple implementations of `Agent` that will be
 
 
 For example, `agenthub/langchain_agent`, `agenthub/metagpt_agent`, `agenthub/codeact_agent`, etc.
 For example, `agenthub/langchain_agent`, `agenthub/metagpt_agent`, `agenthub/codeact_agent`, etc.
 Contributors from different backgrounds and interests can choose to contribute to any (or all!) of these directions.
 Contributors from different backgrounds and interests can choose to contribute to any (or all!) of these directions.
+
+## Constructing an Agent
+Your agent must implement the following methods:
+
+### `step`
+```
+def step(self, cmd_mgr: CommandManager) -> Event:
+```
+`step` moves the agent forward one step towards its goal. This probably means
+sending a prompt to the LLM, then parsing the response into an action `Event`.
+
+Each Event has an `action` and a dict of `args`. Supported Events include:
+* `read` - reads the contents of a file. Arguments:
+  * `path` - the path of the file to read
+* `write` - writes the contents to a file. Arguments:
+  * `path` - the path of the file to write
+  * `contents` - the contents to write to the file
+* `run` - runs a command. Arguments:
+  * `command` - the command to run
+  * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
+* `kill` - kills a background command
+  * `id` - the ID of the background command to kill
+* `browse` - opens a web page. Arguments:
+  * `url` - the URL to open
+* `recall` - recalls a past memory. Arguments:
+  * `query` - the query to search for
+* `think` - make a plan, set a goal, or record your thoughts. Arguments:
+  * `thought` - the thought to record
+* `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
+
+For Events like `read` and `run`, a follow-up event will be added via `add_event` with the output.
+
+### `add_event`
+```
+def add_event(self, event: Event) -> None:
+```
+`add_event` adds an event to the agent's history. This could be a user message,
+an action taken by the agent, log output, file contents, or anything else.
+
+You'll probably want to keep a history of events, and use them in your prompts
+so that the agent knows what it did recently. You may also want to keep events
+in a vector database so the agent can refer back to them.
+
+The output of `step` will automatically be passed to this method.
+
+### `search_memory`
+```
+def search_memory(self, query: str) -> List[str]:
+```
+`search_memory` should return a list of events that match the query. This will be used
+for the `recall` action.

+ 17 - 31
agenthub/langchains_agent/__init__.py

@@ -1,10 +1,11 @@
 import os
 import os
 import argparse
 import argparse
+from typing import List, Dict, Type
 
 
 from opendevin.agent import Agent, Message
 from opendevin.agent import Agent, Message
 
 
 from agenthub.langchains_agent.utils.agent import Agent as LangchainsAgentImpl
 from agenthub.langchains_agent.utils.agent import Agent as LangchainsAgentImpl
-from agenthub.langchains_agent.utils.event import Event
+from opendevin.lib.event import Event
 
 
 INITIAL_THOUGHTS = [
 INITIAL_THOUGHTS = [
     "I exist!",
     "I exist!",
@@ -46,33 +47,12 @@ INITIAL_THOUGHTS = [
 
 
 
 
 class LangchainsAgent(Agent):
 class LangchainsAgent(Agent):
+    _initialized = False
 
 
-    def _run_loop(self, agent: LangchainsAgentImpl, max_iterations=100):
-        # TODO: make it add a Message to the history for each turn / event
-        for i in range(max_iterations):
-            print("STEP", i, flush=True)
-            log_events = agent.get_background_logs()
-            for event in log_events:
-                print(event, flush=True)
-            action = agent.get_next_action()
-            if action.action == "finish":
-                print("Done!", flush=True)
-                break
-            print(action, flush=True)
-            print("---", flush=True)
-            out = agent.maybe_perform_latest_action()
-            print(out, flush=True)
-            print("==============", flush=True)
-
-    def run(self) -> None:
-        """
-        Starts the execution of the assigned instruction. This method should
-        be implemented by subclasses to define the specific execution logic.
-        """
-        print("Working in directory:", self.workspace_dir)
-        os.chdir(self.workspace_dir)
-
-        agent = LangchainsAgentImpl(self.instruction)
+    def _initialize(self):
+        if self._initialized:
+            return
+        self.agent = LangchainsAgentImpl(self.instruction)
         next_is_output = False
         next_is_output = False
         for thought in INITIAL_THOUGHTS:
         for thought in INITIAL_THOUGHTS:
             thought = thought.replace("$TASK", self.instruction)
             thought = thought.replace("$TASK", self.instruction)
@@ -94,12 +74,18 @@ class LangchainsAgent(Agent):
                     next_is_output = True
                     next_is_output = True
                 else:
                 else:
                     event = Event("think", {"thought": thought})
                     event = Event("think", {"thought": thought})
+            self.agent.add_event(event)
+        self._initialized = True
+
+    def add_event(self, event: Event) -> None:
+        self.agent.add_event(event)
 
 
-            agent.add_event(event)
-        self._run_loop(agent, self.max_steps)
+    def step(self, cmd_mgr) -> Event:
+        self._initialize()
+        return self.agent.get_next_action(cmd_mgr)
 
 
-        # Set the agent's completion status to True
-        self._complete = True
+    def search_memory(self, query: str) -> List[str]:
+        return self.agent.memory.search(query)
 
 
     def chat(self, message: str) -> None:
     def chat(self, message: str) -> None:
         """
         """

+ 0 - 7
agenthub/langchains_agent/utils/actions/kill.py

@@ -1,7 +0,0 @@
-def kill(id, agent):
-    if id < 0 or id >= len(agent.background_commands):
-        raise ValueError('Invalid command id to kill')
-    agent.background_commands[id].kill()
-    agent.background_commands.pop(id)
-    return "Background command %d killed" % id
-

+ 0 - 18
agenthub/langchains_agent/utils/actions/run.py

@@ -1,18 +0,0 @@
-import subprocess
-import os
-
-def run(cmd, agent, background=False):
-    if background:
-        return run_background(cmd, agent)
-    result = subprocess.run(["/bin/bash", "-c", cmd], capture_output=True, text=True)
-    output = result.stdout + result.stderr
-    exit_code = result.returncode
-    if exit_code != 0:
-        raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
-    return output
-
-def run_background(cmd, agent):
-    process = subprocess.Popen(cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
-    agent.background_commands.append(process)
-    return "Background command started. To stop it, send a `kill` action with id " + str(len(agent.background_commands) - 1)
-

+ 3 - 62
agenthub/langchains_agent/utils/agent.py

@@ -1,8 +1,6 @@
-import select
-
 from agenthub.langchains_agent.utils.monologue import Monologue
 from agenthub.langchains_agent.utils.monologue import Monologue
 from agenthub.langchains_agent.utils.memory import LongTermMemory
 from agenthub.langchains_agent.utils.memory import LongTermMemory
-from agenthub.langchains_agent.utils.event import Event
+from opendevin.lib.event import Event
 import agenthub.langchains_agent.utils.llm as llm
 import agenthub.langchains_agent.utils.llm as llm
 
 
 MAX_OUTPUT_LENGTH = 5000
 MAX_OUTPUT_LENGTH = 5000
@@ -13,7 +11,6 @@ class Agent:
         self.task = task
         self.task = task
         self.monologue = Monologue()
         self.monologue = Monologue()
         self.memory = LongTermMemory()
         self.memory = LongTermMemory()
-        self.background_commands = []
 
 
     def add_event(self, event):
     def add_event(self, event):
         self.monologue.add_event(event)
         self.monologue.add_event(event)
@@ -21,65 +18,9 @@ class Agent:
         if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
         if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
             self.monologue.condense()
             self.monologue.condense()
 
 
-    def get_next_action(self):
-        bg_commands = [cmd.args for cmd in self.background_commands]
-        action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), bg_commands)
+    def get_next_action(self, cmd_mgr):
+        action_dict = llm.request_action(self.task, self.monologue.get_thoughts(), cmd_mgr.background_commands)
         event = Event(action_dict['action'], action_dict['args'])
         event = Event(action_dict['action'], action_dict['args'])
         self.latest_action = event
         self.latest_action = event
-        self.add_event(event)
         return event
         return event
 
 
-    def maybe_perform_latest_action(self):
-        if not (self.latest_action and self.latest_action.is_runnable()):
-            return
-        action = 'output'
-        try:
-            output = self.latest_action.run(self)
-        except Exception as e:
-            output = 'Error: ' + str(e)
-            action = 'error'
-        if len(output) > MAX_OUTPUT_LENGTH:
-            output = output[:MAX_OUTPUT_LENGTH] + '...'
-        out_event = Event(action, {'output': output})
-        self.add_event(out_event)
-        return out_event
-
-    def get_background_log(self, idx, cmd, stream, name):
-        logs = ""
-        while True:
-            readable, _, _ = select.select([stream], [], [], .1)
-            if not readable:
-                break
-            next = stream.readline()
-            if next == '':
-                break
-            logs += next
-        if logs == "": return
-
-        event = Event('output', {
-            'output': logs,
-            'stream':name,
-            'id': idx,
-            'command': cmd.args,
-        })
-        self.add_event(event)
-        return event
-
-    def get_background_logs(self):
-        all_events = []
-        for idx, cmd in enumerate(self.background_commands):
-            stdout_event = self.get_background_log(idx, cmd, cmd.stdout, 'stdout')
-            if stdout_event:
-                all_events.append(stdout_event)
-            stderr_event = self.get_background_log(idx, cmd, cmd.stderr, 'stderr')
-            if stderr_event:
-                all_events.append(stderr_event)
-
-            exit_code = cmd.poll()
-            if exit_code is not None:
-                event = Event('output', {'output': 'Background command %d exited with code %d' % (idx, exit_code)})
-                all_events.append(event)
-                self.add_event(event)
-
-        self.background_commands = [cmd for cmd in self.background_commands if cmd.poll() is None]
-        return all_events

+ 2 - 3
agenthub/langchains_agent/utils/llm.py

@@ -127,9 +127,8 @@ def request_action(task, thoughts, background_commands=[]):
     bg_commands_message = ""
     bg_commands_message = ""
     if len(background_commands) > 0:
     if len(background_commands) > 0:
         bg_commands_message = "The following commands are running in the background:"
         bg_commands_message = "The following commands are running in the background:"
-        for idx, command in enumerate(background_commands):
-            # TODO: make command IDs long-lived, instead of the index
-            bg_commands_message += f"\n* {idx}: {command}"
+        for id, command in background_commands.items():
+            bg_commands_message += f"\n`{id}`: {command.command}"
         bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
         bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
 
 
     latest_thought = thoughts[-1]
     latest_thought = thoughts[-1]

+ 1 - 1
agenthub/langchains_agent/utils/monologue.py

@@ -1,5 +1,5 @@
 import agenthub.langchains_agent.utils.json as json
 import agenthub.langchains_agent.utils.json as json
-from agenthub.langchains_agent.utils.event import Event
+from opendevin.lib.event import Event
 
 
 import agenthub.langchains_agent.utils.llm as llm
 import agenthub.langchains_agent.utils.llm as llm
 
 

+ 20 - 5
opendevin/agent.py

@@ -3,6 +3,9 @@ from typing import List, Dict, Type
 from dataclasses import dataclass
 from dataclasses import dataclass
 from enum import Enum
 from enum import Enum
 
 
+from .lib.event import Event
+from .lib.command_manager import CommandManager
+from .controller import AgentController
 
 
 class Role(Enum):
 class Role(Enum):
     SYSTEM = "system"  # system message for LLM
     SYSTEM = "system"  # system message for LLM
@@ -86,7 +89,17 @@ class Agent(ABC):
         return self._history
         return self._history
 
 
     @abstractmethod
     @abstractmethod
-    def run(self) -> None:
+    def add_event(self, event: Event) -> None:
+        """
+        Adds an event to the agent's history.
+
+        Parameters:
+        - event (Event): The event to add to the history.
+        """
+        pass
+
+    @abstractmethod
+    def step(self, cmd_mgr: CommandManager) -> Event:
         """
         """
         Starts the execution of the assigned instruction. This method should
         Starts the execution of the assigned instruction. This method should
         be implemented by subclasses to define the specific execution logic.
         be implemented by subclasses to define the specific execution logic.
@@ -94,13 +107,15 @@ class Agent(ABC):
         pass
         pass
 
 
     @abstractmethod
     @abstractmethod
-    def chat(self, message: str) -> None:
+    def search_memory(self, query: str) -> List[str]:
         """
         """
-        Optional method for interactive communication with the agent during its execution. Implementations
-        can use this method to modify the agent's behavior or state based on chat inputs.
+        Searches the agent's memory for information relevant to the given query.
 
 
         Parameters:
         Parameters:
-        - message (str): The chat message or command.
+        - query (str): The query to search for in the agent's memory.
+
+        Returns:
+        - response (str): The response to the query.
         """
         """
         pass
         pass
 
 

+ 49 - 0
opendevin/controller.py

@@ -0,0 +1,49 @@
+from opendevin.lib.command_manager import CommandManager
+from opendevin.lib.event import Event
+
+def print_callback(event):
+    print(event, flush=True)
+
+class AgentController:
+    def __init__(self, agent, max_iterations=100, callbacks=[]):
+        self.agent = agent
+        self.max_iterations = max_iterations
+        self.background_commands = []
+        self.command_manager = CommandManager()
+        self.callbacks = callbacks
+        self.callbacks.append(self.agent.add_event)
+        self.callbacks.append(print_callback)
+
+    def maybe_perform_action(self, event):
+        if not (event and event.is_runnable()):
+            return
+        action = 'output'
+        try:
+            output = event.run(self)
+        except Exception as e:
+            output = 'Error: ' + str(e)
+            action = 'error'
+        out_event = Event(action, {'output': output})
+        return out_event
+
+    def start_loop(self):
+        output = None
+        for i in range(self.max_iterations):
+            print("STEP", i, flush=True)
+            log_events = self.command_manager.get_background_events()
+            for event in log_events:
+                for callback in self.callbacks:
+                    callback(event)
+
+            action_event = self.agent.step(self.command_manager)
+            for callback in self.callbacks:
+                callback(action_event)
+            if action_event.action == 'finish':
+                break
+            print("---", flush=True)
+
+            output_event = self.maybe_perform_action(action_event)
+            if output_event is not None:
+                for callback in self.callbacks:
+                    callback(output_event)
+            print("==============", flush=True)

+ 0 - 2
agenthub/langchains_agent/utils/actions/__init__.py → opendevin/lib/actions/__init__.py

@@ -1,5 +1,3 @@
-from .run import run
-from .kill import kill
 from .browse import browse
 from .browse import browse
 from .write import write
 from .write import write
 from .read import read
 from .read import read

+ 0 - 0
agenthub/langchains_agent/utils/actions/browse.py → opendevin/lib/actions/browse.py


+ 0 - 0
agenthub/langchains_agent/utils/actions/read.py → opendevin/lib/actions/read.py


+ 0 - 0
agenthub/langchains_agent/utils/actions/write.py → opendevin/lib/actions/write.py


+ 89 - 0
opendevin/lib/command_manager.py

@@ -0,0 +1,89 @@
+import subprocess
+import select
+from typing import List
+
+from opendevin.lib.event import Event
+
+class BackgroundCommand:
+    def __init__(self, id: int, command: str, process: subprocess.Popen):
+        self.command = command
+        self.id = id
+        self.process = process
+
+    def _get_log_from_stream(self, stream):
+        logs = ""
+        while True:
+            readable, _, _ = select.select([stream], [], [], .1)
+            if not readable:
+                break
+            next = stream.readline()
+            if next == '':
+                break
+            logs += next
+        if logs == "": return
+        return logs
+
+    def get_logs(self):
+        stdout = self._get_log_from_stream(self.process.stdout)
+        stderr = self._get_log_from_stream(self.process.stderr)
+        exit_code = self.process.poll()
+        return stdout, stderr, exit_code
+
+class CommandManager:
+    def __init__(self):
+        self.cur_id = 0
+        self.background_commands = {}
+
+    def run_command(self, command: str, background=False) -> str:
+        if background:
+            return self.run_background(command)
+        else:
+            return self.run_immediately(command)
+
+    def run_immediately(self, command: str) -> str:
+        result = subprocess.run(["/bin/bash", "-c", command], capture_output=True, text=True)
+        output = result.stdout + result.stderr
+        exit_code = result.returncode
+        if exit_code != 0:
+            raise ValueError('Command failed with exit code ' + str(exit_code) + ': ' + output)
+        return output
+
+    def run_background(self, command: str) -> str:
+        process = subprocess.Popen(command, stdout=subprocess.PIPE, stderr=subprocess.PIPE, text=True, shell=True)
+        bg_cmd = BackgroundCommand(self.cur_id, command, process)
+        self.cur_id += 1
+        self.background_commands[bg_cmd.id] = bg_cmd
+        return "Background command started. To stop it, send a `kill` action with id " + str(bg_cmd.id)
+
+    def kill_command(self, id: int) -> str:
+        # TODO: get log events before killing
+        self.background_commands[id].processs.kill()
+        del self.background_commands[id]
+
+    def get_background_events(self) -> List[Event]:
+        events = []
+        for id, cmd in self.background_commands.items():
+            stdout, stderr, exit_code = cmd.get_logs()
+            if stdout is not None:
+                events.append(Event('output', {
+                    'output': stdout,
+                    'stream': 'stdout',
+                    'id': id,
+                    'command': cmd.command,
+                }))
+            if stderr is not None:
+                events.append(Event('output', {
+                    'output': stderr,
+                    'stream': 'stderr',
+                    'id': id,
+                    'command': cmd.command,
+                }))
+            if exit_code is not None:
+                events.append(Event('output', {
+                    'exit_code': exit_code,
+                    'output': 'Background command %d exited with code %d' % (idx, exit_code),
+                    'id': id,
+                    'command': cmd.command,
+                }))
+                del self.background_commands[id]
+        return events

+ 11 - 6
agenthub/langchains_agent/utils/event.py → opendevin/lib/event.py

@@ -1,9 +1,14 @@
 import os
 import os
 import json
 import json
-import agenthub.langchains_agent.utils.actions as actions
+import opendevin.lib.actions as actions
+
+ACTION_TYPES = ['run', 'kill', 'browse', 'read', 'write', 'recall', 'think', 'output', 'error', 'finish']
+RUNNABLE_ACTIONS = ['run', 'kill', 'browse', 'read', 'write', 'recall']
 
 
 class Event:
 class Event:
     def __init__(self, action, args):
     def __init__(self, action, args):
+        if action not in ACTION_TYPES:
+            raise ValueError('Invalid action type: ' + action)
         self.action = action
         self.action = action
         self.args = args
         self.args = args
 
 
@@ -17,18 +22,18 @@ class Event:
         }
         }
 
 
     def is_runnable(self):
     def is_runnable(self):
-        return self.action in ['run', 'kill', 'browse', 'read', 'write', 'recall']
+        return self.action in RUNNABLE_ACTIONS
 
 
-    def run(self, agent):
+    def run(self, agent_controller):
         if self.action == 'run':
         if self.action == 'run':
             cmd = self.args['command']
             cmd = self.args['command']
             background = False
             background = False
             if 'background' in self.args and self.args['background']:
             if 'background' in self.args and self.args['background']:
                 background = True
                 background = True
-            return actions.run(cmd, agent, background)
+            return agent_controller.command_manager.run_command(cmd, background)
         if self.action == 'kill':
         if self.action == 'kill':
             id = self.args['id']
             id = self.args['id']
-            return actions.kill(id, agent)
+            return agent_controller.command_manager.kill_command(id)
         elif self.action == 'browse':
         elif self.action == 'browse':
             url = self.args['url']
             url = self.args['url']
             return actions.browse(url)
             return actions.browse(url)
@@ -40,6 +45,6 @@ class Event:
             contents = self.args['contents']
             contents = self.args['contents']
             return actions.write(path, contents)
             return actions.write(path, contents)
         elif self.action == 'recall':
         elif self.action == 'recall':
-            return agent.memory.search(self.args['query'])
+            return agent_controller.agent.search_memory(self.args['query'])
         else:
         else:
             raise ValueError('Invalid action type')
             raise ValueError('Invalid action type')

+ 4 - 1
opendevin/main.py

@@ -3,6 +3,7 @@ import argparse
 
 
 import agenthub  # for the agent registry
 import agenthub  # for the agent registry
 from opendevin.agent import Agent
 from opendevin.agent import Agent
+from opendevin.controller import AgentController
 
 
 if __name__ == "__main__":
 if __name__ == "__main__":
     parser = argparse.ArgumentParser(description="Run an agent with a specific task")
     parser = argparse.ArgumentParser(description="Run an agent with a specific task")
@@ -18,4 +19,6 @@ if __name__ == "__main__":
         workspace_dir=args.directory,
         workspace_dir=args.directory,
         model_name=args.model_name
         model_name=args.model_name
     )
     )
-    agent.run()
+
+    controller = AgentController(agent)
+    controller.start_loop()