ai
/
OpenHands


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
							import agenthub.monologue_agent.utils.prompts as prompts
from agenthub.monologue_agent.response_parser import MonologueResponseParser
from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
from opendevin.controller.agent import Agent
from opendevin.controller.state.state import State
from opendevin.core.config import config
from opendevin.core.exceptions import AgentNoInstructionError
from opendevin.core.schema import ActionType
from opendevin.events.action import (
    Action,
    AgentRecallAction,
    BrowseURLAction,
    CmdRunAction,
    FileReadAction,
    FileWriteAction,
    MessageAction,
    NullAction,
)
from opendevin.events.observation import (
    AgentRecallObservation,
    BrowserOutputObservation,
    CmdOutputObservation,
    FileReadObservation,
    NullObservation,
    Observation,
)
from opendevin.events.serialization.event import event_to_memory
from opendevin.llm.llm import LLM
from opendevin.memory.condenser import MemoryCondenser
from opendevin.runtime.tools import RuntimeTool

if config.agent.memory_enabled:
    from opendevin.memory.memory import LongTermMemory

MAX_TOKEN_COUNT_PADDING = 512
MAX_OUTPUT_LENGTH = 5000


class MonologueAgent(Agent):
    VERSION = '1.0'
    """
    The Monologue Agent utilizes long and short term memory to complete tasks.
    Long term memory is stored as a LongTermMemory object and the model uses it to search for examples from the past.
    Short term memory is stored as a Monologue object and the model can condense it as necessary.
    """

    _initialized = False
    initial_thoughts: list[dict[str, str]]
    memory: 'LongTermMemory | None'
    memory_condenser: MemoryCondenser
    runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
    response_parser = MonologueResponseParser()

    def __init__(self, llm: LLM):
        """
        Initializes the Monologue Agent with an llm.

        Parameters:
        - llm (LLM): The llm to be used by this agent
        """
        super().__init__(llm)

    def _initialize(self, task: str):
        """
        Utilizes the INITIAL_THOUGHTS list to give the agent a context for its capabilities
        and how to navigate the WORKSPACE_MOUNT_PATH_IN_SANDBOX in `config` (e.g., /workspace by default).
        Short circuited to return when already initialized.
        Will execute again when called after reset.

        Parameters:
        - task (str): The initial goal statement provided by the user

        Raises:
        - AgentNoInstructionError: If task is not provided
        """

        if self._initialized:
            return

        if task is None or task == '':
            raise AgentNoInstructionError()

        self.initial_thoughts = []
        if config.agent.memory_enabled:
            self.memory = LongTermMemory()
        else:
            self.memory = None

        self.memory_condenser = MemoryCondenser()

        self._add_initial_thoughts(task)
        self._initialized = True

    def _add_initial_thoughts(self, task):
        previous_action = ''
        for thought in INITIAL_THOUGHTS:
            thought = thought.replace('$TASK', task)
            if previous_action != '':
                observation: Observation = NullObservation(content='')
                if previous_action in {ActionType.RUN, ActionType.PUSH}:
                    observation = CmdOutputObservation(
                        content=thought, command_id=0, command=''
                    )
                elif previous_action == ActionType.READ:
                    observation = FileReadObservation(content=thought, path='')
                elif previous_action == ActionType.RECALL:
                    observation = AgentRecallObservation(content=thought, memories=[])
                elif previous_action == ActionType.BROWSE:
                    observation = BrowserOutputObservation(
                        content=thought, url='', screenshot=''
                    )
                self.initial_thoughts.append(event_to_memory(observation))
                previous_action = ''
            else:
                action: Action = NullAction()
                if thought.startswith('RUN'):
                    command = thought.split('RUN ')[1]
                    action = CmdRunAction(command)
                    previous_action = ActionType.RUN
                elif thought.startswith('WRITE'):
                    parts = thought.split('WRITE ')[1].split(' > ')
                    path = parts[1]
                    content = parts[0]
                    action = FileWriteAction(path=path, content=content)
                elif thought.startswith('READ'):
                    path = thought.split('READ ')[1]
                    action = FileReadAction(path=path)
                    previous_action = ActionType.READ
                elif thought.startswith('RECALL'):
                    query = thought.split('RECALL ')[1]
                    action = AgentRecallAction(query=query)
                    previous_action = ActionType.RECALL
                elif thought.startswith('BROWSE'):
                    url = thought.split('BROWSE ')[1]
                    action = BrowseURLAction(url=url)
                    previous_action = ActionType.BROWSE
                else:
                    action = MessageAction(thought)
                self.initial_thoughts.append(event_to_memory(action))

    def step(self, state: State) -> Action:
        """
        Modifies the current state by adding the most recent actions and observations, then prompts the model to think about it's next action to take using monologue, memory, and hint.

        Parameters:
        - state (State): The current state based on previous steps taken

        Returns:
        - Action: The next action to take based on LLM response
        """

        goal = state.get_current_user_intent()
        self._initialize(goal)

        recent_events: list[dict[str, str]] = []

        # add the events from state.history
        for prev_action, obs in state.history:
            if not isinstance(prev_action, NullAction):
                recent_events.append(event_to_memory(prev_action))
            if not isinstance(obs, NullObservation):
                recent_events.append(self._truncate_output(event_to_memory(obs)))

        # add the last messages to long term memory
        if self.memory is not None and state.history and len(state.history) > 0:
            self.memory.add_event(event_to_memory(state.history[-1][0]))
            self.memory.add_event(
                self._truncate_output(event_to_memory(state.history[-1][1]))
            )

        # the action prompt with initial thoughts and recent events
        prompt = prompts.get_request_action_prompt(
            goal,
            self.initial_thoughts,
            recent_events,
            state.background_commands_obs,
        )

        messages: list[dict[str, str]] = [
            {'role': 'user', 'content': prompt},
        ]

        # format all as a single message, a monologue
        resp = self.llm.do_completion(messages=messages)

        # keep track of max_chars fallback option
        state.num_of_chars += len(prompt) + len(
            resp['choices'][0]['message']['content']
        )

        action = self.response_parser.parse(resp)
        self.latest_action = action
        return action

    def _truncate_output(
        self, observation: dict, max_chars: int = MAX_OUTPUT_LENGTH
    ) -> dict[str, str]:
        """
        Truncates the output of an observation to a maximum number of characters.

        Parameters:
        - output (str): The observation whose output to truncate
        - max_chars (int): The maximum number of characters to allow

        Returns:
        - str: The truncated output
        """
        if (
            'args' in observation
            and 'output' in observation['args']
            and len(observation['args']['output']) > max_chars
        ):
            output = observation['args']['output']
            half = max_chars // 2
            observation['args']['output'] = (
                output[:half]
                + '\n[... Output truncated due to length...]\n'
                + output[-half:]
            )
        return observation

    def search_memory(self, query: str) -> list[str]:
        """
        Uses VectorIndexRetriever to find related memories within the long term memory.
        Uses search to produce top 10 results.

        Parameters:
        - query (str): The query that we want to find related memories for

        Returns:
        - list[str]: A list of top 10 text results that matched the query
        """
        if self.memory is None:
            return []
        return self.memory.search(query)

    def reset(self) -> None:
        super().reset()

        # Reset the initial monologue and memory
        self._initialized = False