agent.py 8.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241
  1. import agenthub.monologue_agent.utils.prompts as prompts
  2. from agenthub.monologue_agent.response_parser import MonologueResponseParser
  3. from agenthub.monologue_agent.utils.prompts import INITIAL_THOUGHTS
  4. from opendevin.controller.agent import Agent
  5. from opendevin.controller.state.state import State
  6. from opendevin.core.config import config
  7. from opendevin.core.exceptions import AgentNoInstructionError
  8. from opendevin.core.schema import ActionType
  9. from opendevin.events.action import (
  10. Action,
  11. AgentRecallAction,
  12. BrowseURLAction,
  13. CmdRunAction,
  14. FileReadAction,
  15. FileWriteAction,
  16. MessageAction,
  17. NullAction,
  18. )
  19. from opendevin.events.observation import (
  20. AgentRecallObservation,
  21. BrowserOutputObservation,
  22. CmdOutputObservation,
  23. FileReadObservation,
  24. NullObservation,
  25. Observation,
  26. )
  27. from opendevin.events.serialization.event import event_to_memory
  28. from opendevin.llm.llm import LLM
  29. from opendevin.memory.condenser import MemoryCondenser
  30. from opendevin.runtime.tools import RuntimeTool
  31. if config.agent.memory_enabled:
  32. from opendevin.memory.memory import LongTermMemory
  33. MAX_TOKEN_COUNT_PADDING = 512
  34. MAX_OUTPUT_LENGTH = 5000
  35. class MonologueAgent(Agent):
  36. VERSION = '1.0'
  37. """
  38. The Monologue Agent utilizes long and short term memory to complete tasks.
  39. Long term memory is stored as a LongTermMemory object and the model uses it to search for examples from the past.
  40. Short term memory is stored as a Monologue object and the model can condense it as necessary.
  41. """
  42. _initialized = False
  43. initial_thoughts: list[dict[str, str]]
  44. memory: 'LongTermMemory | None'
  45. memory_condenser: MemoryCondenser
  46. runtime_tools: list[RuntimeTool] = [RuntimeTool.BROWSER]
  47. response_parser = MonologueResponseParser()
  48. def __init__(self, llm: LLM):
  49. """
  50. Initializes the Monologue Agent with an llm.
  51. Parameters:
  52. - llm (LLM): The llm to be used by this agent
  53. """
  54. super().__init__(llm)
  55. def _initialize(self, task: str):
  56. """
  57. Utilizes the INITIAL_THOUGHTS list to give the agent a context for its capabilities
  58. and how to navigate the WORKSPACE_MOUNT_PATH_IN_SANDBOX in `config` (e.g., /workspace by default).
  59. Short circuited to return when already initialized.
  60. Will execute again when called after reset.
  61. Parameters:
  62. - task (str): The initial goal statement provided by the user
  63. Raises:
  64. - AgentNoInstructionError: If task is not provided
  65. """
  66. if self._initialized:
  67. return
  68. if task is None or task == '':
  69. raise AgentNoInstructionError()
  70. self.initial_thoughts = []
  71. if config.agent.memory_enabled:
  72. self.memory = LongTermMemory()
  73. else:
  74. self.memory = None
  75. self.memory_condenser = MemoryCondenser()
  76. self._add_initial_thoughts(task)
  77. self._initialized = True
  78. def _add_initial_thoughts(self, task):
  79. previous_action = ''
  80. for thought in INITIAL_THOUGHTS:
  81. thought = thought.replace('$TASK', task)
  82. if previous_action != '':
  83. observation: Observation = NullObservation(content='')
  84. if previous_action in {ActionType.RUN, ActionType.PUSH}:
  85. observation = CmdOutputObservation(
  86. content=thought, command_id=0, command=''
  87. )
  88. elif previous_action == ActionType.READ:
  89. observation = FileReadObservation(content=thought, path='')
  90. elif previous_action == ActionType.RECALL:
  91. observation = AgentRecallObservation(content=thought, memories=[])
  92. elif previous_action == ActionType.BROWSE:
  93. observation = BrowserOutputObservation(
  94. content=thought, url='', screenshot=''
  95. )
  96. self.initial_thoughts.append(event_to_memory(observation))
  97. previous_action = ''
  98. else:
  99. action: Action = NullAction()
  100. if thought.startswith('RUN'):
  101. command = thought.split('RUN ')[1]
  102. action = CmdRunAction(command)
  103. previous_action = ActionType.RUN
  104. elif thought.startswith('WRITE'):
  105. parts = thought.split('WRITE ')[1].split(' > ')
  106. path = parts[1]
  107. content = parts[0]
  108. action = FileWriteAction(path=path, content=content)
  109. elif thought.startswith('READ'):
  110. path = thought.split('READ ')[1]
  111. action = FileReadAction(path=path)
  112. previous_action = ActionType.READ
  113. elif thought.startswith('RECALL'):
  114. query = thought.split('RECALL ')[1]
  115. action = AgentRecallAction(query=query)
  116. previous_action = ActionType.RECALL
  117. elif thought.startswith('BROWSE'):
  118. url = thought.split('BROWSE ')[1]
  119. action = BrowseURLAction(url=url)
  120. previous_action = ActionType.BROWSE
  121. else:
  122. action = MessageAction(thought)
  123. self.initial_thoughts.append(event_to_memory(action))
  124. def step(self, state: State) -> Action:
  125. """
  126. Modifies the current state by adding the most recent actions and observations, then prompts the model to think about it's next action to take using monologue, memory, and hint.
  127. Parameters:
  128. - state (State): The current state based on previous steps taken
  129. Returns:
  130. - Action: The next action to take based on LLM response
  131. """
  132. goal = state.get_current_user_intent()
  133. self._initialize(goal)
  134. recent_events: list[dict[str, str]] = []
  135. # add the events from state.history
  136. for prev_action, obs in state.history:
  137. if not isinstance(prev_action, NullAction):
  138. recent_events.append(event_to_memory(prev_action))
  139. if not isinstance(obs, NullObservation):
  140. recent_events.append(self._truncate_output(event_to_memory(obs)))
  141. # add the last messages to long term memory
  142. if self.memory is not None and state.history and len(state.history) > 0:
  143. self.memory.add_event(event_to_memory(state.history[-1][0]))
  144. self.memory.add_event(
  145. self._truncate_output(event_to_memory(state.history[-1][1]))
  146. )
  147. # the action prompt with initial thoughts and recent events
  148. prompt = prompts.get_request_action_prompt(
  149. goal,
  150. self.initial_thoughts,
  151. recent_events,
  152. state.background_commands_obs,
  153. )
  154. messages: list[dict[str, str]] = [
  155. {'role': 'user', 'content': prompt},
  156. ]
  157. # format all as a single message, a monologue
  158. resp = self.llm.do_completion(messages=messages)
  159. # keep track of max_chars fallback option
  160. state.num_of_chars += len(prompt) + len(
  161. resp['choices'][0]['message']['content']
  162. )
  163. action = self.response_parser.parse(resp)
  164. self.latest_action = action
  165. return action
  166. def _truncate_output(
  167. self, observation: dict, max_chars: int = MAX_OUTPUT_LENGTH
  168. ) -> dict[str, str]:
  169. """
  170. Truncates the output of an observation to a maximum number of characters.
  171. Parameters:
  172. - output (str): The observation whose output to truncate
  173. - max_chars (int): The maximum number of characters to allow
  174. Returns:
  175. - str: The truncated output
  176. """
  177. if (
  178. 'args' in observation
  179. and 'output' in observation['args']
  180. and len(observation['args']['output']) > max_chars
  181. ):
  182. output = observation['args']['output']
  183. half = max_chars // 2
  184. observation['args']['output'] = (
  185. output[:half]
  186. + '\n[... Output truncated due to length...]\n'
  187. + output[-half:]
  188. )
  189. return observation
  190. def search_memory(self, query: str) -> list[str]:
  191. """
  192. Uses VectorIndexRetriever to find related memories within the long term memory.
  193. Uses search to produce top 10 results.
  194. Parameters:
  195. - query (str): The query that we want to find related memories for
  196. Returns:
  197. - list[str]: A list of top 10 text results that matched the query
  198. """
  199. if self.memory is None:
  200. return []
  201. return self.memory.search(query)
  202. def reset(self) -> None:
  203. super().reset()
  204. # Reset the initial monologue and memory
  205. self._initialized = False