agent.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259
  1. import agenthub.monologue_agent.utils.prompts as prompts
  2. from agenthub.monologue_agent.utils.monologue import Monologue
  3. from opendevin.controller.agent import Agent
  4. from opendevin.controller.state.state import State
  5. from opendevin.core.config import config
  6. from opendevin.core.exceptions import AgentNoInstructionError
  7. from opendevin.core.schema import ActionType
  8. from opendevin.events.action import (
  9. Action,
  10. AgentRecallAction,
  11. BrowseURLAction,
  12. CmdRunAction,
  13. FileReadAction,
  14. FileWriteAction,
  15. MessageAction,
  16. NullAction,
  17. )
  18. from opendevin.events.observation import (
  19. AgentRecallObservation,
  20. BrowserOutputObservation,
  21. CmdOutputObservation,
  22. FileReadObservation,
  23. NullObservation,
  24. Observation,
  25. )
  26. from opendevin.llm.llm import LLM
  27. if config.agent.memory_enabled:
  28. from agenthub.monologue_agent.utils.memory import LongTermMemory
  29. MAX_TOKEN_COUNT_PADDING = 512
  30. MAX_OUTPUT_LENGTH = 5000
  31. INITIAL_THOUGHTS = [
  32. 'I exist!',
  33. 'Hmm...looks like I can type in a command line prompt',
  34. 'Looks like I have a web browser too!',
  35. "Here's what I want to do: $TASK",
  36. 'How am I going to get there though?',
  37. 'It seems like I have some kind of short term memory.',
  38. 'Each of my thoughts seems to be stored in a JSON array.',
  39. 'It seems whatever I say next will be added as an object to the list.',
  40. 'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
  41. 'Fortunately I have long term memory!',
  42. 'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
  43. "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
  44. "Let's try it out!",
  45. 'RECALL what it is I want to do',
  46. "Here's what I want to do: $TASK",
  47. 'How am I going to get there though?',
  48. "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
  49. 'RUN echo "hello world"',
  50. 'hello world',
  51. 'Cool! I bet I can write files too using the write action.',
  52. 'WRITE echo "console.log(\'hello world\')" > test.js',
  53. '',
  54. "I just created test.js. I'll try and run it now.",
  55. 'RUN node test.js',
  56. 'hello world',
  57. 'It works!',
  58. "I'm going to try reading it now using the read action.",
  59. 'READ test.js',
  60. "console.log('hello world')",
  61. 'Nice! I can read files too!',
  62. 'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
  63. "Let's try that...",
  64. 'BROWSE google.com',
  65. '<form><input type="text"></input><button type="submit"></button></form>',
  66. 'I can browse the web too!',
  67. 'And once I have completed my task, I can use the finish action to stop working.',
  68. "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
  69. 'Very cool. Now to accomplish my task.',
  70. "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
  71. 'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
  72. "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
  73. 'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
  74. ]
  75. class MonologueAgent(Agent):
  76. """
  77. The Monologue Agent utilizes long and short term memory to complete tasks.
  78. Long term memory is stored as a LongTermMemory object and the model uses it to search for examples from the past.
  79. Short term memory is stored as a Monologue object and the model can condense it as necessary.
  80. """
  81. _initialized = False
  82. monologue: Monologue
  83. memory: 'LongTermMemory | None'
  84. def __init__(self, llm: LLM):
  85. """
  86. Initializes the Monologue Agent with an llm, monologue, and memory.
  87. Parameters:
  88. - llm (LLM): The llm to be used by this agent
  89. """
  90. super().__init__(llm)
  91. def _add_event(self, event: dict):
  92. """
  93. Adds a new event to the agent's monologue and memory.
  94. Monologue automatically condenses when it gets too large.
  95. Parameters:
  96. - event (dict): The event that will be added to monologue and memory
  97. """
  98. if (
  99. 'args' in event
  100. and 'output' in event['args']
  101. and len(event['args']['output']) > MAX_OUTPUT_LENGTH
  102. ):
  103. event['args']['output'] = (
  104. event['args']['output'][:MAX_OUTPUT_LENGTH] + '...'
  105. )
  106. self.monologue.add_event(event)
  107. if self.memory is not None:
  108. self.memory.add_event(event)
  109. # Test monologue token length
  110. prompt = prompts.get_request_action_prompt(
  111. '',
  112. self.monologue.get_thoughts(),
  113. [],
  114. )
  115. messages = [{'content': prompt, 'role': 'user'}]
  116. token_count = self.llm.get_token_count(messages)
  117. if token_count + MAX_TOKEN_COUNT_PADDING > self.llm.max_input_tokens:
  118. self.monologue.condense(self.llm)
  119. def _initialize(self, task: str):
  120. """
  121. Utilizes the INITIAL_THOUGHTS list to give the agent a context for its capabilities
  122. and how to navigate the WORKSPACE_MOUNT_PATH_IN_SANDBOX in `config` (e.g., /workspace by default).
  123. Short circuited to return when already initialized.
  124. Will execute again when called after reset.
  125. Parameters:
  126. - task (str): The initial goal statement provided by the user
  127. Raises:
  128. - AgentNoInstructionError: If task is not provided
  129. """
  130. if self._initialized:
  131. return
  132. if task is None or task == '':
  133. raise AgentNoInstructionError()
  134. self.monologue = Monologue()
  135. if config.agent.memory_enabled:
  136. self.memory = LongTermMemory()
  137. else:
  138. self.memory = None
  139. self._add_initial_thoughts(task)
  140. self._initialized = True
  141. def _add_initial_thoughts(self, task):
  142. previous_action = ''
  143. for thought in INITIAL_THOUGHTS:
  144. thought = thought.replace('$TASK', task)
  145. if previous_action != '':
  146. observation: Observation = NullObservation(content='')
  147. if previous_action in {ActionType.RUN, ActionType.PUSH}:
  148. observation = CmdOutputObservation(
  149. content=thought, command_id=0, command=''
  150. )
  151. elif previous_action == ActionType.READ:
  152. observation = FileReadObservation(content=thought, path='')
  153. elif previous_action == ActionType.RECALL:
  154. observation = AgentRecallObservation(content=thought, memories=[])
  155. elif previous_action == ActionType.BROWSE:
  156. observation = BrowserOutputObservation(
  157. content=thought, url='', screenshot=''
  158. )
  159. self._add_event(observation.to_memory())
  160. previous_action = ''
  161. else:
  162. action: Action = NullAction()
  163. if thought.startswith('RUN'):
  164. command = thought.split('RUN ')[1]
  165. action = CmdRunAction(command)
  166. previous_action = ActionType.RUN
  167. elif thought.startswith('WRITE'):
  168. parts = thought.split('WRITE ')[1].split(' > ')
  169. path = parts[1]
  170. content = parts[0]
  171. action = FileWriteAction(path=path, content=content)
  172. elif thought.startswith('READ'):
  173. path = thought.split('READ ')[1]
  174. action = FileReadAction(path=path)
  175. previous_action = ActionType.READ
  176. elif thought.startswith('RECALL'):
  177. query = thought.split('RECALL ')[1]
  178. action = AgentRecallAction(query=query)
  179. previous_action = ActionType.RECALL
  180. elif thought.startswith('BROWSE'):
  181. url = thought.split('BROWSE ')[1]
  182. action = BrowseURLAction(url=url)
  183. previous_action = ActionType.BROWSE
  184. else:
  185. action = MessageAction(thought)
  186. self._add_event(action.to_memory())
  187. def step(self, state: State) -> Action:
  188. """
  189. Modifies the current state by adding the most recent actions and observations, then prompts the model to think about it's next action to take using monologue, memory, and hint.
  190. Parameters:
  191. - state (State): The current state based on previous steps taken
  192. Returns:
  193. - Action: The next action to take based on LLM response
  194. """
  195. self._initialize(state.plan.main_goal)
  196. for prev_action, obs in state.updated_info:
  197. self._add_event(prev_action.to_memory())
  198. self._add_event(obs.to_memory())
  199. state.updated_info = []
  200. prompt = prompts.get_request_action_prompt(
  201. state.plan.main_goal,
  202. self.monologue.get_thoughts(),
  203. state.background_commands_obs,
  204. )
  205. messages = [{'content': prompt, 'role': 'user'}]
  206. resp = self.llm.completion(messages=messages)
  207. action_resp = resp['choices'][0]['message']['content']
  208. state.num_of_chars += len(prompt) + len(action_resp)
  209. action = prompts.parse_action_response(action_resp)
  210. self.latest_action = action
  211. return action
  212. def search_memory(self, query: str) -> list[str]:
  213. """
  214. Uses VectorIndexRetriever to find related memories within the long term memory.
  215. Uses search to produce top 10 results.
  216. Parameters:
  217. - query (str): The query that we want to find related memories for
  218. Returns:
  219. - list[str]: A list of top 10 text results that matched the query
  220. """
  221. if self.memory is None:
  222. return []
  223. return self.memory.search(query)
  224. def reset(self) -> None:
  225. super().reset()
  226. # Reset the initial monologue and memory
  227. self._initialized = False