__init__.py 8.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197
  1. from typing import List, Dict, Type
  2. import agenthub.langchains_agent.utils.llm as llm
  3. from opendevin.agent import Agent
  4. from opendevin.action import (
  5. Action,
  6. CmdRunAction,
  7. CmdKillAction,
  8. BrowseURLAction,
  9. FileReadAction,
  10. FileWriteAction,
  11. AgentRecallAction,
  12. AgentThinkAction,
  13. AgentFinishAction,
  14. )
  15. from opendevin.observation import (
  16. Observation,
  17. CmdOutputObservation,
  18. BrowserOutputObservation,
  19. )
  20. from opendevin.state import State
  21. from agenthub.langchains_agent.utils.monologue import Monologue
  22. from agenthub.langchains_agent.utils.memory import LongTermMemory
  23. INITIAL_THOUGHTS = [
  24. "I exist!",
  25. "Hmm...looks like I can type in a command line prompt",
  26. "Looks like I have a web browser too!",
  27. "Here's what I want to do: $TASK",
  28. "How am I going to get there though?",
  29. "It seems like I have some kind of short term memory.",
  30. "Each of my thoughts seems to be stored in a numbered list.",
  31. "It seems whatever I say next will be added to the list.",
  32. "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
  33. "Fortunately I have long term memory!",
  34. "I can just say RECALL, followed by the thing I want to remember. And then related thoughts just spill out!",
  35. "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
  36. "Let's try it out!",
  37. "RECALL what it is I want to do",
  38. "Here's what I want to do: $TASK",
  39. "How am I going to get there though?",
  40. "Neat! And it looks like it's easy for me to use the command line too! I just have to say RUN followed by the command I want to run. The command output just jumps into my head!",
  41. 'RUN echo "hello world"',
  42. "hello world",
  43. "Cool! I bet I can read and edit files too.",
  44. "RUN echo \"console.log('hello world')\" > test.js",
  45. "",
  46. "I just created test.js. I'll try and run it now.",
  47. "RUN node test.js",
  48. "hello world",
  49. "it works!",
  50. "And if I want to use the browser, I just need to say BROWSE, followed by a website I want to visit, or an action I want to take on the current site",
  51. "Let's try that...",
  52. "BROWSE google.com",
  53. '<form><input type="text"></input><button type="submit"></button></form>',
  54. "Very cool. Now to accomplish my task.",
  55. "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
  56. "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
  57. "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
  58. "It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
  59. ]
  60. MAX_OUTPUT_LENGTH = 5000
  61. MAX_MONOLOGUE_LENGTH = 20000
  62. ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
  63. "run": CmdRunAction,
  64. "kill": CmdKillAction,
  65. "browse": BrowseURLAction,
  66. "read": FileReadAction,
  67. "write": FileWriteAction,
  68. "recall": AgentRecallAction,
  69. "think": AgentThinkAction,
  70. "finish": AgentFinishAction,
  71. }
  72. CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
  73. class LangchainsAgent(Agent):
  74. _initialized = False
  75. def __init__(self, model_name: str):
  76. super().__init__(model_name)
  77. self.monologue = Monologue(self.model_name)
  78. self.memory = LongTermMemory()
  79. def _add_event(self, event: dict):
  80. if 'output' in event['args'] and len(event['args']['output']) > MAX_OUTPUT_LENGTH:
  81. event['args']['output'] = event['args']['output'][:MAX_OUTPUT_LENGTH] + "..."
  82. self.monologue.add_event(event)
  83. self.memory.add_event(event)
  84. if self.monologue.get_total_length() > MAX_MONOLOGUE_LENGTH:
  85. self.monologue.condense()
  86. def _initialize(self):
  87. if self._initialized:
  88. return
  89. if self.instruction is None or self.instruction == "":
  90. raise ValueError("Instruction must be provided")
  91. next_is_output = False
  92. for thought in INITIAL_THOUGHTS:
  93. thought = thought.replace("$TASK", self.instruction)
  94. if next_is_output:
  95. d = {"action": "output", "args": {"output": thought}}
  96. next_is_output = False
  97. else:
  98. if thought.startswith("RUN"):
  99. command = thought.split("RUN ")[1]
  100. d = {"action": "run", "args": {"command": command}}
  101. next_is_output = True
  102. elif thought.startswith("RECALL"):
  103. query = thought.split("RECALL ")[1]
  104. d = {"action": "recall", "args": {"query": query}}
  105. next_is_output = True
  106. elif thought.startswith("BROWSE"):
  107. url = thought.split("BROWSE ")[1]
  108. d = {"action": "browse", "args": {"url": url}}
  109. next_is_output = True
  110. else:
  111. d = {"action": "think", "args": {"thought": thought}}
  112. self._add_event(d)
  113. self._initialized = True
  114. def step(self, state: State) -> Action:
  115. self._initialize()
  116. # TODO: make langchains agent use Action & Observation
  117. # completly from ground up
  118. # Translate state to action_dict
  119. for prev_action, obs in state.updated_info:
  120. if isinstance(obs, CmdOutputObservation):
  121. if obs.error:
  122. d = {"action": "error", "args": {"output": obs.content}}
  123. else:
  124. d = {"action": "output", "args": {"output": obs.content}}
  125. # elif isinstance(obs, UserMessageObservation):
  126. # d = {"action": "output", "args": {"output": obs.message}}
  127. # elif isinstance(obs, AgentMessageObservation):
  128. # d = {"action": "output", "args": {"output": obs.message}}
  129. elif isinstance(obs, (BrowserOutputObservation, Observation)):
  130. d = {"action": "output", "args": {"output": obs.content}}
  131. else:
  132. raise NotImplementedError(f"Unknown observation type: {obs}")
  133. self._add_event(d)
  134. if isinstance(prev_action, CmdRunAction):
  135. d = {"action": "run", "args": {"command": prev_action.command}}
  136. elif isinstance(prev_action, CmdKillAction):
  137. d = {"action": "kill", "args": {"id": prev_action.id}}
  138. elif isinstance(prev_action, BrowseURLAction):
  139. d = {"action": "browse", "args": {"url": prev_action.url}}
  140. elif isinstance(prev_action, FileReadAction):
  141. d = {"action": "read", "args": {"file": prev_action.path}}
  142. elif isinstance(prev_action, FileWriteAction):
  143. d = {"action": "write", "args": {"file": prev_action.path, "content": prev_action.contents}}
  144. elif isinstance(prev_action, AgentRecallAction):
  145. d = {"action": "recall", "args": {"query": prev_action.query}}
  146. elif isinstance(prev_action, AgentThinkAction):
  147. d = {"action": "think", "args": {"thought": prev_action.thought}}
  148. elif isinstance(prev_action, AgentFinishAction):
  149. d = {"action": "finish"}
  150. else:
  151. raise NotImplementedError(f"Unknown action type: {prev_action}")
  152. self._add_event(d)
  153. state.updated_info = []
  154. action_dict = llm.request_action(
  155. self.instruction,
  156. self.monologue.get_thoughts(),
  157. self.model_name,
  158. state.background_commands_obs,
  159. )
  160. if action_dict is None:
  161. action_dict = {"action": "think", "args": {"thought": "..."}}
  162. # Translate action_dict to Action
  163. action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
  164. self.latest_action = action
  165. return action
  166. def search_memory(self, query: str) -> List[str]:
  167. return self.memory.search(query)
  168. Agent.register("LangchainsAgent", LangchainsAgent)