codeact_agent.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180
  1. import re
  2. from typing import List, Mapping
  3. from agenthub.codeact_agent.prompt import EXAMPLES, SYSTEM_MESSAGE
  4. from opendevin.action import (
  5. Action,
  6. AgentEchoAction,
  7. AgentFinishAction,
  8. AgentTalkAction,
  9. CmdRunAction,
  10. IPythonRunCellAction,
  11. NullAction,
  12. )
  13. from opendevin.agent import Agent
  14. from opendevin.llm.llm import LLM
  15. from opendevin.observation import (
  16. AgentMessageObservation,
  17. CmdOutputObservation,
  18. IPythonRunCellObservation,
  19. UserMessageObservation,
  20. )
  21. from opendevin.sandbox.plugins import (
  22. JupyterRequirement,
  23. PluginRequirement,
  24. SWEAgentCommandsRequirement,
  25. )
  26. from opendevin.state import State
  27. def parse_response(response) -> str:
  28. action = response.choices[0].message.content
  29. for lang in ['bash', 'ipython']:
  30. if f'<execute_{lang}>' in action and f'</execute_{lang}>' not in action:
  31. action += f'</execute_{lang}>'
  32. return action
  33. def truncate_observation(observation: str, max_chars: int=5000) -> str:
  34. """
  35. Truncate the middle of the observation if it is too long.
  36. """
  37. if len(observation) <= max_chars:
  38. return observation
  39. half = max_chars // 2
  40. return observation[:half] + '\n[... Observation truncated due to length ...]\n' + observation[-half:]
  41. class CodeActAgent(Agent):
  42. """
  43. The Code Act Agent is a minimalist agent.
  44. The agent works by passing the model a list of action-observation pairs and prompting the model to take the next step.
  45. """
  46. sandbox_plugins: List[PluginRequirement] = [JupyterRequirement(), SWEAgentCommandsRequirement()]
  47. SUPPORTED_ACTIONS = (
  48. CmdRunAction,
  49. IPythonRunCellAction,
  50. AgentEchoAction,
  51. AgentTalkAction,
  52. NullAction
  53. )
  54. SUPPORTED_OBSERVATIONS = (
  55. AgentMessageObservation,
  56. UserMessageObservation,
  57. CmdOutputObservation,
  58. IPythonRunCellObservation
  59. )
  60. def __init__(
  61. self,
  62. llm: LLM,
  63. ) -> None:
  64. """
  65. Initializes a new instance of the CodeActAgent class.
  66. Parameters:
  67. - llm (LLM): The llm to be used by this agent
  68. """
  69. super().__init__(llm)
  70. self.messages: List[Mapping[str, str]] = []
  71. def step(self, state: State) -> Action:
  72. """
  73. Performs one step using the Code Act Agent.
  74. This includes gathering info on previous steps and prompting the model to make a command to execute.
  75. Parameters:
  76. - state (State): used to get updated info and background commands
  77. Returns:
  78. - CmdRunAction(command) - command action to run
  79. - AgentEchoAction(content=INVALID_INPUT_MESSAGE) - invalid command output
  80. Raises:
  81. - NotImplementedError - for actions other than CmdOutputObservation or AgentMessageObservation
  82. """
  83. if len(self.messages) == 0:
  84. assert state.plan.main_goal, 'Expecting instruction to be set'
  85. self.messages = [
  86. {'role': 'system', 'content': SYSTEM_MESSAGE},
  87. {
  88. 'role': 'user',
  89. 'content': (
  90. f'Here is an example of how you can interact with the environment for task solving:\n{EXAMPLES}\n\n'
  91. f"NOW, LET'S START!\n\n{state.plan.main_goal}"
  92. )
  93. },
  94. ]
  95. updated_info = state.updated_info
  96. if updated_info:
  97. for prev_action, obs in updated_info:
  98. assert isinstance(
  99. prev_action, self.SUPPORTED_ACTIONS
  100. ), f'{prev_action.__class__} is not supported (supported: {self.SUPPORTED_ACTIONS})'
  101. # prev_action is already added to self.messages when returned
  102. # handle observations
  103. assert isinstance(
  104. obs, self.SUPPORTED_OBSERVATIONS
  105. ), f'{obs.__class__} is not supported (supported: {self.SUPPORTED_OBSERVATIONS})'
  106. if isinstance(obs, (AgentMessageObservation, UserMessageObservation)):
  107. self.messages.append(
  108. {'role': 'user', 'content': obs.content})
  109. # User wants to exit
  110. if obs.content.strip() == '/exit':
  111. return AgentFinishAction()
  112. elif isinstance(obs, CmdOutputObservation):
  113. content = 'OBSERVATION:\n' + truncate_observation(obs.content)
  114. content += f'\n[Command {obs.command_id} finished with exit code {obs.exit_code}]]'
  115. self.messages.append({'role': 'user', 'content': content})
  116. elif isinstance(obs, IPythonRunCellObservation):
  117. content = 'OBSERVATION:\n' + obs.content
  118. # replace base64 images with a placeholder
  119. splited = content.split('\n')
  120. for i, line in enumerate(splited):
  121. if '![image](data:image/png;base64,' in line:
  122. splited[i] = '![image](data:image/png;base64, ...) already displayed to user'
  123. content = '\n'.join(splited)
  124. content = truncate_observation(content)
  125. self.messages.append({'role': 'user', 'content': content})
  126. else:
  127. raise NotImplementedError(
  128. f'Unknown observation type: {obs.__class__}'
  129. )
  130. response = self.llm.completion(
  131. messages=self.messages,
  132. stop=[
  133. '</execute_ipython>',
  134. '</execute_bash>',
  135. ],
  136. temperature=0.0
  137. )
  138. action_str: str = parse_response(response)
  139. state.num_of_chars += sum(
  140. len(message['content']) for message in self.messages
  141. ) + len(action_str)
  142. self.messages.append({'role': 'assistant', 'content': action_str})
  143. if bash_command := re.search(r'<execute_bash>(.*)</execute_bash>', action_str, re.DOTALL):
  144. # remove the command from the action string to get thought
  145. thought = action_str.replace(bash_command.group(0), '').strip()
  146. # a command was found
  147. command_group = bash_command.group(1).strip()
  148. if command_group.strip() == 'exit':
  149. return AgentFinishAction()
  150. return CmdRunAction(command=command_group, thought=thought)
  151. elif python_code := re.search(r'<execute_ipython>(.*)</execute_ipython>', action_str, re.DOTALL):
  152. # a code block was found
  153. code_group = python_code.group(1).strip()
  154. thought = action_str.replace(python_code.group(0), '').strip()
  155. return IPythonRunCellAction(code=code_group, thought=thought)
  156. else:
  157. # We assume the LLM is GOOD enough that when it returns pure natural language
  158. # it want to talk to the user
  159. return AgentTalkAction(content=action_str)
  160. def search_memory(self, query: str) -> List[str]:
  161. raise NotImplementedError('Implement this abstract method')