prompts.py 6.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179
  1. from typing import List, Dict, Type
  2. from langchain_core.pydantic_v1 import BaseModel
  3. from langchain.prompts import PromptTemplate
  4. from langchain_core.output_parsers import JsonOutputParser
  5. from opendevin import config
  6. if config.get_or_default("DEBUG", False):
  7. from langchain.globals import set_debug
  8. set_debug(True)
  9. from . import json
  10. from opendevin.action import (
  11. Action,
  12. CmdRunAction,
  13. CmdKillAction,
  14. BrowseURLAction,
  15. FileReadAction,
  16. FileWriteAction,
  17. AgentRecallAction,
  18. AgentThinkAction,
  19. AgentFinishAction,
  20. AgentSummarizeAction,
  21. )
  22. from opendevin.observation import (
  23. CmdOutputObservation,
  24. )
  25. ACTION_TYPE_TO_CLASS: Dict[str, Type[Action]] = {
  26. "run": CmdRunAction,
  27. "kill": CmdKillAction,
  28. "browse": BrowseURLAction,
  29. "read": FileReadAction,
  30. "write": FileWriteAction,
  31. "recall": AgentRecallAction,
  32. "think": AgentThinkAction,
  33. "summarize": AgentSummarizeAction,
  34. "finish": AgentFinishAction,
  35. }
  36. CLASS_TO_ACTION_TYPE: Dict[Type[Action], str] = {v: k for k, v in ACTION_TYPE_TO_CLASS.items()}
  37. ACTION_PROMPT = """
  38. You're a thoughtful robot. Your main task is to {task}.
  39. Don't expand the scope of your task--just complete it as written.
  40. This is your internal monologue, in JSON format:
  41. ```json
  42. {monologue}
  43. ```
  44. Your most recent thought is at the bottom of that monologue. Continue your train of thought.
  45. What is your next thought or action? Your response must be in JSON format.
  46. It must be an object, and it must contain two fields:
  47. * `action`, which is one of the actions below
  48. * `args`, which is a map of key-value pairs, specifying the arguments for that action
  49. Here are the possible actions:
  50. * `read` - reads the contents of a file. Arguments:
  51. * `path` - the path of the file to read
  52. * `write` - writes the contents to a file. Arguments:
  53. * `path` - the path of the file to write
  54. * `contents` - the contents to write to the file
  55. * `run` - runs a command. Arguments:
  56. * `command` - the command to run
  57. * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
  58. * `kill` - kills a background command
  59. * `id` - the ID of the background command to kill
  60. * `browse` - opens a web page. Arguments:
  61. * `url` - the URL to open
  62. * `recall` - recalls a past memory. Arguments:
  63. * `query` - the query to search for
  64. * `think` - make a plan, set a goal, or record your thoughts. Arguments:
  65. * `thought` - the thought to record
  66. * `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
  67. {background_commands}
  68. You MUST take time to think in between read, write, run, browse, and recall actions.
  69. You should never act twice in a row without thinking. But if your last several
  70. actions are all "think" actions, you should consider taking a different action.
  71. Notes:
  72. * your environment is Debian Linux. You can install software with `apt`
  73. * you can use `git commit` to stash your work, but you don't have access to a remote repository
  74. * your working directory will not change, even if you run `cd`. All commands will be run in the `/workspace` directory.
  75. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
  76. What is your next thought or action? Again, you must reply with JSON, and only with JSON.
  77. {hint}
  78. """
  79. MONOLOGUE_SUMMARY_PROMPT = """
  80. Below is the internal monologue of an automated LLM agent. Each
  81. thought is an item in a JSON array. The thoughts may be memories,
  82. actions taken by the agent, or outputs from those actions.
  83. Please return a new, smaller JSON array, which summarizes the
  84. internal monologue. You can summarize individual thoughts, and
  85. you can condense related thoughts together with a description
  86. of their content.
  87. ```json
  88. {monologue}
  89. ```
  90. Make the summaries as pithy and informative as possible.
  91. Be specific about what happened and what was learned. The summary
  92. will be used as keywords for searching for the original memory.
  93. Be sure to preserve any key words or important information.
  94. Your response must be in JSON format. It must be an object with the
  95. key `new_monologue`, which is a JSON array containing the summarized monologue.
  96. Each entry in the array must have an `action` key, and an `args` key.
  97. The action key may be `summarize`, and `args.summary` should contain the summary.
  98. You can also use the same action and args from the source monologue.
  99. """
  100. class _ActionDict(BaseModel):
  101. action: str
  102. args: dict
  103. class NewMonologue(BaseModel):
  104. new_monologue: List[_ActionDict]
  105. def get_summarize_monologue_prompt(thoughts):
  106. prompt = PromptTemplate.from_template(MONOLOGUE_SUMMARY_PROMPT)
  107. return prompt.format(monologue=json.dumps({'old_monologue': thoughts}, indent=2))
  108. def get_request_action_prompt(
  109. task: str,
  110. thoughts: List[dict],
  111. background_commands_obs: List[CmdOutputObservation] = [],
  112. ):
  113. hint = ''
  114. if len(thoughts) > 0:
  115. latest_thought = thoughts[-1]
  116. if latest_thought["action"] == 'think':
  117. if latest_thought["args"]['thought'].startswith("OK so my task is"):
  118. hint = "You're just getting started! What should you do first?"
  119. else:
  120. hint = "You've been thinking a lot lately. Maybe it's time to take action?"
  121. elif latest_thought["action"] == 'error':
  122. hint = "Looks like that last command failed. Maybe you need to fix it, or try something else."
  123. bg_commands_message = ""
  124. if len(background_commands_obs) > 0:
  125. bg_commands_message = "The following commands are running in the background:"
  126. for command_obs in background_commands_obs:
  127. bg_commands_message += f"\n`{command_obs.command_id}`: {command_obs.command}"
  128. bg_commands_message += "\nYou can end any process by sending a `kill` action with the numerical `id` above."
  129. latest_thought = thoughts[-1]
  130. prompt = PromptTemplate.from_template(ACTION_PROMPT)
  131. return prompt.format(
  132. task=task,
  133. monologue=json.dumps(thoughts, indent=2),
  134. background_commands=bg_commands_message,
  135. hint=hint,
  136. )
  137. def parse_action_response(response: str) -> Action:
  138. parser = JsonOutputParser(pydantic_object=_ActionDict)
  139. action_dict = parser.parse(response)
  140. if 'content' in action_dict:
  141. # The LLM gets confused here. Might as well be robust
  142. action_dict['contents'] = action_dict.pop('content')
  143. action = ACTION_TYPE_TO_CLASS[action_dict["action"]](**action_dict["args"])
  144. return action
  145. def parse_summary_response(response: str) -> List[Action]:
  146. parser = JsonOutputParser(pydantic_object=NewMonologue)
  147. parsed = parser.parse(response)
  148. #thoughts = [ACTION_TYPE_TO_CLASS[t['action']](**t['args']) for t in parsed['new_monologue']]
  149. return parsed['new_monologue']