prompts.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245
  1. from opendevin.core.config import config
  2. from opendevin.core.utils import json
  3. from opendevin.events.observation import (
  4. CmdOutputObservation,
  5. )
  6. from opendevin.events.action import (
  7. Action,
  8. )
  9. from opendevin.events.serialization.action import action_from_dict
  10. ACTION_PROMPT = """
  11. You're a thoughtful robot. Your main task is this:
  12. %(task)s
  13. Don't expand the scope of your task--just complete it as written.
  14. This is your internal monologue, in JSON format:
  15. %(monologue)s
  16. Your most recent thought is at the bottom of that monologue. Continue your train of thought.
  17. What is your next single thought or action? Your response must be in JSON format.
  18. It must be a single object, and it must contain two fields:
  19. * `action`, which is one of the actions below
  20. * `args`, which is a map of key-value pairs, specifying the arguments for that action
  21. Here are the possible actions:
  22. * `read` - reads the content of a file. Arguments:
  23. * `path` - the path of the file to read
  24. * `write` - writes the content to a file. Arguments:
  25. * `path` - the path of the file to write
  26. * `content` - the content to write to the file
  27. * `run` - runs a command. Arguments:
  28. * `command` - the command to run
  29. * `background` - if true, run the command in the background, so that other commands can be run concurrently. Useful for e.g. starting a server. You won't be able to see the logs. You don't need to end the command with `&`, just set this to true.
  30. * `kill` - kills a background command
  31. * `command_id` - the ID of the background command to kill
  32. * `browse` - opens a web page. Arguments:
  33. * `url` - the URL to open
  34. * `push` - Push a branch from the current repo to github:
  35. * `owner` - the owner of the repo to push to
  36. * `repo` - the name of the repo to push to
  37. * `branch` - the name of the branch to push
  38. * `recall` - recalls a past memory. Arguments:
  39. * `query` - the query to search for
  40. * `message` - make a plan, set a goal, record your thoughts, or ask for more input from the user. Arguments:
  41. * `content` - the message to record
  42. * `wait_for_response` - set to `true` to wait for the user to respond before proceeding
  43. * `finish` - if you're absolutely certain that you've completed your task and have tested your work, use the finish action to stop working.
  44. %(background_commands)s
  45. You MUST take time to think in between read, write, run, kill, browse, push, and recall actions--do this with the `message` action.
  46. You should never act twice in a row without thinking. But if your last several
  47. actions are all `message` actions, you should consider taking a different action.
  48. Notes:
  49. * you are logged in as %(user)s, but sudo will always work without a password.
  50. * all non-background commands will be forcibly stopped if they remain running for over %(timeout)s seconds.
  51. * your environment is Debian Linux. You can install software with `sudo apt-get`, but remember to use -y.
  52. * don't run interactive commands, or commands that don't return (e.g. `node server.js`). You may run commands in the background (e.g. `node server.js &`)
  53. * don't run interactive text editors (e.g. `nano` or 'vim'), instead use the 'write' or 'read' action.
  54. * don't run gui applications (e.g. software IDEs (like vs code or codium), web browsers (like firefox or chromium), or other complex software packages). Use non-interactive cli applications, or special actions instead.
  55. * whenever an action fails, always send a `message` about why it may have happened before acting again.
  56. What is your next single thought or action? Again, you must reply with JSON, and only with JSON. You must respond with exactly one 'action' object.
  57. %(hint)s
  58. """
  59. MONOLOGUE_SUMMARY_PROMPT = """
  60. Below is the internal monologue of an automated LLM agent. Each
  61. thought is an item in a JSON array. The thoughts may be memories,
  62. actions taken by the agent, or outputs from those actions.
  63. Please return a new, smaller JSON array, which summarizes the
  64. internal monologue. You can summarize individual thoughts, and
  65. you can condense related thoughts together with a description
  66. of their content.
  67. %(monologue)s
  68. Make the summaries as pithy and informative as possible.
  69. Be specific about what happened and what was learned. The summary
  70. will be used as keywords for searching for the original memory.
  71. Be sure to preserve any key words or important information.
  72. Your response must be in JSON format. It must be an object with the
  73. key `new_monologue`, which is a JSON array containing the summarized monologue.
  74. Each entry in the array must have an `action` key, and an `args` key.
  75. The action key may be `summarize`, and `args.summary` should contain the summary.
  76. You can also use the same action and args from the source monologue.
  77. """
  78. INITIAL_THOUGHTS = [
  79. 'I exist!',
  80. 'Hmm...looks like I can type in a command line prompt',
  81. 'Looks like I have a web browser too!',
  82. "Here's what I want to do: $TASK",
  83. 'How am I going to get there though?',
  84. 'It seems like I have some kind of short term memory.',
  85. 'Each of my thoughts seems to be stored in a JSON array.',
  86. 'It seems whatever I say next will be added as an object to the list.',
  87. 'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
  88. 'Fortunately I have long term memory!',
  89. 'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
  90. "Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
  91. "Let's try it out!",
  92. 'RECALL what it is I want to do',
  93. "Here's what I want to do: $TASK",
  94. 'How am I going to get there though?',
  95. "Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
  96. 'RUN echo "hello world"',
  97. 'hello world',
  98. 'Cool! I bet I can write files too using the write action.',
  99. 'WRITE echo "console.log(\'hello world\')" > test.js',
  100. '',
  101. "I just created test.js. I'll try and run it now.",
  102. 'RUN node test.js',
  103. 'hello world',
  104. 'It works!',
  105. "I'm going to try reading it now using the read action.",
  106. 'READ test.js',
  107. "console.log('hello world')",
  108. 'Nice! I can read files too!',
  109. 'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
  110. "Let's try that...",
  111. 'BROWSE google.com',
  112. '<form><input type="text"></input><button type="submit"></button></form>',
  113. 'I can browse the web too!',
  114. 'And once I have completed my task, I can use the finish action to stop working.',
  115. "But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
  116. 'Very cool. Now to accomplish my task.',
  117. "I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
  118. 'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
  119. "OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
  120. 'It seems like there might be an existing project here. I should probably start by running `pwd` and `ls` to orient myself.',
  121. ]
  122. def get_summarize_monologue_prompt(thoughts: list[dict]):
  123. """
  124. Gets the prompt for summarizing the monologue
  125. Returns:
  126. - str: A formatted string with the current monologue within the prompt
  127. """
  128. return MONOLOGUE_SUMMARY_PROMPT % {
  129. 'monologue': json.dumps({'old_monologue': thoughts}, indent=2),
  130. }
  131. def get_request_action_prompt(
  132. task: str,
  133. thoughts: list[dict],
  134. recent_events: list[dict],
  135. background_commands_obs: list[CmdOutputObservation] | None = None,
  136. ):
  137. """
  138. Gets the action prompt formatted with appropriate values.
  139. Parameters:
  140. - task (str): The current task the agent is trying to accomplish
  141. - thoughts (list[dict]): The agent's current thoughts
  142. - background_commands_obs (list[CmdOutputObservation]): list of all observed background commands running
  143. Returns:
  144. - str: Formatted prompt string with hint, task, monologue, and background commands included
  145. """
  146. if background_commands_obs is None:
  147. background_commands_obs = []
  148. hint = ''
  149. if len(recent_events) > 0:
  150. latest_event = recent_events[-1]
  151. if 'action' in latest_event:
  152. if (
  153. latest_event['action'] == 'message'
  154. and 'source' in latest_event
  155. and latest_event['source'] == 'agent'
  156. ):
  157. hint = (
  158. "You've been thinking a lot lately. Maybe it's time to take action?"
  159. )
  160. elif latest_event['action'] == 'error':
  161. hint = 'Looks like that last command failed. Maybe you need to fix it, or try something else.'
  162. else:
  163. hint = "You're just getting started! What should you do first?"
  164. bg_commands_message = ''
  165. if len(background_commands_obs) > 0:
  166. bg_commands_message = 'The following commands are running in the background:'
  167. for command_obs in background_commands_obs:
  168. bg_commands_message += (
  169. f'\n`{command_obs.command_id}`: {command_obs.command}'
  170. )
  171. bg_commands_message += '\nYou can end any process by sending a `kill` action with the numerical `command_id` above.'
  172. user = 'opendevin' if config.run_as_devin else 'root'
  173. monologue = thoughts + recent_events
  174. return ACTION_PROMPT % {
  175. 'task': task,
  176. 'monologue': json.dumps(monologue, indent=2),
  177. 'background_commands': bg_commands_message,
  178. 'hint': hint,
  179. 'user': user,
  180. 'timeout': config.sandbox_timeout,
  181. 'WORKSPACE_MOUNT_PATH_IN_SANDBOX': config.workspace_mount_path_in_sandbox,
  182. }
  183. def parse_action_response(orig_response: str) -> Action:
  184. """
  185. Parses a string to find an action within it
  186. Parameters:
  187. - response (str): The string to be parsed
  188. Returns:
  189. - Action: The action that was found in the response string
  190. """
  191. # attempt to load the JSON dict from the response
  192. action_dict = json.loads(orig_response)
  193. if 'content' in action_dict:
  194. # The LLM gets confused here. Might as well be robust
  195. action_dict['contents'] = action_dict.pop('content')
  196. return action_from_dict(action_dict)
  197. def parse_summary_response(response: str) -> list[dict]:
  198. """
  199. Parses a summary of the monologue
  200. Parameters:
  201. - response (str): The response string to be parsed
  202. Returns:
  203. - list[dict]: The list of summaries output by the model
  204. """
  205. parsed = json.loads(response)
  206. return parsed['new_monologue']