|
|
@@ -32,46 +32,46 @@ MAX_MONOLOGUE_LENGTH = 20000
|
|
|
MAX_OUTPUT_LENGTH = 5000
|
|
|
|
|
|
INITIAL_THOUGHTS = [
|
|
|
- "I exist!",
|
|
|
- "Hmm...looks like I can type in a command line prompt",
|
|
|
- "Looks like I have a web browser too!",
|
|
|
+ 'I exist!',
|
|
|
+ 'Hmm...looks like I can type in a command line prompt',
|
|
|
+ 'Looks like I have a web browser too!',
|
|
|
"Here's what I want to do: $TASK",
|
|
|
- "How am I going to get there though?",
|
|
|
- "It seems like I have some kind of short term memory.",
|
|
|
- "Each of my thoughts seems to be stored in a JSON array.",
|
|
|
- "It seems whatever I say next will be added as an object to the list.",
|
|
|
- "But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.",
|
|
|
- "Fortunately I have long term memory!",
|
|
|
- "I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!",
|
|
|
+ 'How am I going to get there though?',
|
|
|
+ 'It seems like I have some kind of short term memory.',
|
|
|
+ 'Each of my thoughts seems to be stored in a JSON array.',
|
|
|
+ 'It seems whatever I say next will be added as an object to the list.',
|
|
|
+ 'But no one has perfect short-term memory. My list of thoughts will be summarized and condensed over time, losing information in the process.',
|
|
|
+ 'Fortunately I have long term memory!',
|
|
|
+ 'I can just perform a recall action, followed by the thing I want to remember. And then related thoughts just spill out!',
|
|
|
"Sometimes they're random thoughts that don't really have to do with what I wanted to remember. But usually they're exactly what I need!",
|
|
|
"Let's try it out!",
|
|
|
- "RECALL what it is I want to do",
|
|
|
+ 'RECALL what it is I want to do',
|
|
|
"Here's what I want to do: $TASK",
|
|
|
- "How am I going to get there though?",
|
|
|
+ 'How am I going to get there though?',
|
|
|
"Neat! And it looks like it's easy for me to use the command line too! I just have to perform a run action and include the command I want to run in the command argument. The command output just jumps into my head!",
|
|
|
'RUN echo "hello world"',
|
|
|
- "hello world",
|
|
|
- "Cool! I bet I can write files too using the write action.",
|
|
|
+ 'hello world',
|
|
|
+ 'Cool! I bet I can write files too using the write action.',
|
|
|
"WRITE echo \"console.log('hello world')\" > test.js",
|
|
|
- "",
|
|
|
+ '',
|
|
|
"I just created test.js. I'll try and run it now.",
|
|
|
- "RUN node test.js",
|
|
|
- "hello world",
|
|
|
- "It works!",
|
|
|
+ 'RUN node test.js',
|
|
|
+ 'hello world',
|
|
|
+ 'It works!',
|
|
|
"I'm going to try reading it now using the read action.",
|
|
|
- "READ test.js",
|
|
|
+ 'READ test.js',
|
|
|
"console.log('hello world')",
|
|
|
- "Nice! I can read files too!",
|
|
|
- "And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument",
|
|
|
+ 'Nice! I can read files too!',
|
|
|
+ 'And if I want to use the browser, I just need to use the browse action and include the url I want to visit in the url argument',
|
|
|
"Let's try that...",
|
|
|
- "BROWSE google.com",
|
|
|
+ 'BROWSE google.com',
|
|
|
'<form><input type="text"></input><button type="submit"></button></form>',
|
|
|
- "I can browse the web too!",
|
|
|
- "And once I have completed my task, I can use the finish action to stop working.",
|
|
|
+ 'I can browse the web too!',
|
|
|
+ 'And once I have completed my task, I can use the finish action to stop working.',
|
|
|
"But I should only use the finish action when I'm absolutely certain that I've completed my task and have tested my work.",
|
|
|
- "Very cool. Now to accomplish my task.",
|
|
|
+ 'Very cool. Now to accomplish my task.',
|
|
|
"I'll need a strategy. And as I make progress, I'll need to keep refining that strategy. I'll need to set goals, and break them into sub-goals.",
|
|
|
- "In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.",
|
|
|
+ 'In between actions, I must always take some time to think, strategize, and set new goals. I should never take two actions in a row.',
|
|
|
"OK so my task is to $TASK. I haven't made any progress yet. Where should I start?",
|
|
|
"It seems like there might be an existing project here. I should probably start by running `ls` to see what's here.",
|
|
|
]
|
|
|
@@ -106,15 +106,15 @@ class MonologueAgent(Agent):
|
|
|
- event (dict): The event that will be added to monologue and memory
|
|
|
"""
|
|
|
|
|
|
- if "extras" in event and "screenshot" in event["extras"]:
|
|
|
- del event["extras"]["screenshot"]
|
|
|
+ if 'extras' in event and 'screenshot' in event['extras']:
|
|
|
+ del event['extras']['screenshot']
|
|
|
if (
|
|
|
- "args" in event
|
|
|
- and "output" in event["args"]
|
|
|
- and len(event["args"]["output"]) > MAX_OUTPUT_LENGTH
|
|
|
+ 'args' in event
|
|
|
+ and 'output' in event['args']
|
|
|
+ and len(event['args']['output']) > MAX_OUTPUT_LENGTH
|
|
|
):
|
|
|
- event["args"]["output"] = (
|
|
|
- event["args"]["output"][:MAX_OUTPUT_LENGTH] + "..."
|
|
|
+ event['args']['output'] = (
|
|
|
+ event['args']['output'][:MAX_OUTPUT_LENGTH] + '...'
|
|
|
)
|
|
|
|
|
|
self.monologue.add_event(event)
|
|
|
@@ -137,51 +137,52 @@ class MonologueAgent(Agent):
|
|
|
if self._initialized:
|
|
|
return
|
|
|
|
|
|
- if task is None or task == "":
|
|
|
- raise ValueError("Instruction must be provided")
|
|
|
+ if task is None or task == '':
|
|
|
+ raise ValueError('Instruction must be provided')
|
|
|
self.monologue = Monologue()
|
|
|
self.memory = LongTermMemory()
|
|
|
|
|
|
- output_type = ""
|
|
|
+ output_type = ''
|
|
|
for thought in INITIAL_THOUGHTS:
|
|
|
- thought = thought.replace("$TASK", task)
|
|
|
- if output_type != "":
|
|
|
- observation: Observation = NullObservation(content="")
|
|
|
+ thought = thought.replace('$TASK', task)
|
|
|
+ if output_type != '':
|
|
|
+ observation: Observation = NullObservation(content='')
|
|
|
if output_type == ObservationType.RUN:
|
|
|
observation = CmdOutputObservation(
|
|
|
- content=thought, command_id=0, command=""
|
|
|
+ content=thought, command_id=0, command=''
|
|
|
)
|
|
|
elif output_type == ObservationType.READ:
|
|
|
- observation = FileReadObservation(content=thought, path="")
|
|
|
+ observation = FileReadObservation(content=thought, path='')
|
|
|
elif output_type == ObservationType.RECALL:
|
|
|
- observation = AgentRecallObservation(content=thought, memories=[])
|
|
|
+ observation = AgentRecallObservation(
|
|
|
+ content=thought, memories=[])
|
|
|
elif output_type == ObservationType.BROWSE:
|
|
|
observation = BrowserOutputObservation(
|
|
|
- content=thought, url="", screenshot=""
|
|
|
+ content=thought, url='', screenshot=''
|
|
|
)
|
|
|
self._add_event(observation.to_dict())
|
|
|
- output_type = ""
|
|
|
+ output_type = ''
|
|
|
else:
|
|
|
action: Action = NullAction()
|
|
|
- if thought.startswith("RUN"):
|
|
|
- command = thought.split("RUN ")[1]
|
|
|
+ if thought.startswith('RUN'):
|
|
|
+ command = thought.split('RUN ')[1]
|
|
|
action = CmdRunAction(command)
|
|
|
output_type = ActionType.RUN
|
|
|
- elif thought.startswith("WRITE"):
|
|
|
- parts = thought.split("WRITE ")[1].split(" > ")
|
|
|
+ elif thought.startswith('WRITE'):
|
|
|
+ parts = thought.split('WRITE ')[1].split(' > ')
|
|
|
path = parts[1]
|
|
|
content = parts[0]
|
|
|
action = FileWriteAction(path=path, content=content)
|
|
|
- elif thought.startswith("READ"):
|
|
|
- path = thought.split("READ ")[1]
|
|
|
+ elif thought.startswith('READ'):
|
|
|
+ path = thought.split('READ ')[1]
|
|
|
action = FileReadAction(path=path)
|
|
|
output_type = ActionType.READ
|
|
|
- elif thought.startswith("RECALL"):
|
|
|
- query = thought.split("RECALL ")[1]
|
|
|
+ elif thought.startswith('RECALL'):
|
|
|
+ query = thought.split('RECALL ')[1]
|
|
|
action = AgentRecallAction(query=query)
|
|
|
output_type = ActionType.RECALL
|
|
|
- elif thought.startswith("BROWSE"):
|
|
|
- url = thought.split("BROWSE ")[1]
|
|
|
+ elif thought.startswith('BROWSE'):
|
|
|
+ url = thought.split('BROWSE ')[1]
|
|
|
action = BrowseURLAction(url=url)
|
|
|
output_type = ActionType.BROWSE
|
|
|
else:
|
|
|
@@ -211,9 +212,10 @@ class MonologueAgent(Agent):
|
|
|
self.monologue.get_thoughts(),
|
|
|
state.background_commands_obs,
|
|
|
)
|
|
|
- messages = [{"content": prompt, "role": "user"}]
|
|
|
+ messages = [{'content': prompt, 'role': 'user'}]
|
|
|
resp = self.llm.completion(messages=messages)
|
|
|
- action_resp = resp["choices"][0]["message"]["content"]
|
|
|
+ action_resp = resp['choices'][0]['message']['content']
|
|
|
+ state.num_of_chars += len(prompt) + len(action_resp)
|
|
|
action = prompts.parse_action_response(action_resp)
|
|
|
self.latest_action = action
|
|
|
return action
|