agent_controller.py 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600
  1. import asyncio
  2. import traceback
  3. from typing import Type
  4. from openhands.controller.agent import Agent
  5. from openhands.controller.state.state import State, TrafficControlState
  6. from openhands.controller.stuck import StuckDetector
  7. from openhands.core.config import AgentConfig, LLMConfig
  8. from openhands.core.exceptions import (
  9. LLMMalformedActionError,
  10. LLMNoActionError,
  11. LLMResponseError,
  12. )
  13. from openhands.core.logger import openhands_logger as logger
  14. from openhands.core.schema import AgentState
  15. from openhands.events import EventSource, EventStream, EventStreamSubscriber
  16. from openhands.events.action import (
  17. Action,
  18. ActionConfirmationStatus,
  19. AddTaskAction,
  20. AgentDelegateAction,
  21. AgentFinishAction,
  22. AgentRejectAction,
  23. ChangeAgentStateAction,
  24. CmdRunAction,
  25. IPythonRunCellAction,
  26. MessageAction,
  27. ModifyTaskAction,
  28. NullAction,
  29. )
  30. from openhands.events.event import Event
  31. from openhands.events.observation import (
  32. AgentDelegateObservation,
  33. AgentStateChangedObservation,
  34. CmdOutputObservation,
  35. ErrorObservation,
  36. Observation,
  37. )
  38. from openhands.llm.llm import LLM
  39. from openhands.runtime.utils.shutdown_listener import should_continue
  40. # note: RESUME is only available on web GUI
  41. TRAFFIC_CONTROL_REMINDER = (
  42. "Please click on resume button if you'd like to continue, or start a new task."
  43. )
  44. class AgentController:
  45. id: str
  46. agent: Agent
  47. max_iterations: int
  48. event_stream: EventStream
  49. state: State
  50. confirmation_mode: bool
  51. agent_to_llm_config: dict[str, LLMConfig]
  52. agent_configs: dict[str, AgentConfig]
  53. agent_task: asyncio.Task | None = None
  54. parent: 'AgentController | None' = None
  55. delegate: 'AgentController | None' = None
  56. _pending_action: Action | None = None
  57. def __init__(
  58. self,
  59. agent: Agent,
  60. event_stream: EventStream,
  61. max_iterations: int,
  62. max_budget_per_task: float | None = None,
  63. agent_to_llm_config: dict[str, LLMConfig] | None = None,
  64. agent_configs: dict[str, AgentConfig] | None = None,
  65. sid: str = 'default',
  66. confirmation_mode: bool = False,
  67. initial_state: State | None = None,
  68. is_delegate: bool = False,
  69. headless_mode: bool = True,
  70. ):
  71. """Initializes a new instance of the AgentController class.
  72. Args:
  73. agent: The agent instance to control.
  74. event_stream: The event stream to publish events to.
  75. max_iterations: The maximum number of iterations the agent can run.
  76. max_budget_per_task: The maximum budget (in USD) allowed per task, beyond which the agent will stop.
  77. agent_to_llm_config: A dictionary mapping agent names to LLM configurations in the case that
  78. we delegate to a different agent.
  79. agent_configs: A dictionary mapping agent names to agent configurations in the case that
  80. we delegate to a different agent.
  81. sid: The session ID of the agent.
  82. initial_state: The initial state of the controller.
  83. is_delegate: Whether this controller is a delegate.
  84. headless_mode: Whether the agent is run in headless mode.
  85. """
  86. self._step_lock = asyncio.Lock()
  87. self.id = sid
  88. self.agent = agent
  89. self.headless_mode = headless_mode
  90. # subscribe to the event stream
  91. self.event_stream = event_stream
  92. self.event_stream.subscribe(
  93. EventStreamSubscriber.AGENT_CONTROLLER, self.on_event, append=is_delegate
  94. )
  95. # state from the previous session, state from a parent agent, or a fresh state
  96. self.set_initial_state(
  97. state=initial_state,
  98. max_iterations=max_iterations,
  99. confirmation_mode=confirmation_mode,
  100. )
  101. self.max_budget_per_task = max_budget_per_task
  102. self.agent_to_llm_config = agent_to_llm_config if agent_to_llm_config else {}
  103. self.agent_configs = agent_configs if agent_configs else {}
  104. self._initial_max_iterations = max_iterations
  105. self._initial_max_budget_per_task = max_budget_per_task
  106. # stuck helper
  107. self._stuck_detector = StuckDetector(self.state)
  108. if not is_delegate:
  109. self.agent_task = asyncio.create_task(self._start_step_loop())
  110. async def close(self):
  111. """Closes the agent controller, canceling any ongoing tasks and unsubscribing from the event stream."""
  112. if self.agent_task is not None:
  113. self.agent_task.cancel()
  114. await self.set_agent_state_to(AgentState.STOPPED)
  115. self.event_stream.unsubscribe(EventStreamSubscriber.AGENT_CONTROLLER)
  116. def update_state_before_step(self):
  117. self.state.iteration += 1
  118. self.state.local_iteration += 1
  119. async def update_state_after_step(self):
  120. # update metrics especially for cost
  121. self.state.local_metrics = self.agent.llm.metrics
  122. async def report_error(self, message: str, exception: Exception | None = None):
  123. """Reports an error to the user and sends the exception to the LLM next step, in the hope it can self-correct.
  124. This method should be called for a particular type of errors, which have:
  125. - a user-friendly message, which will be shown in the chat box. This should not be a raw exception message.
  126. - an ErrorObservation that can be sent to the LLM by the agent, with the exception message, so it can self-correct next time.
  127. """
  128. self.state.last_error = message
  129. if exception:
  130. self.state.last_error += f': {exception}'
  131. self.event_stream.add_event(ErrorObservation(message), EventSource.AGENT)
  132. async def _start_step_loop(self):
  133. """The main loop for the agent's step-by-step execution."""
  134. logger.info(f'[Agent Controller {self.id}] Starting step loop...')
  135. while should_continue():
  136. try:
  137. await self._step()
  138. except asyncio.CancelledError:
  139. logger.info('AgentController task was cancelled')
  140. break
  141. except Exception as e:
  142. traceback.print_exc()
  143. logger.error(f'Error while running the agent: {e}')
  144. logger.error(traceback.format_exc())
  145. await self.report_error(
  146. 'There was an unexpected error while running the agent', exception=e
  147. )
  148. await self.set_agent_state_to(AgentState.ERROR)
  149. break
  150. await asyncio.sleep(0.1)
  151. async def on_event(self, event: Event):
  152. """Callback from the event stream. Notifies the controller of incoming events.
  153. Args:
  154. event (Event): The incoming event to process.
  155. """
  156. if isinstance(event, Action):
  157. await self._handle_action(event)
  158. elif isinstance(event, Observation):
  159. await self._handle_observation(event)
  160. async def _handle_action(self, action: Action):
  161. """Handles actions from the event stream.
  162. Args:
  163. action (Action): The action to handle.
  164. """
  165. if isinstance(action, ChangeAgentStateAction):
  166. await self.set_agent_state_to(action.agent_state) # type: ignore
  167. elif isinstance(action, MessageAction):
  168. await self._handle_message_action(action)
  169. elif isinstance(action, AgentDelegateAction):
  170. await self.start_delegate(action)
  171. elif isinstance(action, AddTaskAction):
  172. self.state.root_task.add_subtask(
  173. action.parent, action.goal, action.subtasks
  174. )
  175. elif isinstance(action, ModifyTaskAction):
  176. self.state.root_task.set_subtask_state(action.task_id, action.state)
  177. elif isinstance(action, AgentFinishAction):
  178. self.state.outputs = action.outputs
  179. self.state.metrics.merge(self.state.local_metrics)
  180. await self.set_agent_state_to(AgentState.FINISHED)
  181. elif isinstance(action, AgentRejectAction):
  182. self.state.outputs = action.outputs
  183. self.state.metrics.merge(self.state.local_metrics)
  184. await self.set_agent_state_to(AgentState.REJECTED)
  185. async def _handle_observation(self, observation: Observation):
  186. """Handles observation from the event stream.
  187. Args:
  188. observation (observation): The observation to handle.
  189. """
  190. if (
  191. self._pending_action
  192. and hasattr(self._pending_action, 'is_confirmed')
  193. and self._pending_action.is_confirmed
  194. == ActionConfirmationStatus.AWAITING_CONFIRMATION
  195. ):
  196. return
  197. logger.info(observation, extra={'msg_type': 'OBSERVATION'})
  198. if self._pending_action and self._pending_action.id == observation.cause:
  199. self._pending_action = None
  200. if self.state.agent_state == AgentState.USER_CONFIRMED:
  201. await self.set_agent_state_to(AgentState.RUNNING)
  202. if self.state.agent_state == AgentState.USER_REJECTED:
  203. await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
  204. return
  205. if isinstance(observation, CmdOutputObservation):
  206. return
  207. elif isinstance(observation, AgentDelegateObservation):
  208. self.state.history.on_event(observation)
  209. elif isinstance(observation, ErrorObservation):
  210. if self.state.agent_state == AgentState.ERROR:
  211. self.state.metrics.merge(self.state.local_metrics)
  212. async def _handle_message_action(self, action: MessageAction):
  213. """Handles message actions from the event stream.
  214. Args:
  215. action (MessageAction): The message action to handle.
  216. """
  217. if action.source == EventSource.USER:
  218. logger.info(
  219. action, extra={'msg_type': 'ACTION', 'event_source': EventSource.USER}
  220. )
  221. if self.get_agent_state() != AgentState.RUNNING:
  222. await self.set_agent_state_to(AgentState.RUNNING)
  223. elif action.source == EventSource.AGENT and action.wait_for_response:
  224. await self.set_agent_state_to(AgentState.AWAITING_USER_INPUT)
  225. def reset_task(self):
  226. """Resets the agent's task."""
  227. self.almost_stuck = 0
  228. self.agent.reset()
  229. async def set_agent_state_to(self, new_state: AgentState):
  230. """Updates the agent's state and handles side effects. Can emit events to the event stream.
  231. Args:
  232. new_state (AgentState): The new state to set for the agent.
  233. """
  234. logger.debug(
  235. f'[Agent Controller {self.id}] Setting agent({self.agent.name}) state from {self.state.agent_state} to {new_state}'
  236. )
  237. if new_state == self.state.agent_state:
  238. return
  239. if new_state == AgentState.STOPPED or new_state == AgentState.ERROR:
  240. self.reset_task()
  241. elif (
  242. new_state == AgentState.RUNNING
  243. and self.state.agent_state == AgentState.PAUSED
  244. and self.state.traffic_control_state == TrafficControlState.THROTTLING
  245. ):
  246. # user intends to interrupt traffic control and let the task resume temporarily
  247. self.state.traffic_control_state = TrafficControlState.PAUSED
  248. # User has chosen to deliberately continue - lets double the max iterations
  249. if (
  250. self.state.iteration is not None
  251. and self.state.max_iterations is not None
  252. and self._initial_max_iterations is not None
  253. ):
  254. if self.state.iteration >= self.state.max_iterations:
  255. self.state.max_iterations += self._initial_max_iterations
  256. if (
  257. self.state.metrics.accumulated_cost is not None
  258. and self.max_budget_per_task is not None
  259. and self._initial_max_budget_per_task is not None
  260. ):
  261. if self.state.metrics.accumulated_cost >= self.max_budget_per_task:
  262. self.max_budget_per_task += self._initial_max_budget_per_task
  263. elif self._pending_action is not None and (
  264. new_state == AgentState.USER_CONFIRMED
  265. or new_state == AgentState.USER_REJECTED
  266. ):
  267. if hasattr(self._pending_action, 'thought'):
  268. self._pending_action.thought = '' # type: ignore[union-attr]
  269. if new_state == AgentState.USER_CONFIRMED:
  270. self._pending_action.is_confirmed = ActionConfirmationStatus.CONFIRMED # type: ignore[attr-defined]
  271. else:
  272. self._pending_action.is_confirmed = ActionConfirmationStatus.REJECTED # type: ignore[attr-defined]
  273. self.event_stream.add_event(self._pending_action, EventSource.AGENT)
  274. self.state.agent_state = new_state
  275. self.event_stream.add_event(
  276. AgentStateChangedObservation('', self.state.agent_state), EventSource.AGENT
  277. )
  278. if new_state == AgentState.INIT and self.state.resume_state:
  279. await self.set_agent_state_to(self.state.resume_state)
  280. self.state.resume_state = None
  281. def get_agent_state(self):
  282. """Returns the current state of the agent.
  283. Returns:
  284. AgentState: The current state of the agent.
  285. """
  286. return self.state.agent_state
  287. async def start_delegate(self, action: AgentDelegateAction):
  288. """Start a delegate agent to handle a subtask.
  289. OpenHands is a multi-agentic system. A `task` is a conversation between
  290. OpenHands (the whole system) and the user, which might involve one or more inputs
  291. from the user. It starts with an initial input (typically a task statement) from
  292. the user, and ends with either an `AgentFinishAction` initiated by the agent, a
  293. stop initiated by the user, or an error.
  294. A `subtask` is a conversation between an agent and the user, or another agent. If a `task`
  295. is conducted by a single agent, then it's also a `subtask`. Otherwise, a `task` consists of
  296. multiple `subtasks`, each executed by one agent.
  297. Args:
  298. action (AgentDelegateAction): The action containing information about the delegate agent to start.
  299. """
  300. agent_cls: Type[Agent] = Agent.get_cls(action.agent)
  301. agent_config = self.agent_configs.get(action.agent, self.agent.config)
  302. llm_config = self.agent_to_llm_config.get(action.agent, self.agent.llm.config)
  303. llm = LLM(config=llm_config)
  304. delegate_agent = agent_cls(llm=llm, config=agent_config)
  305. state = State(
  306. inputs=action.inputs or {},
  307. local_iteration=0,
  308. iteration=self.state.iteration,
  309. max_iterations=self.state.max_iterations,
  310. delegate_level=self.state.delegate_level + 1,
  311. # global metrics should be shared between parent and child
  312. metrics=self.state.metrics,
  313. )
  314. logger.info(
  315. f'[Agent Controller {self.id}]: start delegate, creating agent {delegate_agent.name} using LLM {llm}'
  316. )
  317. self.delegate = AgentController(
  318. sid=self.id + '-delegate',
  319. agent=delegate_agent,
  320. event_stream=self.event_stream,
  321. max_iterations=self.state.max_iterations,
  322. max_budget_per_task=self.max_budget_per_task,
  323. agent_to_llm_config=self.agent_to_llm_config,
  324. agent_configs=self.agent_configs,
  325. initial_state=state,
  326. is_delegate=True,
  327. headless_mode=self.headless_mode,
  328. )
  329. await self.delegate.set_agent_state_to(AgentState.RUNNING)
  330. async def _step(self) -> None:
  331. """Executes a single step of the parent or delegate agent. Detects stuck agents and limits on the number of iterations and the task budget."""
  332. if self.get_agent_state() != AgentState.RUNNING:
  333. await asyncio.sleep(1)
  334. return
  335. if self._pending_action:
  336. await asyncio.sleep(1)
  337. return
  338. if self.delegate is not None:
  339. assert self.delegate != self
  340. if self.delegate.get_agent_state() == AgentState.PAUSED:
  341. await asyncio.sleep(1)
  342. else:
  343. await self._delegate_step()
  344. return
  345. logger.info(
  346. f'{self.agent.name} LEVEL {self.state.delegate_level} LOCAL STEP {self.state.local_iteration} GLOBAL STEP {self.state.iteration}',
  347. extra={'msg_type': 'STEP'},
  348. )
  349. # check if agent hit the resources limit
  350. stop_step = False
  351. if self.state.iteration >= self.state.max_iterations:
  352. stop_step = await self._handle_traffic_control(
  353. 'iteration', self.state.iteration, self.state.max_iterations
  354. )
  355. if self.max_budget_per_task is not None:
  356. current_cost = self.state.metrics.accumulated_cost
  357. if current_cost > self.max_budget_per_task:
  358. stop_step = await self._handle_traffic_control(
  359. 'budget', current_cost, self.max_budget_per_task
  360. )
  361. if stop_step:
  362. return
  363. self.update_state_before_step()
  364. action: Action = NullAction()
  365. try:
  366. action = self.agent.step(self.state)
  367. if action is None:
  368. raise LLMNoActionError('No action was returned')
  369. except (LLMMalformedActionError, LLMNoActionError, LLMResponseError) as e:
  370. # report to the user
  371. # and send the underlying exception to the LLM for self-correction
  372. await self.report_error(str(e))
  373. return
  374. if action.runnable:
  375. if self.state.confirmation_mode and (
  376. type(action) is CmdRunAction or type(action) is IPythonRunCellAction
  377. ):
  378. action.is_confirmed = ActionConfirmationStatus.AWAITING_CONFIRMATION
  379. self._pending_action = action
  380. if not isinstance(action, NullAction):
  381. if (
  382. hasattr(action, 'is_confirmed')
  383. and action.is_confirmed
  384. == ActionConfirmationStatus.AWAITING_CONFIRMATION
  385. ):
  386. await self.set_agent_state_to(AgentState.AWAITING_USER_CONFIRMATION)
  387. self.event_stream.add_event(action, EventSource.AGENT)
  388. await self.update_state_after_step()
  389. logger.info(action, extra={'msg_type': 'ACTION'})
  390. if self._is_stuck():
  391. await self.report_error('Agent got stuck in a loop')
  392. await self.set_agent_state_to(AgentState.ERROR)
  393. async def _delegate_step(self):
  394. """Executes a single step of the delegate agent."""
  395. logger.debug(f'[Agent Controller {self.id}] Delegate not none, awaiting...')
  396. await self.delegate._step() # type: ignore[union-attr]
  397. logger.debug(f'[Agent Controller {self.id}] Delegate step done')
  398. assert self.delegate is not None
  399. delegate_state = self.delegate.get_agent_state()
  400. logger.debug(f'[Agent Controller {self.id}] Delegate state: {delegate_state}')
  401. if delegate_state == AgentState.ERROR:
  402. # update iteration that shall be shared across agents
  403. self.state.iteration = self.delegate.state.iteration
  404. # close the delegate upon error
  405. await self.delegate.close()
  406. self.delegate = None
  407. self.delegateAction = None
  408. await self.report_error('Delegator agent encountered an error')
  409. elif delegate_state in (AgentState.FINISHED, AgentState.REJECTED):
  410. logger.info(
  411. f'[Agent Controller {self.id}] Delegate agent has finished execution'
  412. )
  413. # retrieve delegate result
  414. outputs = self.delegate.state.outputs if self.delegate.state else {}
  415. # update iteration that shall be shared across agents
  416. self.state.iteration = self.delegate.state.iteration
  417. # close delegate controller: we must close the delegate controller before adding new events
  418. await self.delegate.close()
  419. # update delegate result observation
  420. # TODO: replace this with AI-generated summary (#2395)
  421. formatted_output = ', '.join(
  422. f'{key}: {value}' for key, value in outputs.items()
  423. )
  424. content = (
  425. f'{self.delegate.agent.name} finishes task with {formatted_output}'
  426. )
  427. obs: Observation = AgentDelegateObservation(
  428. outputs=outputs, content=content
  429. )
  430. # clean up delegate status
  431. self.delegate = None
  432. self.delegateAction = None
  433. self.event_stream.add_event(obs, EventSource.AGENT)
  434. return
  435. async def _handle_traffic_control(
  436. self, limit_type: str, current_value: float, max_value: float
  437. ):
  438. """Handles agent state after hitting the traffic control limit.
  439. Args:
  440. limit_type (str): The type of limit that was hit.
  441. current_value (float): The current value of the limit.
  442. max_value (float): The maximum value of the limit.
  443. """
  444. stop_step = False
  445. if self.state.traffic_control_state == TrafficControlState.PAUSED:
  446. logger.info('Hitting traffic control, temporarily resume upon user request')
  447. self.state.traffic_control_state = TrafficControlState.NORMAL
  448. else:
  449. self.state.traffic_control_state = TrafficControlState.THROTTLING
  450. if self.headless_mode:
  451. # set to ERROR state if running in headless mode
  452. # since user cannot resume on the web interface
  453. await self.report_error(
  454. f'Agent reached maximum {limit_type} in headless mode, task stopped. '
  455. f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}'
  456. )
  457. await self.set_agent_state_to(AgentState.ERROR)
  458. else:
  459. await self.report_error(
  460. f'Agent reached maximum {limit_type}, task paused. '
  461. f'Current {limit_type}: {current_value:.2f}, max {limit_type}: {max_value:.2f}. '
  462. f'{TRAFFIC_CONTROL_REMINDER}'
  463. )
  464. await self.set_agent_state_to(AgentState.PAUSED)
  465. stop_step = True
  466. return stop_step
  467. def get_state(self):
  468. """Returns the current running state object.
  469. Returns:
  470. State: The current state object.
  471. """
  472. return self.state
  473. def set_initial_state(
  474. self,
  475. state: State | None,
  476. max_iterations: int,
  477. confirmation_mode: bool = False,
  478. ):
  479. """Sets the initial state for the agent, either from the previous session, or from a parent agent, or by creating a new one.
  480. Args:
  481. state: The state to initialize with, or None to create a new state.
  482. max_iterations: The maximum number of iterations allowed for the task.
  483. confirmation_mode: Whether to enable confirmation mode.
  484. """
  485. # state from the previous session, state from a parent agent, or a new state
  486. # note that this is called twice when restoring a previous session, first with state=None
  487. if state is None:
  488. self.state = State(
  489. inputs={},
  490. max_iterations=max_iterations,
  491. confirmation_mode=confirmation_mode,
  492. )
  493. else:
  494. self.state = state
  495. # when restored from a previous session, the State object will have history, start_id, and end_id
  496. # connect it to the event stream
  497. self.state.history.set_event_stream(self.event_stream)
  498. # if start_id was not set in State, we're starting fresh, at the top of the stream
  499. start_id = self.state.start_id
  500. if start_id == -1:
  501. start_id = self.event_stream.get_latest_event_id() + 1
  502. else:
  503. logger.debug(f'AgentController {self.id} restoring from event {start_id}')
  504. # make sure history is in sync
  505. self.state.start_id = start_id
  506. self.state.history.start_id = start_id
  507. # if there was an end_id saved in State, set it in history
  508. # currently not used, later useful for delegates
  509. if self.state.end_id > -1:
  510. self.state.history.end_id = self.state.end_id
  511. def _is_stuck(self):
  512. """Checks if the agent or its delegate is stuck in a loop.
  513. Returns:
  514. bool: True if the agent is stuck, False otherwise.
  515. """
  516. # check if delegate stuck
  517. if self.delegate and self.delegate._is_stuck():
  518. return True
  519. return self._stuck_detector.is_stuck()
  520. def __repr__(self):
  521. return (
  522. f'AgentController(id={self.id}, agent={self.agent!r}, '
  523. f'event_stream={self.event_stream!r}, '
  524. f'state={self.state!r}, agent_task={self.agent_task!r}, '
  525. f'delegate={self.delegate!r}, _pending_action={self._pending_action!r})'
  526. )