1 год назад · 8dee334236
--- a/openhands/controller/agent_controller.py
+++ b/openhands/controller/agent_controller.py
@@ -5,6 +5,7 @@ import traceback
 
															 from typing import Callable, ClassVar, Type
														
 
															 import litellm
														
 
															+from litellm.exceptions import ContextWindowExceededError
														
 
															 from openhands.controller.agent import Agent
														
 
															 from openhands.controller.state.state import State, TrafficControlState
														
@@ -485,6 +486,15 @@ class AgentController:
 
															                 EventSource.AGENT,
														
 
															             )
														
 
															             return
														
 
															+        except ContextWindowExceededError:
														
 
															+            # When context window is exceeded, keep roughly half of agent interactions
														
 
															+            self.state.history = self._apply_conversation_window(self.state.history)
														
 
															+
														
 
															+            # Save the ID of the first event in our truncated history for future reloading
														
 
															+            if self.state.history:
														
 
															+                self.state.start_id = self.state.history[0].id
														
 
															+            # Don't add error event - let the agent retry with reduced context
														
 
															+            return
														
 
															         if action.runnable:
														
 
															             if self.state.confirmation_mode and (
														
@@ -659,6 +669,12 @@ class AgentController:
 
															         - For delegate events (between AgentDelegateAction and AgentDelegateObservation):
														
 
															             - Excludes all events between the action and observation
														
 
															             - Includes the delegate action and observation themselves
														
 
															+
														
 
															+        The history is loaded in two parts if truncation_id is set:
														
 
															+        1. First user message from start_id onwards
														
 
															+        2. Rest of history from truncation_id to the end
														
 
															+
														
 
															+        Otherwise loads normally from start_id.
														
 
															         """
														
 
															         # define range of events to fetch
														
@@ -680,8 +696,33 @@ class AgentController:
 
															             self.state.history = []
														
 
															             return
														
 
															-        # Get all events, filtering out backend events and hidden events
														
 
															-        events = list(
														
 
															+        events: list[Event] = []
														
 
															+
														
 
															+        # If we have a truncation point, get first user message and then rest of history
														
 
															+        if hasattr(self.state, 'truncation_id') and self.state.truncation_id > 0:
														
 
															+            # Find first user message from stream
														
 
															+            first_user_msg = next(
														
 
															+                (
														
 
															+                    e
														
 
															+                    for e in self.event_stream.get_events(
														
 
															+                        start_id=start_id,
														
 
															+                        end_id=end_id,
														
 
															+                        reverse=False,
														
 
															+                        filter_out_type=self.filter_out,
														
 
															+                        filter_hidden=True,
														
 
															+                    )
														
 
															+                    if isinstance(e, MessageAction) and e.source == EventSource.USER
														
 
															+                ),
														
 
															+                None,
														
 
															+            )
														
 
															+            if first_user_msg:
														
 
															+                events.append(first_user_msg)
														
 
															+
														
 
															+            # the rest of the events are from the truncation point
														
 
															+            start_id = self.state.truncation_id
														
 
															+
														
 
															+        # Get rest of history
														
 
															+        events_to_add = list(
														
 
															             self.event_stream.get_events(
														
 
															                 start_id=start_id,
														
 
															                 end_id=end_id,
														
@@ -690,6 +731,7 @@ class AgentController:
 
															                 filter_hidden=True,
														
 
															             )
														
 
															         )
														
 
															+        events.extend(events_to_add)
														
 
															         # Find all delegate action/observation pairs
														
 
															         delegate_ranges: list[tuple[int, int]] = []
														
@@ -744,6 +786,92 @@ class AgentController:
 
															         # make sure history is in sync
														
 
															         self.state.start_id = start_id
														
 
															+    def _apply_conversation_window(self, events: list[Event]) -> list[Event]:
														
 
															+        """Cuts history roughly in half when context window is exceeded, preserving action-observation pairs
														
 
															+        and ensuring the first user message is always included.
														
 
															+
														
 
															+        The algorithm:
														
 
															+        1. Cut history in half
														
 
															+        2. Check first event in new history:
														
 
															+           - If Observation: find and include its Action
														
 
															+           - If MessageAction: ensure its related Action-Observation pair isn't split
														
 
															+        3. Always include the first user message
														
 
															+
														
 
															+        Args:
														
 
															+            events: List of events to filter
														
 
															+
														
 
															+        Returns:
														
 
															+            Filtered list of events keeping newest half while preserving pairs
														
 
															+        """
														
 
															+        if not events:
														
 
															+            return events
														
 
															+
														
 
															+        # Find first user message - we'll need to ensure it's included
														
 
															+        first_user_msg = next(
														
 
															+            (
														
 
															+                e
														
 
															+                for e in events
														
 
															+                if isinstance(e, MessageAction) and e.source == EventSource.USER
														
 
															+            ),
														
 
															+            None,
														
 
															+        )
														
 
															+
														
 
															+        # cut in half
														
 
															+        mid_point = max(1, len(events) // 2)
														
 
															+        kept_events = events[mid_point:]
														
 
															+
														
 
															+        # Handle first event in truncated history
														
 
															+        if kept_events:
														
 
															+            i = 0
														
 
															+            while i < len(kept_events):
														
 
															+                first_event = kept_events[i]
														
 
															+                if isinstance(first_event, Observation) and first_event.cause:
														
 
															+                    # Find its action and include it
														
 
															+                    matching_action = next(
														
 
															+                        (
														
 
															+                            e
														
 
															+                            for e in reversed(events[:mid_point])
														
 
															+                            if isinstance(e, Action) and e.id == first_event.cause
														
 
															+                        ),
														
 
															+                        None,
														
 
															+                    )
														
 
															+                    if matching_action:
														
 
															+                        kept_events = [matching_action] + kept_events
														
 
															+                    else:
														
 
															+                        self.log(
														
 
															+                            'warning',
														
 
															+                            f'Found Observation without matching Action at id={first_event.id}',
														
 
															+                        )
														
 
															+                        # drop this observation
														
 
															+                        kept_events = kept_events[1:]
														
 
															+                    break
														
 
															+
														
 
															+                elif isinstance(first_event, MessageAction) or (
														
 
															+                    isinstance(first_event, Action)
														
 
															+                    and first_event.source == EventSource.USER
														
 
															+                ):
														
 
															+                    # if it's a message action or a user action, keep it and continue to find the next event
														
 
															+                    i += 1
														
 
															+                    continue
														
 
															+
														
 
															+                else:
														
 
															+                    # if it's an action with source == EventSource.AGENT, we're good
														
 
															+                    break
														
 
															+
														
 
															+        # Save where to continue from in next reload
														
 
															+        if kept_events:
														
 
															+            self.state.truncation_id = kept_events[0].id
														
 
															+
														
 
															+        # Ensure first user message is included
														
 
															+        if first_user_msg and first_user_msg not in kept_events:
														
 
															+            kept_events = [first_user_msg] + kept_events
														
 
															+
														
 
															+        # start_id points to first user message
														
 
															+        if first_user_msg:
														
 
															+            self.state.start_id = first_user_msg.id
														
 
															+
														
 
															+        return kept_events
														
 
															+
														
 
															     def _is_stuck(self):
														
 
															         """Checks if the agent or its delegate is stuck in a loop.
														
--- a/openhands/controller/state/state.py
+++ b/openhands/controller/state/state.py
@@ -92,6 +92,8 @@ class State:
 
															     # start_id and end_id track the range of events in history
														
 
															     start_id: int = -1
														
 
															     end_id: int = -1
														
 
															+    # truncation_id tracks where to load history after context window truncation
														
 
															+    truncation_id: int = -1
														
 
															     almost_stuck: int = 0
														
 
															     delegates: dict[tuple[int, int], tuple[str, str]] = field(default_factory=dict)
														
 
															     # NOTE: This will never be used by the controller, but it can be used by different
														
--- a/tests/unit/test_truncation.py
+++ b/tests/unit/test_truncation.py
@@ -0,0 +1,188 @@
 
															+from unittest.mock import MagicMock
														
 
															+
														
 
															+import pytest
														
 
															+
														
 
															+from openhands.controller.agent_controller import AgentController
														
 
															+from openhands.events import EventSource
														
 
															+from openhands.events.action import CmdRunAction, MessageAction
														
 
															+from openhands.events.observation import CmdOutputObservation
														
 
															+
														
 
															+
														
 
															+@pytest.fixture
														
 
															+def mock_event_stream():
														
 
															+    stream = MagicMock()
														
 
															+    # Mock get_events to return an empty list by default
														
 
															+    stream.get_events.return_value = []
														
 
															+    return stream
														
 
															+
														
 
															+
														
 
															+@pytest.fixture
														
 
															+def mock_agent():
														
 
															+    agent = MagicMock()
														
 
															+    agent.llm = MagicMock()
														
 
															+    agent.llm.config = MagicMock()
														
 
															+    return agent
														
 
															+
														
 
															+
														
 
															+class TestTruncation:
														
 
															+    def test_apply_conversation_window_basic(self, mock_event_stream, mock_agent):
														
 
															+        controller = AgentController(
														
 
															+            agent=mock_agent,
														
 
															+            event_stream=mock_event_stream,
														
 
															+            max_iterations=10,
														
 
															+            sid='test_truncation',
														
 
															+            confirmation_mode=False,
														
 
															+            headless_mode=True,
														
 
															+        )
														
 
															+
														
 
															+        # Create a sequence of events with IDs
														
 
															+        first_msg = MessageAction(content='Hello, start task', wait_for_response=False)
														
 
															+        first_msg._source = EventSource.USER
														
 
															+        first_msg._id = 1
														
 
															+
														
 
															+        cmd1 = CmdRunAction(command='ls')
														
 
															+        cmd1._id = 2
														
 
															+        obs1 = CmdOutputObservation(command='ls', content='file1.txt', command_id=2)
														
 
															+        obs1._id = 3
														
 
															+        obs1._cause = 2
														
 
															+
														
 
															+        cmd2 = CmdRunAction(command='pwd')
														
 
															+        cmd2._id = 4
														
 
															+        obs2 = CmdOutputObservation(command='pwd', content='/home', command_id=4)
														
 
															+        obs2._id = 5
														
 
															+        obs2._cause = 4
														
 
															+
														
 
															+        events = [first_msg, cmd1, obs1, cmd2, obs2]
														
 
															+
														
 
															+        # Apply truncation
														
 
															+        truncated = controller._apply_conversation_window(events)
														
 
															+
														
 
															+        # Should keep first user message and roughly half of other events
														
 
															+        assert (
														
 
															+            len(truncated) >= 3
														
 
															+        )  # First message + at least one action-observation pair
														
 
															+        assert truncated[0] == first_msg  # First message always preserved
														
 
															+        assert controller.state.start_id == first_msg._id
														
 
															+        assert controller.state.truncation_id is not None
														
 
															+
														
 
															+        # Verify pairs aren't split
														
 
															+        for i, event in enumerate(truncated[1:]):
														
 
															+            if isinstance(event, CmdOutputObservation):
														
 
															+                assert any(e._id == event._cause for e in truncated[: i + 1])
														
 
															+
														
 
															+    def test_context_window_exceeded_handling(self, mock_event_stream, mock_agent):
														
 
															+        controller = AgentController(
														
 
															+            agent=mock_agent,
														
 
															+            event_stream=mock_event_stream,
														
 
															+            max_iterations=10,
														
 
															+            sid='test_truncation',
														
 
															+            confirmation_mode=False,
														
 
															+            headless_mode=True,
														
 
															+        )
														
 
															+
														
 
															+        # Setup initial history with IDs
														
 
															+        first_msg = MessageAction(content='Start task', wait_for_response=False)
														
 
															+        first_msg._source = EventSource.USER
														
 
															+        first_msg._id = 1
														
 
															+
														
 
															+        # Add agent question
														
 
															+        agent_msg = MessageAction(
														
 
															+            content='What task would you like me to perform?', wait_for_response=True
														
 
															+        )
														
 
															+        agent_msg._source = EventSource.AGENT
														
 
															+        agent_msg._id = 2
														
 
															+
														
 
															+        # Add user response
														
 
															+        user_response = MessageAction(
														
 
															+            content='Please list all files and show me current directory',
														
 
															+            wait_for_response=False,
														
 
															+        )
														
 
															+        user_response._source = EventSource.USER
														
 
															+        user_response._id = 3
														
 
															+
														
 
															+        cmd1 = CmdRunAction(command='ls')
														
 
															+        cmd1._id = 4
														
 
															+        obs1 = CmdOutputObservation(command='ls', content='file1.txt', command_id=4)
														
 
															+        obs1._id = 5
														
 
															+        obs1._cause = 4
														
 
															+
														
 
															+        # Update mock event stream to include new messages
														
 
															+        mock_event_stream.get_events.return_value = [
														
 
															+            first_msg,
														
 
															+            agent_msg,
														
 
															+            user_response,
														
 
															+            cmd1,
														
 
															+            obs1,
														
 
															+        ]
														
 
															+        controller.state.history = [first_msg, agent_msg, user_response, cmd1, obs1]
														
 
															+        original_history_len = len(controller.state.history)
														
 
															+
														
 
															+        # Simulate ContextWindowExceededError and truncation
														
 
															+        controller.state.history = controller._apply_conversation_window(
														
 
															+            controller.state.history
														
 
															+        )
														
 
															+
														
 
															+        # Verify truncation occurred
														
 
															+        assert len(controller.state.history) < original_history_len
														
 
															+        assert controller.state.start_id == first_msg._id
														
 
															+        assert controller.state.truncation_id is not None
														
 
															+        assert controller.state.truncation_id > controller.state.start_id
														
 
															+
														
 
															+    def test_history_restoration_after_truncation(self, mock_event_stream, mock_agent):
														
 
															+        controller = AgentController(
														
 
															+            agent=mock_agent,
														
 
															+            event_stream=mock_event_stream,
														
 
															+            max_iterations=10,
														
 
															+            sid='test_truncation',
														
 
															+            confirmation_mode=False,
														
 
															+            headless_mode=True,
														
 
															+        )
														
 
															+
														
 
															+        # Create events with IDs
														
 
															+        first_msg = MessageAction(content='Start task', wait_for_response=False)
														
 
															+        first_msg._source = EventSource.USER
														
 
															+        first_msg._id = 1
														
 
															+
														
 
															+        events = [first_msg]
														
 
															+        for i in range(5):
														
 
															+            cmd = CmdRunAction(command=f'cmd{i}')
														
 
															+            cmd._id = i + 2
														
 
															+            obs = CmdOutputObservation(
														
 
															+                command=f'cmd{i}', content=f'output{i}', command_id=cmd._id
														
 
															+            )
														
 
															+            obs._cause = cmd._id
														
 
															+            events.extend([cmd, obs])
														
 
															+
														
 
															+        # Set up initial history
														
 
															+        controller.state.history = events.copy()
														
 
															+
														
 
															+        # Force truncation
														
 
															+        controller.state.history = controller._apply_conversation_window(
														
 
															+            controller.state.history
														
 
															+        )
														
 
															+
														
 
															+        # Save state
														
 
															+        saved_start_id = controller.state.start_id
														
 
															+        saved_truncation_id = controller.state.truncation_id
														
 
															+        saved_history_len = len(controller.state.history)
														
 
															+
														
 
															+        # Set up mock event stream for new controller
														
 
															+        mock_event_stream.get_events.return_value = controller.state.history
														
 
															+
														
 
															+        # Create new controller with saved state
														
 
															+        new_controller = AgentController(
														
 
															+            agent=mock_agent,
														
 
															+            event_stream=mock_event_stream,
														
 
															+            max_iterations=10,
														
 
															+            sid='test_truncation',
														
 
															+            confirmation_mode=False,
														
 
															+            headless_mode=True,
														
 
															+        )
														
 
															+        new_controller.state.start_id = saved_start_id
														
 
															+        new_controller.state.truncation_id = saved_truncation_id
														
 
															+        new_controller.state.history = mock_event_stream.get_events()
														
 
															+
														
 
															+        # Verify restoration
														
 
															+        assert len(new_controller.state.history) == saved_history_len
														
 
															+        assert new_controller.state.history[0] == first_msg
														
 
															+        assert new_controller.state.start_id == saved_start_id