Переглянути джерело

Use more specific action types for openhands-aci commands (#5508)

Co-authored-by: Xingyao Wang <xingyao@all-hands.dev>
Ryan H. Tran 1 рік тому
батько
коміт
7a0488c012

+ 6 - 0
frontend/src/i18n/translation.json

@@ -2014,6 +2014,9 @@
   "ACTION_MESSAGE$READ": {
     "en": "Reading the contents of a file"
   },
+  "ACTION_MESSAGE$EDIT": {
+    "en": "Editing the contents of a file"
+  },
   "ACTION_MESSAGE$WRITE": {
     "en": "Writing to a file"
   },
@@ -2029,6 +2032,9 @@
   "OBSERVATION_MESSAGE$READ": {
     "en": "Read the contents of a file"
   },
+  "OBSERVATION_MESSAGE$EDIT": {
+    "en": "Edited the contents of a file"
+  },
   "OBSERVATION_MESSAGE$WRITE": {
     "en": "Wrote to a file"
   },

+ 15 - 0
frontend/src/services/observations.ts

@@ -46,6 +46,9 @@ export function handleObservationMessage(message: ObservationMessage) {
         store.dispatch(addAssistantMessage(message.content));
       }
       break;
+    case ObservationType.READ:
+    case ObservationType.EDIT:
+      break; // We don't display the default message for these observations
     default:
       store.dispatch(addAssistantMessage(message.message));
       break;
@@ -84,6 +87,18 @@ export function handleObservationMessage(message: ObservationMessage) {
           }),
         );
         break;
+      case "read":
+      case "edit":
+        store.dispatch(
+          addAssistantObservation({
+            ...baseObservation,
+            observation,
+            extras: {
+              path: String(message.extras.path || ""),
+            },
+          }),
+        );
+        break;
       case "run_ipython":
         store.dispatch(
           addAssistantObservation({

+ 4 - 2
frontend/src/state/chat-slice.ts

@@ -19,6 +19,7 @@ const HANDLED_ACTIONS: OpenHandsEventType[] = [
   "write",
   "read",
   "browse",
+  "edit",
 ];
 
 function getRiskText(risk: ActionSecurityRisk) {
@@ -101,8 +102,6 @@ export const chatSlice = createSlice({
           content = `${content.slice(0, MAX_CONTENT_LENGTH)}...`;
         }
         text = `${action.payload.args.path}\n${content}`;
-      } else if (actionID === "read") {
-        text = action.payload.args.path;
       } else if (actionID === "browse") {
         text = `Browsing ${action.payload.args.url}`;
       }
@@ -161,6 +160,9 @@ export const chatSlice = createSlice({
         }
         content = `\`\`\`\n${content}\n\`\`\``;
         causeMessage.content = content; // Observation content includes the action
+      } else if (observationID === "read" || observationID === "edit") {
+        const { content } = observation.payload;
+        causeMessage.content = `\`\`\`${observationID === "edit" ? "diff" : "python"}\n${content}\n\`\`\``; // Content is already truncated by the ACI
       } else if (observationID === "browse") {
         let content = `**URL:** ${observation.payload.extras.url}\n`;
         if (observation.payload.extras.error) {

+ 10 - 0
frontend/src/types/core/actions.ts

@@ -104,6 +104,7 @@ export interface FileReadAction extends OpenHandsActionEvent<"read"> {
   args: {
     path: string;
     thought: string;
+    translated_ipython_code: string | null;
   };
 }
 
@@ -116,6 +117,14 @@ export interface FileWriteAction extends OpenHandsActionEvent<"write"> {
   };
 }
 
+export interface FileEditAction extends OpenHandsActionEvent<"edit"> {
+  source: "agent";
+  args: {
+    path: string;
+    translated_ipython_code: string;
+  };
+}
+
 export interface RejectAction extends OpenHandsActionEvent<"reject"> {
   source: "agent";
   args: {
@@ -133,6 +142,7 @@ export type OpenHandsAction =
   | BrowseAction
   | BrowseInteractiveAction
   | FileReadAction
+  | FileEditAction
   | FileWriteAction
   | AddTaskAction
   | ModifyTaskAction

+ 1 - 0
frontend/src/types/core/base.ts

@@ -4,6 +4,7 @@ export type OpenHandsEventType =
   | "run"
   | "read"
   | "write"
+  | "edit"
   | "run_ipython"
   | "delegate"
   | "browse"

+ 8 - 0
frontend/src/types/core/observations.ts

@@ -67,6 +67,13 @@ export interface ReadObservation extends OpenHandsObservationEvent<"read"> {
   };
 }
 
+export interface EditObservation extends OpenHandsObservationEvent<"edit"> {
+  source: "agent";
+  extras: {
+    path: string;
+  };
+}
+
 export interface ErrorObservation extends OpenHandsObservationEvent<"error"> {
   source: "user";
   extras: {
@@ -82,4 +89,5 @@ export type OpenHandsObservation =
   | BrowseObservation
   | WriteObservation
   | ReadObservation
+  | EditObservation
   | ErrorObservation;

+ 3 - 0
frontend/src/types/observation-type.tsx

@@ -2,6 +2,9 @@ enum ObservationType {
   // The contents of a file
   READ = "read",
 
+  // The diff of a file edit
+  EDIT = "edit",
+
   // The HTML contents of a URL
   BROWSE = "browse",
 

+ 9 - 0
openhands/agenthub/codeact_agent/codeact_agent.py

@@ -18,6 +18,7 @@ from openhands.events.action import (
     BrowseURLAction,
     CmdRunAction,
     FileEditAction,
+    FileReadAction,
     IPythonRunCellAction,
     MessageAction,
 )
@@ -26,6 +27,7 @@ from openhands.events.observation import (
     BrowserOutputObservation,
     CmdOutputObservation,
     FileEditObservation,
+    FileReadObservation,
     IPythonRunCellObservation,
     UserRejectObservation,
 )
@@ -128,6 +130,7 @@ class CodeActAgent(Agent):
                 - CmdRunAction: For executing bash commands
                 - IPythonRunCellAction: For running IPython code
                 - FileEditAction: For editing files
+                - FileReadAction: For reading files using openhands-aci commands
                 - BrowseInteractiveAction: For browsing the web
                 - AgentFinishAction: For ending the interaction
                 - MessageAction: For sending messages
@@ -151,6 +154,7 @@ class CodeActAgent(Agent):
                 AgentDelegateAction,
                 IPythonRunCellAction,
                 FileEditAction,
+                FileReadAction,
                 BrowseInteractiveAction,
                 BrowseURLAction,
             ),
@@ -239,6 +243,7 @@ class CodeActAgent(Agent):
         - CmdOutputObservation: Formats command execution results with exit codes
         - IPythonRunCellObservation: Formats IPython cell execution results, replacing base64 images
         - FileEditObservation: Formats file editing results
+        - FileReadObservation: Formats file reading results from openhands-aci
         - AgentDelegateObservation: Formats results from delegated agent tasks
         - ErrorObservation: Formats error messages from failed actions
         - UserRejectObservation: Formats user rejection messages
@@ -288,6 +293,10 @@ class CodeActAgent(Agent):
         elif isinstance(obs, FileEditObservation):
             text = truncate_content(str(obs), max_message_chars)
             message = Message(role='user', content=[TextContent(text=text)])
+        elif isinstance(obs, FileReadObservation):
+            message = Message(
+                role='user', content=[TextContent(text=obs.content)]
+            )  # Content is already truncated by openhands-aci
         elif isinstance(obs, BrowserOutputObservation):
             text = obs.get_agent_obs_text()
             message = Message(

+ 17 - 2
openhands/agenthub/codeact_agent/function_calling.py

@@ -22,9 +22,11 @@ from openhands.events.action import (
     BrowseURLAction,
     CmdRunAction,
     FileEditAction,
+    FileReadAction,
     IPythonRunCellAction,
     MessageAction,
 )
+from openhands.events.event import FileEditSource, FileReadSource
 from openhands.events.tool import ToolCallMetadata
 
 _BASH_DESCRIPTION = """Execute a bash command in the terminal.
@@ -192,7 +194,7 @@ LLMBasedFileEditTool = ChatCompletionToolParam(
                     'type': 'string',
                     'description': 'The absolute path to the file to be edited.',
                 },
-                'new_content_draft': {
+                'content': {
                     'type': 'string',
                     'description': 'A draft of the new content for the file being edited. Note that the assistant may skip unchanged lines.',
                 },
@@ -506,7 +508,20 @@ def response_to_actions(response: ModelResponse) -> list[Action]:
                 logger.debug(
                     f'TOOL CALL: str_replace_editor -> file_editor with code: {code}'
                 )
-                action = IPythonRunCellAction(code=code, include_extra=False)
+
+                if arguments['command'] == 'view':
+                    action = FileReadAction(
+                        path=arguments['path'],
+                        translated_ipython_code=code,
+                        impl_source=FileReadSource.OH_ACI,
+                    )
+                else:
+                    action = FileEditAction(
+                        path=arguments['path'],
+                        content='',  # dummy value -- we don't need it
+                        translated_ipython_code=code,
+                        impl_source=FileEditSource.OH_ACI,
+                    )
             elif tool_call.function.name == 'browser':
                 action = BrowseInteractiveAction(browser_actions=arguments['code'])
             elif tool_call.function.name == 'web_read':

+ 5 - 0
openhands/events/action/files.py

@@ -3,6 +3,7 @@ from typing import ClassVar
 
 from openhands.core.schema import ActionType
 from openhands.events.action.action import Action, ActionSecurityRisk
+from openhands.events.event import FileEditSource, FileReadSource
 
 
 @dataclass
@@ -19,6 +20,8 @@ class FileReadAction(Action):
     action: str = ActionType.READ
     runnable: ClassVar[bool] = True
     security_risk: ActionSecurityRisk | None = None
+    impl_source: FileReadSource = FileReadSource.DEFAULT
+    translated_ipython_code: str = ''  # translated openhands-aci IPython code
 
     @property
     def message(self) -> str:
@@ -64,6 +67,8 @@ class FileEditAction(Action):
     action: str = ActionType.EDIT
     runnable: ClassVar[bool] = True
     security_risk: ActionSecurityRisk | None = None
+    impl_source: FileEditSource = FileEditSource.LLM_BASED_EDIT
+    translated_ipython_code: str = ''
 
     def __repr__(self) -> str:
         ret = '**FileEditAction**\n'

+ 10 - 0
openhands/events/event.py

@@ -12,6 +12,16 @@ class EventSource(str, Enum):
     ENVIRONMENT = 'environment'
 
 
+class FileEditSource(str, Enum):
+    LLM_BASED_EDIT = 'llm_based_edit'
+    OH_ACI = 'oh_aci'  # openhands-aci
+
+
+class FileReadSource(str, Enum):
+    OH_ACI = 'oh_aci'  # openhands-aci
+    DEFAULT = 'default'
+
+
 @dataclass
 class Event:
     @property

+ 7 - 0
openhands/events/observation/files.py

@@ -2,6 +2,7 @@ from dataclasses import dataclass
 from difflib import SequenceMatcher
 
 from openhands.core.schema import ObservationType
+from openhands.events.event import FileEditSource, FileReadSource
 from openhands.events.observation.observation import Observation
 
 
@@ -11,6 +12,7 @@ class FileReadObservation(Observation):
 
     path: str
     observation: str = ObservationType.READ
+    impl_source: FileReadSource = FileReadSource.DEFAULT
 
     @property
     def message(self) -> str:
@@ -39,6 +41,8 @@ class FileEditObservation(Observation):
     old_content: str
     new_content: str
     observation: str = ObservationType.EDIT
+    impl_source: FileEditSource = FileEditSource.LLM_BASED_EDIT
+    formatted_output_and_error: str = ''
 
     @property
     def message(self) -> str:
@@ -122,6 +126,9 @@ class FileEditObservation(Observation):
         return '\n'.join(result)
 
     def __str__(self) -> str:
+        if self.impl_source == FileEditSource.OH_ACI:
+            return self.formatted_output_and_error
+
         ret = ''
         if not self.prev_exist:
             assert (

+ 58 - 7
openhands/runtime/action_execution_server.py

@@ -25,6 +25,7 @@ from fastapi import Depends, FastAPI, HTTPException, Request, UploadFile
 from fastapi.exceptions import RequestValidationError
 from fastapi.responses import JSONResponse, StreamingResponse
 from fastapi.security import APIKeyHeader
+from openhands_aci.utils.diff import get_diff
 from pydantic import BaseModel
 from starlette.exceptions import HTTPException as StarletteHTTPException
 from uvicorn import run
@@ -39,9 +40,11 @@ from openhands.events.action import (
     FileWriteAction,
     IPythonRunCellAction,
 )
+from openhands.events.event import FileEditSource, FileReadSource
 from openhands.events.observation import (
     CmdOutputObservation,
     ErrorObservation,
+    FileEditObservation,
     FileReadObservation,
     FileWriteObservation,
     IPythonRunCellObservation,
@@ -202,24 +205,64 @@ class ActionExecutor:
             obs: IPythonRunCellObservation = await _jupyter_plugin.run(action)
             obs.content = obs.content.rstrip()
             matches = re.findall(
-                r'<oh_aci_output>(.*?)</oh_aci_output>', obs.content, re.DOTALL
+                r'<oh_aci_output_[0-9a-f]{32}>(.*?)</oh_aci_output_[0-9a-f]{32}>',
+                obs.content,
+                re.DOTALL,
             )
             if matches:
-                results = []
-                for match in matches:
+                results: list[str] = []
+                if len(matches) == 1:
+                    # Use specific actions/observations types
+                    match = matches[0]
                     try:
                         result_dict = json.loads(match)
-                        results.append(
-                            result_dict.get('formatted_output_and_error', '')
-                        )
+                        if result_dict.get('path'):  # Successful output
+                            if (
+                                result_dict['new_content'] is not None
+                            ):  # File edit commands
+                                diff = get_diff(
+                                    old_contents=result_dict['old_content']
+                                    or '',  # old_content is None when file is created
+                                    new_contents=result_dict['new_content'],
+                                    filepath=result_dict['path'],
+                                )
+                                return FileEditObservation(
+                                    content=diff,
+                                    path=result_dict['path'],
+                                    old_content=result_dict['old_content'],
+                                    new_content=result_dict['new_content'],
+                                    prev_exist=result_dict['prev_exist'],
+                                    impl_source=FileEditSource.OH_ACI,
+                                    formatted_output_and_error=result_dict[
+                                        'formatted_output_and_error'
+                                    ],
+                                )
+                            else:  # File view commands
+                                return FileReadObservation(
+                                    content=result_dict['formatted_output_and_error'],
+                                    path=result_dict['path'],
+                                    impl_source=FileReadSource.OH_ACI,
+                                )
+                        else:  # Error output
+                            results.append(result_dict['formatted_output_and_error'])
                     except json.JSONDecodeError:
                         # Handle JSON decoding errors if necessary
                         results.append(
                             f"Invalid JSON in 'openhands-aci' output: {match}"
                         )
+                else:
+                    for match in matches:
+                        try:
+                            result_dict = json.loads(match)
+                            results.append(result_dict['formatted_output_and_error'])
+                        except json.JSONDecodeError:
+                            # Handle JSON decoding errors if necessary
+                            results.append(
+                                f"Invalid JSON in 'openhands-aci' output: {match}"
+                            )
 
                 # Combine the results (e.g., join them) or handle them as required
-                obs.content = '\n'.join(results)
+                obs.content = '\n'.join(str(result) for result in results)
 
             if action.include_extra:
                 obs.content += (
@@ -239,6 +282,14 @@ class ActionExecutor:
         return str(filepath)
 
     async def read(self, action: FileReadAction) -> Observation:
+        if action.impl_source == FileReadSource.OH_ACI:
+            return await self.run_ipython(
+                IPythonRunCellAction(
+                    code=action.translated_ipython_code,
+                    include_extra=False,
+                )
+            )
+
         # NOTE: the client code is running inside the sandbox,
         # so there's no need to check permission
         working_dir = self.bash_session.workdir

+ 20 - 1
openhands/runtime/utils/edit.py

@@ -8,7 +8,13 @@ from openhands_aci.utils.diff import get_diff
 
 from openhands.core.config import AppConfig
 from openhands.core.logger import openhands_logger as logger
-from openhands.events.action import FileEditAction, FileReadAction, FileWriteAction
+from openhands.events.action import (
+    FileEditAction,
+    FileReadAction,
+    FileWriteAction,
+    IPythonRunCellAction,
+)
+from openhands.events.event import FileEditSource
 from openhands.events.observation import (
     ErrorObservation,
     FileEditObservation,
@@ -88,6 +94,10 @@ class FileEditRuntimeInterface(ABC):
     def write(self, action: FileWriteAction) -> Observation:
         pass
 
+    @abstractmethod
+    def run_ipython(self, action: IPythonRunCellAction) -> Observation:
+        pass
+
 
 class FileEditRuntimeMixin(FileEditRuntimeInterface):
     # Most LLMs have output token limit of 4k tokens.
@@ -198,6 +208,15 @@ class FileEditRuntimeMixin(FileEditRuntimeInterface):
         return None
 
     def edit(self, action: FileEditAction) -> Observation:
+        if action.impl_source == FileEditSource.OH_ACI:
+            # Translate to ipython command to file_editor
+            return self.run_ipython(
+                IPythonRunCellAction(
+                    code=action.translated_ipython_code,
+                    include_extra=False,
+                )
+            )
+
         obs = self.read(FileReadAction(path=action.path))
         if (
             isinstance(obs, ErrorObservation)

+ 5 - 6
poetry.lock

@@ -1,4 +1,4 @@
-# This file is automatically @generated by Poetry 1.8.5 and should not be changed by hand.
+# This file is automatically @generated by Poetry 1.8.4 and should not be changed by hand.
 
 [[package]]
 name = "aiohappyeyeballs"
@@ -5414,7 +5414,6 @@ optional = false
 python-versions = ">=3.6"
 files = [
     {file = "opencv-python-4.10.0.84.tar.gz", hash = "sha256:72d234e4582e9658ffea8e9cae5b63d488ad06994ef12d81dc303b17472f3526"},
-    {file = "opencv_python-4.10.0.84-cp37-abi3-macosx_11_0_arm64.whl", hash = "sha256:fc182f8f4cda51b45f01c64e4cbedfc2f00aff799debebc305d8d0210c43f251"},
     {file = "opencv_python-4.10.0.84-cp37-abi3-macosx_12_0_x86_64.whl", hash = "sha256:71e575744f1d23f79741450254660442785f45a0797212852ee5199ef12eed98"},
     {file = "opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:09a332b50488e2dda866a6c5573ee192fe3583239fb26ff2f7f9ceb0bc119ea6"},
     {file = "opencv_python-4.10.0.84-cp37-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:9ace140fc6d647fbe1c692bcb2abce768973491222c067c131d80957c595b71f"},
@@ -5427,13 +5426,13 @@ numpy = {version = ">=1.26.0", markers = "python_version >= \"3.12\""}
 
 [[package]]
 name = "openhands-aci"
-version = "0.1.2"
+version = "0.1.4"
 description = "An Agent-Computer Interface (ACI) designed for software development agents OpenHands."
 optional = false
 python-versions = "<4.0,>=3.12"
 files = [
-    {file = "openhands_aci-0.1.2-py3-none-any.whl", hash = "sha256:a2fcae7a2f1047d516d6862742c7a2f8ea988c6a58295599bc305c99b8d53067"},
-    {file = "openhands_aci-0.1.2.tar.gz", hash = "sha256:c3c91aa3f13554159168b44a7f86bf333da30067fa6370a46ed785bf4240631b"},
+    {file = "openhands_aci-0.1.4-py3-none-any.whl", hash = "sha256:a7cefc969a856e971a5ecf3765de9ab2e8eb4e46c623aca9088f388b8f8d972f"},
+    {file = "openhands_aci-0.1.4.tar.gz", hash = "sha256:ae3207308f7757179ae77ce70a448deec9e2d77a1390ae0f5bede39925ec5446"},
 ]
 
 [package.dependencies]
@@ -10049,4 +10048,4 @@ testing = ["coverage[toml]", "zope.event", "zope.testing"]
 [metadata]
 lock-version = "2.0"
 python-versions = "^3.12"
-content-hash = "3893da8994f1a0ad86331b468baa432c14023b33c0764243da412edfa4d683f6"
+content-hash = "7b0dda83687d6a1285cc60f9a79ab5cc966ca199c85ad23d57668df9b2cf8816"

+ 1 - 1
pyproject.toml

@@ -64,7 +64,7 @@ modal = ">=0.66.26,<0.69.0"
 runloop-api-client = "0.11.0"
 pygithub = "^2.5.0"
 joblib = "*"
-openhands-aci = "0.1.2"
+openhands-aci = "0.1.4"
 python-socketio = "^5.11.4"
 redis = "^5.2.0"
 

+ 64 - 0
tests/runtime/test_ipython.py

@@ -11,10 +11,12 @@ from conftest import (
 from openhands.core.logger import openhands_logger as logger
 from openhands.events.action import (
     CmdRunAction,
+    FileEditAction,
     FileReadAction,
     FileWriteAction,
     IPythonRunCellAction,
 )
+from openhands.events.event import FileEditSource
 from openhands.events.observation import (
     CmdOutputObservation,
     ErrorObservation,
@@ -314,3 +316,65 @@ print(file_editor(command='undo_edit', path='{sandbox_dir}/test.txt'))
     assert obs.exit_code == 0
 
     _close_test_runtime(runtime)
+
+
+def test_file_read_and_edit_via_oh_aci(temp_dir, runtime_cls, run_as_openhands):
+    runtime = _load_runtime(temp_dir, runtime_cls, run_as_openhands)
+    sandbox_dir = _get_sandbox_folder(runtime)
+
+    actions = [
+        {
+            'command': 'create',
+            'test_code': f"print(file_editor(command='create', path='{sandbox_dir}/test.txt', file_text='Line 1\\nLine 2\\nLine 3'))",
+            'action_cls': FileEditAction,
+            'assertions': ['File created successfully'],
+        },
+        {
+            'command': 'view',
+            'test_code': f"print(file_editor(command='view', path='{sandbox_dir}/test.txt'))",
+            'action_cls': FileReadAction,
+            'assertions': ['Line 1', 'Line 2', 'Line 3'],
+        },
+        {
+            'command': 'str_replace',
+            'test_code': f"print(file_editor(command='str_replace', path='{sandbox_dir}/test.txt', old_str='Line 2', new_str='New Line 2'))",
+            'action_cls': FileEditAction,
+            'assertions': ['New Line 2'],
+        },
+        {
+            'command': 'undo_edit',
+            'test_code': f"print(file_editor(command='undo_edit', path='{sandbox_dir}/test.txt'))",
+            'action_cls': FileEditAction,
+            'assertions': ['Last edit to', 'undone successfully'],
+        },
+        {
+            'command': 'insert',
+            'test_code': f"print(file_editor(command='insert', path='{sandbox_dir}/test.txt', insert_line=2, new_str='Line 4'))",
+            'action_cls': FileEditAction,
+            'assertions': ['Line 4'],
+        },
+    ]
+
+    for action_info in actions:
+        action_cls = action_info['action_cls']
+
+        kwargs = {
+            'path': f'{sandbox_dir}/test.txt',
+            'translated_ipython_code': action_info['test_code'],
+            'impl_source': FileEditSource.OH_ACI,
+        }
+        if action_info['action_cls'] == FileEditAction:
+            kwargs['content'] = ''  # dummy value required for FileEditAction
+
+        action = action_cls(**kwargs)
+
+        logger.info(action, extra={'msg_type': 'ACTION'})
+        obs = runtime.run_action(action)
+        logger.info(obs, extra={'msg_type': 'OBSERVATION'})
+        for assertion in action_info['assertions']:
+            if action_cls == FileReadAction:
+                assert assertion in obs.content
+            else:
+                assert assertion in str(obs)
+
+    _close_test_runtime(runtime)

+ 8 - 1
tests/unit/test_action_serialization.py

@@ -132,7 +132,14 @@ def test_browse_interactive_action_serialization_deserialization():
 def test_file_read_action_serialization_deserialization():
     original_action_dict = {
         'action': 'read',
-        'args': {'path': '/path/to/file.txt', 'start': 0, 'end': -1, 'thought': 'None'},
+        'args': {
+            'path': '/path/to/file.txt',
+            'start': 0,
+            'end': -1,
+            'thought': 'None',
+            'impl_source': 'default',
+            'translated_ipython_code': '',
+        },
     }
     serialization_deserialization(original_action_dict, FileReadAction)