Browse Source

fix(agent, browsing): too long tool description for openai (#4778)

Xingyao Wang 1 year ago
parent
commit
8166bf768a
1 changed files with 18 additions and 9 deletions
  1. 18 9
      openhands/agenthub/codeact_agent/function_calling.py

+ 18 - 9
openhands/agenthub/codeact_agent/function_calling.py

@@ -284,6 +284,17 @@ _browser_action_space = HighLevelActionSet(
 
 
 _BROWSER_DESCRIPTION = """Interact with the browser using Python code.
+
+See the description of "code" parameter for more details.
+
+Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
+More than 2-3 actions usually leads to failure or unexpected behavior. Example:
+fill('a12', 'example with "quotes"')
+click('a51')
+click('48', button='middle', modifiers=['Shift'])
+"""
+
+_BROWSER_TOOL_DESCRIPTION = """
 The following 15 functions are available. Nothing else is supported.
 
 goto(url: str)
@@ -385,20 +396,15 @@ upload_file(bid: str, file: str | list[str])
         upload_file('572', '/home/user/my_receipt.pdf')
 
         upload_file('63', ['/home/bob/Documents/image.jpg', '/home/bob/Documents/file.zip'])
-
-Multiple actions can be provided at once, but will be executed sequentially without any feedback from the page.
-More than 2-3 actions usually leads to failure or unexpected behavior. Example:
-fill('a12', 'example with "quotes"')
-click('a51')
-click('48', button='middle', modifiers=['Shift'])
 """
 
+
 for _, action in _browser_action_space.action_set.items():
     assert (
-        action.signature in _BROWSER_DESCRIPTION
+        action.signature in _BROWSER_TOOL_DESCRIPTION
     ), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.signature}'
     assert (
-        action.description in _BROWSER_DESCRIPTION
+        action.description in _BROWSER_TOOL_DESCRIPTION
     ), f'Browser description mismatch. Please double check if the BrowserGym updated their action space.\n\nAction: {action.description}'
 
 BrowserTool = ChatCompletionToolParam(
@@ -411,7 +417,10 @@ BrowserTool = ChatCompletionToolParam(
             'properties': {
                 'code': {
                     'type': 'string',
-                    'description': 'The Python code that interacts with the browser.',
+                    'description': (
+                        'The Python code that interacts with the browser.\n'
+                        + _BROWSER_TOOL_DESCRIPTION
+                    ),
                 }
             },
             'required': ['code'],