Просмотр исходного кода

Fix browsing actions to be more robust (#4226)

Engel Nyst 1 год назад
Родитель
Сommit
6b1f23a20a
2 измененных файлов с 77 добавлено и 4 удалено
  1. 23 4
      agenthub/browsing_agent/response_parser.py
  2. 54 0
      tests/unit/test_browsing_agent_parser.py

+ 23 - 4
agenthub/browsing_agent/response_parser.py

@@ -72,17 +72,36 @@ class BrowsingActionParserBrowseInteractive(ActionParser):
         return True
 
     def parse(self, action_str: str) -> Action:
-        thought = action_str.split('```')[0].strip()
-        action_str = action_str.split('```')[1].strip()
+        # parse the action string into browser_actions and thought
+        # the LLM can return only one string, or both
+
+        # when both are returned, it looks like this:
+        ### Based on the current state of the page and the goal of finding out the president of the USA, the next action should involve searching for information related to the president.
+        ### To achieve this, we can navigate to a reliable source such as a search engine or a specific website that provides information about the current president of the USA.
+        ### Here is an example of a valid action to achieve this:
+        ### ```
+        ### goto('https://www.whitehouse.gov/about-the-white-house/presidents/'
+        # in practice, BrowsingResponseParser.parse_response also added )``` to the end of the string
+
+        # when the LLM returns only one string, it looks like this:
+        ### goto('https://www.whitehouse.gov/about-the-white-house/presidents/')
+        # and parse_response added )``` to the end of the string
+        parts = action_str.split('```')
+        browser_actions = (
+            parts[1].strip() if parts[1].strip() != '' else parts[0].strip()
+        )
+        thought = parts[0].strip() if parts[1].strip() != '' else ''
+
+        # if the LLM wants to talk to the user, we extract the message
         msg_content = ''
-        for sub_action in action_str.split('\n'):
+        for sub_action in browser_actions.split('\n'):
             if 'send_msg_to_user(' in sub_action:
                 tree = ast.parse(sub_action)
                 args = tree.body[0].value.args  # type: ignore
                 msg_content = args[0].value
 
         return BrowseInteractiveAction(
-            browser_actions=action_str,
+            browser_actions=browser_actions,
             thought=thought,
             browsergym_send_msg_to_user=msg_content,
         )

+ 54 - 0
tests/unit/test_browsing_agent_parser.py

@@ -0,0 +1,54 @@
+import pytest
+
+from agenthub.browsing_agent.response_parser import (
+    BrowseInteractiveAction,
+    BrowsingResponseParser,
+)
+
+
+@pytest.mark.parametrize(
+    'action_str, expected',
+    [
+        ("click('81'", "click('81')```"),
+        (
+            '"We need to search the internet\n```goto("google.com")',
+            '"We need to search the internet\n```goto("google.com"))```',
+        ),
+        ("```click('81'", "```click('81')```"),
+        ("click('81')", "click('81'))```"),
+    ],
+)
+def test_parse_response(action_str: str, expected: str) -> None:
+    # BrowsingResponseParser.parse_response
+    parser = BrowsingResponseParser()
+    response = {'choices': [{'message': {'content': action_str}}]}
+    result = parser.parse_response(response)
+    assert result == expected
+
+
+@pytest.mark.parametrize(
+    'action_str, expected_browser_actions, expected_thought, expected_msg_content',
+    [
+        ("click('81')```", "click('81')", '', ''),
+        ("```click('81')```", "click('81')", '', ''),
+        (
+            "We need to perform a click\n```click('81')",
+            "click('81')",
+            'We need to perform a click',
+            '',
+        ),
+    ],
+)
+def test_parse_action(
+    action_str: str,
+    expected_browser_actions: str,
+    expected_thought: str,
+    expected_msg_content: str,
+) -> None:
+    # BrowsingResponseParser.parse_action
+    parser = BrowsingResponseParser()
+    action = parser.parse_action(action_str)
+    assert isinstance(action, BrowseInteractiveAction)
+    assert action.browser_actions == expected_browser_actions
+    assert action.thought == expected_thought
+    assert action.browsergym_send_msg_to_user == expected_msg_content