1 tahun lalu · f8e8365caa
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_002.log
@@ -408,21 +408,21 @@ Sure! Let's start by installing the `pymsgbox` package.
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-Collecting pymsgbox==1.0.9

			
 
				-  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)

			
 
				-  Installing build dependencies ... [?25l- \ | / - done

			
 
				-[?25h  Getting requirements to build wheel ... [?25l- done

			
 
				-[?25h  Preparing metadata (pyproject.toml) ... [?25l- done

			
 
				-[?25hBuilding wheels for collected packages: pymsgbox

			
 
				-  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done

			
 
				-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=8561279053bc22a85928e69c2bb609aebc9c04526f2dbf420ed0b9c13f94131c

			
 
				-  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662

			
 
				-Successfully built pymsgbox

			
 
				-Installing collected packages: pymsgbox

			
 
				-Successfully installed pymsgbox-1.0.9

			
 
				-

			
 
				-[notice] A new release of pip is available: 24.1 -> 24.2

			
 
				-[notice] To update, run: pip install --upgrade pip

			
 
				+Collecting pymsgbox==1.0.9
			
 
				+  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
			
 
				+  Installing build dependencies ... [?25l- \ | / - done
			
 
				+[?25h  Getting requirements to build wheel ... [?25l- done
			
 
				+[?25h  Preparing metadata (pyproject.toml) ... [?25l- done
			
 
				+[?25hBuilding wheels for collected packages: pymsgbox
			
 
				+  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
			
 
				+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=8561279053bc22a85928e69c2bb609aebc9c04526f2dbf420ed0b9c13f94131c
			
 
				+  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
			
 
				+Successfully built pymsgbox
			
 
				+Installing collected packages: pymsgbox
			
 
				+Successfully installed pymsgbox-1.0.9
			
 
				+
			
 
				+[notice] A new release of pip is available: 24.1 -> 24.2
			
 
				+[notice] To update, run: pip install --upgrade pip
			
 
				 Note: you may need to restart the kernel to use updated packages.
			
 
				 [Jupyter current working directory: /workspace]
			
 
				 [Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
			
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_003.log
@@ -408,21 +408,21 @@ Sure! Let's start by installing the `pymsgbox` package.
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-Collecting pymsgbox==1.0.9

			
 
				-  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)

			
 
				-  Installing build dependencies ... [?25l- \ | / - done

			
 
				-[?25h  Getting requirements to build wheel ... [?25l- done

			
 
				-[?25h  Preparing metadata (pyproject.toml) ... [?25l- done

			
 
				-[?25hBuilding wheels for collected packages: pymsgbox

			
 
				-  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done

			
 
				-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=8561279053bc22a85928e69c2bb609aebc9c04526f2dbf420ed0b9c13f94131c

			
 
				-  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662

			
 
				-Successfully built pymsgbox

			
 
				-Installing collected packages: pymsgbox

			
 
				-Successfully installed pymsgbox-1.0.9

			
 
				-

			
 
				-[notice] A new release of pip is available: 24.1 -> 24.2

			
 
				-[notice] To update, run: pip install --upgrade pip

			
 
				+Collecting pymsgbox==1.0.9
			
 
				+  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
			
 
				+  Installing build dependencies ... [?25l- \ | / - done
			
 
				+[?25h  Getting requirements to build wheel ... [?25l- done
			
 
				+[?25h  Preparing metadata (pyproject.toml) ... [?25l- done
			
 
				+[?25hBuilding wheels for collected packages: pymsgbox
			
 
				+  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
			
 
				+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=8561279053bc22a85928e69c2bb609aebc9c04526f2dbf420ed0b9c13f94131c
			
 
				+  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
			
 
				+Successfully built pymsgbox
			
 
				+Installing collected packages: pymsgbox
			
 
				+Successfully installed pymsgbox-1.0.9
			
 
				+
			
 
				+[notice] A new release of pip is available: 24.1 -> 24.2
			
 
				+[notice] To update, run: pip install --upgrade pip
			
 
				 Note: you may need to restart the kernel to use updated packages.
			
 
				 [Jupyter current working directory: /workspace]
			
 
				 [Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
			
--- a/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActAgent/test_ipython_module/prompt_004.log
@@ -408,21 +408,21 @@ Sure! Let's start by installing the `pymsgbox` package.
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-Collecting pymsgbox==1.0.9

			
 
				-  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)

			
 
				-  Installing build dependencies ... [?25l- \ | / - done

			
 
				-[?25h  Getting requirements to build wheel ... [?25l- done

			
 
				-[?25h  Preparing metadata (pyproject.toml) ... [?25l- done

			
 
				-[?25hBuilding wheels for collected packages: pymsgbox

			
 
				-  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done

			
 
				-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=8561279053bc22a85928e69c2bb609aebc9c04526f2dbf420ed0b9c13f94131c

			
 
				-  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662

			
 
				-Successfully built pymsgbox

			
 
				-Installing collected packages: pymsgbox

			
 
				-Successfully installed pymsgbox-1.0.9

			
 
				-

			
 
				-[notice] A new release of pip is available: 24.1 -> 24.2

			
 
				-[notice] To update, run: pip install --upgrade pip

			
 
				+Collecting pymsgbox==1.0.9
			
 
				+  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
			
 
				+  Installing build dependencies ... [?25l- \ | / - done
			
 
				+[?25h  Getting requirements to build wheel ... [?25l- done
			
 
				+[?25h  Preparing metadata (pyproject.toml) ... [?25l- done
			
 
				+[?25hBuilding wheels for collected packages: pymsgbox
			
 
				+  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
			
 
				+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=8561279053bc22a85928e69c2bb609aebc9c04526f2dbf420ed0b9c13f94131c
			
 
				+  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
			
 
				+Successfully built pymsgbox
			
 
				+Installing collected packages: pymsgbox
			
 
				+Successfully installed pymsgbox-1.0.9
			
 
				+
			
 
				+[notice] A new release of pip is available: 24.1 -> 24.2
			
 
				+[notice] To update, run: pip install --upgrade pip
			
 
				 Note: you may need to restart the kernel to use updated packages.
			
 
				 [Jupyter current working directory: /workspace]
			
 
				 [Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
			
@@ -456,8 +456,8 @@ cat /workspace/test.txt
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-pymsgbox version: 1.0.9

			
 
				-

			
 
				+pymsgbox version: 1.0.9
			
 
				+
			
 
				 [Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
			
 
				 openhands@docker-desktop:/workspace $
			
 
				 [Command -1 finished with exit code 0]
			
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_002.log
@@ -601,21 +601,21 @@ Understood. Let's start by installing the `pymsgbox` package.
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-Collecting pymsgbox==1.0.9

			
 
				-  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)

			
 
				-  Installing build dependencies ... [?25l- \ | / - done

			
 
				-[?25h  Getting requirements to build wheel ... [?25l- done

			
 
				-[?25h  Preparing metadata (pyproject.toml) ... [?25l- done

			
 
				-[?25hBuilding wheels for collected packages: pymsgbox

			
 
				-  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done

			
 
				-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=cd01971247caa14ba55cb76b476c8c2659d10c03ec9da34ba3a8bdb4bb626438

			
 
				-  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662

			
 
				-Successfully built pymsgbox

			
 
				-Installing collected packages: pymsgbox

			
 
				-Successfully installed pymsgbox-1.0.9

			
 
				-

			
 
				-[notice] A new release of pip is available: 24.1 -> 24.2

			
 
				-[notice] To update, run: pip install --upgrade pip

			
 
				+Collecting pymsgbox==1.0.9
			
 
				+  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
			
 
				+  Installing build dependencies ... [?25l- \ | / - done
			
 
				+[?25h  Getting requirements to build wheel ... [?25l- done
			
 
				+[?25h  Preparing metadata (pyproject.toml) ... [?25l- done
			
 
				+[?25hBuilding wheels for collected packages: pymsgbox
			
 
				+  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
			
 
				+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=cd01971247caa14ba55cb76b476c8c2659d10c03ec9da34ba3a8bdb4bb626438
			
 
				+  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
			
 
				+Successfully built pymsgbox
			
 
				+Installing collected packages: pymsgbox
			
 
				+Successfully installed pymsgbox-1.0.9
			
 
				+
			
 
				+[notice] A new release of pip is available: 24.1 -> 24.2
			
 
				+[notice] To update, run: pip install --upgrade pip
			
 
				 Note: you may need to restart the kernel to use updated packages.
			
 
				 [Jupyter current working directory: /workspace]
			
 
				 [Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
			
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_003.log
@@ -601,21 +601,21 @@ Understood. Let's start by installing the `pymsgbox` package.
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-Collecting pymsgbox==1.0.9

			
 
				-  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)

			
 
				-  Installing build dependencies ... [?25l- \ | / - done

			
 
				-[?25h  Getting requirements to build wheel ... [?25l- done

			
 
				-[?25h  Preparing metadata (pyproject.toml) ... [?25l- done

			
 
				-[?25hBuilding wheels for collected packages: pymsgbox

			
 
				-  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done

			
 
				-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=cd01971247caa14ba55cb76b476c8c2659d10c03ec9da34ba3a8bdb4bb626438

			
 
				-  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662

			
 
				-Successfully built pymsgbox

			
 
				-Installing collected packages: pymsgbox

			
 
				-Successfully installed pymsgbox-1.0.9

			
 
				-

			
 
				-[notice] A new release of pip is available: 24.1 -> 24.2

			
 
				-[notice] To update, run: pip install --upgrade pip

			
 
				+Collecting pymsgbox==1.0.9
			
 
				+  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
			
 
				+  Installing build dependencies ... [?25l- \ | / - done
			
 
				+[?25h  Getting requirements to build wheel ... [?25l- done
			
 
				+[?25h  Preparing metadata (pyproject.toml) ... [?25l- done
			
 
				+[?25hBuilding wheels for collected packages: pymsgbox
			
 
				+  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
			
 
				+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=cd01971247caa14ba55cb76b476c8c2659d10c03ec9da34ba3a8bdb4bb626438
			
 
				+  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
			
 
				+Successfully built pymsgbox
			
 
				+Installing collected packages: pymsgbox
			
 
				+Successfully installed pymsgbox-1.0.9
			
 
				+
			
 
				+[notice] A new release of pip is available: 24.1 -> 24.2
			
 
				+[notice] To update, run: pip install --upgrade pip
			
 
				 Note: you may need to restart the kernel to use updated packages.
			
 
				 [Jupyter current working directory: /workspace]
			
 
				 [Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
			
--- a/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
+++ b/tests/integration/mock/eventstream_runtime/CodeActSWEAgent/test_ipython_module/prompt_004.log
@@ -601,21 +601,21 @@ Understood. Let's start by installing the `pymsgbox` package.
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-Collecting pymsgbox==1.0.9

			
 
				-  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)

			
 
				-  Installing build dependencies ... [?25l- \ | / - done

			
 
				-[?25h  Getting requirements to build wheel ... [?25l- done

			
 
				-[?25h  Preparing metadata (pyproject.toml) ... [?25l- done

			
 
				-[?25hBuilding wheels for collected packages: pymsgbox

			
 
				-  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done

			
 
				-[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=cd01971247caa14ba55cb76b476c8c2659d10c03ec9da34ba3a8bdb4bb626438

			
 
				-  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662

			
 
				-Successfully built pymsgbox

			
 
				-Installing collected packages: pymsgbox

			
 
				-Successfully installed pymsgbox-1.0.9

			
 
				-

			
 
				-[notice] A new release of pip is available: 24.1 -> 24.2

			
 
				-[notice] To update, run: pip install --upgrade pip

			
 
				+Collecting pymsgbox==1.0.9
			
 
				+  Downloading PyMsgBox-1.0.9.tar.gz (18 kB)
			
 
				+  Installing build dependencies ... [?25l- \ | / - done
			
 
				+[?25h  Getting requirements to build wheel ... [?25l- done
			
 
				+[?25h  Preparing metadata (pyproject.toml) ... [?25l- done
			
 
				+[?25hBuilding wheels for collected packages: pymsgbox
			
 
				+  Building wheel for pymsgbox (pyproject.toml) ... [?25l- done
			
 
				+[?25h  Created wheel for pymsgbox: filename=PyMsgBox-1.0.9-py3-none-any.whl size=7407 sha256=cd01971247caa14ba55cb76b476c8c2659d10c03ec9da34ba3a8bdb4bb626438
			
 
				+  Stored in directory: /home/openhands/.cache/pip/wheels/85/92/63/e126ee5f33d8f2ed04f96e43ef5df7270a2f331848752e8662
			
 
				+Successfully built pymsgbox
			
 
				+Installing collected packages: pymsgbox
			
 
				+Successfully installed pymsgbox-1.0.9
			
 
				+
			
 
				+[notice] A new release of pip is available: 24.1 -> 24.2
			
 
				+[notice] To update, run: pip install --upgrade pip
			
 
				 Note: you may need to restart the kernel to use updated packages.
			
 
				 [Jupyter current working directory: /workspace]
			
 
				 [Jupyter Python interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
			
@@ -651,8 +651,8 @@ cat /workspace/test.txt
 
				 ----------
			
 
				 
			
 
				 OBSERVATION:
			
 
				-pymsgbox version: 1.0.9

			
 
				-

			
 
				+pymsgbox version: 1.0.9
			
 
				+
			
 
				 [Python Interpreter: /openhands/poetry/openhands-5O4_aCHf-py3.11/bin/python]
			
 
				 openhands@docker-desktop:/workspace $
			
 
				 [Command -1 finished with exit code 0]
			
--- a/tests/unit/test_arg_parser.py
+++ b/tests/unit/test_arg_parser.py
@@ -1,63 +1,131 @@
 
				 import pytest
			
 
				 
			
 
				-from openhands.core.config import get_parser
			
 
				+from openhands.core.config import _DEFAULT_AGENT, _MAX_ITERATIONS, get_parser
			
 
				+
			
 
				+
			
 
				+def test_parser_default_values():
			
 
				+    parser = get_parser()
			
 
				+    args = parser.parse_args([])
			
 
				+
			
 
				+    assert args.directory is None
			
 
				+    assert args.task == ''
			
 
				+    assert args.file is None
			
 
				+    assert args.agent_cls == _DEFAULT_AGENT
			
 
				+    assert args.max_iterations == _MAX_ITERATIONS
			
 
				+    assert args.max_budget_per_task is None
			
 
				+    assert args.eval_output_dir == 'evaluation/evaluation_outputs/outputs'
			
 
				+    assert args.eval_n_limit is None
			
 
				+    assert args.eval_num_workers == 4
			
 
				+    assert args.eval_note is None
			
 
				+    assert args.llm_config is None
			
 
				+    assert args.name == 'default'
			
 
				+
			
 
				+
			
 
				+def test_parser_custom_values():
			
 
				+    parser = get_parser()
			
 
				+    args = parser.parse_args(
			
 
				+        [
			
 
				+            '-d',
			
 
				+            '/path/to/dir',
			
 
				+            '-t',
			
 
				+            'custom task',
			
 
				+            '-f',
			
 
				+            'task.txt',
			
 
				+            '-c',
			
 
				+            'CustomAgent',
			
 
				+            '-i',
			
 
				+            '50',
			
 
				+            '-b',
			
 
				+            '100.5',
			
 
				+            '--eval-output-dir',
			
 
				+            'custom/output',
			
 
				+            '--eval-n-limit',
			
 
				+            '10',
			
 
				+            '--eval-num-workers',
			
 
				+            '8',
			
 
				+            '--eval-note',
			
 
				+            'Test run',
			
 
				+            '-l',
			
 
				+            'gpt4',
			
 
				+            '-n',
			
 
				+            'test_session',
			
 
				+        ]
			
 
				+    )
			
 
				+
			
 
				+    assert args.directory == '/path/to/dir'
			
 
				+    assert args.task == 'custom task'
			
 
				+    assert args.file == 'task.txt'
			
 
				+    assert args.agent_cls == 'CustomAgent'
			
 
				+    assert args.max_iterations == 50
			
 
				+    assert args.max_budget_per_task == pytest.approx(100.5)
			
 
				+    assert args.eval_output_dir == 'custom/output'
			
 
				+    assert args.eval_n_limit == 10
			
 
				+    assert args.eval_num_workers == 8
			
 
				+    assert args.eval_note == 'Test run'
			
 
				+    assert args.llm_config == 'gpt4'
			
 
				+    assert args.name == 'test_session'
			
 
				+
			
 
				+
			
 
				+def test_parser_file_overrides_task():
			
 
				+    parser = get_parser()
			
 
				+    args = parser.parse_args(['-t', 'task from command', '-f', 'task_file.txt'])
			
 
				+
			
 
				+    assert args.task == 'task from command'
			
 
				+    assert args.file == 'task_file.txt'
			
 
				+
			
 
				+
			
 
				+def test_parser_invalid_max_iterations():
			
 
				+    parser = get_parser()
			
 
				+    with pytest.raises(SystemExit):
			
 
				+        parser.parse_args(['-i', 'not_a_number'])
			
 
				+
			
 
				+
			
 
				+def test_parser_invalid_max_budget():
			
 
				+    parser = get_parser()
			
 
				+    with pytest.raises(SystemExit):
			
 
				+        parser.parse_args(['-b', 'not_a_number'])
			
 
				+
			
 
				+
			
 
				+def test_parser_invalid_eval_n_limit():
			
 
				+    parser = get_parser()
			
 
				+    with pytest.raises(SystemExit):
			
 
				+        parser.parse_args(['--eval-n-limit', 'not_a_number'])
			
 
				+
			
 
				+
			
 
				+def test_parser_invalid_eval_num_workers():
			
 
				+    parser = get_parser()
			
 
				+    with pytest.raises(SystemExit):
			
 
				+        parser.parse_args(['--eval-num-workers', 'not_a_number'])
			
 
				 
			
 
				 
			
 
				 def test_help_message(capsys):
			
 
				     parser = get_parser()
			
 
				-    with pytest.raises(SystemExit):  # `--help` causes SystemExit
			
 
				+    with pytest.raises(SystemExit):
			
 
				         parser.parse_args(['--help'])
			
 
				     captured = capsys.readouterr()
			
 
				-    expected_help_message = """
			
 
				-usage: pytest [-h] [-d DIRECTORY] [-t TASK] [-f FILE] [-c AGENT_CLS]
			
 
				-              [-i MAX_ITERATIONS] [-b MAX_BUDGET_PER_TASK]
			
 
				-              [--eval-output-dir EVAL_OUTPUT_DIR]
			
 
				-              [--eval-n-limit EVAL_N_LIMIT]
			
 
				-              [--eval-num-workers EVAL_NUM_WORKERS] [--eval-note EVAL_NOTE]
			
 
				-              [-l LLM_CONFIG] [-n NAME]
			
 
				-
			
 
				-Run an agent with a specific task
			
 
				-
			
 
				-options:
			
 
				-  -h, --help            show this help message and exit
			
 
				-  -d DIRECTORY, --directory DIRECTORY
			
 
				-                        The working directory for the agent
			
 
				-  -t TASK, --task TASK  The task for the agent to perform
			
 
				-  -f FILE, --file FILE  Path to a file containing the task. Overrides -t if
			
 
				-                        both are provided.
			
 
				-  -c AGENT_CLS, --agent-cls AGENT_CLS
			
 
				-                        Name of the default agent to use
			
 
				-  -i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS
			
 
				-                        The maximum number of iterations to run the agent
			
 
				-  -b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK
			
 
				-                        The maximum budget allowed per task, beyond which the
			
 
				-                        agent will stop.
			
 
				-  --eval-output-dir EVAL_OUTPUT_DIR
			
 
				-                        The directory to save evaluation output
			
 
				-  --eval-n-limit EVAL_N_LIMIT
			
 
				-                        The number of instances to evaluate
			
 
				-  --eval-num-workers EVAL_NUM_WORKERS
			
 
				-                        The number of workers to use for evaluation
			
 
				-  --eval-note EVAL_NOTE
			
 
				-                        The note to add to the evaluation directory
			
 
				-  -l LLM_CONFIG, --llm-config LLM_CONFIG
			
 
				-                        Replace default LLM ([llm] section in config.toml)
			
 
				-                        config with the specified LLM config, e.g. "llama3"
			
 
				-                        for [llm.llama3] section in config.toml
			
 
				-  -n NAME, --name NAME  Name for the session
			
 
				-"""
			
 
				-
			
 
				-    actual_lines = captured.out.strip().split('\n')
			
 
				-    print('\n'.join(actual_lines))
			
 
				-    expected_lines = expected_help_message.strip().split('\n')
			
 
				-
			
 
				-    # Ensure both outputs have the same number of lines
			
 
				-    assert len(actual_lines) == len(
			
 
				-        expected_lines
			
 
				-    ), 'The number of lines in the help message does not match.'
			
 
				-
			
 
				-    # Compare each line
			
 
				-    for actual, expected in zip(actual_lines, expected_lines):
			
 
				-        assert (
			
 
				-            actual.strip() == expected.strip()
			
 
				-        ), f"Expected '{expected}', got '{actual}'"
			
 
				+    help_output = captured.out
			
 
				+
			
 
				+    expected_elements = [
			
 
				+        'usage:',
			
 
				+        'Run an agent with a specific task',
			
 
				+        'options:',
			
 
				+        '-h, --help',
			
 
				+        '-d DIRECTORY, --directory DIRECTORY',
			
 
				+        '-t TASK, --task TASK',
			
 
				+        '-f FILE, --file FILE',
			
 
				+        '-c AGENT_CLS, --agent-cls AGENT_CLS',
			
 
				+        '-i MAX_ITERATIONS, --max-iterations MAX_ITERATIONS',
			
 
				+        '-b MAX_BUDGET_PER_TASK, --max-budget-per-task MAX_BUDGET_PER_TASK',
			
 
				+        '--eval-output-dir EVAL_OUTPUT_DIR',
			
 
				+        '--eval-n-limit EVAL_N_LIMIT',
			
 
				+        '--eval-num-workers EVAL_NUM_WORKERS',
			
 
				+        '--eval-note EVAL_NOTE',
			
 
				+        '-l LLM_CONFIG, --llm-config LLM_CONFIG',
			
 
				+        '-n NAME, --name NAME',
			
 
				+    ]
			
 
				+
			
 
				+    for element in expected_elements:
			
 
				+        assert element in help_output, f"Expected '{element}' to be in the help message"
			
 
				+
			
 
				+    option_count = help_output.count('  -')
			
 
				+    assert option_count == 13, f'Expected 13 options, found {option_count}'