Просмотр исходного кода

(feat) implement typescript linting for CodeActAgent (#3452)

* tweaks to linter.py to prep for typescript linting (not implemented yet)

* fix 2 linter unit tests

* simpler basic_lint output; updated unit test

* fix default gpt-4o model name in aider default config

* linter.py: use tsc (typescript compiler) for linting; added more tests

* make typescript linting be more forgiving

* use npx instead of npm to install typescript in Dockerfile.j2

* Fix merge mistake

* removed npx call from Dockerfile.j2

* fix run_cmd to use code parameter; replace regex in test

* fix test_lint_file_fail_typescript to ignore leading path characters

* added TODO comment to extract_error_line_from

* fixed bug in ts_lint with wrong line number parsing
tobitege 1 год назад
Родитель
Сommit
c7886168e1

+ 2 - 1
openhands/runtime/plugins/agent_skills/file_ops/file_ops.py

@@ -95,7 +95,8 @@ def _lint_file(file_path: str) -> tuple[str | None, int | None]:
     if not lint_error:
         # Linting successful. No issues found.
         return None, None
-    return 'ERRORS:\n' + lint_error.text, lint_error.lines[0]
+    first_error_line = lint_error.lines[0] if lint_error.lines else None
+    return 'ERRORS:\n' + lint_error.text, first_error_line
 
 
 def _print_window(file_path, targeted_line, window, return_str=False):

+ 85 - 13
openhands/runtime/plugins/agent_skills/utils/aider/linter.py

@@ -26,8 +26,11 @@ class Linter:
 
         self.languages = dict(
             python=self.py_lint,
+            javascript=self.ts_lint,
+            typescript=self.ts_lint,
         )
         self.all_lint_cmd = None
+        self.ts_installed = self.check_ts_installed()
 
     def set_linter(self, lang, cmd):
         if lang:
@@ -47,9 +50,15 @@ class Linter:
         cmd = cmd.split()
 
         process = subprocess.Popen(
-            cmd, cwd=self.root, stdout=subprocess.PIPE, stderr=subprocess.STDOUT
+            cmd,
+            cwd=self.root,
+            stdout=subprocess.PIPE,
+            stderr=subprocess.STDOUT,
+            stdin=subprocess.PIPE,  # Add stdin parameter
         )
-        stdout, _ = process.communicate()
+        stdout, _ = process.communicate(
+            input=code.encode()
+        )  # Pass the code to the process
         errors = stdout.decode().strip()
         self.returncode = process.returncode
         if self.returncode == 0:
@@ -109,6 +118,64 @@ class Linter:
             error = basic_lint(rel_fname, code)
         return error
 
+    def check_ts_installed(self):
+        """Check if TypeScript is installed."""
+        try:
+            subprocess.run(
+                ['tsc', '--version'],
+                check=True,
+                stdout=subprocess.PIPE,
+                stderr=subprocess.PIPE,
+            )
+            return True
+        except (subprocess.CalledProcessError, FileNotFoundError):
+            return False
+
+    def ts_lint(self, fname, rel_fname, code):
+        """Use typescript compiler to check for errors. If TypeScript is not installed return None."""
+        if self.ts_installed:
+            tsc_cmd = 'tsc --noEmit --allowJs --checkJs --strict --noImplicitAny --strictNullChecks --strictFunctionTypes --strictBindCallApply --strictPropertyInitialization --noImplicitThis --alwaysStrict'
+            try:
+                tsc_res = self.run_cmd(tsc_cmd, rel_fname, code)
+                if tsc_res:
+                    # Parse the TSC output
+                    error_lines = []
+                    for line in tsc_res.text.split('\n'):
+                        # Extract lines and column numbers
+                        if ': error TS' in line or ': warning TS' in line:
+                            try:
+                                location_part = line.split('(')[1].split(')')[0]
+                                line_num, _ = map(int, location_part.split(','))
+                                error_lines.append(line_num)
+                            except (IndexError, ValueError):
+                                continue
+                    return LintResult(text=tsc_res.text, lines=error_lines)
+            except FileNotFoundError:
+                pass
+
+        # If still no errors, check for missing semicolons
+        lines = code.split('\n')
+        error_lines = []
+        for i, line in enumerate(lines):
+            stripped_line = line.strip()
+            if (
+                stripped_line
+                and not stripped_line.endswith(';')
+                and not stripped_line.endswith('{')
+                and not stripped_line.endswith('}')
+                and not stripped_line.startswith('//')
+            ):
+                error_lines.append(i + 1)
+
+        if error_lines:
+            error_message = (
+                f"{rel_fname}({error_lines[0]},1): error TS1005: ';' expected."
+            )
+            return LintResult(text=error_message, lines=error_lines)
+
+        # If tsc is not available return None (basic_lint causes other problems!)
+        return None
+
 
 def lint_python_compile(fname, code):
     try:
@@ -137,10 +204,7 @@ def lint_python_compile(fname, code):
 
 
 def basic_lint(fname, code):
-    """
-    Use tree-sitter to look for syntax errors, display them with tree context.
-    """
-
+    """Use tree-sitter to look for syntax errors, display them with tree context."""
     lang = filename_to_lang(fname)
     if not lang:
         return
@@ -151,11 +215,19 @@ def basic_lint(fname, code):
     errors = traverse_tree(tree.root_node)
     if not errors:
         return
-    return LintResult(text=f'{fname}:{errors[0]}', lines=errors)
+
+    error_messages = [
+        f'{fname}:{line}:{col}: {error_details}' for line, col, error_details in errors
+    ]
+    return LintResult(
+        text='\n'.join(error_messages), lines=[line for line, _, _ in errors]
+    )
 
 
 def extract_error_line_from(lint_error):
-    # moved from openhands.agentskills#_lint_file
+    # TODO: this is a temporary fix to extract the error line from the error message
+    # it should be replaced with a more robust/unified solution
+    first_error_line = None
     for line in lint_error.splitlines(True):
         if line.strip():
             # The format of the error message is: <filename>:<line>:<column>: <error code> <error message>
@@ -191,12 +263,14 @@ def tree_context(fname, code, line_nums):
     return output
 
 
-# Traverse the tree to find errors
 def traverse_tree(node):
+    """Traverses the tree to find errors"""
     errors = []
     if node.type == 'ERROR' or node.is_missing:
         line_no = node.start_point[0] + 1
-        errors.append(line_no)
+        col_no = node.start_point[1] + 1
+        error_type = 'Missing node' if node.is_missing else 'Syntax error'
+        errors.append((line_no, col_no, error_type))
 
     for child in node.children:
         errors += traverse_tree(child)
@@ -205,9 +279,7 @@ def traverse_tree(node):
 
 
 def main():
-    """
-    Main function to parse files provided as command line arguments.
-    """
+    """Main function to parse files provided as command line arguments."""
     if len(sys.argv) < 2:
         print('Usage: python linter.py <file1> <file2> ...')
         sys.exit(1)

+ 1 - 1
openhands/runtime/plugins/agent_skills/utils/config.py

@@ -18,7 +18,7 @@ def _get_openai_base_url():
 
 
 def _get_openai_model():
-    return os.getenv('OPENAI_MODEL', 'gpt-4o-2024-05-13')
+    return os.getenv('OPENAI_MODEL', 'gpt-4o')
 
 
 def _get_max_token():

+ 64 - 5
tests/unit/test_agent_skill.py

@@ -29,6 +29,7 @@ from openhands.runtime.plugins.agent_skills.file_reader.file_readers import (
     parse_pdf,
     parse_pptx,
 )
+from openhands.runtime.plugins.agent_skills.utils.aider import Linter
 
 
 # CURRENT_FILE must be reset for each test
@@ -1277,22 +1278,22 @@ def test_find_file_cwd(tmp_path, monkeypatch):
 def test_find_file_not_exist_file():
     with io.StringIO() as buf:
         with contextlib.redirect_stdout(buf):
-            find_file('unexist.txt')
+            find_file('nonexist.txt')
         result = buf.getvalue()
     assert result is not None
 
-    expected = '[No matches found for "unexist.txt" in ./]\n'
+    expected = '[No matches found for "nonexist.txt" in ./]\n'
     assert result.split('\n') == expected.split('\n')
 
 
 def test_find_file_not_exist_file_specific_path(tmp_path):
     with io.StringIO() as buf:
         with contextlib.redirect_stdout(buf):
-            find_file('unexist.txt', str(tmp_path))
+            find_file('nonexist.txt', str(tmp_path))
         result = buf.getvalue()
     assert result is not None
 
-    expected = f'[No matches found for "unexist.txt" in {tmp_path}]\n'
+    expected = f'[No matches found for "nonexist.txt" in {tmp_path}]\n'
     assert result.split('\n') == expected.split('\n')
 
 
@@ -1569,7 +1570,7 @@ def test_lint_file_fail_non_python(tmp_path, capsys):
             '(this is the end of the file)\n'
             '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n'
             'ERRORS:\n'
-            f'{file_path}:1\n'
+            f'{file_path}:1:1: Syntax error\n'
             '[This is how your edit would have looked if applied]\n'
             '-------------------------------------------------\n'
             '(this is the beginning of the file)\n'
@@ -1588,3 +1589,61 @@ def test_lint_file_fail_non_python(tmp_path, capsys):
             'DO NOT re-run the same failed edit command. Running it again will lead to the same error.\n'
         )
         assert result.split('\n') == expected.split('\n')
+
+
+def test_lint_file_fail_typescript(tmp_path, capsys):
+    linter = Linter()
+    if not linter.ts_installed:
+        return
+    with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}):
+        current_line = 1
+        file_path = tmp_path / 'test.ts'
+        file_path.write_text('')
+
+        open_file(str(file_path), current_line)
+        insert_content_at_line(
+            str(file_path),
+            1,
+            "function greet(name: string) {\n    console.log('Hello, ' + name)",
+        )
+        result = capsys.readouterr().out
+        assert result is not None
+
+        # Note: the tsc (typescript compiler) message is different from a
+        # compared to a python linter message, like line and column in brackets:
+        expected_lines = [
+            f'[File: {file_path} (1 lines total)]',
+            '(this is the beginning of the file)',
+            '1|',
+            '(this is the end of the file)',
+            '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]',
+            'ERRORS:',
+            f"{file_path}(3,1): error TS1005: '}}' expected.",
+            '[This is how your edit would have looked if applied]',
+            '-------------------------------------------------',
+            '(this is the beginning of the file)',
+            '1|function greet(name: string) {',
+            "2|    console.log('Hello, ' + name)",
+            '(this is the end of the file)',
+            '-------------------------------------------------',
+            '',
+            '[This is the original code before your edit]',
+            '-------------------------------------------------',
+            '(this is the beginning of the file)',
+            '1|',
+            '(this is the end of the file)',
+            '-------------------------------------------------',
+            'Your changes have NOT been applied. Please fix your edit command and try again.',
+            'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.',
+            'DO NOT re-run the same failed edit command. Running it again will lead to the same error.',
+            '',
+        ]
+        result_lines = result.split('\n')
+        assert len(result_lines) == len(expected_lines), "Number of lines doesn't match"
+
+        for i, (result_line, expected_line) in enumerate(
+            zip(result_lines, expected_lines)
+        ):
+            assert result_line.lstrip('./') == expected_line.lstrip(
+                './'
+            ), f"Line {i+1} doesn't match"

+ 66 - 2
tests/unit/test_aider_linter.py

@@ -1,4 +1,5 @@
 import os
+from unittest.mock import patch
 
 import pytest
 
@@ -64,6 +65,48 @@ def linter(tmp_path):
     return Linter(root=tmp_path)
 
 
+@pytest.fixture
+def temp_typescript_file_errors(tmp_path):
+    # Fixture to create a temporary TypeScript file with errors
+    temp_name = os.path.join(tmp_path, 'lint-test.ts')
+    with open(temp_name, 'w', encoding='utf-8') as tmp_file:
+        tmp_file.write("""function foo() {
+    console.log("Hello, World!")
+foo()
+""")
+    tmp_file.close()
+    yield temp_name
+    os.remove(temp_name)
+
+
+@pytest.fixture
+def temp_typescript_file_errors_semicolon(tmp_path):
+    # Fixture to create a temporary TypeScript file with missing semicolon
+    temp_name = os.path.join(tmp_path, 'lint-test.ts')
+    with open(temp_name, 'w', encoding='utf-8') as tmp_file:
+        tmp_file.write("""function printHelloWorld() {
+    console.log('Hello World')
+}""")
+    tmp_file.close()
+    yield temp_name
+    os.remove(temp_name)
+
+
+@pytest.fixture
+def temp_typescript_file_correct(tmp_path):
+    # Fixture to create a temporary TypeScript file with correct code
+    temp_name = os.path.join(tmp_path, 'lint-test.ts')
+    with open(temp_name, 'w', encoding='utf-8') as tmp_file:
+        tmp_file.write("""function foo(): void {
+  console.log("Hello, World!");
+}
+foo();
+""")
+    tmp_file.close()
+    yield temp_name
+    os.remove(temp_name)
+
+
 def test_get_rel_fname(linter, temp_file, tmp_path):
     # Test get_rel_fname method
     rel_fname = linter.get_rel_fname(temp_file)
@@ -119,7 +162,7 @@ def test_basic_lint(temp_file):
     result = basic_lint(temp_file, poorly_formatted_code)
 
     assert isinstance(result, LintResult)
-    assert result.text == f'{temp_file}:2'
+    assert result.text.startswith(f'{temp_file}:2:9')
     assert 2 in result.lines
 
 
@@ -136,7 +179,7 @@ def test_basic_lint_fail_returns_text_and_lines(temp_file):
     result = basic_lint(temp_file, poorly_formatted_code)
 
     assert isinstance(result, LintResult)
-    assert result.text == f'{temp_file}:2'
+    assert result.text.startswith(f'{temp_file}:2:9')
     assert 2 in result.lines
 
 
@@ -199,3 +242,24 @@ def test_lint_fail_ruby(linter, temp_ruby_file_errors):
 def test_lint_fail_ruby_no_parentheses(linter, temp_ruby_file_errors_parentheses):
     errors = linter.lint(temp_ruby_file_errors_parentheses)
     assert errors is not None
+
+
+def test_lint_pass_typescript(linter, temp_typescript_file_correct):
+    if linter.ts_installed:
+        result = linter.lint(temp_typescript_file_correct)
+        assert result is None
+
+
+def test_lint_fail_typescript(linter, temp_typescript_file_errors):
+    if linter.ts_installed:
+        errors = linter.lint(temp_typescript_file_errors)
+        assert errors is not None
+
+
+def test_lint_fail_typescript_missing_semicolon(
+    linter, temp_typescript_file_errors_semicolon
+):
+    if linter.ts_installed:
+        with patch.dict(os.environ, {'ENABLE_AUTO_LINT': 'True'}):
+            errors = linter.lint(temp_typescript_file_errors_semicolon)
+            assert errors is not None