import os from collections import defaultdict from difflib import SequenceMatcher from openhands.linter.base import BaseLinter, LinterException, LintResult from openhands.linter.languages.python import PythonLinter from openhands.linter.languages.treesitter import TreesitterBasicLinter class DefaultLinter(BaseLinter): def __init__(self): self.linters: dict[str, list[BaseLinter]] = defaultdict(list) self.linters['.py'] = [PythonLinter()] # Add treesitter linter as a fallback for all linters self.basic_linter = TreesitterBasicLinter() for extension in self.basic_linter.supported_extensions: self.linters[extension].append(self.basic_linter) self._supported_extensions = list(self.linters.keys()) @property def supported_extensions(self) -> list[str]: return self._supported_extensions def lint(self, file_path: str) -> list[LintResult]: if not os.path.isabs(file_path): raise LinterException(f'File path {file_path} is not an absolute path') file_extension = os.path.splitext(file_path)[1] linters: list[BaseLinter] = self.linters.get(file_extension, []) for linter in linters: res = linter.lint(file_path) # We always return the first linter's result (higher priority) if res: return res return [] def lint_file_diff( self, original_file_path: str, updated_file_path: str ) -> list[LintResult]: """Only return lint errors that are introduced by the diff. Args: original_file_path: The original file path. updated_file_path: The updated file path. Returns: A list of lint errors that are introduced by the diff. """ # 1. Lint the original and updated file original_lint_errors: list[LintResult] = self.lint(original_file_path) updated_lint_errors: list[LintResult] = self.lint(updated_file_path) # 2. Load the original and updated file content with open(original_file_path, 'r') as f: old_lines = f.readlines() with open(updated_file_path, 'r') as f: new_lines = f.readlines() # 3. Get line numbers that are changed & unchanged # Map the line number of the original file to the updated file # NOTE: this only works for lines that are not changed (i.e., equal) old_to_new_line_no_mapping: dict[int, int] = {} replace_or_inserted_lines: list[int] = [] for ( tag, old_idx_start, old_idx_end, new_idx_start, new_idx_end, ) in SequenceMatcher( isjunk=None, a=old_lines, b=new_lines, ).get_opcodes(): if tag == 'equal': for idx, _ in enumerate(old_lines[old_idx_start:old_idx_end]): old_to_new_line_no_mapping[old_idx_start + idx + 1] = ( new_idx_start + idx + 1 ) elif tag == 'replace' or tag == 'insert': for idx, _ in enumerate(old_lines[old_idx_start:old_idx_end]): replace_or_inserted_lines.append(new_idx_start + idx + 1) else: # omit the case of delete pass # 4. Get pre-existing errors in unchanged lines # increased error elsewhere introduced by the newlines # i.e., we omit errors that are already in original files and report new one new_line_no_to_original_errors: dict[int, list[LintResult]] = defaultdict(list) for error in original_lint_errors: if error.line in old_to_new_line_no_mapping: new_line_no_to_original_errors[ old_to_new_line_no_mapping[error.line] ].append(error) # 5. Select errors from lint results in new file to report selected_errors = [] for error in updated_lint_errors: # 5.1. Error introduced by replace/insert if error.line in replace_or_inserted_lines: selected_errors.append(error) # 5.2. Error introduced by modified lines that impacted # the unchanged lines that HAVE pre-existing errors elif error.line in new_line_no_to_original_errors: # skip if the error is already reported # or add if the error is new if not any( original_error.message == error.message and original_error.column == error.column for original_error in new_line_no_to_original_errors[error.line] ): selected_errors.append(error) # 5.3. Error introduced by modified lines that impacted # the unchanged lines that have NO pre-existing errors else: selected_errors.append(error) # 6. Sort errors by line and column selected_errors.sort(key=lambda x: (x.line, x.column)) return selected_errors