| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122 |
- import os
- from collections import defaultdict
- from difflib import SequenceMatcher
- from openhands.linter.base import BaseLinter, LinterException, LintResult
- from openhands.linter.languages.python import PythonLinter
- from openhands.linter.languages.treesitter import TreesitterBasicLinter
- class DefaultLinter(BaseLinter):
- def __init__(self):
- self.linters: dict[str, list[BaseLinter]] = defaultdict(list)
- self.linters['.py'] = [PythonLinter()]
- # Add treesitter linter as a fallback for all linters
- self.basic_linter = TreesitterBasicLinter()
- for extension in self.basic_linter.supported_extensions:
- self.linters[extension].append(self.basic_linter)
- self._supported_extensions = list(self.linters.keys())
- @property
- def supported_extensions(self) -> list[str]:
- return self._supported_extensions
- def lint(self, file_path: str) -> list[LintResult]:
- if not os.path.isabs(file_path):
- raise LinterException(f'File path {file_path} is not an absolute path')
- file_extension = os.path.splitext(file_path)[1]
- linters: list[BaseLinter] = self.linters.get(file_extension, [])
- for linter in linters:
- res = linter.lint(file_path)
- # We always return the first linter's result (higher priority)
- if res:
- return res
- return []
- def lint_file_diff(
- self, original_file_path: str, updated_file_path: str
- ) -> list[LintResult]:
- """Only return lint errors that are introduced by the diff.
- Args:
- original_file_path: The original file path.
- updated_file_path: The updated file path.
- Returns:
- A list of lint errors that are introduced by the diff.
- """
- # 1. Lint the original and updated file
- original_lint_errors: list[LintResult] = self.lint(original_file_path)
- updated_lint_errors: list[LintResult] = self.lint(updated_file_path)
- # 2. Load the original and updated file content
- with open(original_file_path, 'r') as f:
- old_lines = f.readlines()
- with open(updated_file_path, 'r') as f:
- new_lines = f.readlines()
- # 3. Get line numbers that are changed & unchanged
- # Map the line number of the original file to the updated file
- # NOTE: this only works for lines that are not changed (i.e., equal)
- old_to_new_line_no_mapping: dict[int, int] = {}
- replace_or_inserted_lines: list[int] = []
- for (
- tag,
- old_idx_start,
- old_idx_end,
- new_idx_start,
- new_idx_end,
- ) in SequenceMatcher(
- isjunk=None,
- a=old_lines,
- b=new_lines,
- ).get_opcodes():
- if tag == 'equal':
- for idx, _ in enumerate(old_lines[old_idx_start:old_idx_end]):
- old_to_new_line_no_mapping[old_idx_start + idx + 1] = (
- new_idx_start + idx + 1
- )
- elif tag == 'replace' or tag == 'insert':
- for idx, _ in enumerate(old_lines[old_idx_start:old_idx_end]):
- replace_or_inserted_lines.append(new_idx_start + idx + 1)
- else:
- # omit the case of delete
- pass
- # 4. Get pre-existing errors in unchanged lines
- # increased error elsewhere introduced by the newlines
- # i.e., we omit errors that are already in original files and report new one
- new_line_no_to_original_errors: dict[int, list[LintResult]] = defaultdict(list)
- for error in original_lint_errors:
- if error.line in old_to_new_line_no_mapping:
- new_line_no_to_original_errors[
- old_to_new_line_no_mapping[error.line]
- ].append(error)
- # 5. Select errors from lint results in new file to report
- selected_errors = []
- for error in updated_lint_errors:
- # 5.1. Error introduced by replace/insert
- if error.line in replace_or_inserted_lines:
- selected_errors.append(error)
- # 5.2. Error introduced by modified lines that impacted
- # the unchanged lines that HAVE pre-existing errors
- elif error.line in new_line_no_to_original_errors:
- # skip if the error is already reported
- # or add if the error is new
- if not any(
- original_error.message == error.message
- and original_error.column == error.column
- for original_error in new_line_no_to_original_errors[error.line]
- ):
- selected_errors.append(error)
- # 5.3. Error introduced by modified lines that impacted
- # the unchanged lines that have NO pre-existing errors
- else:
- selected_errors.append(error)
- # 6. Sort errors by line and column
- selected_errors.sort(key=lambda x: (x.line, x.column))
- return selected_errors
|