agentskills.py 38 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879808182838485868788899091929394959697989910010110210310410510610710810911011111211311411511611711811912012112212312412512612712812913013113213313413513613713813914014114214314414514614714814915015115215315415515615715815916016116216316416516616716816917017117217317417517617717817918018118218318418518618718818919019119219319419519619719819920020120220320420520620720820921021121221321421521621721821922022122222322422522622722822923023123223323423523623723823924024124224324424524624724824925025125225325425525625725825926026126226326426526626726826927027127227327427527627727827928028128228328428528628728828929029129229329429529629729829930030130230330430530630730830931031131231331431531631731831932032132232332432532632732832933033133233333433533633733833934034134234334434534634734834935035135235335435535635735835936036136236336436536636736836937037137237337437537637737837938038138238338438538638738838939039139239339439539639739839940040140240340440540640740840941041141241341441541641741841942042142242342442542642742842943043143243343443543643743843944044144244344444544644744844945045145245345445545645745845946046146246346446546646746846947047147247347447547647747847948048148248348448548648748848949049149249349449549649749849950050150250350450550650750850951051151251351451551651751851952052152252352452552652752852953053153253353453553653753853954054154254354454554654754854955055155255355455555655755855956056156256356456556656756856957057157257357457557657757857958058158258358458558658758858959059159259359459559659759859960060160260360460560660760860961061161261361461561661761861962062162262362462562662762862963063163263363463563663763863964064164264364464564664764864965065165265365465565665765865966066166266366466566666766866967067167267367467567667767867968068168268368468568668768868969069169269369469569669769869970070170270370470570670770870971071171271371471571671771871972072172272372472572672772872973073173273373473573673773873974074174274374474574674774874975075175275375475575675775875976076176276376476576676776876977077177277377477577677777877978078178278378478578678778878979079179279379479579679779879980080180280380480580680780880981081181281381481581681781881982082182282382482582682782882983083183283383483583683783883984084184284384484584684784884985085185285385485585685785885986086186286386486586686786886987087187287387487587687787887988088188288388488588688788888989089189289389489589689789889990090190290390490590690790890991091191291391491591691791891992092192292392492592692792892993093193293393493593693793893994094194294394494594694794894995095195295395495595695795895996096196296396496596696796896997097197297397497597697797897998098198298398498598698798898999099199299399499599699799899910001001100210031004100510061007100810091010101110121013101410151016101710181019102010211022102310241025102610271028102910301031103210331034103510361037103810391040104110421043104410451046104710481049105010511052105310541055105610571058105910601061106210631064106510661067106810691070107110721073107410751076107710781079108010811082108310841085108610871088108910901091109210931094109510961097109810991100110111021103110411051106110711081109111011111112111311141115
  1. """agentskills.py
  2. This module provides various file manipulation skills for the OpenDevin agent.
  3. Functions:
  4. - open_file(path: str, line_number: int | None = 1, context_lines: int = 100): Opens a file and optionally moves to a specific line.
  5. - goto_line(line_number): Moves the window to show the specified line number.
  6. - scroll_down(): Moves the window down by the number of lines specified in WINDOW.
  7. - scroll_up(): Moves the window up by the number of lines specified in WINDOW.
  8. - create_file(filename): Creates and opens a new file with the given name.
  9. - search_dir(search_term, dir_path='./'): Searches for a term in all files in the specified directory.
  10. - search_file(search_term, file_path=None): Searches for a term in the specified file or the currently open file.
  11. - find_file(file_name, dir_path='./'): Finds all files with the given name in the specified directory.
  12. - edit_file_by_replace(file_name: str, to_replace: str, new_content: str): Replaces lines in a file with the given content.
  13. - insert_content_at_line(file_name: str, line_number: int, content: str): Inserts given content at the specified line number in a file.
  14. - append_file(file_name: str, content: str): Appends the given content to the end of the specified file.
  15. """
  16. import base64
  17. import os
  18. import re
  19. import shutil
  20. import tempfile
  21. from inspect import signature
  22. import docx
  23. import PyPDF2
  24. from openai import OpenAI
  25. from pptx import Presentation
  26. from pylatexenc.latex2text import LatexNodes2Text
  27. if __package__ is None or __package__ == '':
  28. from aider import Linter
  29. else:
  30. from .aider import Linter
  31. CURRENT_FILE: str | None = None
  32. CURRENT_LINE = 1
  33. WINDOW = 100
  34. # This is also used in unit tests!
  35. MSG_FILE_UPDATED = '[File updated (edited at line {line_number}). Please review the changes and make sure they are correct (correct indentation, no duplicate lines, etc). Edit the file again if necessary.]'
  36. # ==================================================================================================
  37. # OPENAI
  38. # TODO: Move this to EventStream Actions when EventStreamRuntime is fully implemented
  39. # NOTE: we need to get env vars inside functions because they will be set in IPython
  40. # AFTER the agentskills is imported (the case for EventStreamRuntime)
  41. # ==================================================================================================
  42. def _get_openai_api_key():
  43. return os.getenv('OPENAI_API_KEY', os.getenv('SANDBOX_ENV_OPENAI_API_KEY', ''))
  44. def _get_openai_base_url():
  45. return os.getenv('OPENAI_BASE_URL', 'https://api.openai.com/v1')
  46. def _get_openai_model():
  47. return os.getenv('OPENAI_MODEL', 'gpt-4o-2024-05-13')
  48. def _get_max_token():
  49. return os.getenv('MAX_TOKEN', 500)
  50. def _get_openai_client():
  51. client = OpenAI(api_key=_get_openai_api_key(), base_url=_get_openai_base_url())
  52. return client
  53. # ==================================================================================================
  54. def _is_valid_filename(file_name) -> bool:
  55. if not file_name or not isinstance(file_name, str) or not file_name.strip():
  56. return False
  57. invalid_chars = '<>:"/\\|?*'
  58. if os.name == 'nt': # Windows
  59. invalid_chars = '<>:"/\\|?*'
  60. elif os.name == 'posix': # Unix-like systems
  61. invalid_chars = '\0'
  62. for char in invalid_chars:
  63. if char in file_name:
  64. return False
  65. return True
  66. def _is_valid_path(path) -> bool:
  67. if not path or not isinstance(path, str):
  68. return False
  69. try:
  70. return os.path.exists(os.path.normpath(path))
  71. except PermissionError:
  72. return False
  73. def _create_paths(file_name) -> bool:
  74. try:
  75. dirname = os.path.dirname(file_name)
  76. if dirname:
  77. os.makedirs(dirname, exist_ok=True)
  78. return True
  79. except PermissionError:
  80. return False
  81. def _check_current_file(file_path: str | None = None) -> bool:
  82. global CURRENT_FILE
  83. if not file_path:
  84. file_path = CURRENT_FILE
  85. if not file_path or not os.path.isfile(file_path):
  86. raise ValueError('No file open. Use the open_file function first.')
  87. return True
  88. def _clamp(value, min_value, max_value):
  89. return max(min_value, min(value, max_value))
  90. def _lint_file(file_path: str) -> tuple[str | None, int | None]:
  91. """Lint the file at the given path and return a tuple with a boolean indicating if there are errors,
  92. and the line number of the first error, if any.
  93. Returns:
  94. tuple[str | None, int | None]: (lint_error, first_error_line_number)
  95. """
  96. linter = Linter(root=os.getcwd())
  97. lint_error = linter.lint(file_path)
  98. if not lint_error:
  99. # Linting successful. No issues found.
  100. return None, None
  101. return 'ERRORS:\n' + lint_error.text, lint_error.lines[0]
  102. def _print_window(file_path, targeted_line, window, return_str=False):
  103. global CURRENT_LINE
  104. _check_current_file(file_path)
  105. with open(file_path) as file:
  106. content = file.read()
  107. # Ensure the content ends with a newline character
  108. if not content.endswith('\n'):
  109. content += '\n'
  110. lines = content.splitlines(True) # Keep all line ending characters
  111. total_lines = len(lines)
  112. # cover edge cases
  113. CURRENT_LINE = _clamp(targeted_line, 1, total_lines)
  114. half_window = max(1, window // 2)
  115. # Ensure at least one line above and below the targeted line
  116. start = max(1, CURRENT_LINE - half_window)
  117. end = min(total_lines, CURRENT_LINE + half_window)
  118. # Adjust start and end to ensure at least one line above and below
  119. if start == 1:
  120. end = min(total_lines, start + window - 1)
  121. if end == total_lines:
  122. start = max(1, end - window + 1)
  123. output = ''
  124. # only display this when there's at least one line above
  125. if start > 1:
  126. output += f'({start - 1} more lines above)\n'
  127. else:
  128. output += '(this is the beginning of the file)\n'
  129. for i in range(start, end + 1):
  130. _new_line = f'{i}|{lines[i-1]}'
  131. if not _new_line.endswith('\n'):
  132. _new_line += '\n'
  133. output += _new_line
  134. if end < total_lines:
  135. output += f'({total_lines - end} more lines below)\n'
  136. else:
  137. output += '(this is the end of the file)\n'
  138. output = output.rstrip()
  139. if return_str:
  140. return output
  141. else:
  142. print(output)
  143. def _cur_file_header(current_file, total_lines) -> str:
  144. if not current_file:
  145. return ''
  146. return f'[File: {os.path.abspath(current_file)} ({total_lines} lines total)]\n'
  147. def open_file(
  148. path: str, line_number: int | None = 1, context_lines: int | None = WINDOW
  149. ) -> None:
  150. """Opens the file at the given path in the editor. If line_number is provided, the window will be moved to include that line.
  151. It only shows the first 100 lines by default! Max `context_lines` supported is 2000, use `scroll up/down`
  152. to view the file if you want to see more.
  153. Args:
  154. path: str: The path to the file to open, preferred absolute path.
  155. line_number: int | None = 1: The line number to move to. Defaults to 1.
  156. context_lines: int | None = 100: Only shows this number of lines in the context window (usually from line 1), with line_number as the center (if possible). Defaults to 100.
  157. """
  158. global CURRENT_FILE, CURRENT_LINE, WINDOW
  159. if not os.path.isfile(path):
  160. raise FileNotFoundError(f'File {path} not found')
  161. CURRENT_FILE = os.path.abspath(path)
  162. with open(CURRENT_FILE) as file:
  163. total_lines = max(1, sum(1 for _ in file))
  164. if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines:
  165. raise ValueError(f'Line number must be between 1 and {total_lines}')
  166. CURRENT_LINE = line_number
  167. # Override WINDOW with context_lines
  168. if context_lines is None or context_lines < 1:
  169. context_lines = WINDOW
  170. output = _cur_file_header(CURRENT_FILE, total_lines)
  171. output += _print_window(
  172. CURRENT_FILE, CURRENT_LINE, _clamp(context_lines, 1, 2000), return_str=True
  173. )
  174. print(output)
  175. def goto_line(line_number: int) -> None:
  176. """Moves the window to show the specified line number.
  177. Args:
  178. line_number: int: The line number to move to.
  179. """
  180. global CURRENT_FILE, CURRENT_LINE, WINDOW
  181. _check_current_file()
  182. with open(str(CURRENT_FILE)) as file:
  183. total_lines = max(1, sum(1 for _ in file))
  184. if not isinstance(line_number, int) or line_number < 1 or line_number > total_lines:
  185. raise ValueError(f'Line number must be between 1 and {total_lines}')
  186. CURRENT_LINE = _clamp(line_number, 1, total_lines)
  187. output = _cur_file_header(CURRENT_FILE, total_lines)
  188. output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True)
  189. print(output)
  190. def scroll_down() -> None:
  191. """Moves the window down by 100 lines.
  192. Args:
  193. None
  194. """
  195. global CURRENT_FILE, CURRENT_LINE, WINDOW
  196. _check_current_file()
  197. with open(str(CURRENT_FILE)) as file:
  198. total_lines = max(1, sum(1 for _ in file))
  199. CURRENT_LINE = _clamp(CURRENT_LINE + WINDOW, 1, total_lines)
  200. output = _cur_file_header(CURRENT_FILE, total_lines)
  201. output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True)
  202. print(output)
  203. def scroll_up() -> None:
  204. """Moves the window up by 100 lines.
  205. Args:
  206. None
  207. """
  208. global CURRENT_FILE, CURRENT_LINE, WINDOW
  209. _check_current_file()
  210. with open(str(CURRENT_FILE)) as file:
  211. total_lines = max(1, sum(1 for _ in file))
  212. CURRENT_LINE = _clamp(CURRENT_LINE - WINDOW, 1, total_lines)
  213. output = _cur_file_header(CURRENT_FILE, total_lines)
  214. output += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True)
  215. print(output)
  216. def create_file(filename: str) -> None:
  217. """Creates and opens a new file with the given name.
  218. Args:
  219. filename: str: The name of the file to create.
  220. """
  221. if os.path.exists(filename):
  222. raise FileExistsError(f"File '{filename}' already exists.")
  223. with open(filename, 'w') as file:
  224. file.write('\n')
  225. open_file(filename)
  226. print(f'[File {filename} created.]')
  227. LINTER_ERROR_MSG = '[Your proposed edit has introduced new syntax error(s). Please understand the errors and retry your edit command.]\n'
  228. class LineNumberError(Exception):
  229. pass
  230. def _append_impl(lines, content):
  231. """Internal method to handle appending to a file.
  232. Args:
  233. lines: list[str]: The lines in the original file.
  234. content: str: The content to append to the file.
  235. Returns:
  236. content: str: The new content of the file.
  237. n_added_lines: int: The number of lines added to the file.
  238. """
  239. content_lines = content.splitlines(keepends=True)
  240. n_added_lines = len(content_lines)
  241. if lines and not (len(lines) == 1 and lines[0].strip() == ''):
  242. # file is not empty
  243. if not lines[-1].endswith('\n'):
  244. lines[-1] += '\n'
  245. new_lines = lines + content_lines
  246. content = ''.join(new_lines)
  247. else:
  248. # file is empty
  249. content = ''.join(content_lines)
  250. return content, n_added_lines
  251. def _insert_impl(lines, start, content):
  252. """Internal method to handle inserting to a file.
  253. Args:
  254. lines: list[str]: The lines in the original file.
  255. start: int: The start line number for inserting.
  256. content: str: The content to insert to the file.
  257. Returns:
  258. content: str: The new content of the file.
  259. n_added_lines: int: The number of lines added to the file.
  260. Raises:
  261. LineNumberError: If the start line number is invalid.
  262. """
  263. inserted_lines = [content + '\n' if not content.endswith('\n') else content]
  264. if len(lines) == 0:
  265. new_lines = inserted_lines
  266. elif start is not None:
  267. if len(lines) == 1 and lines[0].strip() == '':
  268. # if the file with only 1 line and that line is empty
  269. lines = []
  270. if len(lines) == 0:
  271. new_lines = inserted_lines
  272. else:
  273. new_lines = lines[: start - 1] + inserted_lines + lines[start - 1 :]
  274. else:
  275. raise LineNumberError(
  276. f'Invalid line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).'
  277. )
  278. content = ''.join(new_lines)
  279. n_added_lines = len(inserted_lines)
  280. return content, n_added_lines
  281. def _edit_impl(lines, start, end, content):
  282. """Internal method to handle editing a file.
  283. REQUIRES (should be checked by caller):
  284. start <= end
  285. start and end are between 1 and len(lines) (inclusive)
  286. content ends with a newline
  287. Args:
  288. lines: list[str]: The lines in the original file.
  289. start: int: The start line number for editing.
  290. end: int: The end line number for editing.
  291. content: str: The content to replace the lines with.
  292. Returns:
  293. content: str: The new content of the file.
  294. n_added_lines: int: The number of lines added to the file.
  295. """
  296. # Handle cases where start or end are None
  297. if start is None:
  298. start = 1 # Default to the beginning
  299. if end is None:
  300. end = len(lines) # Default to the end
  301. # Check arguments
  302. if not (1 <= start <= len(lines)):
  303. raise LineNumberError(
  304. f'Invalid start line number: {start}. Line numbers must be between 1 and {len(lines)} (inclusive).'
  305. )
  306. if not (1 <= end <= len(lines)):
  307. raise LineNumberError(
  308. f'Invalid end line number: {end}. Line numbers must be between 1 and {len(lines)} (inclusive).'
  309. )
  310. if start > end:
  311. raise LineNumberError(
  312. f'Invalid line range: {start}-{end}. Start must be less than or equal to end.'
  313. )
  314. if not content.endswith('\n'):
  315. content += '\n'
  316. content_lines = content.splitlines(True)
  317. n_added_lines = len(content_lines)
  318. new_lines = lines[: start - 1] + content_lines + lines[end:]
  319. content = ''.join(new_lines)
  320. return content, n_added_lines
  321. def _edit_file_impl(
  322. file_name: str,
  323. start: int | None = None,
  324. end: int | None = None,
  325. content: str = '',
  326. is_insert: bool = False,
  327. is_append: bool = False,
  328. ) -> str:
  329. """Internal method to handle common logic for edit_/append_file methods.
  330. Args:
  331. file_name: str: The name of the file to edit or append to.
  332. start: int | None = None: The start line number for editing. Ignored if is_append is True.
  333. end: int | None = None: The end line number for editing. Ignored if is_append is True.
  334. content: str: The content to replace the lines with or to append.
  335. is_insert: bool = False: Whether to insert content at the given line number instead of editing.
  336. is_append: bool = False: Whether to append content to the file instead of editing.
  337. """
  338. ret_str = ''
  339. global CURRENT_FILE, CURRENT_LINE, WINDOW
  340. ERROR_MSG = f'[Error editing file {file_name}. Please confirm the file is correct.]'
  341. ERROR_MSG_SUFFIX = (
  342. 'Your changes have NOT been applied. Please fix your edit command and try again.\n'
  343. 'You either need to 1) Open the correct file and try again or 2) Specify the correct line number arguments.\n'
  344. 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.'
  345. )
  346. if not _is_valid_filename(file_name):
  347. raise FileNotFoundError('Invalid file name.')
  348. if not _is_valid_path(file_name):
  349. raise FileNotFoundError('Invalid path or file name.')
  350. if not _create_paths(file_name):
  351. raise PermissionError('Could not access or create directories.')
  352. if not os.path.isfile(file_name):
  353. raise FileNotFoundError(f'File {file_name} not found.')
  354. if is_insert and is_append:
  355. raise ValueError('Cannot insert and append at the same time.')
  356. # Use a temporary file to write changes
  357. content = str(content or '')
  358. temp_file_path = ''
  359. src_abs_path = os.path.abspath(file_name)
  360. first_error_line = None
  361. try:
  362. n_added_lines = None
  363. # lint the original file
  364. enable_auto_lint = os.getenv('ENABLE_AUTO_LINT', 'false').lower() == 'true'
  365. if enable_auto_lint:
  366. original_lint_error, _ = _lint_file(file_name)
  367. # Create a temporary file
  368. with tempfile.NamedTemporaryFile('w', delete=False) as temp_file:
  369. temp_file_path = temp_file.name
  370. # Read the original file and check if empty and for a trailing newline
  371. with open(file_name) as original_file:
  372. lines = original_file.readlines()
  373. if is_append:
  374. content, n_added_lines = _append_impl(lines, content)
  375. elif is_insert:
  376. try:
  377. content, n_added_lines = _insert_impl(lines, start, content)
  378. except LineNumberError as e:
  379. ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n'
  380. return ret_str
  381. else:
  382. try:
  383. content, n_added_lines = _edit_impl(lines, start, end, content)
  384. except LineNumberError as e:
  385. ret_str += (f'{ERROR_MSG}\n' f'{e}\n' f'{ERROR_MSG_SUFFIX}') + '\n'
  386. return ret_str
  387. if not content.endswith('\n'):
  388. content += '\n'
  389. # Write the new content to the temporary file
  390. temp_file.write(content)
  391. # Replace the original file with the temporary file atomically
  392. shutil.move(temp_file_path, src_abs_path)
  393. # Handle linting
  394. # NOTE: we need to get env var inside this function
  395. # because the env var will be set AFTER the agentskills is imported
  396. if enable_auto_lint:
  397. # BACKUP the original file
  398. original_file_backup_path = os.path.join(
  399. os.path.dirname(file_name),
  400. f'.backup.{os.path.basename(file_name)}',
  401. )
  402. with open(original_file_backup_path, 'w') as f:
  403. f.writelines(lines)
  404. lint_error, first_error_line = _lint_file(file_name)
  405. # Select the errors caused by the modification
  406. def extract_last_part(line):
  407. parts = line.split(':')
  408. if len(parts) > 1:
  409. return parts[-1].strip()
  410. return line.strip()
  411. def subtract_strings(str1, str2) -> str:
  412. lines1 = str1.splitlines()
  413. lines2 = str2.splitlines()
  414. last_parts1 = [extract_last_part(line) for line in lines1]
  415. remaining_lines = [
  416. line
  417. for line in lines2
  418. if extract_last_part(line) not in last_parts1
  419. ]
  420. result = '\n'.join(remaining_lines)
  421. return result
  422. if original_lint_error and lint_error:
  423. lint_error = subtract_strings(original_lint_error, lint_error)
  424. if lint_error == '':
  425. lint_error = None
  426. first_error_line = None
  427. if lint_error is not None:
  428. if first_error_line is not None:
  429. show_line = int(first_error_line)
  430. elif is_append:
  431. # original end-of-file
  432. show_line = len(lines)
  433. # insert OR edit WILL provide meaningful line numbers
  434. elif start is not None and end is not None:
  435. show_line = int((start + end) / 2)
  436. else:
  437. raise ValueError('Invalid state. This should never happen.')
  438. ret_str += LINTER_ERROR_MSG
  439. ret_str += lint_error + '\n'
  440. editor_lines = n_added_lines + 20
  441. ret_str += '[This is how your edit would have looked if applied]\n'
  442. ret_str += '-------------------------------------------------\n'
  443. ret_str += (
  444. _print_window(file_name, show_line, editor_lines, return_str=True)
  445. + '\n'
  446. )
  447. ret_str += '-------------------------------------------------\n\n'
  448. ret_str += '[This is the original code before your edit]\n'
  449. ret_str += '-------------------------------------------------\n'
  450. ret_str += (
  451. _print_window(
  452. original_file_backup_path,
  453. show_line,
  454. editor_lines,
  455. return_str=True,
  456. )
  457. + '\n'
  458. )
  459. ret_str += '-------------------------------------------------\n'
  460. ret_str += (
  461. 'Your changes have NOT been applied. Please fix your edit command and try again.\n'
  462. 'You either need to 1) Specify the correct start/end line arguments or 2) Correct your edit code.\n'
  463. 'DO NOT re-run the same failed edit command. Running it again will lead to the same error.'
  464. )
  465. # recover the original file
  466. with open(original_file_backup_path) as fin, open(
  467. file_name, 'w'
  468. ) as fout:
  469. fout.write(fin.read())
  470. os.remove(original_file_backup_path)
  471. return ret_str
  472. except FileNotFoundError as e:
  473. ret_str += f'File not found: {e}\n'
  474. except IOError as e:
  475. ret_str += f'An error occurred while handling the file: {e}\n'
  476. except ValueError as e:
  477. ret_str += f'Invalid input: {e}\n'
  478. except Exception as e:
  479. # Clean up the temporary file if an error occurs
  480. if temp_file_path and os.path.exists(temp_file_path):
  481. os.remove(temp_file_path)
  482. print(f'An unexpected error occurred: {e}')
  483. raise e
  484. # Update the file information and print the updated content
  485. with open(file_name, 'r', encoding='utf-8') as file:
  486. n_total_lines = max(1, len(file.readlines()))
  487. if first_error_line is not None and int(first_error_line) > 0:
  488. CURRENT_LINE = first_error_line
  489. else:
  490. if is_append:
  491. CURRENT_LINE = max(1, len(lines)) # end of original file
  492. else:
  493. CURRENT_LINE = start or n_total_lines or 1
  494. ret_str += f'[File: {os.path.abspath(file_name)} ({n_total_lines} lines total after edit)]\n'
  495. CURRENT_FILE = file_name
  496. ret_str += _print_window(CURRENT_FILE, CURRENT_LINE, WINDOW, return_str=True) + '\n'
  497. ret_str += MSG_FILE_UPDATED.format(line_number=CURRENT_LINE)
  498. return ret_str
  499. def edit_file_by_replace(file_name: str, to_replace: str, new_content: str) -> None:
  500. """Edit a file. This will search for `to_replace` in the given file and replace it with `new_content`.
  501. Every *to_replace* must *EXACTLY MATCH* the existing source code, character for character, including all comments, docstrings, etc.
  502. Include enough lines to make code in `to_replace` unique. `to_replace` should NOT be empty.
  503. For example, given a file "/workspace/example.txt" with the following content:
  504. ```
  505. line 1
  506. line 2
  507. line 2
  508. line 3
  509. ```
  510. EDITING: If you want to replace the second occurrence of "line 2", you can make `to_replace` unique:
  511. edit_file_by_replace(
  512. '/workspace/example.txt',
  513. to_replace='line 2\nline 3',
  514. new_content='new line\nline 3',
  515. )
  516. This will replace only the second "line 2" with "new line". The first "line 2" will remain unchanged.
  517. The resulting file will be:
  518. ```
  519. line 1
  520. line 2
  521. new line
  522. line 3
  523. ```
  524. REMOVAL: If you want to remove "line 2" and "line 3", you can set `new_content` to an empty string:
  525. edit_file_by_replace(
  526. '/workspace/example.txt',
  527. to_replace='line 2\nline 3',
  528. new_content='',
  529. )
  530. Args:
  531. file_name: str: The name of the file to edit.
  532. to_replace: str: The content to search for and replace.
  533. new_content: str: The new content to replace the old content with.
  534. """
  535. # FIXME: support replacing *all* occurrences
  536. if to_replace.strip() == '':
  537. raise ValueError('`to_replace` must not be empty.')
  538. if to_replace == new_content:
  539. raise ValueError('`to_replace` and `new_content` must be different.')
  540. # search for `to_replace` in the file
  541. # if found, replace it with `new_content`
  542. # if not found, perform a fuzzy search to find the closest match and replace it with `new_content`
  543. with open(file_name, 'r') as file:
  544. file_content = file.read()
  545. if file_content.count(to_replace) > 1:
  546. raise ValueError(
  547. '`to_replace` appears more than once, please include enough lines to make code in `to_replace` unique.'
  548. )
  549. start = file_content.find(to_replace)
  550. if start != -1:
  551. # Convert start from index to line number
  552. start_line_number = file_content[:start].count('\n') + 1
  553. end_line_number = start_line_number + len(to_replace.splitlines()) - 1
  554. else:
  555. def _fuzzy_transform(s: str) -> str:
  556. # remove all space except newline
  557. return re.sub(r'[^\S\n]+', '', s)
  558. # perform a fuzzy search (remove all spaces except newlines)
  559. to_replace_fuzzy = _fuzzy_transform(to_replace)
  560. file_content_fuzzy = _fuzzy_transform(file_content)
  561. # find the closest match
  562. start = file_content_fuzzy.find(to_replace_fuzzy)
  563. if start == -1:
  564. print(
  565. f'[No exact match found in {file_name} for\n```\n{to_replace}\n```\n]'
  566. )
  567. return
  568. # Convert start from index to line number for fuzzy match
  569. start_line_number = file_content_fuzzy[:start].count('\n') + 1
  570. end_line_number = start_line_number + len(to_replace.splitlines()) - 1
  571. ret_str = _edit_file_impl(
  572. file_name,
  573. start=start_line_number,
  574. end=end_line_number,
  575. content=new_content,
  576. is_insert=False,
  577. )
  578. # lint_error = bool(LINTER_ERROR_MSG in ret_str)
  579. # TODO: automatically tries to fix linter error (maybe involve some static analysis tools on the location near the edit to figure out indentation)
  580. print(ret_str)
  581. def insert_content_at_line(file_name: str, line_number: int, content: str) -> None:
  582. """Insert content at the given line number in a file.
  583. This will NOT modify the content of the lines before OR after the given line number.
  584. For example, if the file has the following content:
  585. ```
  586. line 1
  587. line 2
  588. line 3
  589. ```
  590. and you call `insert_content_at_line('file.txt', 2, 'new line')`, the file will be updated to:
  591. ```
  592. line 1
  593. new line
  594. line 2
  595. line 3
  596. ```
  597. Args:
  598. file_name: str: The name of the file to edit.
  599. line_number: int: The line number (starting from 1) to insert the content after.
  600. content: str: The content to insert.
  601. """
  602. ret_str = _edit_file_impl(
  603. file_name,
  604. start=line_number,
  605. end=line_number,
  606. content=content,
  607. is_insert=True,
  608. is_append=False,
  609. )
  610. print(ret_str)
  611. def append_file(file_name: str, content: str) -> None:
  612. """Append content to the given file.
  613. It appends text `content` to the end of the specified file.
  614. Args:
  615. file_name: str: The name of the file to edit.
  616. line_number: int: The line number (starting from 1) to insert the content after.
  617. content: str: The content to insert.
  618. """
  619. ret_str = _edit_file_impl(
  620. file_name,
  621. start=None,
  622. end=None,
  623. content=content,
  624. is_insert=False,
  625. is_append=True,
  626. )
  627. print(ret_str)
  628. def search_dir(search_term: str, dir_path: str = './') -> None:
  629. """Searches for search_term in all files in dir. If dir is not provided, searches in the current directory.
  630. Args:
  631. search_term: str: The term to search for.
  632. dir_path: str: The path to the directory to search.
  633. """
  634. if not os.path.isdir(dir_path):
  635. raise FileNotFoundError(f'Directory {dir_path} not found')
  636. matches = []
  637. for root, _, files in os.walk(dir_path):
  638. for file in files:
  639. if file.startswith('.'):
  640. continue
  641. file_path = os.path.join(root, file)
  642. with open(file_path, 'r', errors='ignore') as f:
  643. for line_num, line in enumerate(f, 1):
  644. if search_term in line:
  645. matches.append((file_path, line_num, line.strip()))
  646. if not matches:
  647. print(f'No matches found for "{search_term}" in {dir_path}')
  648. return
  649. num_matches = len(matches)
  650. num_files = len(set(match[0] for match in matches))
  651. if num_files > 100:
  652. print(
  653. f'More than {num_files} files matched for "{search_term}" in {dir_path}. Please narrow your search.'
  654. )
  655. return
  656. print(f'[Found {num_matches} matches for "{search_term}" in {dir_path}]')
  657. for file_path, line_num, line in matches:
  658. print(f'{file_path} (Line {line_num}): {line}')
  659. print(f'[End of matches for "{search_term}" in {dir_path}]')
  660. def search_file(search_term: str, file_path: str | None = None) -> None:
  661. """Searches for search_term in file. If file is not provided, searches in the current open file.
  662. Args:
  663. search_term: str: The term to search for.
  664. file_path: str | None: The path to the file to search.
  665. """
  666. global CURRENT_FILE
  667. if file_path is None:
  668. file_path = CURRENT_FILE
  669. if file_path is None:
  670. raise FileNotFoundError(
  671. 'No file specified or open. Use the open_file function first.'
  672. )
  673. if not os.path.isfile(file_path):
  674. raise FileNotFoundError(f'File {file_path} not found')
  675. matches = []
  676. with open(file_path) as file:
  677. for i, line in enumerate(file, 1):
  678. if search_term in line:
  679. matches.append((i, line.strip()))
  680. if matches:
  681. print(f'[Found {len(matches)} matches for "{search_term}" in {file_path}]')
  682. for match in matches:
  683. print(f'Line {match[0]}: {match[1]}')
  684. print(f'[End of matches for "{search_term}" in {file_path}]')
  685. else:
  686. print(f'[No matches found for "{search_term}" in {file_path}]')
  687. def find_file(file_name: str, dir_path: str = './') -> None:
  688. """Finds all files with the given name in the specified directory.
  689. Args:
  690. file_name: str: The name of the file to find.
  691. dir_path: str: The path to the directory to search.
  692. """
  693. if not os.path.isdir(dir_path):
  694. raise FileNotFoundError(f'Directory {dir_path} not found')
  695. matches = []
  696. for root, _, files in os.walk(dir_path):
  697. for file in files:
  698. if file_name in file:
  699. matches.append(os.path.join(root, file))
  700. if matches:
  701. print(f'[Found {len(matches)} matches for "{file_name}" in {dir_path}]')
  702. for match in matches:
  703. print(f'{match}')
  704. print(f'[End of matches for "{file_name}" in {dir_path}]')
  705. else:
  706. print(f'[No matches found for "{file_name}" in {dir_path}]')
  707. def parse_pdf(file_path: str) -> None:
  708. """Parses the content of a PDF file and prints it.
  709. Args:
  710. file_path: str: The path to the file to open.
  711. """
  712. print(f'[Reading PDF file from {file_path}]')
  713. content = PyPDF2.PdfReader(file_path)
  714. text = ''
  715. for page_idx in range(len(content.pages)):
  716. text += (
  717. f'@@ Page {page_idx + 1} @@\n'
  718. + content.pages[page_idx].extract_text()
  719. + '\n\n'
  720. )
  721. print(text.strip())
  722. def parse_docx(file_path: str) -> None:
  723. """Parses the content of a DOCX file and prints it.
  724. Args:
  725. file_path: str: The path to the file to open.
  726. """
  727. print(f'[Reading DOCX file from {file_path}]')
  728. content = docx.Document(file_path)
  729. text = ''
  730. for i, para in enumerate(content.paragraphs):
  731. text += f'@@ Page {i + 1} @@\n' + para.text + '\n\n'
  732. print(text)
  733. def parse_latex(file_path: str) -> None:
  734. """Parses the content of a LaTex file and prints it.
  735. Args:
  736. file_path: str: The path to the file to open.
  737. """
  738. print(f'[Reading LaTex file from {file_path}]')
  739. with open(file_path) as f:
  740. data = f.read()
  741. text = LatexNodes2Text().latex_to_text(data)
  742. print(text.strip())
  743. def _base64_img(file_path: str) -> str:
  744. with open(file_path, 'rb') as image_file:
  745. encoded_image = base64.b64encode(image_file.read()).decode('utf-8')
  746. return encoded_image
  747. def _base64_video(file_path: str, frame_interval: int = 10) -> list[str]:
  748. import cv2
  749. video = cv2.VideoCapture(file_path)
  750. base64_frames = []
  751. frame_count = 0
  752. while video.isOpened():
  753. success, frame = video.read()
  754. if not success:
  755. break
  756. if frame_count % frame_interval == 0:
  757. _, buffer = cv2.imencode('.jpg', frame)
  758. base64_frames.append(base64.b64encode(buffer).decode('utf-8'))
  759. frame_count += 1
  760. video.release()
  761. return base64_frames
  762. def _prepare_image_messages(task: str, base64_image: str):
  763. return [
  764. {
  765. 'role': 'user',
  766. 'content': [
  767. {'type': 'text', 'text': task},
  768. {
  769. 'type': 'image_url',
  770. 'image_url': {'url': f'data:image/jpeg;base64,{base64_image}'},
  771. },
  772. ],
  773. }
  774. ]
  775. def parse_audio(file_path: str, model: str = 'whisper-1') -> None:
  776. """Parses the content of an audio file and prints it.
  777. Args:
  778. file_path: str: The path to the audio file to transcribe.
  779. model: str: The audio model to use for transcription. Defaults to 'whisper-1'.
  780. """
  781. print(f'[Transcribing audio file from {file_path}]')
  782. try:
  783. # TODO: record the COST of the API call
  784. with open(file_path, 'rb') as audio_file:
  785. transcript = _get_openai_client().audio.translations.create(
  786. model=model, file=audio_file
  787. )
  788. print(transcript.text)
  789. except Exception as e:
  790. print(f'Error transcribing audio file: {e}')
  791. def parse_image(
  792. file_path: str, task: str = 'Describe this image as detail as possible.'
  793. ) -> None:
  794. """Parses the content of an image file and prints the description.
  795. Args:
  796. file_path: str: The path to the file to open.
  797. task: str: The task description for the API call. Defaults to 'Describe this image as detail as possible.'.
  798. """
  799. print(f'[Reading image file from {file_path}]')
  800. # TODO: record the COST of the API call
  801. try:
  802. base64_image = _base64_img(file_path)
  803. response = _get_openai_client().chat.completions.create(
  804. model=_get_openai_model(),
  805. messages=_prepare_image_messages(task, base64_image),
  806. max_tokens=_get_max_token(),
  807. )
  808. content = response.choices[0].message.content
  809. print(content)
  810. except Exception as error:
  811. print(f'Error with the request: {error}')
  812. def parse_video(
  813. file_path: str,
  814. task: str = 'Describe this image as detail as possible.',
  815. frame_interval: int = 30,
  816. ) -> None:
  817. """Parses the content of an image file and prints the description.
  818. Args:
  819. file_path: str: The path to the video file to open.
  820. task: str: The task description for the API call. Defaults to 'Describe this image as detail as possible.'.
  821. frame_interval: int: The interval between frames to analyze. Defaults to 30.
  822. """
  823. print(
  824. f'[Processing video file from {file_path} with frame interval {frame_interval}]'
  825. )
  826. task = task or 'This is one frame from a video, please summarize this frame.'
  827. base64_frames = _base64_video(file_path)
  828. selected_frames = base64_frames[::frame_interval]
  829. if len(selected_frames) > 30:
  830. new_interval = len(base64_frames) // 30
  831. selected_frames = base64_frames[::new_interval]
  832. print(f'Totally {len(selected_frames)} would be analyze...\n')
  833. idx = 0
  834. for base64_frame in selected_frames:
  835. idx += 1
  836. print(f'Process the {file_path}, current No. {idx * frame_interval} frame...')
  837. # TODO: record the COST of the API call
  838. try:
  839. response = _get_openai_client().chat.completions.create(
  840. model=_get_openai_model(),
  841. messages=_prepare_image_messages(task, base64_frame),
  842. max_tokens=_get_max_token(),
  843. )
  844. content = response.choices[0].message.content
  845. current_frame_content = f"Frame {idx}'s content: {content}\n"
  846. print(current_frame_content)
  847. except Exception as error:
  848. print(f'Error with the request: {error}')
  849. def parse_pptx(file_path: str) -> None:
  850. """Parses the content of a pptx file and prints it.
  851. Args:
  852. file_path: str: The path to the file to open.
  853. """
  854. print(f'[Reading PowerPoint file from {file_path}]')
  855. try:
  856. pres = Presentation(str(file_path))
  857. text = []
  858. for slide_idx, slide in enumerate(pres.slides):
  859. text.append(f'@@ Slide {slide_idx + 1} @@')
  860. for shape in slide.shapes:
  861. if hasattr(shape, 'text'):
  862. text.append(shape.text)
  863. print('\n'.join(text))
  864. except Exception as e:
  865. print(f'Error reading PowerPoint file: {e}')
  866. __all__ = [
  867. # file operation
  868. 'open_file',
  869. 'goto_line',
  870. 'scroll_down',
  871. 'scroll_up',
  872. 'create_file',
  873. 'edit_file_by_replace',
  874. 'insert_content_at_line',
  875. 'append_file',
  876. 'search_dir',
  877. 'search_file',
  878. 'find_file',
  879. # readers
  880. 'parse_pdf',
  881. 'parse_docx',
  882. 'parse_latex',
  883. 'parse_pptx',
  884. ]
  885. # This is called from OpenDevin's side
  886. # If SANDBOX_ENV_OPENAI_API_KEY is set, we will be able to use these tools in the sandbox environment
  887. if _get_openai_api_key() and _get_openai_base_url():
  888. __all__ += ['parse_audio', 'parse_video', 'parse_image']
  889. DOCUMENTATION = ''
  890. for func_name in __all__:
  891. func = globals()[func_name]
  892. cur_doc = func.__doc__
  893. # remove indentation from docstring and extra empty lines
  894. cur_doc = '\n'.join(filter(None, map(lambda x: x.strip(), cur_doc.split('\n'))))
  895. # now add a consistent 4 indentation
  896. cur_doc = '\n'.join(map(lambda x: ' ' * 4 + x, cur_doc.split('\n')))
  897. fn_signature = f'{func.__name__}' + str(signature(func))
  898. DOCUMENTATION += f'{fn_signature}:\n{cur_doc}\n\n'