test_agent_skill.py 23 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717
  1. import contextlib
  2. import io
  3. import sys
  4. import docx
  5. import pytest
  6. from openhands.runtime.plugins.agent_skills.file_ops.file_ops import (
  7. WINDOW,
  8. _print_window,
  9. find_file,
  10. goto_line,
  11. open_file,
  12. scroll_down,
  13. scroll_up,
  14. search_dir,
  15. search_file,
  16. )
  17. from openhands.runtime.plugins.agent_skills.file_reader.file_readers import (
  18. parse_docx,
  19. parse_latex,
  20. parse_pdf,
  21. parse_pptx,
  22. )
  23. # CURRENT_FILE must be reset for each test
  24. @pytest.fixture(autouse=True)
  25. def reset_current_file():
  26. from openhands.runtime.plugins.agent_skills import agentskills
  27. agentskills.CURRENT_FILE = None
  28. def _numbered_test_lines(start, end) -> str:
  29. return ('\n'.join(f'{i}|' for i in range(start, end + 1))) + '\n'
  30. def _generate_test_file_with_lines(temp_path, num_lines) -> str:
  31. file_path = temp_path / 'test_file.py'
  32. file_path.write_text('\n' * num_lines)
  33. return file_path
  34. def _generate_ruby_test_file_with_lines(temp_path, num_lines) -> str:
  35. file_path = temp_path / 'test_file.rb'
  36. file_path.write_text('\n' * num_lines)
  37. return file_path
  38. def _calculate_window_bounds(current_line, total_lines, window_size):
  39. """Calculate the bounds of the window around the current line."""
  40. half_window = window_size // 2
  41. if current_line - half_window < 0:
  42. start = 1
  43. end = window_size
  44. else:
  45. start = current_line - half_window
  46. end = current_line + half_window
  47. return start, end
  48. def _capture_file_operation_error(operation, expected_error_msg):
  49. with io.StringIO() as buf:
  50. with contextlib.redirect_stdout(buf):
  51. operation()
  52. result = buf.getvalue().strip()
  53. assert result == expected_error_msg
  54. SEP = '-' * 49 + '\n'
  55. # =============================================================================
  56. def test_open_file_unexist_path():
  57. _capture_file_operation_error(
  58. lambda: open_file('/unexist/path/a.txt'),
  59. 'ERROR: File /unexist/path/a.txt not found.',
  60. )
  61. def test_open_file(tmp_path):
  62. assert tmp_path is not None
  63. temp_file_path = tmp_path / 'a.txt'
  64. temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
  65. with io.StringIO() as buf:
  66. with contextlib.redirect_stdout(buf):
  67. open_file(str(temp_file_path))
  68. result = buf.getvalue()
  69. assert result is not None
  70. expected = (
  71. f'[File: {temp_file_path} (5 lines total)]\n'
  72. '(this is the beginning of the file)\n'
  73. '1|Line 1\n'
  74. '2|Line 2\n'
  75. '3|Line 3\n'
  76. '4|Line 4\n'
  77. '5|Line 5\n'
  78. '(this is the end of the file)\n'
  79. )
  80. assert result.split('\n') == expected.split('\n')
  81. def test_open_file_with_indentation(tmp_path):
  82. temp_file_path = tmp_path / 'a.txt'
  83. temp_file_path.write_text('Line 1\n Line 2\nLine 3\nLine 4\nLine 5')
  84. with io.StringIO() as buf:
  85. with contextlib.redirect_stdout(buf):
  86. open_file(str(temp_file_path))
  87. result = buf.getvalue()
  88. assert result is not None
  89. expected = (
  90. f'[File: {temp_file_path} (5 lines total)]\n'
  91. '(this is the beginning of the file)\n'
  92. '1|Line 1\n'
  93. '2| Line 2\n'
  94. '3|Line 3\n'
  95. '4|Line 4\n'
  96. '5|Line 5\n'
  97. '(this is the end of the file)\n'
  98. )
  99. assert result.split('\n') == expected.split('\n')
  100. def test_open_file_long(tmp_path):
  101. temp_file_path = tmp_path / 'a.txt'
  102. content = '\n'.join([f'Line {i}' for i in range(1, 1001)])
  103. temp_file_path.write_text(content)
  104. with io.StringIO() as buf:
  105. with contextlib.redirect_stdout(buf):
  106. open_file(str(temp_file_path), 1, 50)
  107. result = buf.getvalue()
  108. assert result is not None
  109. expected = f'[File: {temp_file_path} (1000 lines total)]\n'
  110. expected += '(this is the beginning of the file)\n'
  111. for i in range(1, 51):
  112. expected += f'{i}|Line {i}\n'
  113. expected += '(950 more lines below)\n'
  114. expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
  115. assert result.split('\n') == expected.split('\n')
  116. def test_open_file_long_with_lineno(tmp_path):
  117. temp_file_path = tmp_path / 'a.txt'
  118. content = '\n'.join([f'Line {i}' for i in range(1, 1001)])
  119. temp_file_path.write_text(content)
  120. cur_line = 100
  121. with io.StringIO() as buf:
  122. with contextlib.redirect_stdout(buf):
  123. open_file(str(temp_file_path), cur_line)
  124. result = buf.getvalue()
  125. assert result is not None
  126. expected = f'[File: {temp_file_path} (1000 lines total)]\n'
  127. # since 100 is < WINDOW and 100 - WINDOW//2 < 0, so it should show all lines from 1 to WINDOW
  128. start, end = _calculate_window_bounds(cur_line, 1000, WINDOW)
  129. if start == 1:
  130. expected += '(this is the beginning of the file)\n'
  131. else:
  132. expected += f'({start - 1} more lines above)\n'
  133. for i in range(start, end + 1):
  134. expected += f'{i}|Line {i}\n'
  135. if end == 1000:
  136. expected += '(this is the end of the file)\n'
  137. else:
  138. expected += f'({1000 - end} more lines below)\n'
  139. expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
  140. assert result.split('\n') == expected.split('\n')
  141. def test_goto_line(tmp_path):
  142. temp_file_path = tmp_path / 'a.txt'
  143. total_lines = 1000
  144. content = '\n'.join([f'Line {i}' for i in range(1, total_lines + 1)])
  145. temp_file_path.write_text(content)
  146. with io.StringIO() as buf:
  147. with contextlib.redirect_stdout(buf):
  148. open_file(str(temp_file_path))
  149. result = buf.getvalue()
  150. assert result is not None
  151. expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
  152. expected += '(this is the beginning of the file)\n'
  153. for i in range(1, WINDOW + 1):
  154. expected += f'{i}|Line {i}\n'
  155. expected += f'({total_lines - WINDOW} more lines below)\n'
  156. expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
  157. assert result.split('\n') == expected.split('\n')
  158. with io.StringIO() as buf:
  159. with contextlib.redirect_stdout(buf):
  160. goto_line(500)
  161. result = buf.getvalue()
  162. assert result is not None
  163. cur_line = 500
  164. expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
  165. start, end = _calculate_window_bounds(cur_line, total_lines, WINDOW)
  166. if start == 1:
  167. expected += '(this is the beginning of the file)\n'
  168. else:
  169. expected += f'({start - 1} more lines above)\n'
  170. for i in range(start, end + 1):
  171. expected += f'{i}|Line {i}\n'
  172. if end == total_lines:
  173. expected += '(this is the end of the file)\n'
  174. else:
  175. expected += f'({total_lines - end} more lines below)\n'
  176. assert result.split('\n') == expected.split('\n')
  177. def test_goto_line_negative(tmp_path):
  178. temp_file_path = tmp_path / 'a.txt'
  179. content = '\n'.join([f'Line {i}' for i in range(1, 5)])
  180. temp_file_path.write_text(content)
  181. with io.StringIO() as buf:
  182. with contextlib.redirect_stdout(buf):
  183. open_file(str(temp_file_path))
  184. _capture_file_operation_error(
  185. lambda: goto_line(-1), 'ERROR: Line number must be between 1 and 4.'
  186. )
  187. def test_goto_line_out_of_bound(tmp_path):
  188. temp_file_path = tmp_path / 'a.txt'
  189. content = '\n'.join([f'Line {i}' for i in range(1, 10)])
  190. temp_file_path.write_text(content)
  191. with io.StringIO() as buf:
  192. with contextlib.redirect_stdout(buf):
  193. open_file(str(temp_file_path))
  194. _capture_file_operation_error(
  195. lambda: goto_line(100), 'ERROR: Line number must be between 1 and 9.'
  196. )
  197. def test_scroll_down(tmp_path):
  198. temp_file_path = tmp_path / 'a.txt'
  199. total_lines = 1000
  200. content = '\n'.join([f'Line {i}' for i in range(1, total_lines + 1)])
  201. temp_file_path.write_text(content)
  202. with io.StringIO() as buf:
  203. with contextlib.redirect_stdout(buf):
  204. open_file(str(temp_file_path))
  205. result = buf.getvalue()
  206. assert result is not None
  207. expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
  208. start, end = _calculate_window_bounds(1, total_lines, WINDOW)
  209. if start == 1:
  210. expected += '(this is the beginning of the file)\n'
  211. else:
  212. expected += f'({start - 1} more lines above)\n'
  213. for i in range(start, end + 1):
  214. expected += f'{i}|Line {i}\n'
  215. if end == total_lines:
  216. expected += '(this is the end of the file)\n'
  217. else:
  218. expected += f'({total_lines - end} more lines below)\n'
  219. expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
  220. assert result.split('\n') == expected.split('\n')
  221. with io.StringIO() as buf:
  222. with contextlib.redirect_stdout(buf):
  223. scroll_down()
  224. result = buf.getvalue()
  225. assert result is not None
  226. expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
  227. start = WINDOW + 1
  228. end = 2 * WINDOW + 1
  229. if start == 1:
  230. expected += '(this is the beginning of the file)\n'
  231. else:
  232. expected += f'({start - 1} more lines above)\n'
  233. for i in range(start, end + 1):
  234. expected += f'{i}|Line {i}\n'
  235. if end == total_lines:
  236. expected += '(this is the end of the file)\n'
  237. else:
  238. expected += f'({total_lines - end} more lines below)\n'
  239. assert result.split('\n') == expected.split('\n')
  240. def test_scroll_up(tmp_path):
  241. temp_file_path = tmp_path / 'a.txt'
  242. total_lines = 1000
  243. content = '\n'.join([f'Line {i}' for i in range(1, total_lines + 1)])
  244. temp_file_path.write_text(content)
  245. cur_line = 300
  246. with io.StringIO() as buf:
  247. with contextlib.redirect_stdout(buf):
  248. open_file(str(temp_file_path), cur_line)
  249. result = buf.getvalue()
  250. assert result is not None
  251. expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
  252. start, end = _calculate_window_bounds(cur_line, total_lines, WINDOW)
  253. if start == 1:
  254. expected += '(this is the beginning of the file)\n'
  255. else:
  256. expected += f'({start - 1} more lines above)\n'
  257. for i in range(start, end + 1):
  258. expected += f'{i}|Line {i}\n'
  259. if end == total_lines:
  260. expected += '(this is the end of the file)\n'
  261. else:
  262. expected += f'({total_lines - end} more lines below)\n'
  263. expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
  264. assert result.split('\n') == expected.split('\n')
  265. with io.StringIO() as buf:
  266. with contextlib.redirect_stdout(buf):
  267. scroll_up()
  268. result = buf.getvalue()
  269. assert result is not None
  270. cur_line = cur_line - WINDOW
  271. expected = f'[File: {temp_file_path} ({total_lines} lines total)]\n'
  272. start = cur_line
  273. end = cur_line + WINDOW
  274. if start == 1:
  275. expected += '(this is the beginning of the file)\n'
  276. else:
  277. expected += f'({start - 1} more lines above)\n'
  278. for i in range(start, end + 1):
  279. expected += f'{i}|Line {i}\n'
  280. if end == total_lines:
  281. expected += '(this is the end of the file)\n'
  282. else:
  283. expected += f'({total_lines - end} more lines below)\n'
  284. assert result.split('\n') == expected.split('\n')
  285. def test_scroll_down_edge(tmp_path):
  286. temp_file_path = tmp_path / 'a.txt'
  287. content = '\n'.join([f'Line {i}' for i in range(1, 10)])
  288. temp_file_path.write_text(content)
  289. with io.StringIO() as buf:
  290. with contextlib.redirect_stdout(buf):
  291. open_file(str(temp_file_path))
  292. result = buf.getvalue()
  293. assert result is not None
  294. expected = f'[File: {temp_file_path} (9 lines total)]\n'
  295. expected += '(this is the beginning of the file)\n'
  296. for i in range(1, 10):
  297. expected += f'{i}|Line {i}\n'
  298. expected += '(this is the end of the file)\n'
  299. with io.StringIO() as buf:
  300. with contextlib.redirect_stdout(buf):
  301. scroll_down()
  302. result = buf.getvalue()
  303. assert result is not None
  304. # expected should be unchanged
  305. assert result.split('\n') == expected.split('\n')
  306. def test_print_window_internal(tmp_path):
  307. test_file_path = tmp_path / 'a.txt'
  308. test_file_path.write_text('')
  309. open_file(str(test_file_path))
  310. with open(test_file_path, 'w') as file:
  311. for i in range(1, 101):
  312. file.write(f'Line `{i}`\n')
  313. # Define the parameters for the test
  314. current_line = 50
  315. window = 2
  316. # Test _print_window especially with backticks
  317. with io.StringIO() as buf:
  318. with contextlib.redirect_stdout(buf):
  319. _print_window(str(test_file_path), current_line, window, return_str=False)
  320. result = buf.getvalue()
  321. expected = (
  322. '(48 more lines above)\n'
  323. '49|Line `49`\n'
  324. '50|Line `50`\n'
  325. '51|Line `51`\n'
  326. '(49 more lines below)\n'
  327. )
  328. assert result == expected
  329. def test_open_file_large_line_number(tmp_path):
  330. test_file_path = tmp_path / 'a.txt'
  331. test_file_path.write_text('')
  332. open_file(str(test_file_path))
  333. with open(test_file_path, 'w') as file:
  334. for i in range(1, 1000):
  335. file.write(f'Line `{i}`\n')
  336. # Define the parameters for the test
  337. current_line = 800
  338. window = 100
  339. # Test _print_window especially with backticks
  340. with io.StringIO() as buf:
  341. with contextlib.redirect_stdout(buf):
  342. # _print_window(str(test_file_path), current_line, window, return_str=False)
  343. open_file(str(test_file_path), current_line, window)
  344. result = buf.getvalue()
  345. expected = f'[File: {test_file_path} (999 lines total)]\n'
  346. expected += '(749 more lines above)\n'
  347. for i in range(750, 850 + 1):
  348. expected += f'{i}|Line `{i}`\n'
  349. expected += '(149 more lines below)\n'
  350. expected += '[Use `scroll_down` to view the next 100 lines of the file!]\n'
  351. assert result == expected
  352. def test_search_dir(tmp_path):
  353. # create files with the search term "bingo"
  354. for i in range(1, 101):
  355. temp_file_path = tmp_path / f'a{i}.txt'
  356. with open(temp_file_path, 'w') as file:
  357. file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n')
  358. if i == 50:
  359. file.write('bingo')
  360. # test
  361. with io.StringIO() as buf:
  362. with contextlib.redirect_stdout(buf):
  363. search_dir('bingo', str(tmp_path))
  364. result = buf.getvalue()
  365. assert result is not None
  366. expected = (
  367. f'[Found 1 matches for "bingo" in {tmp_path}]\n'
  368. f'{tmp_path}/a50.txt (Line 6): bingo\n'
  369. f'[End of matches for "bingo" in {tmp_path}]\n'
  370. )
  371. assert result.split('\n') == expected.split('\n')
  372. def test_search_dir_not_exist_term(tmp_path):
  373. # create files with the search term "bingo"
  374. for i in range(1, 101):
  375. temp_file_path = tmp_path / f'a{i}.txt'
  376. with open(temp_file_path, 'w') as file:
  377. file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n')
  378. # test
  379. with io.StringIO() as buf:
  380. with contextlib.redirect_stdout(buf):
  381. search_dir('non-exist', str(tmp_path))
  382. result = buf.getvalue()
  383. assert result is not None
  384. expected = f'No matches found for "non-exist" in {tmp_path}\n'
  385. assert result.split('\n') == expected.split('\n')
  386. def test_search_dir_too_much_match(tmp_path):
  387. # create files with the search term "Line 5"
  388. for i in range(1, 1000):
  389. temp_file_path = tmp_path / f'a{i}.txt'
  390. with open(temp_file_path, 'w') as file:
  391. file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n')
  392. with io.StringIO() as buf:
  393. with contextlib.redirect_stdout(buf):
  394. search_dir('Line 5', str(tmp_path))
  395. result = buf.getvalue()
  396. assert result is not None
  397. expected = f'More than 999 files matched for "Line 5" in {tmp_path}. Please narrow your search.\n'
  398. assert result.split('\n') == expected.split('\n')
  399. def test_search_dir_cwd(tmp_path, monkeypatch):
  400. # Using pytest's monkeypatch to change directory without affecting other tests
  401. monkeypatch.chdir(tmp_path)
  402. # create files with the search term "bingo"
  403. for i in range(1, 101):
  404. temp_file_path = tmp_path / f'a{i}.txt'
  405. with open(temp_file_path, 'w') as file:
  406. file.write('Line 1\nLine 2\nLine 3\nLine 4\nLine 5\n')
  407. if i == 50:
  408. file.write('bingo')
  409. with io.StringIO() as buf:
  410. with contextlib.redirect_stdout(buf):
  411. search_dir('bingo')
  412. result = buf.getvalue()
  413. assert result is not None
  414. expected = (
  415. '[Found 1 matches for "bingo" in ./]\n'
  416. './a50.txt (Line 6): bingo\n'
  417. '[End of matches for "bingo" in ./]\n'
  418. )
  419. assert result.split('\n') == expected.split('\n')
  420. def test_search_file(tmp_path):
  421. temp_file_path = tmp_path / 'a.txt'
  422. temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
  423. with io.StringIO() as buf:
  424. with contextlib.redirect_stdout(buf):
  425. search_file('Line 5', str(temp_file_path))
  426. result = buf.getvalue()
  427. assert result is not None
  428. expected = f'[Found 1 matches for "Line 5" in {temp_file_path}]\n'
  429. expected += 'Line 5: Line 5\n'
  430. expected += f'[End of matches for "Line 5" in {temp_file_path}]\n'
  431. assert result.split('\n') == expected.split('\n')
  432. def test_search_file_not_exist_term(tmp_path):
  433. temp_file_path = tmp_path / 'a.txt'
  434. temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
  435. with io.StringIO() as buf:
  436. with contextlib.redirect_stdout(buf):
  437. search_file('Line 6', str(temp_file_path))
  438. result = buf.getvalue()
  439. assert result is not None
  440. expected = f'[No matches found for "Line 6" in {temp_file_path}]\n'
  441. assert result.split('\n') == expected.split('\n')
  442. def test_search_file_not_exist_file():
  443. _capture_file_operation_error(
  444. lambda: search_file('Line 6', '/unexist/path/a.txt'),
  445. 'ERROR: File /unexist/path/a.txt not found.',
  446. )
  447. def test_find_file(tmp_path):
  448. temp_file_path = tmp_path / 'a.txt'
  449. temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
  450. with io.StringIO() as buf:
  451. with contextlib.redirect_stdout(buf):
  452. find_file('a.txt', str(tmp_path))
  453. result = buf.getvalue()
  454. assert result is not None
  455. expected = f'[Found 1 matches for "a.txt" in {tmp_path}]\n'
  456. expected += f'{tmp_path}/a.txt\n'
  457. expected += f'[End of matches for "a.txt" in {tmp_path}]\n'
  458. assert result.split('\n') == expected.split('\n')
  459. def test_find_file_cwd(tmp_path, monkeypatch):
  460. monkeypatch.chdir(tmp_path)
  461. temp_file_path = tmp_path / 'a.txt'
  462. temp_file_path.write_text('Line 1\nLine 2\nLine 3\nLine 4\nLine 5')
  463. with io.StringIO() as buf:
  464. with contextlib.redirect_stdout(buf):
  465. find_file('a.txt')
  466. result = buf.getvalue()
  467. assert result is not None
  468. def test_find_file_not_exist_file():
  469. with io.StringIO() as buf:
  470. with contextlib.redirect_stdout(buf):
  471. find_file('nonexist.txt')
  472. result = buf.getvalue()
  473. assert result is not None
  474. expected = '[No matches found for "nonexist.txt" in ./]\n'
  475. assert result.split('\n') == expected.split('\n')
  476. def test_find_file_not_exist_file_specific_path(tmp_path):
  477. with io.StringIO() as buf:
  478. with contextlib.redirect_stdout(buf):
  479. find_file('nonexist.txt', str(tmp_path))
  480. result = buf.getvalue()
  481. assert result is not None
  482. expected = f'[No matches found for "nonexist.txt" in {tmp_path}]\n'
  483. assert result.split('\n') == expected.split('\n')
  484. def test_parse_docx(tmp_path):
  485. # Create a DOCX file with some content
  486. test_docx_path = tmp_path / 'test.docx'
  487. doc = docx.Document()
  488. doc.add_paragraph('Hello, this is a test document.')
  489. doc.add_paragraph('This is the second paragraph.')
  490. doc.save(str(test_docx_path))
  491. old_stdout = sys.stdout
  492. sys.stdout = io.StringIO()
  493. # Call the parse_docx function
  494. parse_docx(str(test_docx_path))
  495. # Capture the output
  496. output = sys.stdout.getvalue()
  497. sys.stdout = old_stdout
  498. # Check if the output is correct
  499. expected_output = (
  500. f'[Reading DOCX file from {test_docx_path}]\n'
  501. '@@ Page 1 @@\nHello, this is a test document.\n\n'
  502. '@@ Page 2 @@\nThis is the second paragraph.\n\n\n'
  503. )
  504. assert output == expected_output, f'Expected output does not match. Got: {output}'
  505. def test_parse_latex(tmp_path):
  506. # Create a LaTeX file with some content
  507. test_latex_path = tmp_path / 'test.tex'
  508. with open(test_latex_path, 'w') as f:
  509. f.write(r"""
  510. \documentclass{article}
  511. \begin{document}
  512. Hello, this is a test LaTeX document.
  513. \end{document}
  514. """)
  515. old_stdout = sys.stdout
  516. sys.stdout = io.StringIO()
  517. # Call the parse_latex function
  518. parse_latex(str(test_latex_path))
  519. # Capture the output
  520. output = sys.stdout.getvalue()
  521. sys.stdout = old_stdout
  522. # Check if the output is correct
  523. expected_output = (
  524. f'[Reading LaTex file from {test_latex_path}]\n'
  525. 'Hello, this is a test LaTeX document.\n'
  526. )
  527. assert output == expected_output, f'Expected output does not match. Got: {output}'
  528. def test_parse_pdf(tmp_path):
  529. # Create a PDF file with some content
  530. test_pdf_path = tmp_path / 'test.pdf'
  531. from reportlab.lib.pagesizes import letter
  532. from reportlab.pdfgen import canvas
  533. c = canvas.Canvas(str(test_pdf_path), pagesize=letter)
  534. c.drawString(100, 750, 'Hello, this is a test PDF document.')
  535. c.save()
  536. old_stdout = sys.stdout
  537. sys.stdout = io.StringIO()
  538. # Call the parse_pdf function
  539. parse_pdf(str(test_pdf_path))
  540. # Capture the output
  541. output = sys.stdout.getvalue()
  542. sys.stdout = old_stdout
  543. # Check if the output is correct
  544. expected_output = (
  545. f'[Reading PDF file from {test_pdf_path}]\n'
  546. '@@ Page 1 @@\n'
  547. 'Hello, this is a test PDF document.\n'
  548. )
  549. assert output == expected_output, f'Expected output does not match. Got: {output}'
  550. def test_parse_pptx(tmp_path):
  551. test_pptx_path = tmp_path / 'test.pptx'
  552. from pptx import Presentation
  553. pres = Presentation()
  554. slide1 = pres.slides.add_slide(pres.slide_layouts[0])
  555. title1 = slide1.shapes.title
  556. title1.text = 'Hello, this is the first test PPTX slide.'
  557. slide2 = pres.slides.add_slide(pres.slide_layouts[0])
  558. title2 = slide2.shapes.title
  559. title2.text = 'Hello, this is the second test PPTX slide.'
  560. pres.save(str(test_pptx_path))
  561. old_stdout = sys.stdout
  562. sys.stdout = io.StringIO()
  563. parse_pptx(str(test_pptx_path))
  564. output = sys.stdout.getvalue()
  565. sys.stdout = old_stdout
  566. expected_output = (
  567. f'[Reading PowerPoint file from {test_pptx_path}]\n'
  568. '@@ Slide 1 @@\n'
  569. 'Hello, this is the first test PPTX slide.\n\n'
  570. '@@ Slide 2 @@\n'
  571. 'Hello, this is the second test PPTX slide.\n\n'
  572. )
  573. assert output == expected_output, f'Expected output does not match. Got: {output}'