| 12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667686970717273747576777879 |
- import pytest
- from pathlib import Path
- from worker.html_convert.converter_base import ConverterBase
- class TestConverterBase:
- """Test suite for ConverterBase class"""
-
- @pytest.fixture
- def converter(self):
- return ConverterBase()
-
- def test_extract_content_after_first_h1(self, converter):
- """Test extracting content after first H1"""
- sample_md = """
- Some header content to skip
- ## PERMALINK
- Copy
- # Main Title Here
- Content starts here
- """
- expected = "# Main Title Here\n\nContent starts here"
- result = converter.extract_content_after_first_h1(sample_md)
- assert result.strip() == expected.strip()
-
- def test_fix_inline_links(self, converter):
- """Test fixing inline links"""
- # Test case 1: Relative URL with domain
- sample_md_1 = "[Author Name](https://example.com/<https://actual.com/path>)"
- expected_1 = "[Author Name](https://actual.com/path)"
- assert converter.fix_inline_links(sample_md_1) == expected_1
-
- # Test case 2: Absolute URL
- sample_md_2 = "[PMC Copyright](https://pmc.ncbi.nlm.nih.gov/articles/PMC9919988/</about/copyright/>)"
- expected_2 = "[PMC Copyright](https://pmc.ncbi.nlm.nih.gov/articles/PMC9919988/about/copyright/)"
- assert converter.fix_inline_links(sample_md_2) == expected_2
-
- # Test case 3: Already correct link
- sample_md_3 = "[Normal Link](https://correct.com/path)"
- expected_3 = "[Normal Link](https://correct.com/path)"
- assert converter.fix_inline_links(sample_md_3) == expected_3
-
- # Test case 4: Image link with empty alt text
- sample_md_4 = " [](https://www.mdpi.com/1420-3049/29/22/<#table_body_display_molecules-29-05310-t003>)"
- expected_4 = " [](https://www.mdpi.com/1420-3049/29/22/#table_body_display_molecules-29-05310-t003)"
- assert converter.fix_inline_links(sample_md_4) == expected_4
-
- def test_add_url_header(self, converter):
- """Test adding URL header"""
- content = "Some markdown content"
- url = "https://example.com"
- expected = "[https://example.com](https://example.com)\n\nSome markdown content"
- assert converter.add_url_header(content, url) == expected
-
- def test_filter_markdown_integration(self, converter):
- """Integration test for filter_markdown"""
- sample_md = """
- [ Skip to main content ](https://pmc.ncbi.nlm.nih.gov/articles/PMC9919988/<#main-content>)
- 
- ## PERMALINK
- Copy
- # Main Title Here
- ### Author Name
- [Author](https://example.com/<https://actual.com/path>)
- """
- url = "https://example.com"
- expected = ("[https://example.com](https://example.com)\n\n"
- "# Main Title Here\n\n"
- "### Author Name\n"
- "[Author](https://actual.com/path)")
- result = converter.filter_markdown(sample_md)
- result = converter.add_url_header(result, url)
- assert result.strip() == expected.strip()
|