Option to render links as [Some Text ](/link), adds the ability to change-detect on hyperlink changes

This commit is contained in:
dgtlmoon
2022-04-09 10:35:14 +02:00
committed by GitHub
parent 1890881977
commit 9809af142d
9 changed files with 322 additions and 12 deletions

View File

@@ -0,0 +1,38 @@
#!/usr/bin/python3
"""Test suite for the method to extract text from an html string"""
from ..html_tools import html_to_text
def test_html_to_text_func():
test_html = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
<a href="/first_link"> More Text </a>
</br>
So let's see what happens. </br>
<a href="second_link.com"> Even More Text </a>
</body>
</html>
"""
# extract text, with 'render_anchor_tag_content' set to False
text_content = html_to_text(test_html, render_anchor_tag_content=False)
no_links_text = \
"Some initial text\n\nWhich is across multiple " \
"lines\n\nMore Text So let's see what happens. Even More Text"
# check that no links are in the extracted text
assert text_content == no_links_text
# extract text, with 'render_anchor_tag_content' set to True
text_content = html_to_text(test_html, render_anchor_tag_content=True)
links_text = \
"Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
"](/first_link) So let's see what happens. [ Even More Text ]" \
"(second_link.com)"
# check that links are present in the extracted text
assert text_content == links_text