mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-11 18:45:34 +00:00
Option to render links as [Some Text ](/link), adds the ability to change-detect on hyperlink changes
This commit is contained in:
38
changedetectionio/tests/test_html_to_text.py
Normal file
38
changedetectionio/tests/test_html_to_text.py
Normal file
@@ -0,0 +1,38 @@
|
||||
#!/usr/bin/python3
|
||||
"""Test suite for the method to extract text from an html string"""
|
||||
from ..html_tools import html_to_text
|
||||
|
||||
|
||||
def test_html_to_text_func():
|
||||
test_html = """<html>
|
||||
<body>
|
||||
Some initial text</br>
|
||||
<p>Which is across multiple lines</p>
|
||||
<a href="/first_link"> More Text </a>
|
||||
</br>
|
||||
So let's see what happens. </br>
|
||||
<a href="second_link.com"> Even More Text </a>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
# extract text, with 'render_anchor_tag_content' set to False
|
||||
text_content = html_to_text(test_html, render_anchor_tag_content=False)
|
||||
|
||||
no_links_text = \
|
||||
"Some initial text\n\nWhich is across multiple " \
|
||||
"lines\n\nMore Text So let's see what happens. Even More Text"
|
||||
|
||||
# check that no links are in the extracted text
|
||||
assert text_content == no_links_text
|
||||
|
||||
# extract text, with 'render_anchor_tag_content' set to True
|
||||
text_content = html_to_text(test_html, render_anchor_tag_content=True)
|
||||
|
||||
links_text = \
|
||||
"Some initial text\n\nWhich is across multiple lines\n\n[ More Text " \
|
||||
"](/first_link) So let's see what happens. [ Even More Text ]" \
|
||||
"(second_link.com)"
|
||||
|
||||
# check that links are present in the extracted text
|
||||
assert text_content == links_text
|
||||
Reference in New Issue
Block a user