From 47f7698b328accb1851d47788f91660feab28012 Mon Sep 17 00:00:00 2001 From: dgtlmoon Date: Wed, 23 Jun 2021 12:29:14 +1000 Subject: [PATCH] CSS Filter - strip text of whitespacing, preserve new lines where applicable, remove extra newlines --- backend/fetch_site_status.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/backend/fetch_site_status.py b/backend/fetch_site_status.py index 6856a63f..9c64623b 100644 --- a/backend/fetch_site_status.py +++ b/backend/fetch_site_status.py @@ -89,7 +89,10 @@ class perform_site_check(): soup = BeautifulSoup(r.content, "html.parser") stripped_text_from_html = "" for item in soup.select(css_filter): - text = str(item.get_text()).strip() + '\n' + # By default, bs4's get_text will lump the text together + text = str(item.get_text(separator="\n", strip=True)).strip() + # Try to cut back on excessive linefeeds if there are any + text = text.replace("\n\n","\n") stripped_text_from_html += text else: