mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-02 15:47:19 +00:00
Compare commits
3 Commits
lev-test
...
update-ins
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc33d49840 | ||
|
|
c30f96c4cd | ||
|
|
c8310b7e93 |
@@ -409,23 +409,6 @@ def has_ldjson_product_info(content):
|
||||
x=bool(pricing_data)
|
||||
return x
|
||||
|
||||
|
||||
def workarounds_for_obfuscations(content):
|
||||
"""
|
||||
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
|
||||
This could go into its own Pip package in the future, for faster updates
|
||||
"""
|
||||
|
||||
# HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
|
||||
# https://github.com/weblyzard/inscriptis/issues/45
|
||||
if not content:
|
||||
return content
|
||||
|
||||
content = re.sub('<!--\s+-->', '', content)
|
||||
|
||||
return content
|
||||
|
||||
|
||||
def get_triggered_text(content, trigger_text):
|
||||
triggered_text = []
|
||||
result = strip_ignore_text(content=content,
|
||||
|
||||
@@ -151,7 +151,6 @@ class perform_site_check(difference_detection_processor):
|
||||
if is_html or watch.is_source_type_url:
|
||||
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
|
||||
html_content = self.fetcher.content
|
||||
|
||||
# If not JSON, and if it's not text/plain..
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import live_server_setup
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_original_ignore_response():
|
||||
@@ -21,7 +21,7 @@ def set_original_ignore_response():
|
||||
def test_obfuscations(client, live_server):
|
||||
set_original_ignore_response()
|
||||
live_server_setup(live_server)
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
@@ -32,12 +32,12 @@ def test_obfuscations(client, live_server):
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(3)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Check HTML conversion detected and workd
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# whitespace appears but it renders https://github.com/weblyzard/inscriptis/issues/45#issuecomment-1923339265
|
||||
assert b'$90.74' in res.data
|
||||
|
||||
@@ -8,7 +8,7 @@ flask_expects_json~=1.7
|
||||
flask_restful
|
||||
flask_wtf~=1.2
|
||||
flask~=2.3
|
||||
inscriptis~=2.2
|
||||
inscriptis~=2.4
|
||||
pytz
|
||||
timeago~=1.0
|
||||
validators~=0.21
|
||||
|
||||
Reference in New Issue
Block a user