mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-18 15:36:11 +00:00
Compare commits
3 Commits
2486-chars
...
update-ins
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
dc33d49840 | ||
|
|
c30f96c4cd | ||
|
|
c8310b7e93 |
@@ -409,23 +409,6 @@ def has_ldjson_product_info(content):
|
|||||||
x=bool(pricing_data)
|
x=bool(pricing_data)
|
||||||
return x
|
return x
|
||||||
|
|
||||||
|
|
||||||
def workarounds_for_obfuscations(content):
|
|
||||||
"""
|
|
||||||
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
|
|
||||||
This could go into its own Pip package in the future, for faster updates
|
|
||||||
"""
|
|
||||||
|
|
||||||
# HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
|
|
||||||
# https://github.com/weblyzard/inscriptis/issues/45
|
|
||||||
if not content:
|
|
||||||
return content
|
|
||||||
|
|
||||||
content = re.sub('<!--\s+-->', '', content)
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
|
|
||||||
def get_triggered_text(content, trigger_text):
|
def get_triggered_text(content, trigger_text):
|
||||||
triggered_text = []
|
triggered_text = []
|
||||||
result = strip_ignore_text(content=content,
|
result = strip_ignore_text(content=content,
|
||||||
|
|||||||
@@ -151,7 +151,6 @@ class perform_site_check(difference_detection_processor):
|
|||||||
if is_html or watch.is_source_type_url:
|
if is_html or watch.is_source_type_url:
|
||||||
|
|
||||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
|
|
||||||
html_content = self.fetcher.content
|
html_content = self.fetcher.content
|
||||||
|
|
||||||
# If not JSON, and if it's not text/plain..
|
# If not JSON, and if it's not text/plain..
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
def set_original_ignore_response():
|
def set_original_ignore_response():
|
||||||
@@ -21,7 +21,7 @@ def set_original_ignore_response():
|
|||||||
def test_obfuscations(client, live_server):
|
def test_obfuscations(client, live_server):
|
||||||
set_original_ignore_response()
|
set_original_ignore_response()
|
||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
time.sleep(1)
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
res = client.post(
|
res = client.post(
|
||||||
@@ -32,12 +32,12 @@ def test_obfuscations(client, live_server):
|
|||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(3)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Check HTML conversion detected and workd
|
# Check HTML conversion detected and workd
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("preview_page", uuid="first"),
|
url_for("preview_page", uuid="first"),
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
# whitespace appears but it renders https://github.com/weblyzard/inscriptis/issues/45#issuecomment-1923339265
|
||||||
assert b'$90.74' in res.data
|
assert b'$90.74' in res.data
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ flask_expects_json~=1.7
|
|||||||
flask_restful
|
flask_restful
|
||||||
flask_wtf~=1.2
|
flask_wtf~=1.2
|
||||||
flask~=2.3
|
flask~=2.3
|
||||||
inscriptis~=2.2
|
inscriptis~=2.4
|
||||||
pytz
|
pytz
|
||||||
timeago~=1.0
|
timeago~=1.0
|
||||||
validators~=0.21
|
validators~=0.21
|
||||||
|
|||||||
Reference in New Issue
Block a user