mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-08 09:05:36 +00:00
Fetcher / Parser - Automatically attempt to extract JSON from document when document contains JSON but could be wrapped in HTML (#1593)
This commit is contained in:
@@ -64,6 +64,24 @@ and it can also be repeated
|
||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
||||
|
||||
|
||||
def test_unittest_inline_extract_body():
|
||||
content = """
|
||||
<html>
|
||||
<head></head>
|
||||
<body>
|
||||
<pre style="word-wrap: break-word; white-space: pre-wrap;">
|
||||
{"testKey": 42}
|
||||
</pre>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
from .. import html_tools
|
||||
|
||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||
text = html_tools.extract_json_as_string(content, "json:$.testKey")
|
||||
assert text == '42'
|
||||
|
||||
def set_original_ext_response():
|
||||
data = """
|
||||
[
|
||||
@@ -437,7 +455,6 @@ def test_ignore_json_order(client, live_server):
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_correct_header_detect(client, live_server):
|
||||
|
||||
# Like in https://github.com/dgtlmoon/changedetection.io/pull/1593
|
||||
# Specify extra html that JSON is sometimes wrapped in - when using Browserless/Puppeteer etc
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
@@ -453,11 +470,17 @@ def test_correct_header_detect(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
res = client.get(url_for("index"))
|
||||
# This will be fixed in #1593
|
||||
assert b'No parsable JSON found in this document' in res.data
|
||||
|
||||
# Fixed in #1593
|
||||
assert b'No parsable JSON found in this document' not in res.data
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'"world":' in res.data
|
||||
assert res.data.count(b'{') >= 2
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
Reference in New Issue
Block a user