oops

Content Fetchers / Browsers - Improvements for pluggable extra fetchers/browsers.
2026-03-18 01:38:13 +00:00 · 2026-03-15 16:56:05 +01:00 · 2026-03-15 16:33:06 +01:00
4 changed files with 9 additions and 72 deletions
--- a/changedetectionio/init.py
+++ b/changedetectionio/init.py
@@ -2,7 +2,7 @@

 # Read more https://github.com/dgtlmoon/changedetection.io/wiki
 # Semver means never use .01, or 00. Should be .1.
-__version__ = '0.54.6'
+__version__ = '0.54.5'

 from changedetectionio.strtobool import strtobool
 from json.decoder import JSONDecodeError
--- a/changedetectionio/html_tools.py
+++ b/changedetectionio/html_tools.py
@@ -487,25 +487,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
        except json.JSONDecodeError as e:
            logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
    else:
-        # Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
-        # Server may claim application/json but actually return JSONP
-        jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
-        if jsonp_match:
-            try:
-                inner = jsonp_match.group(1).strip()
-                logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
-                stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
-            except json.JSONDecodeError as e:
-                logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
-
-        if not stripped_text_from_html:
-            # Probably something else, go fish inside for it
-            try:
-                stripped_text_from_html = extract_json_blob_from_html(content=content,
-                                                                      ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
-                                                                      json_filter=json_filter)
-            except json.JSONDecodeError as e:
-                logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
+        # Probably something else, go fish inside for it
+        try:
+            stripped_text_from_html = extract_json_blob_from_html(content=content,
+                                                                  ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
+                                                                  json_filter=json_filter                                                                  )
+        except json.JSONDecodeError as e:
+            logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")

    if not stripped_text_from_html:
        # Re 265 - Just return an empty string when filter not found
--- a/changedetectionio/processors/magic.py
+++ b/changedetectionio/processors/magic.py
@@ -100,13 +100,7 @@ class guess_stream_type():
        if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
            self.is_rss = True
        elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
-            # JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
-            # A JSONP response starts with an identifier followed by '(' - not valid JSON
-            if re.match(r'^\w[\w.]*\s*\(', test_content):
-                logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
-                self.is_plaintext = True
-            else:
-                self.is_json = True
+            self.is_json = True
        elif 'pdf' in magic_content_header:
            self.is_pdf = True
        # magic will call a rss document 'xml'
--- a/changedetectionio/tests/test_jsonpath_jq_selector.py
+++ b/changedetectionio/tests/test_jsonpath_jq_selector.py
@@ -16,51 +16,6 @@ except ModuleNotFoundError:



-def test_jsonp_treated_as_plaintext():
-    from ..processors.magic import guess_stream_type
-
-    # JSONP content (server wrongly claims application/json) should be detected as plaintext
-    # Callback names are arbitrary identifiers, not always 'cb'
-    jsonp_content = 'jQuery123456({ "version": "8.0.41", "url": "https://example.com/app.apk" })'
-    result = guess_stream_type(http_content_header="application/json", content=jsonp_content)
-    assert result.is_json is False
-    assert result.is_plaintext is True
-
-    # Variation with dotted callback name e.g. jQuery.cb(...)
-    jsonp_dotted = 'some.callback({ "version": "1.0" })'
-    result = guess_stream_type(http_content_header="application/json", content=jsonp_dotted)
-    assert result.is_json is False
-    assert result.is_plaintext is True
-
-    # Real JSON should still be detected as JSON
-    json_content = '{ "version": "8.0.41", "url": "https://example.com/app.apk" }'
-    result = guess_stream_type(http_content_header="application/json", content=json_content)
-    assert result.is_json is True
-    assert result.is_plaintext is False
-
-
-def test_jsonp_json_filter_extraction():
-    from .. import html_tools
-
-    # Tough case: dotted namespace callback, trailing semicolon, deeply nested content with arrays
-    jsonp_content = 'weixin.update.callback({"platforms": {"android": {"variants": [{"arch": "arm64", "versionName": "8.0.68", "url": "https://example.com/app-arm64.apk"}, {"arch": "arm32", "versionName": "8.0.41", "url": "https://example.com/app-arm32.apk"}]}}});'
-
-    # Deep nested jsonpath filter into array element
-    text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[0].versionName")
-    assert text == '"8.0.68"'
-
-    # Filter that selects the second array element
-    text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[1].arch")
-    assert text == '"arm32"'
-
-    if jq_support:
-        text = html_tools.extract_json_as_string(jsonp_content, "jq:.platforms.android.variants[0].versionName")
-        assert text == '"8.0.68"'
-
-        text = html_tools.extract_json_as_string(jsonp_content, "jqraw:.platforms.android.variants[1].url")
-        assert text == "https://example.com/app-arm32.apk"
-
-
 def test_unittest_inline_html_extract():
    # So lets pretend that the JSON we want is inside some HTML
    content="""
Author	SHA1	Message	Date
dgtlmoon	cd83704a5e	oops	2026-03-15 16:56:05 +01:00
dgtlmoon	6333f72578	Content Fetchers / Browsers - Improvements for pluggable extra fetchers/browsers.	2026-03-15 16:33:06 +01:00