mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-03-21 11:17:57 +00:00
Compare commits
4 Commits
JSONP-supp
...
socketio-c
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6eb71c9eee | ||
|
|
34c2c05bc5 | ||
|
|
0da8dfb09a | ||
|
|
b747e06c3e |
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.5'
|
||||
__version__ = '0.54.6'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -154,9 +154,8 @@
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -352,7 +351,7 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
|
||||
@@ -487,13 +487,25 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
||||
# Server may claim application/json but actually return JSONP
|
||||
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
||||
if jsonp_match:
|
||||
try:
|
||||
inner = jsonp_match.group(1).strip()
|
||||
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
||||
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
|
||||
@@ -100,7 +100,13 @@ class guess_stream_type():
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
||||
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
||||
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
||||
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
||||
self.is_plaintext = True
|
||||
else:
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
# magic will call a rss document 'xml'
|
||||
|
||||
@@ -199,8 +199,31 @@ def handle_watch_update(socketio, **kwargs):
|
||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||
|
||||
|
||||
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
|
||||
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
|
||||
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
|
||||
The exception is caught inside run_wsgi and routed to self.server.log() — it never
|
||||
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
|
||||
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
|
||||
disconnect correctly via its own disconnect handler and timeout logic."""
|
||||
try:
|
||||
from werkzeug.serving import BaseWSGIServer
|
||||
_original_log = BaseWSGIServer.log
|
||||
|
||||
def _filtered_log(self, type, message, *args):
|
||||
if type == 'error' and 'write() before start_response' in message:
|
||||
return
|
||||
_original_log(self, type, message, *args)
|
||||
|
||||
BaseWSGIServer.log = _filtered_log
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
_suppress_werkzeug_ws_abrupt_disconnect_noise()
|
||||
|
||||
import platform
|
||||
import sys
|
||||
|
||||
|
||||
@@ -116,6 +116,14 @@ $(document).ready(function () {
|
||||
$('#realtime-conn-error').show();
|
||||
});
|
||||
|
||||
// Tell the server we're leaving cleanly so it can release the connection
|
||||
// immediately rather than waiting for a timeout.
|
||||
// Note: this only fires for voluntary closes (tab/window close, navigation away).
|
||||
// Hard kills, crashes and network drops will still timeout normally on the server.
|
||||
window.addEventListener('beforeunload', function () {
|
||||
socket.disconnect();
|
||||
});
|
||||
|
||||
socket.on('queue_size', function (data) {
|
||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
||||
if(queueSizePagerInfoText) {
|
||||
|
||||
@@ -16,6 +16,51 @@ except ModuleNotFoundError:
|
||||
|
||||
|
||||
|
||||
def test_jsonp_treated_as_plaintext():
|
||||
from ..processors.magic import guess_stream_type
|
||||
|
||||
# JSONP content (server wrongly claims application/json) should be detected as plaintext
|
||||
# Callback names are arbitrary identifiers, not always 'cb'
|
||||
jsonp_content = 'jQuery123456({ "version": "8.0.41", "url": "https://example.com/app.apk" })'
|
||||
result = guess_stream_type(http_content_header="application/json", content=jsonp_content)
|
||||
assert result.is_json is False
|
||||
assert result.is_plaintext is True
|
||||
|
||||
# Variation with dotted callback name e.g. jQuery.cb(...)
|
||||
jsonp_dotted = 'some.callback({ "version": "1.0" })'
|
||||
result = guess_stream_type(http_content_header="application/json", content=jsonp_dotted)
|
||||
assert result.is_json is False
|
||||
assert result.is_plaintext is True
|
||||
|
||||
# Real JSON should still be detected as JSON
|
||||
json_content = '{ "version": "8.0.41", "url": "https://example.com/app.apk" }'
|
||||
result = guess_stream_type(http_content_header="application/json", content=json_content)
|
||||
assert result.is_json is True
|
||||
assert result.is_plaintext is False
|
||||
|
||||
|
||||
def test_jsonp_json_filter_extraction():
|
||||
from .. import html_tools
|
||||
|
||||
# Tough case: dotted namespace callback, trailing semicolon, deeply nested content with arrays
|
||||
jsonp_content = 'weixin.update.callback({"platforms": {"android": {"variants": [{"arch": "arm64", "versionName": "8.0.68", "url": "https://example.com/app-arm64.apk"}, {"arch": "arm32", "versionName": "8.0.41", "url": "https://example.com/app-arm32.apk"}]}}});'
|
||||
|
||||
# Deep nested jsonpath filter into array element
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[0].versionName")
|
||||
assert text == '"8.0.68"'
|
||||
|
||||
# Filter that selects the second array element
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[1].arch")
|
||||
assert text == '"arm32"'
|
||||
|
||||
if jq_support:
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "jq:.platforms.android.variants[0].versionName")
|
||||
assert text == '"8.0.68"'
|
||||
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "jqraw:.platforms.android.variants[1].url")
|
||||
assert text == "https://example.com/app-arm32.apk"
|
||||
|
||||
|
||||
def test_unittest_inline_html_extract():
|
||||
# So lets pretend that the JSON we want is inside some HTML
|
||||
content="""
|
||||
|
||||
Reference in New Issue
Block a user