mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-03-20 18:58:03 +00:00
Compare commits
6 Commits
JSONP-supp
...
restock-to
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0a1d218076 | ||
|
|
6181b09b16 | ||
|
|
5f9fa15a6a | ||
|
|
34c2c05bc5 | ||
|
|
0da8dfb09a | ||
|
|
b747e06c3e |
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
# Semver means never use .01, or 00. Should be .1.
|
||||
__version__ = '0.54.5'
|
||||
__version__ = '0.54.6'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -40,12 +40,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
contents = ''
|
||||
now = time.time()
|
||||
try:
|
||||
import asyncio
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
|
||||
asyncio.run(update_handler.call_browser(preferred_proxy_id=preferred_proxy))
|
||||
# title, size is len contents not len xfer
|
||||
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 404:
|
||||
|
||||
@@ -154,9 +154,8 @@
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">{{ _('Connect using Bright Data and Oxylabs Proxies, find out more here.') }}</a>
|
||||
|
||||
<br>
|
||||
{{ _('Tip:') }} <a href="{{ url_for('settings.settings_page')}}#proxies">{{ _('Connect using Bright Data proxies, find out more here.') }}</a>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
@@ -352,7 +351,7 @@ nav
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.') }}</p>
|
||||
<p><strong>{{ _('Tip') }}</strong>: {{ _('"Residential" and "Mobile" proxy type can be more successful than "Data Center" for blocked websites.') }}</p>
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
|
||||
@@ -487,13 +487,25 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
||||
# Server may claim application/json but actually return JSONP
|
||||
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
||||
if jsonp_match:
|
||||
try:
|
||||
inner = jsonp_match.group(1).strip()
|
||||
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
||||
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
|
||||
@@ -100,7 +100,13 @@ class guess_stream_type():
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
||||
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
||||
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
||||
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
||||
self.is_plaintext = True
|
||||
else:
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
# magic will call a rss document 'xml'
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
|
||||
from babel.numbers import parse_decimal
|
||||
from changedetectionio.model.Watch import model as BaseWatch
|
||||
from decimal import Decimal, InvalidOperation
|
||||
from typing import Union
|
||||
import re
|
||||
|
||||
@@ -10,6 +11,8 @@ supports_browser_steps = True
|
||||
supports_text_filters_and_triggers = True
|
||||
supports_text_filters_and_triggers_elements = True
|
||||
supports_request_type = True
|
||||
_price_re = re.compile(r"Price:\s*(\d+(?:\.\d+)?)", re.IGNORECASE)
|
||||
|
||||
|
||||
class Restock(dict):
|
||||
|
||||
@@ -63,6 +66,17 @@ class Restock(dict):
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
def get_price_from_history_str(history_str):
|
||||
m = _price_re.search(history_str)
|
||||
if not m:
|
||||
return None
|
||||
|
||||
try:
|
||||
return str(Decimal(m.group(1)))
|
||||
except InvalidOperation:
|
||||
return None
|
||||
|
||||
|
||||
class Watch(BaseWatch):
|
||||
def __init__(self, *arg, **kw):
|
||||
super().__init__(*arg, **kw)
|
||||
@@ -76,13 +90,27 @@ class Watch(BaseWatch):
|
||||
def extra_notification_token_values(self):
|
||||
values = super().extra_notification_token_values()
|
||||
values['restock'] = self.get('restock', {})
|
||||
|
||||
values['restock']['previous_price'] = None
|
||||
if self.history_n >= 2:
|
||||
history = self.history
|
||||
if history and len(history) >=2:
|
||||
"""Unfortunately for now timestamp is stored as string key"""
|
||||
sorted_keys = sorted(list(history), key=lambda x: int(x))
|
||||
sorted_keys.reverse()
|
||||
|
||||
price_str = self.get_history_snapshot(timestamp=sorted_keys[-1])
|
||||
if price_str:
|
||||
values['restock']['previous_price'] = get_price_from_history_str(price_str)
|
||||
return values
|
||||
|
||||
def extra_notification_token_placeholder_info(self):
|
||||
values = super().extra_notification_token_placeholder_info()
|
||||
|
||||
values.append(('restock.price', "Price detected"))
|
||||
values.append(('restock.in_stock', "In stock status"))
|
||||
values.append(('restock.original_price', "Original price at first check"))
|
||||
values.append(('restock.previous_price', "Previous price in history"))
|
||||
|
||||
return values
|
||||
|
||||
|
||||
@@ -199,8 +199,31 @@ def handle_watch_update(socketio, **kwargs):
|
||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
||||
|
||||
|
||||
def _suppress_werkzeug_ws_abrupt_disconnect_noise():
|
||||
"""Patch BaseWSGIServer.log to suppress the AssertionError traceback that fires when
|
||||
a browser closes a WebSocket connection mid-handshake (e.g. closing a tab).
|
||||
The exception is caught inside run_wsgi and routed to self.server.log() — it never
|
||||
propagates out, so wrapping run_wsgi doesn't help. Patching the log method is the
|
||||
only reliable intercept point. The error is cosmetic: Socket.IO already handles the
|
||||
disconnect correctly via its own disconnect handler and timeout logic."""
|
||||
try:
|
||||
from werkzeug.serving import BaseWSGIServer
|
||||
_original_log = BaseWSGIServer.log
|
||||
|
||||
def _filtered_log(self, type, message, *args):
|
||||
if type == 'error' and 'write() before start_response' in message:
|
||||
return
|
||||
_original_log(self, type, message, *args)
|
||||
|
||||
BaseWSGIServer.log = _filtered_log
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
_suppress_werkzeug_ws_abrupt_disconnect_noise()
|
||||
|
||||
import platform
|
||||
import sys
|
||||
|
||||
|
||||
@@ -116,6 +116,14 @@ $(document).ready(function () {
|
||||
$('#realtime-conn-error').show();
|
||||
});
|
||||
|
||||
// Tell the server we're leaving cleanly so it can release the connection
|
||||
// immediately rather than waiting for a timeout.
|
||||
// Note: this only fires for voluntary closes (tab/window close, navigation away).
|
||||
// Hard kills, crashes and network drops will still timeout normally on the server.
|
||||
window.addEventListener('beforeunload', function () {
|
||||
socket.disconnect();
|
||||
});
|
||||
|
||||
socket.on('queue_size', function (data) {
|
||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
||||
if(queueSizePagerInfoText) {
|
||||
|
||||
@@ -16,6 +16,51 @@ except ModuleNotFoundError:
|
||||
|
||||
|
||||
|
||||
def test_jsonp_treated_as_plaintext():
|
||||
from ..processors.magic import guess_stream_type
|
||||
|
||||
# JSONP content (server wrongly claims application/json) should be detected as plaintext
|
||||
# Callback names are arbitrary identifiers, not always 'cb'
|
||||
jsonp_content = 'jQuery123456({ "version": "8.0.41", "url": "https://example.com/app.apk" })'
|
||||
result = guess_stream_type(http_content_header="application/json", content=jsonp_content)
|
||||
assert result.is_json is False
|
||||
assert result.is_plaintext is True
|
||||
|
||||
# Variation with dotted callback name e.g. jQuery.cb(...)
|
||||
jsonp_dotted = 'some.callback({ "version": "1.0" })'
|
||||
result = guess_stream_type(http_content_header="application/json", content=jsonp_dotted)
|
||||
assert result.is_json is False
|
||||
assert result.is_plaintext is True
|
||||
|
||||
# Real JSON should still be detected as JSON
|
||||
json_content = '{ "version": "8.0.41", "url": "https://example.com/app.apk" }'
|
||||
result = guess_stream_type(http_content_header="application/json", content=json_content)
|
||||
assert result.is_json is True
|
||||
assert result.is_plaintext is False
|
||||
|
||||
|
||||
def test_jsonp_json_filter_extraction():
|
||||
from .. import html_tools
|
||||
|
||||
# Tough case: dotted namespace callback, trailing semicolon, deeply nested content with arrays
|
||||
jsonp_content = 'weixin.update.callback({"platforms": {"android": {"variants": [{"arch": "arm64", "versionName": "8.0.68", "url": "https://example.com/app-arm64.apk"}, {"arch": "arm32", "versionName": "8.0.41", "url": "https://example.com/app-arm32.apk"}]}}});'
|
||||
|
||||
# Deep nested jsonpath filter into array element
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[0].versionName")
|
||||
assert text == '"8.0.68"'
|
||||
|
||||
# Filter that selects the second array element
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[1].arch")
|
||||
assert text == '"arm32"'
|
||||
|
||||
if jq_support:
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "jq:.platforms.android.variants[0].versionName")
|
||||
assert text == '"8.0.68"'
|
||||
|
||||
text = html_tools.extract_json_as_string(jsonp_content, "jqraw:.platforms.android.variants[1].url")
|
||||
assert text == "https://example.com/app-arm32.apk"
|
||||
|
||||
|
||||
def test_unittest_inline_html_extract():
|
||||
# So lets pretend that the JSON we want is inside some HTML
|
||||
content="""
|
||||
|
||||
@@ -350,6 +350,7 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
res = client.get(url_for("settings.settings_page"))
|
||||
|
||||
assert b'{{restock.original_price}}' in res.data
|
||||
assert b'{{restock.previous_price}}' in res.data
|
||||
assert b'Original price at first check' in res.data
|
||||
|
||||
#####################
|
||||
@@ -358,7 +359,7 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-notification_urls": notification_url,
|
||||
"application-notification_title": "title new price {{restock.price}}",
|
||||
"application-notification_body": "new price {{restock.price}}",
|
||||
"application-notification_body": "new price {{restock.price}} previous price {{restock.previous_price}} instock {{restock.in_stock}}",
|
||||
"application-notification_format": default_notification_format,
|
||||
"requests-time_between_check-minutes": 180,
|
||||
'application-fetch_backend': "html_requests"},
|
||||
@@ -372,8 +373,6 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
|
||||
set_original_response(props_markup=instock_props[0], price='960.45', datastore_path=datastore_path)
|
||||
# A change in price, should trigger a change by default
|
||||
set_original_response(props_markup=instock_props[0], price='1950.45', datastore_path=datastore_path)
|
||||
client.get(url_for("ui.form_watch_checknow"))
|
||||
@@ -384,6 +383,7 @@ def test_change_with_notification_values(client, live_server, measure_memory_usa
|
||||
notification = f.read()
|
||||
assert "new price 1950.45" in notification
|
||||
assert "title new price 1950.45" in notification
|
||||
assert "previous price 960.45" in notification
|
||||
|
||||
## Now test the "SEND TEST NOTIFICATION" is working
|
||||
os.unlink(os.path.join(datastore_path, "notification.txt"))
|
||||
|
||||
Reference in New Issue
Block a user