mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-03-18 17:58:04 +00:00
Compare commits
3 Commits
JSONP-supp
...
cpu-memory
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
259e44940c | ||
|
|
1dbd25cdb4 | ||
|
|
bb2c9855ee |
17
.github/workflows/containers.yml
vendored
17
.github/workflows/containers.yml
vendored
@@ -103,14 +103,6 @@ jobs:
|
|||||||
ghcr.io/${{ github.repository }}
|
ghcr.io/${{ github.repository }}
|
||||||
tags: |
|
tags: |
|
||||||
type=raw,value=dev
|
type=raw,value=dev
|
||||||
labels: |
|
|
||||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
|
||||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
|
||||||
org.opencontainers.image.documentation=https://changedetection.io
|
|
||||||
org.opencontainers.image.revision=${{ github.sha }}
|
|
||||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
|
||||||
org.opencontainers.image.title=changedetection.io
|
|
||||||
org.opencontainers.image.url=https://changedetection.io
|
|
||||||
|
|
||||||
- name: Build and push :dev
|
- name: Build and push :dev
|
||||||
id: docker_build
|
id: docker_build
|
||||||
@@ -150,15 +142,6 @@ jobs:
|
|||||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
||||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
||||||
type=raw,value=latest
|
type=raw,value=latest
|
||||||
labels: |
|
|
||||||
org.opencontainers.image.created=${{ github.event.release.published_at }}
|
|
||||||
org.opencontainers.image.description=Website, webpage change detection, monitoring and notifications.
|
|
||||||
org.opencontainers.image.documentation=https://changedetection.io
|
|
||||||
org.opencontainers.image.revision=${{ github.sha }}
|
|
||||||
org.opencontainers.image.source=https://github.com/dgtlmoon/changedetection.io
|
|
||||||
org.opencontainers.image.title=changedetection.io
|
|
||||||
org.opencontainers.image.url=https://changedetection.io
|
|
||||||
org.opencontainers.image.version=${{ github.event.release.tag_name }}
|
|
||||||
|
|
||||||
- name: Build and push :tag
|
- name: Build and push :tag
|
||||||
id: docker_build_tag_release
|
id: docker_build_tag_release
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||||
# Semver means never use .01, or 00. Should be .1.
|
# Semver means never use .01, or 00. Should be .1.
|
||||||
__version__ = '0.54.5'
|
__version__ = '0.54.4'
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
|
|||||||
@@ -177,13 +177,6 @@ class Tag(Resource):
|
|||||||
|
|
||||||
new_uuid = self.datastore.add_tag(title=title)
|
new_uuid = self.datastore.add_tag(title=title)
|
||||||
if new_uuid:
|
if new_uuid:
|
||||||
# Apply any extra fields (e.g. processor_config_restock_diff) beyond just title
|
|
||||||
extra = {k: v for k, v in json_data.items() if k != 'title'}
|
|
||||||
if extra:
|
|
||||||
tag = self.datastore.data['settings']['application']['tags'].get(new_uuid)
|
|
||||||
if tag:
|
|
||||||
tag.update(extra)
|
|
||||||
tag.commit()
|
|
||||||
return {'uuid': new_uuid}, 201
|
return {'uuid': new_uuid}, 201
|
||||||
else:
|
else:
|
||||||
return "Invalid or unsupported tag", 400
|
return "Invalid or unsupported tag", 400
|
||||||
|
|||||||
@@ -338,7 +338,7 @@ class WatchHistoryDiff(Resource):
|
|||||||
word_diff = True
|
word_diff = True
|
||||||
|
|
||||||
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
# Get boolean diff preferences with defaults from DIFF_PREFERENCES_CONFIG
|
||||||
changes_only = strtobool(request.args.get('changesOnly', 'false'))
|
changes_only = strtobool(request.args.get('changesOnly', 'true'))
|
||||||
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
ignore_whitespace = strtobool(request.args.get('ignoreWhitespace', 'false'))
|
||||||
include_removed = strtobool(request.args.get('removed', 'true'))
|
include_removed = strtobool(request.args.get('removed', 'true'))
|
||||||
include_added = strtobool(request.args.get('added', 'true'))
|
include_added = strtobool(request.args.get('added', 'true'))
|
||||||
@@ -349,7 +349,7 @@ class WatchHistoryDiff(Resource):
|
|||||||
previous_version_file_contents=from_version_file_contents,
|
previous_version_file_contents=from_version_file_contents,
|
||||||
newest_version_file_contents=to_version_file_contents,
|
newest_version_file_contents=to_version_file_contents,
|
||||||
ignore_junk=ignore_whitespace,
|
ignore_junk=ignore_whitespace,
|
||||||
include_equal=not changes_only,
|
include_equal=changes_only,
|
||||||
include_removed=include_removed,
|
include_removed=include_removed,
|
||||||
include_added=include_added,
|
include_added=include_added,
|
||||||
include_replaced=include_replaced,
|
include_replaced=include_replaced,
|
||||||
|
|||||||
@@ -102,35 +102,6 @@ def run_async_in_browser_loop(coro):
|
|||||||
else:
|
else:
|
||||||
raise RuntimeError("Browser steps event loop is not available")
|
raise RuntimeError("Browser steps event loop is not available")
|
||||||
|
|
||||||
async def _close_session_resources(session_data, label=''):
|
|
||||||
"""Close all browser resources for a session in the correct order.
|
|
||||||
|
|
||||||
browserstepper.cleanup() closes page+context but not the browser itself.
|
|
||||||
For CloakBrowser, browser.close() is what stops the local Chromium process via pw.stop().
|
|
||||||
For the default CDP path, playwright_context.stop() shuts down the playwright instance.
|
|
||||||
"""
|
|
||||||
browserstepper = session_data.get('browserstepper')
|
|
||||||
if browserstepper:
|
|
||||||
try:
|
|
||||||
await browserstepper.cleanup()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error cleaning up browserstepper{label}: {e}")
|
|
||||||
|
|
||||||
browser = session_data.get('browser')
|
|
||||||
if browser:
|
|
||||||
try:
|
|
||||||
await asyncio.wait_for(browser.close(), timeout=5.0)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error closing browser{label}: {e}")
|
|
||||||
|
|
||||||
playwright_context = session_data.get('playwright_context')
|
|
||||||
if playwright_context:
|
|
||||||
try:
|
|
||||||
await playwright_context.stop()
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error stopping playwright context{label}: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_expired_sessions():
|
def cleanup_expired_sessions():
|
||||||
"""Remove expired browsersteps sessions and cleanup their resources"""
|
"""Remove expired browsersteps sessions and cleanup their resources"""
|
||||||
global browsersteps_sessions, browsersteps_watch_to_session
|
global browsersteps_sessions, browsersteps_watch_to_session
|
||||||
@@ -148,10 +119,13 @@ def cleanup_expired_sessions():
|
|||||||
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
logger.debug(f"Cleaning up expired browsersteps session {session_id}")
|
||||||
session_data = browsersteps_sessions[session_id]
|
session_data = browsersteps_sessions[session_id]
|
||||||
|
|
||||||
try:
|
# Cleanup playwright resources asynchronously
|
||||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for session {session_id}"))
|
browserstepper = session_data.get('browserstepper')
|
||||||
except Exception as e:
|
if browserstepper:
|
||||||
logger.error(f"Error cleaning up session {session_id}: {e}")
|
try:
|
||||||
|
run_async_in_browser_loop(browserstepper.cleanup())
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error cleaning up session {session_id}: {e}")
|
||||||
|
|
||||||
# Remove from sessions dict
|
# Remove from sessions dict
|
||||||
del browsersteps_sessions[session_id]
|
del browsersteps_sessions[session_id]
|
||||||
@@ -178,10 +152,12 @@ def cleanup_session_for_watch(watch_uuid):
|
|||||||
|
|
||||||
session_data = browsersteps_sessions.get(session_id)
|
session_data = browsersteps_sessions.get(session_id)
|
||||||
if session_data:
|
if session_data:
|
||||||
try:
|
browserstepper = session_data.get('browserstepper')
|
||||||
run_async_in_browser_loop(_close_session_resources(session_data, label=f" for watch {watch_uuid}"))
|
if browserstepper:
|
||||||
except Exception as e:
|
try:
|
||||||
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
run_async_in_browser_loop(browserstepper.cleanup())
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error cleaning up session {session_id} for watch {watch_uuid}: {e}")
|
||||||
|
|
||||||
# Remove from sessions dict
|
# Remove from sessions dict
|
||||||
del browsersteps_sessions[session_id]
|
del browsersteps_sessions[session_id]
|
||||||
@@ -202,69 +178,59 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
import time
|
import time
|
||||||
from playwright.async_api import async_playwright
|
from playwright.async_api import async_playwright
|
||||||
|
|
||||||
|
# We keep the playwright session open for many minutes
|
||||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
|
||||||
|
|
||||||
browsersteps_start_session = {'start_time': time.time()}
|
browsersteps_start_session = {'start_time': time.time()}
|
||||||
|
|
||||||
# Build proxy dict first — needed by both the CDP path and fetcher-specific launchers
|
# Create a new async playwright instance for browser steps
|
||||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
playwright_instance = async_playwright()
|
||||||
proxy = None
|
playwright_context = await playwright_instance.start()
|
||||||
if proxy_id:
|
|
||||||
proxy_url = datastore.proxy_list.get(proxy_id, {}).get('url')
|
|
||||||
if proxy_url:
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
parsed = urlparse(proxy_url)
|
|
||||||
proxy = {'server': proxy_url}
|
|
||||||
if parsed.username:
|
|
||||||
proxy['username'] = parsed.username
|
|
||||||
if parsed.password:
|
|
||||||
proxy['password'] = parsed.password
|
|
||||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
|
||||||
|
|
||||||
# Resolve the fetcher class for this watch so we can ask it to launch its own browser
|
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||||
# if it supports that (e.g. CloakBrowser, which runs locally rather than via CDP)
|
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||||
watch = datastore.data['watching'][watch_uuid]
|
a = "?" if not '?' in base_url else '&'
|
||||||
from changedetectionio import content_fetchers
|
base_url += a + f"timeout={keepalive_ms}"
|
||||||
fetcher_name = watch.get_fetch_backend or 'system'
|
|
||||||
if fetcher_name == 'system':
|
|
||||||
fetcher_name = datastore.data['settings']['application'].get('fetch_backend', 'html_requests')
|
|
||||||
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
|
||||||
|
|
||||||
browser = None
|
|
||||||
playwright_context = None
|
|
||||||
|
|
||||||
# If the fetcher has its own browser launch for the live steps UI, use it.
|
|
||||||
# get_browsersteps_browser(proxy, keepalive_ms) returns (browser, playwright_context_or_None)
|
|
||||||
# or None to fall back to the default CDP path.
|
|
||||||
if fetcher_class and hasattr(fetcher_class, 'get_browsersteps_browser'):
|
|
||||||
result = await fetcher_class.get_browsersteps_browser(proxy=proxy, keepalive_ms=keepalive_ms)
|
|
||||||
if result is not None:
|
|
||||||
browser, playwright_context = result
|
|
||||||
logger.debug(f"Browser Steps: using fetcher-specific browser for '{fetcher_name}'")
|
|
||||||
|
|
||||||
# Default: connect to the remote Playwright/sockpuppetbrowser via CDP
|
|
||||||
if browser is None:
|
|
||||||
playwright_instance = async_playwright()
|
|
||||||
playwright_context = await playwright_instance.start()
|
|
||||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
|
||||||
a = "?" if '?' not in base_url else '&'
|
|
||||||
base_url += a + f"timeout={keepalive_ms}"
|
|
||||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
|
||||||
logger.debug(f"Browser Steps: using CDP connection to {base_url}")
|
|
||||||
|
|
||||||
|
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
||||||
browsersteps_start_session['browser'] = browser
|
browsersteps_start_session['browser'] = browser
|
||||||
browsersteps_start_session['playwright_context'] = playwright_context
|
browsersteps_start_session['playwright_context'] = playwright_context
|
||||||
|
|
||||||
|
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||||
|
proxy = None
|
||||||
|
if proxy_id:
|
||||||
|
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||||
|
if proxy_url:
|
||||||
|
|
||||||
|
# Playwright needs separate username and password values
|
||||||
|
from urllib.parse import urlparse
|
||||||
|
parsed = urlparse(proxy_url)
|
||||||
|
proxy = {'server': proxy_url}
|
||||||
|
|
||||||
|
if parsed.username:
|
||||||
|
proxy['username'] = parsed.username
|
||||||
|
|
||||||
|
if parsed.password:
|
||||||
|
proxy['password'] = parsed.password
|
||||||
|
|
||||||
|
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||||
|
|
||||||
|
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||||
browserstepper = browser_steps.browsersteps_live_ui(
|
browserstepper = browser_steps.browsersteps_live_ui(
|
||||||
playwright_browser=browser,
|
playwright_browser=browser,
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
start_url=watch.link,
|
start_url=datastore.data['watching'][watch_uuid].link,
|
||||||
headers=watch.get('headers')
|
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Initialize the async connection
|
||||||
await browserstepper.connect(proxy=proxy)
|
await browserstepper.connect(proxy=proxy)
|
||||||
|
|
||||||
browsersteps_start_session['browserstepper'] = browserstepper
|
browsersteps_start_session['browserstepper'] = browserstepper
|
||||||
|
|
||||||
|
# For test
|
||||||
|
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||||
|
|
||||||
return browsersteps_start_session
|
return browsersteps_start_session
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -10,8 +10,7 @@ from changedetectionio import html_tools
|
|||||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
preview_blueprint = Blueprint('ui_preview', __name__, template_folder="../ui/templates")
|
||||||
|
|
||||||
|
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET'])
|
||||||
@preview_blueprint.route("/preview/<uuid_str:uuid>", methods=['GET', 'POST'])
|
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def preview_page(uuid):
|
def preview_page(uuid):
|
||||||
"""
|
"""
|
||||||
@@ -60,8 +59,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
versions = []
|
versions = []
|
||||||
timestamp = None
|
timestamp = None
|
||||||
|
|
||||||
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
|
||||||
|
is_html_webdriver = False
|
||||||
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||||
|
is_html_webdriver = True
|
||||||
|
|
||||||
triggered_line_numbers = []
|
triggered_line_numbers = []
|
||||||
ignored_line_numbers = []
|
ignored_line_numbers = []
|
||||||
@@ -71,9 +74,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
flash(gettext("Preview unavailable - No fetch/check completed or triggers not reached"), "error")
|
||||||
else:
|
else:
|
||||||
# So prepare the latest preview or not
|
# So prepare the latest preview or not
|
||||||
preferred_version = request.values.get('version') if request.method == 'POST' else request.args.get('version')
|
preferred_version = request.args.get('version')
|
||||||
|
|
||||||
|
|
||||||
versions = list(watch.history.keys())
|
versions = list(watch.history.keys())
|
||||||
timestamp = versions[-1]
|
timestamp = versions[-1]
|
||||||
if preferred_version and preferred_version in versions:
|
if preferred_version and preferred_version in versions:
|
||||||
|
|||||||
@@ -17,7 +17,7 @@
|
|||||||
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
<script src="{{ url_for('static_content', group='js', filename='tabs.js') }}" defer></script>
|
||||||
{% if versions|length >= 2 %}
|
{% if versions|length >= 2 %}
|
||||||
<div id="diff-form" style="text-align: center;">
|
<div id="diff-form" style="text-align: center;">
|
||||||
<form class="pure-form " action="{{url_for('ui.ui_preview.preview_page', uuid=uuid)}}" method="POST">
|
<form class="pure-form " action="" method="POST">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
<label for="preview-version">{{ _('Select timestamp') }}</label> <select id="preview-version"
|
||||||
name="from_version"
|
name="from_version"
|
||||||
@@ -28,7 +28,6 @@
|
|||||||
</option>
|
</option>
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</select>
|
</select>
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
|
||||||
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
<button type="submit" class="pure-button pure-button-primary">{{ _('Go') }}</button>
|
||||||
|
|
||||||
</fieldset>
|
</fieldset>
|
||||||
|
|||||||
@@ -487,25 +487,13 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||||||
except json.JSONDecodeError as e:
|
except json.JSONDecodeError as e:
|
||||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||||
else:
|
else:
|
||||||
# Check for JSONP wrapper: someCallback({...}) or some.namespace({...})
|
# Probably something else, go fish inside for it
|
||||||
# Server may claim application/json but actually return JSONP
|
try:
|
||||||
jsonp_match = re.match(r'^\w[\w.]*\s*\((.+)\)\s*;?\s*$', content.lstrip("\ufeff").strip(), re.DOTALL)
|
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||||
if jsonp_match:
|
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||||
try:
|
json_filter=json_filter )
|
||||||
inner = jsonp_match.group(1).strip()
|
except json.JSONDecodeError as e:
|
||||||
logger.warning(f"Content looks like JSONP, attempting to extract inner JSON for filter '{json_filter}'")
|
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||||
stripped_text_from_html = _parse_json(json.loads(inner), json_filter)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logger.warning(f"Error processing JSONP inner content {content[:20]}...{str(e)})")
|
|
||||||
|
|
||||||
if not stripped_text_from_html:
|
|
||||||
# Probably something else, go fish inside for it
|
|
||||||
try:
|
|
||||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
|
||||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
|
||||||
json_filter=json_filter)
|
|
||||||
except json.JSONDecodeError as e:
|
|
||||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
|
||||||
|
|
||||||
if not stripped_text_from_html:
|
if not stripped_text_from_html:
|
||||||
# Re 265 - Just return an empty string when filter not found
|
# Re 265 - Just return an empty string when filter not found
|
||||||
|
|||||||
@@ -388,25 +388,6 @@ class model(EntityPersistenceMixin, watch_base):
|
|||||||
|
|
||||||
return self.get('fetch_backend')
|
return self.get('fetch_backend')
|
||||||
|
|
||||||
@property
|
|
||||||
def fetcher_supports_screenshots(self):
|
|
||||||
"""Return True if the fetcher configured for this watch supports screenshots.
|
|
||||||
|
|
||||||
Resolves 'system' via self._datastore, then checks supports_screenshots on
|
|
||||||
the actual fetcher class. Works for built-in and plugin fetchers alike.
|
|
||||||
"""
|
|
||||||
from changedetectionio import content_fetchers
|
|
||||||
|
|
||||||
fetcher_name = self.get_fetch_backend # already handles is_pdf → html_requests
|
|
||||||
if not fetcher_name or fetcher_name == 'system':
|
|
||||||
fetcher_name = self._datastore['settings']['application'].get('fetch_backend', 'html_requests')
|
|
||||||
|
|
||||||
fetcher_class = getattr(content_fetchers, fetcher_name, None)
|
|
||||||
if fetcher_class is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
return bool(getattr(fetcher_class, 'supports_screenshots', False))
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def is_pdf(self):
|
def is_pdf(self):
|
||||||
url = str(self.get("url") or "").lower()
|
url = str(self.get("url") or "").lower()
|
||||||
|
|||||||
@@ -42,7 +42,10 @@ def render_form(watch, datastore, request, url_for, render_template, flash, redi
|
|||||||
# Get error information for the template
|
# Get error information for the template
|
||||||
screenshot_url = watch.get_screenshot()
|
screenshot_url = watch.get_screenshot()
|
||||||
|
|
||||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||||
|
is_html_webdriver = False
|
||||||
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||||
|
is_html_webdriver = True
|
||||||
|
|
||||||
password_enabled_and_share_is_off = False
|
password_enabled_and_share_is_off = False
|
||||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||||
|
|||||||
@@ -100,13 +100,7 @@ class guess_stream_type():
|
|||||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||||
self.is_rss = True
|
self.is_rss = True
|
||||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||||
# JSONP detection: server claims application/json but content is actually JSONP (e.g. cb({...}))
|
self.is_json = True
|
||||||
# A JSONP response starts with an identifier followed by '(' - not valid JSON
|
|
||||||
if re.match(r'^\w[\w.]*\s*\(', test_content):
|
|
||||||
logger.warning(f"Content-Type header claims JSON but content looks like JSONP (starts with identifier+parenthesis) - treating as plaintext")
|
|
||||||
self.is_plaintext = True
|
|
||||||
else:
|
|
||||||
self.is_json = True
|
|
||||||
elif 'pdf' in magic_content_header:
|
elif 'pdf' in magic_content_header:
|
||||||
self.is_pdf = True
|
self.is_pdf = True
|
||||||
# magic will call a rss document 'xml'
|
# magic will call a rss document 'xml'
|
||||||
|
|||||||
@@ -154,7 +154,11 @@ def render(watch, datastore, request, url_for, render_template, flash, redirect,
|
|||||||
|
|
||||||
screenshot_url = watch.get_screenshot()
|
screenshot_url = watch.get_screenshot()
|
||||||
|
|
||||||
is_html_webdriver = watch.fetcher_supports_screenshots
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||||
|
|
||||||
|
is_html_webdriver = False
|
||||||
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||||
|
is_html_webdriver = True
|
||||||
|
|
||||||
password_enabled_and_share_is_off = False
|
password_enabled_and_share_is_off = False
|
||||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||||
|
|||||||
@@ -29,11 +29,9 @@ def register_watch_operation_handlers(socketio, datastore):
|
|||||||
# Perform the operation
|
# Perform the operation
|
||||||
if op == 'pause':
|
if op == 'pause':
|
||||||
watch.toggle_pause()
|
watch.toggle_pause()
|
||||||
watch.commit()
|
|
||||||
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
||||||
elif op == 'mute':
|
elif op == 'mute':
|
||||||
watch.toggle_mute()
|
watch.toggle_mute()
|
||||||
watch.commit()
|
|
||||||
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
||||||
elif op == 'recheck':
|
elif op == 'recheck':
|
||||||
# Import here to avoid circular imports
|
# Import here to avoid circular imports
|
||||||
|
|||||||
@@ -170,14 +170,6 @@ def test_api_simple(client, live_server, measure_memory_usage, datastore_path):
|
|||||||
headers={'x-api-key': api_key},
|
headers={'x-api-key': api_key},
|
||||||
)
|
)
|
||||||
assert b'(changed) Which is across' in res.data
|
assert b'(changed) Which is across' in res.data
|
||||||
assert b'Some text thats the same' in res.data
|
|
||||||
|
|
||||||
# Fetch the difference between two versions (default text format)
|
|
||||||
res = client.get(
|
|
||||||
url_for("watchhistorydiff", uuid=watch_uuid, from_timestamp='previous', to_timestamp='latest')+"?changesOnly=true",
|
|
||||||
headers={'x-api-key': api_key},
|
|
||||||
)
|
|
||||||
assert b'Some text thats the same' not in res.data
|
|
||||||
|
|
||||||
# Test htmlcolor format
|
# Test htmlcolor format
|
||||||
res = client.get(
|
res = client.get(
|
||||||
|
|||||||
@@ -178,44 +178,23 @@ def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_p
|
|||||||
|
|
||||||
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
def test_api_tag_restock_processor_config(client, live_server, measure_memory_usage, datastore_path):
|
||||||
"""
|
"""
|
||||||
Test that a tag/group can be created and updated with processor_config_restock_diff via the API.
|
Test that a tag/group can be updated with processor_config_restock_diff via the API.
|
||||||
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
|
Since Tag extends WatchBase, processor config fields injected into WatchBase are also valid for tags.
|
||||||
"""
|
"""
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||||
|
|
||||||
set_original_response(datastore_path=datastore_path)
|
set_original_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
# Create a tag with processor_config_restock_diff in a single POST (issue #3966)
|
# Create a tag
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("tag"),
|
url_for("tag"),
|
||||||
data=json.dumps({
|
data=json.dumps({"title": "Restock Group"}),
|
||||||
"title": "Restock Group",
|
|
||||||
"overrides_watch": True,
|
|
||||||
"processor_config_restock_diff": {
|
|
||||||
"in_stock_processing": "in_stock_only",
|
|
||||||
"follow_price_changes": True,
|
|
||||||
"price_change_min": 7777777
|
|
||||||
}
|
|
||||||
}),
|
|
||||||
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
headers={'content-type': 'application/json', 'x-api-key': api_key}
|
||||||
)
|
)
|
||||||
assert res.status_code == 201, f"POST tag with restock config failed: {res.data}"
|
assert res.status_code == 201
|
||||||
tag_uuid = res.json.get('uuid')
|
tag_uuid = res.json.get('uuid')
|
||||||
|
|
||||||
# Verify processor config was saved during creation (the bug: these were discarded)
|
# Update tag with valid processor_config_restock_diff
|
||||||
res = client.get(
|
|
||||||
url_for("tag", uuid=tag_uuid),
|
|
||||||
headers={'x-api-key': api_key}
|
|
||||||
)
|
|
||||||
assert res.status_code == 200
|
|
||||||
tag_data = res.json
|
|
||||||
assert tag_data.get('overrides_watch') == True, "overrides_watch should be saved on POST"
|
|
||||||
assert tag_data.get('processor_config_restock_diff', {}).get('in_stock_processing') == 'in_stock_only', \
|
|
||||||
"processor_config_restock_diff should be saved on POST"
|
|
||||||
assert tag_data.get('processor_config_restock_diff', {}).get('price_change_min') == 7777777, \
|
|
||||||
"price_change_min should be saved on POST"
|
|
||||||
|
|
||||||
# Update tag with valid processor_config_restock_diff via PUT
|
|
||||||
res = client.put(
|
res = client.put(
|
||||||
url_for("tag", uuid=tag_uuid),
|
url_for("tag", uuid=tag_uuid),
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||||
|
|||||||
@@ -48,15 +48,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
# Check this class does not appear (that we didnt see the actual source)
|
# Check this class does not appear (that we didnt see the actual source)
|
||||||
assert b'foobar-detection' not in res.data
|
assert b'foobar-detection' not in res.data
|
||||||
|
|
||||||
# Check POST preview
|
|
||||||
res = client.post(
|
|
||||||
url_for("ui.ui_preview.preview_page", uuid="first"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
# Check this class does not appear (that we didnt see the actual source)
|
|
||||||
assert b'foobar-detection' not in res.data
|
|
||||||
|
|
||||||
|
|
||||||
# Make a change
|
# Make a change
|
||||||
set_modified_response(datastore_path=datastore_path)
|
set_modified_response(datastore_path=datastore_path)
|
||||||
|
|
||||||
|
|||||||
@@ -16,51 +16,6 @@ except ModuleNotFoundError:
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
def test_jsonp_treated_as_plaintext():
|
|
||||||
from ..processors.magic import guess_stream_type
|
|
||||||
|
|
||||||
# JSONP content (server wrongly claims application/json) should be detected as plaintext
|
|
||||||
# Callback names are arbitrary identifiers, not always 'cb'
|
|
||||||
jsonp_content = 'jQuery123456({ "version": "8.0.41", "url": "https://example.com/app.apk" })'
|
|
||||||
result = guess_stream_type(http_content_header="application/json", content=jsonp_content)
|
|
||||||
assert result.is_json is False
|
|
||||||
assert result.is_plaintext is True
|
|
||||||
|
|
||||||
# Variation with dotted callback name e.g. jQuery.cb(...)
|
|
||||||
jsonp_dotted = 'some.callback({ "version": "1.0" })'
|
|
||||||
result = guess_stream_type(http_content_header="application/json", content=jsonp_dotted)
|
|
||||||
assert result.is_json is False
|
|
||||||
assert result.is_plaintext is True
|
|
||||||
|
|
||||||
# Real JSON should still be detected as JSON
|
|
||||||
json_content = '{ "version": "8.0.41", "url": "https://example.com/app.apk" }'
|
|
||||||
result = guess_stream_type(http_content_header="application/json", content=json_content)
|
|
||||||
assert result.is_json is True
|
|
||||||
assert result.is_plaintext is False
|
|
||||||
|
|
||||||
|
|
||||||
def test_jsonp_json_filter_extraction():
|
|
||||||
from .. import html_tools
|
|
||||||
|
|
||||||
# Tough case: dotted namespace callback, trailing semicolon, deeply nested content with arrays
|
|
||||||
jsonp_content = 'weixin.update.callback({"platforms": {"android": {"variants": [{"arch": "arm64", "versionName": "8.0.68", "url": "https://example.com/app-arm64.apk"}, {"arch": "arm32", "versionName": "8.0.41", "url": "https://example.com/app-arm32.apk"}]}}});'
|
|
||||||
|
|
||||||
# Deep nested jsonpath filter into array element
|
|
||||||
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[0].versionName")
|
|
||||||
assert text == '"8.0.68"'
|
|
||||||
|
|
||||||
# Filter that selects the second array element
|
|
||||||
text = html_tools.extract_json_as_string(jsonp_content, "json:$.platforms.android.variants[1].arch")
|
|
||||||
assert text == '"arm32"'
|
|
||||||
|
|
||||||
if jq_support:
|
|
||||||
text = html_tools.extract_json_as_string(jsonp_content, "jq:.platforms.android.variants[0].versionName")
|
|
||||||
assert text == '"8.0.68"'
|
|
||||||
|
|
||||||
text = html_tools.extract_json_as_string(jsonp_content, "jqraw:.platforms.android.variants[1].url")
|
|
||||||
assert text == "https://example.com/app-arm32.apk"
|
|
||||||
|
|
||||||
|
|
||||||
def test_unittest_inline_html_extract():
|
def test_unittest_inline_html_extract():
|
||||||
# So lets pretend that the JSON we want is inside some HTML
|
# So lets pretend that the JSON we want is inside some HTML
|
||||||
content="""
|
content="""
|
||||||
|
|||||||
Binary file not shown.
File diff suppressed because it is too large
Load Diff
@@ -40,7 +40,7 @@ orjson~=3.11
|
|||||||
# jq not available on Windows so must be installed manually
|
# jq not available on Windows so must be installed manually
|
||||||
|
|
||||||
# Notification library
|
# Notification library
|
||||||
apprise==1.9.8
|
apprise==1.9.7
|
||||||
|
|
||||||
diff_match_patch
|
diff_match_patch
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user