Compare commits

..

3 Commits

Author SHA1 Message Date
dgtlmoon
7b3d054a4a Tweak to tests 2025-03-27 10:24:41 +01:00
dgtlmoon
3d17a85c79 Clear title on save 2025-03-27 09:59:12 +01:00
dgtlmoon
694a8e2fe7 Re #2782 - Should be "Clone & Edit" without watch history 2025-03-27 09:52:22 +01:00
70 changed files with 1255 additions and 1995 deletions

View File

@@ -28,6 +28,7 @@ jobs:
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.11'
skip-pypuppeteer: true
test-application-3-12:
needs: lint-code

View File

@@ -7,7 +7,7 @@ on:
description: 'Python version to use'
required: true
type: string
default: '3.11'
default: '3.10'
skip-pypuppeteer:
description: 'Skip PyPuppeteer (not supported in 3.11/3.12)'
required: false

View File

@@ -1,5 +1,8 @@
# pip dependencies install stage
# @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py
# If you know how to fix it, please do! and test it for both 3.10 and 3.11
ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder

View File

@@ -1,9 +1,9 @@
recursive-include changedetectionio/api *
recursive-include changedetectionio/apprise_plugin *
recursive-include changedetectionio/blueprint *
recursive-include changedetectionio/content_fetchers *
recursive-include changedetectionio/conditions *
recursive-include changedetectionio/model *
recursive-include changedetectionio/notification *
recursive-include changedetectionio/processors *
recursive-include changedetectionio/static *
recursive-include changedetectionio/templates *

View File

@@ -89,7 +89,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
#### Key Features
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
- Target elements with xPath 1 and xPath 2, CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Target elements with xPath(1.0) and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Switch between fast non-JS and Chrome JS based "fetchers"
- Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums)
- Easily specify how often a site should be checked
@@ -105,12 +105,6 @@ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) glob
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
### Conditional web page changes
Easily [configure conditional actions](https://changedetection.io/tutorial/conditional-actions-web-page-changes), for example, only trigger when a price is above or below a preset amount, or [when a web page includes (or does not include) a keyword](https://changedetection.io/tutorial/how-monitor-keywords-any-website)
<img src="./docs/web-page-change-conditions.png" style="max-width:80%;" alt="Conditional web page changes" title="Conditional web page changes" />
### Schedule web page watches in any timezone, limit by day of week and time.
Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours.

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.49.12'
__version__ = '0.49.9'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -11,7 +11,6 @@ os.environ['EVENTLET_NO_GREENDNS'] = 'yes'
import eventlet
import eventlet.wsgi
import getopt
import platform
import signal
import socket
import sys
@@ -20,6 +19,7 @@ from changedetectionio import store
from changedetectionio.flask_app import changedetection_app
from loguru import logger
# Only global so we can access it in the signal handler
app = None
datastore = None
@@ -29,6 +29,8 @@ def get_version():
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
def sigshutdown_handler(_signo, _stack_frame):
global app
global datastore
name = signal.Signals(_signo).name
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown')
datastore.sync_to_json()
@@ -145,19 +147,6 @@ def main():
signal.signal(signal.SIGTERM, sigshutdown_handler)
signal.signal(signal.SIGINT, sigshutdown_handler)
# Custom signal handler for memory cleanup
def sigusr_clean_handler(_signo, _stack_frame):
from changedetectionio.gc_cleanup import memory_cleanup
logger.info('SIGUSR1 received: Running memory cleanup')
return memory_cleanup(app)
# Register the SIGUSR1 signal handler
# Only register the signal handler if running on Linux
if platform.system() == "Linux":
signal.signal(signal.SIGUSR1, sigusr_clean_handler)
else:
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
# Go into cleanup mode
if do_cleanup:

View File

@@ -1,7 +1,5 @@
# Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
# Probably other ways to solve this when the backend switches to some ORM
from changedetectionio.notification import valid_notification_formats
def build_time_between_check_json_schema():
# Setup time between check schema
@@ -100,6 +98,8 @@ def build_watch_json_schema(d):
}
}
from changedetectionio.notification import valid_notification_formats
schema['properties']['notification_format'] = {'type': 'string',
'enum': list(valid_notification_formats.keys())
}

View File

@@ -0,0 +1,12 @@
from changedetectionio import apprise_plugin
import apprise
# Create our AppriseAsset and populate it with some of our new values:
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
asset = apprise.AppriseAsset(
image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
)
asset.app_id = "changedetection.io"
asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection"
asset.app_url = "https://changedetection.io"

View File

@@ -0,0 +1,98 @@
# include the decorator
from apprise.decorators import notify
from loguru import logger
from requests.structures import CaseInsensitiveDict
@notify(on="delete")
@notify(on="deletes")
@notify(on="get")
@notify(on="gets")
@notify(on="post")
@notify(on="posts")
@notify(on="put")
@notify(on="puts")
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
import requests
import json
import re
from urllib.parse import unquote_plus
from apprise.utils.parse import parse_url as apprise_parse_url
url = kwargs['meta'].get('url')
schema = kwargs['meta'].get('schema').lower().strip()
# Choose POST, GET etc from requests
method = re.sub(rf's$', '', schema)
requests_method = getattr(requests, method)
params = CaseInsensitiveDict({}) # Added to requests
auth = None
has_error = False
# Convert /foobar?+some-header=hello to proper header dictionary
results = apprise_parse_url(url)
# Add our headers that the user can potentially over-ride if they wish
# to to our returned result set and tidy entries by unquoting them
headers = CaseInsensitiveDict({unquote_plus(x): unquote_plus(y)
for x, y in results['qsd+'].items()})
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
# but here we are making straight requests, so we need todo convert this against apprise's logic
for k, v in results['qsd'].items():
if not k.strip('+-') in results['qsd+'].keys():
params[unquote_plus(k)] = unquote_plus(v)
# Determine Authentication
auth = ''
if results.get('user') and results.get('password'):
auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user')))
elif results.get('user'):
auth = (unquote_plus(results.get('user')))
# If it smells like it could be JSON and no content-type was already set, offer a default content type.
if body and '{' in body[:100] and not headers.get('Content-Type'):
json_header = 'application/json; charset=utf-8'
try:
# Try if it's JSON
json.loads(body)
headers['Content-Type'] = json_header
except ValueError as e:
logger.warning(f"Could not automatically add '{json_header}' header to the notification because the document failed to parse as JSON: {e}")
pass
# POSTS -> HTTPS etc
if schema.lower().endswith('s'):
url = re.sub(rf'^{schema}', 'https', results.get('url'))
else:
url = re.sub(rf'^{schema}', 'http', results.get('url'))
status_str = ''
try:
r = requests_method(url,
auth=auth,
data=body.encode('utf-8') if type(body) is str else body,
headers=headers,
params=params
)
if not (200 <= r.status_code < 300):
status_str = f"Error sending '{method.upper()}' request to {url} - Status: {r.status_code}: '{r.reason}'"
logger.error(status_str)
has_error = True
else:
logger.info(f"Sent '{method.upper()}' request to {url}")
has_error = False
except requests.RequestException as e:
status_str = f"Error sending '{method.upper()}' request to {url} - {str(e)}"
logger.error(status_str)
has_error = True
if has_error:
raise TypeError(status_str)
return True

View File

@@ -20,7 +20,10 @@ def login_optionally_required(func):
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
# Permitted
if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
if request.endpoint and 'static_content' in request.endpoint and request.view_args and request.view_args.get('group') == 'styles':
return func(*args, **kwargs)
# Permitted
elif request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
return func(*args, **kwargs)
elif request.method in flask_login.config.EXEMPT_METHODS:
return func(*args, **kwargs)

View File

@@ -23,6 +23,7 @@ from loguru import logger
browsersteps_sessions = {}
io_interface_context = None
import json
import base64
import hashlib
from flask import Response
@@ -33,8 +34,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
from . import nonContext
from . import browser_steps
import time
global browsersteps_sessions
global io_interface_context
# We keep the playwright session open for many minutes
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
@@ -101,6 +104,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# A new session was requested, return sessionID
import uuid
global browsersteps_sessions
browsersteps_session_id = str(uuid.uuid4())
watch_uuid = request.args.get('uuid')
@@ -144,6 +149,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
def browsersteps_ui_update():
import base64
import playwright._impl._errors
global browsersteps_sessions
from changedetectionio.blueprint.browser_steps import browser_steps
remaining =0

View File

@@ -4,7 +4,7 @@ import re
from random import randint
from loguru import logger
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
from changedetectionio.content_fetchers.base import manage_user_agent
from changedetectionio.safe_jinja import render as jinja_render
@@ -293,16 +293,19 @@ class browsersteps_live_ui(steppable_browser_interface):
def get_current_state(self):
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
import importlib.resources
import json
# because we for now only run browser steps in playwright mode (not puppeteer mode)
from changedetectionio.content_fetchers.playwright import capture_full_page
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
now = time.time()
self.page.wait_for_timeout(1 * 1000)
screenshot = capture_full_page(page=self.page)
full_height = self.page.evaluate("document.documentElement.scrollHeight")
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
screenshot = capture_stitched_together_full_page(self.page)
else:
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
@@ -310,21 +313,13 @@ class browsersteps_live_ui(steppable_browser_interface):
self.page.evaluate("var include_filters=''")
# Go find the interactive elements
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
self.page.request_gc()
scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
"visualselector_xpath_selectors": scan_elements,
"max_height": MAX_TOTAL_HEIGHT
}))
self.page.request_gc()
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
# So the JS will find the smallest one first
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
# playwright._impl._api_types.Error: Browser closed.
# @todo show some countdown timer?

View File

@@ -22,7 +22,6 @@
<li class="tab"><a href="#notifications">Notifications</a></li>
<li class="tab"><a href="#fetching">Fetching</a></li>
<li class="tab"><a href="#filters">Global Filters</a></li>
<li class="tab"><a href="#ui-options">UI Options</a></li>
<li class="tab"><a href="#api">API</a></li>
<li class="tab"><a href="#timedate">Time &amp Date</a></li>
<li class="tab"><a href="#proxies">CAPTCHA &amp; Proxies</a></li>
@@ -218,7 +217,7 @@ nav
<a id="chrome-extension-link"
title="Try our new Chrome Extension!"
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" alt="Chrome">
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
Chrome Webstore
</a>
</p>
@@ -241,12 +240,6 @@ nav
</p>
</div>
</div>
<div class="tab-pane-inner" id="ui-options">
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
</div>
</div>
<div class="tab-pane-inner" id="proxies">
<div id="recommended-proxy">
<div>

View File

@@ -13,7 +13,6 @@
/*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/
/*{% endif %}*/
{% set has_tag_filters_extra='' %}
</script>
@@ -47,12 +46,59 @@
</div>
<div class="tab-pane-inner" id="filters-and-triggers">
<p>These settings are <strong><i>added</i></strong> to any existing watch configurations.</p>
{% include "edit/include_subtract.html" %}
<div class="text-filtering border-fieldset">
<h3>Text filtering</h3>
{% include "edit/text-options.html" %}
</div>
<div class="pure-control-group">
{% set field = render_field(form.include_filters,
rows=5,
placeholder="#example
xpath://body/div/span[contains(@class, 'example-class')]",
class="m-d")
%}
{{ field }}
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul id="advanced-help-selectors">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul>
</li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code>
<ul>
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a
href="http://xpather.com/" target="new">test your XPath here</a></li>
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
</ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</span>
</div>
<fieldset class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
footer
nav
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
</div>
{# rendered sub Template #}

View File

@@ -125,10 +125,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
else:
# Recheck all, including muted
# Get most overdue first
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
watch_uuid = k[0]
watch = k[1]
for watch_uuid, watch in datastore.data['watching'].items():
if not watch['paused']:
if watch_uuid not in running_uuids:
if with_errors and not watch.get('last_error'):
@@ -143,7 +140,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
if i == 1:
flash("Queued 1 watch for rechecking.")
if i > 1:
flash(f"Queued {i} watches for rechecking.")
flash("Queued {} watches for rechecking.".format(i))
if i == 0:
flash("No watches available to recheck.")

View File

@@ -17,13 +17,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Watch_uuid could be unset in the case it`s used in tag editor, global settings
import apprise
from changedetectionio.notification.handler import process_notification
from changedetectionio.notification.apprise_plugin.assets import apprise_asset
from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler
apobj = apprise.Apprise(asset=apprise_asset)
from changedetectionio.apprise_asset import asset
apobj = apprise.Apprise(asset=asset)
# so that the custom endpoints are registered
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
@@ -92,6 +90,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
n_object['as_async'] = False
n_object.update(watch.extra_notification_token_values())
from changedetectionio.notification import process_notification
sent_obj = process_notification(n_object, datastore)
except Exception as e:

View File

@@ -130,7 +130,7 @@
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
or "extra_browser_" in watch.get_fetch_backend
%}
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
{% endif %}
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
@@ -209,18 +209,15 @@
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
{% if watch.history_n >= 2 %}
{% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
{% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
{% if is_unviewed %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
{% else %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
{% endif %}
{% else %}
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary">Preview</a>
{% endif %}
{% endif %}
</td>
@@ -244,7 +241,7 @@
all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
</li>
<li>
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='Generic_Feed-icon.svg')}}" height="15"></a>
</li>
</ul>
{{ pagination.links }}

View File

@@ -8,7 +8,7 @@ from . import default_plugin
# List of all supported JSON Logic operators
operator_choices = [
(None, "Choose one - Operator"),
(None, "Choose one"),
(">", "Greater Than"),
("<", "Less Than"),
(">=", "Greater Than or Equal To"),
@@ -21,7 +21,7 @@ operator_choices = [
# Fields available in the rules
field_choices = [
(None, "Choose one - Field"),
(None, "Choose one"),
]
# The data we will feed the JSON Rules to see if it passes the test/conditions or not
@@ -116,7 +116,8 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
if not jsonLogic(logic=ruleset, data=EXECUTE_DATA, operations=CUSTOM_OPERATIONS):
result = False
return {'executed_data': EXECUTE_DATA, 'result': result}
return result
# Load plugins dynamically
for plugin in plugin_manager.get_plugins():

View File

@@ -67,8 +67,7 @@ def construct_blueprint(datastore):
return jsonify({
'status': 'success',
'result': result.get('result'),
'data': result.get('executed_data'),
'result': result,
'message': 'Condition passes' if result else 'Condition does not pass'
})

View File

@@ -19,7 +19,7 @@ class ConditionFormRow(Form):
validators=[validators.Optional()]
)
value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"})
value = StringField("Value", validators=[validators.Optional()])
def validate(self, extra_validators=None):
# First, run the default validators

View File

@@ -7,29 +7,11 @@ import os
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000
SCREENSHOT_DEFAULT_QUALITY = 40
# Maximum total height for the final image (When in stitch mode).
# We limit this to 16000px due to the huge amount of RAM that was being used
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
# The size at which we will switch to stitching method, when below this (and
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
# screenshot method.
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
# available_fetchers() will scan this implementation looking for anything starting with html_
# this information is used in the form selections
from changedetectionio.content_fetchers.requests import fetcher as html_requests
import importlib.resources
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
def available_fetchers():
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
import inspect

View File

@@ -63,6 +63,11 @@ class Fetcher():
# Time ONTOP of the system defined env minimum time
render_extract_delay = 0
def __init__(self):
import importlib.resources
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
@abstractmethod
def get_error(self):
return self.error
@@ -82,7 +87,7 @@ class Fetcher():
pass
@abstractmethod
def quit(self, watch=None):
def quit(self):
return
@abstractmethod
@@ -138,7 +143,6 @@ class Fetcher():
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
self.screenshot_step("before-" + str(step_n))
self.save_step_html("before-" + str(step_n))
try:
optional_value = step['optional_value']
selector = step['selector']

View File

@@ -0,0 +1,104 @@
# Pages with a vertical height longer than this will use the 'stitch together' method.
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
# - If a page is taller than ~800010000px, it risks exceeding GPU memory limits.
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
# The size at which we will switch to stitching method
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
from loguru import logger
def capture_stitched_together_full_page(page):
import io
import os
import time
from PIL import Image, ImageDraw, ImageFont
MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
# Save the original viewport size
original_viewport = page.viewport_size
now = time.time()
try:
viewport = page.viewport_size
page_height = page.evaluate("document.documentElement.scrollHeight")
# Limit the total capture height
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
images = []
total_captured_height = 0
for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
# Ensure we do not exceed the total height limit
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
# Adjust viewport size for this chunk
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
# Scroll to the correct position
page.evaluate(f"window.scrollTo(0, {offset})")
# Capture screenshot chunk
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
images.append(Image.open(io.BytesIO(screenshot_bytes)))
total_captured_height += chunk_height
# Stop if we reached the maximum total height
if total_captured_height >= MAX_TOTAL_HEIGHT:
break
# Create the final stitched image
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
y_offset = 0
# Stitch the screenshot chunks together
for img in images:
stitched_image.paste(img, (0, y_offset))
y_offset += img.height
logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
# Overlay warning text if the screenshot was trimmed
if page_height > MAX_TOTAL_HEIGHT:
draw = ImageDraw.Draw(stitched_image)
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
# Load font (default system font if Arial is unavailable)
try:
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
except IOError:
font = ImageFont.load_default() # Default font if Arial not found
# Get text bounding box (correct method for newer Pillow versions)
text_bbox = draw.textbbox((0, 0), warning_text, font=font)
text_width = text_bbox[2] - text_bbox[0] # Calculate text width
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
# Define background rectangle (top of the image)
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
# Center text horizontally within the warning area
text_x = (viewport["width"] - text_width) // 2
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
# Draw the warning text in red
draw.text((text_x, text_y), warning_text, fill="red", font=font)
# Save or return the final image
output = io.BytesIO()
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
screenshot = output.getvalue()
finally:
# Restore the original viewport size
page.set_viewport_size(original_viewport)
return screenshot

View File

@@ -4,71 +4,10 @@ from urllib.parse import urlparse
from loguru import logger
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
def capture_full_page(page):
import os
import time
from multiprocessing import Process, Pipe
start = time.time()
page_height = page.evaluate("document.documentElement.scrollHeight")
page_width = page.evaluate("document.documentElement.scrollWidth")
original_viewport = page.viewport_size
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
screenshot_chunks = []
y = 0
# If page height is larger than current viewport, use a larger viewport for better capturing
if page_height > page.viewport_size['height']:
# Set viewport to a larger size to capture more content at once
page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
# Capture screenshots in chunks up to the max total height
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
page.request_gc()
page.evaluate(f"window.scrollTo(0, {y})")
page.request_gc()
screenshot_chunks.append(page.screenshot(
type="jpeg",
full_page=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
))
y += step_size
page.request_gc()
# Restore original viewport size
page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
# If we have multiple chunks, stitch them together
if len(screenshot_chunks) > 1:
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
parent_conn, child_conn = Pipe()
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
screenshot = parent_conn.recv_bytes()
p.join()
logger.debug(
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
screenshot_chunks = None
return screenshot
logger.debug(
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
return screenshot_chunks[0]
class fetcher(Fetcher):
fetcher_description = "Playwright {}/Javascript".format(
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
@@ -121,8 +60,7 @@ class fetcher(Fetcher):
def screenshot_step(self, step_n=''):
super().screenshot_step(step_n=step_n)
screenshot = capture_full_page(page=self.page)
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
if self.browser_steps_screenshot_path is not None:
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
@@ -151,6 +89,7 @@ class fetcher(Fetcher):
from playwright.sync_api import sync_playwright
import playwright._impl._errors
from changedetectionio.content_fetchers import visualselector_xpath_selectors
import time
self.delete_browser_steps_screenshots()
response = None
@@ -225,7 +164,9 @@ class fetcher(Fetcher):
raise PageUnloadable(url=url, status_code=None, message=str(e))
if self.status_code != 200 and not ignore_status_codes:
screenshot = capture_full_page(self.page)
screenshot = self.page.screenshot(type='jpeg', full_page=True,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
@@ -246,23 +187,13 @@ class fetcher(Fetcher):
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
else:
self.page.evaluate("var include_filters=''")
self.page.request_gc()
# request_gc before and after evaluate to free up memory
# @todo browsersteps etc
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
self.xpath_data = self.page.evaluate(XPATH_ELEMENT_JS, {
"visualselector_xpath_selectors": visualselector_xpath_selectors,
"max_height": MAX_TOTAL_HEIGHT
})
self.page.request_gc()
self.instock_data = self.page.evaluate(INSTOCK_DATA_JS)
self.page.request_gc()
self.xpath_data = self.page.evaluate(
"async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
self.content = self.page.content()
self.page.request_gc()
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s")
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
@@ -273,25 +204,18 @@ class fetcher(Fetcher):
# acceptable screenshot quality here
try:
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
self.screenshot = capture_full_page(page=self.page)
full_height = self.page.evaluate("document.documentElement.scrollHeight")
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
logger.warning(
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
self.screenshot = capture_stitched_together_full_page(self.page)
else:
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
except Exception as e:
# It's likely the screenshot was too long/big and something crashed
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
finally:
# Request garbage collection one more time before closing
try:
self.page.request_gc()
except:
pass
# Clean up resources properly
context.close()
context = None
self.page.close()
self.page = None
browser.close()
borwser = None

View File

@@ -6,76 +6,8 @@ from urllib.parse import urlparse
from loguru import logger
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
SCREENSHOT_MAX_TOTAL_HEIGHT
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \
BrowserConnectError
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
async def capture_full_page(page):
import os
import time
from multiprocessing import Process, Pipe
start = time.time()
page_height = await page.evaluate("document.documentElement.scrollHeight")
page_width = await page.evaluate("document.documentElement.scrollWidth")
original_viewport = page.viewport
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
# JPEG is better here because the screenshots can be very very large
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot
screenshot_chunks = []
y = 0
if page_height > page.viewport['height']:
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
await page.evaluate(f"window.scrollTo(0, {y})")
screenshot_chunks.append(await page.screenshot(type_='jpeg',
fullPage=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
y += step_size
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
if len(screenshot_chunks) > 1:
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
parent_conn, child_conn = Pipe()
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
p.start()
screenshot = parent_conn.recv_bytes()
p.join()
logger.debug(
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
screenshot_chunks = None
return screenshot
logger.debug(
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
return screenshot_chunks[0]
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
class fetcher(Fetcher):
fetcher_description = "Puppeteer/direct {}/Javascript".format(
@@ -147,6 +79,7 @@ class fetcher(Fetcher):
empty_pages_are_a_change
):
from changedetectionio.content_fetchers import visualselector_xpath_selectors
self.delete_browser_steps_screenshots()
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
@@ -248,10 +181,11 @@ class fetcher(Fetcher):
raise PageUnloadable(url=url, status_code=None, message=str(e))
if self.status_code != 200 and not ignore_status_codes:
screenshot = await capture_full_page(page=self.page)
screenshot = await self.page.screenshot(type_='jpeg',
fullPage=True,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
content = await self.page.content
if not empty_pages_are_a_change and len(content.strip()) == 0:
@@ -269,31 +203,46 @@ class fetcher(Fetcher):
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
# Setup the xPath/VisualSelector scraper
if current_include_filters:
if current_include_filters is not None:
js = json.dumps(current_include_filters)
await self.page.evaluate(f"var include_filters={js}")
else:
await self.page.evaluate(f"var include_filters=''")
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
"visualselector_xpath_selectors": visualselector_xpath_selectors,
"max_height": MAX_TOTAL_HEIGHT
})
if not self.xpath_data:
raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)")
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
self.xpath_data = await self.page.evaluate(
"async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
self.instock_data = await self.page.evaluate("async () => {" + self.instock_data_js + "}")
self.content = await self.page.content
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
# JPEG is better here because the screenshots can be very very large
self.screenshot = await capture_full_page(page=self.page)
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
logger.success(f"Fetching '{url}' complete, closing page")
await self.page.close()
logger.success(f"Fetching '{url}' complete, closing browser")
await browser.close()
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
try:
self.screenshot = await self.page.screenshot(type_='jpeg',
fullPage=True,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
except Exception as e:
logger.error("Error fetching screenshot")
# // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
# // @ todo after text extract, we can place some overlay text with red background to say 'croppped'
logger.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot')
try:
self.screenshot = await self.page.screenshot(type_='jpeg',
fullPage=False,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
except Exception as e:
logger.error('ERROR: Failed to get viewport-only reduced screenshot :(')
pass
finally:
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
logger.success(f"Fetching '{url}' complete, closing page")
await self.page.close()
logger.success(f"Fetching '{url}' complete, closing browser")
await browser.close()
logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")
async def main(self, **kwargs):

View File

@@ -96,17 +96,3 @@ class fetcher(Fetcher):
self.raw_content = r.content
def quit(self, watch=None):
# In case they switched to `requests` fetcher from something else
# Then the screenshot could be old, in any case, it's not used here.
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
if strtobool(os.getenv("REMOVE_REQUESTS_OLD_SCREENSHOTS", 'true')):
screenshot = watch.get_screenshot()
if screenshot:
try:
os.unlink(screenshot)
except Exception as e:
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")

View File

@@ -0,0 +1,190 @@
module.exports = async ({page, context}) => {
var {
url,
execute_js,
user_agent,
extra_wait_ms,
req_headers,
include_filters,
xpath_element_js,
screenshot_quality,
proxy_username,
proxy_password,
disk_cache_dir,
no_cache_list,
block_url_list,
} = context;
await page.setBypassCSP(true)
await page.setExtraHTTPHeaders(req_headers);
if (user_agent) {
await page.setUserAgent(user_agent);
}
// https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
await page.setDefaultNavigationTimeout(0);
if (proxy_username) {
// Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer
// https://github.com/puppeteer/puppeteer/issues/676 ?
// https://help.brightdata.com/hc/en-us/articles/12632549957649-Proxy-Manager-How-to-Guides#h_01HAKWR4Q0AFS8RZTNYWRDFJC2
// https://cri.dev/posts/2020-03-30-How-to-solve-Puppeteer-Chrome-Error-ERR_INVALID_ARGUMENT/
await page.authenticate({
username: proxy_username,
password: proxy_password
});
}
await page.setViewport({
width: 1024,
height: 768,
deviceScaleFactor: 1,
});
await page.setRequestInterception(true);
if (disk_cache_dir) {
console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<");
}
const fs = require('fs');
const crypto = require('crypto');
function file_is_expired(file_path) {
if (!fs.existsSync(file_path)) {
return true;
}
var stats = fs.statSync(file_path);
const now_date = new Date();
const expire_seconds = 300;
if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) {
console.log("CACHE EXPIRED: " + file_path);
return true;
}
return false;
}
page.on('request', async (request) => {
// General blocking of requests that waste traffic
if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort();
if (disk_cache_dir) {
const url = request.url();
const key = crypto.createHash('md5').update(url).digest("hex");
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
// https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
if (fs.existsSync(dir_path + key)) {
console.log("* CACHE HIT , using - " + dir_path + key + " - " + url);
const cached_data = fs.readFileSync(dir_path + key);
// @todo headers can come from dir_path+key+".meta" json file
request.respond({
status: 200,
//contentType: 'text/html', //@todo
body: cached_data
});
return;
}
}
request.continue();
});
if (disk_cache_dir) {
page.on('response', async (response) => {
const url = response.url();
// Basic filtering for sane responses
if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) {
console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url);
return;
}
if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) {
console.log("Skipping (no_cache_list) - " + url);
return;
}
if (url.toLowerCase().includes('data:')) {
console.log("Skipping (embedded-data) - " + url);
return;
}
response.buffer().then(buffer => {
if (buffer.length > 100) {
console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType());
const key = crypto.createHash('md5').update(url).digest("hex");
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
if (!fs.existsSync(dir_path)) {
fs.mkdirSync(dir_path, {recursive: true})
}
if (fs.existsSync(dir_path + key)) {
if (file_is_expired(dir_path + key)) {
fs.writeFileSync(dir_path + key, buffer);
}
} else {
fs.writeFileSync(dir_path + key, buffer);
}
}
});
});
}
const r = await page.goto(url, {
waitUntil: 'load'
});
await page.waitForTimeout(1000);
await page.waitForTimeout(extra_wait_ms);
if (execute_js) {
await page.evaluate(execute_js);
await page.waitForTimeout(200);
}
var xpath_data;
var instock_data;
try {
// Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode
// (Once the old playwright is removed)
xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters);
instock_data = await page.evaluate(() => {%instock_scrape_code%});
} catch (e) {
console.log(e);
}
// Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure
// Wrap it here (for now)
var b64s = false;
try {
b64s = await page.screenshot({encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg'});
} catch (e) {
console.log(e);
}
// May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
if (!b64s) {
// @todo after text extract, we can place some overlay text with red background to say 'croppped'
console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot');
try {
b64s = await page.screenshot({encoding: "base64", quality: screenshot_quality, type: 'jpeg'});
} catch (e) {
console.log(e);
}
}
var html = await page.content();
return {
data: {
'content': html,
'headers': r.headers(),
'instock_data': instock_data,
'screenshot': b64s,
'status_code': r.status(),
'xpath_data': xpath_data
},
type: 'application/json',
};
};

View File

@@ -1,220 +1,223 @@
async () => {
// Restock Detector
// (c) Leigh Morresi dgtlmoon@gmail.com
//
// Assumes the product is in stock to begin with, unless the following appears above the fold ;
// - outOfStockTexts appears above the fold (out of stock)
// - negateOutOfStockRegex (really is in stock)
function isItemInStock() {
// @todo Pass these in so the same list can be used in non-JS fetchers
const outOfStockTexts = [
' أخبرني عندما يتوفر',
'0 in stock',
'actuellement indisponible',
'agotado',
'article épuisé',
'artikel zurzeit vergriffen',
'as soon as stock is available',
'ausverkauft', // sold out
'available for back order',
'awaiting stock',
'back in stock soon',
'back-order or out of stock',
'backordered',
'benachrichtigt mich', // notify me
'brak na stanie',
'brak w magazynie',
'coming soon',
'currently have any tickets for this',
'currently unavailable',
'dieser artikel ist bald wieder verfügbar',
'dostępne wkrótce',
'en rupture',
'en rupture de stock',
'épuisé',
'esgotado',
'indisponible',
'indisponível',
'isn\'t in stock right now',
'isnt in stock right now',
'isnt in stock right now',
'item is no longer available',
'let me know when it\'s available',
'mail me when available',
'message if back in stock',
'mevcut değil',
'nachricht bei',
'nicht auf lager',
'nicht lagernd',
'nicht lieferbar',
'nicht verfügbar',
'nicht vorrätig',
'nicht zur verfügung',
'nie znaleziono produktów',
'niet beschikbaar',
'niet leverbaar',
'niet op voorraad',
'no disponible',
'non disponibile',
'non disponible',
'no longer in stock',
'no tickets available',
'not available',
'not currently available',
'not in stock',
'notify me when available',
'notify me',
'notify when available',
'não disponível',
'não estamos a aceitar encomendas',
'out of stock',
'out-of-stock',
'plus disponible',
'prodotto esaurito',
'produkt niedostępny',
'rupture',
'sold out',
'sold-out',
'stok habis',
'stok kosong',
'stok varian ini habis',
'stokta yok',
'temporarily out of stock',
'temporarily unavailable',
'there were no search results for',
'this item is currently unavailable',
'tickets unavailable',
'tidak dijual',
'tidak tersedia',
'tijdelijk uitverkocht',
'tiket tidak tersedia',
'tükendi',
'unavailable nearby',
'unavailable tickets',
'vergriffen',
'vorbestellen',
'vorbestellung ist bald möglich',
'we don\'t currently have any',
'we couldn\'t find any products that match',
'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t know when or if this item will be back in stock.',
'we were not able to find a match',
'when this arrives in stock',
'zur zeit nicht an lager',
'品切れ',
'已售',
'已售完',
'품절'
];
function isItemInStock() {
// @todo Pass these in so the same list can be used in non-JS fetchers
const outOfStockTexts = [
' أخبرني عندما يتوفر',
'0 in stock',
'actuellement indisponible',
'agotado',
'article épuisé',
'artikel zurzeit vergriffen',
'as soon as stock is available',
'ausverkauft', // sold out
'available for back order',
'awaiting stock',
'back in stock soon',
'back-order or out of stock',
'backordered',
'benachrichtigt mich', // notify me
'brak na stanie',
'brak w magazynie',
'coming soon',
'currently have any tickets for this',
'currently unavailable',
'dieser artikel ist bald wieder verfügbar',
'dostępne wkrótce',
'en rupture',
'en rupture de stock',
'épuisé',
'esgotado',
'indisponible',
'indisponível',
'isn\'t in stock right now',
'isnt in stock right now',
'isnt in stock right now',
'item is no longer available',
'let me know when it\'s available',
'mail me when available',
'message if back in stock',
'mevcut değil',
'nachricht bei',
'nicht auf lager',
'nicht lagernd',
'nicht lieferbar',
'nicht verfügbar',
'nicht vorrätig',
'nicht zur verfügung',
'nie znaleziono produktów',
'niet beschikbaar',
'niet leverbaar',
'niet op voorraad',
'no disponible',
'non disponibile',
'non disponible',
'no longer in stock',
'no tickets available',
'not available',
'not currently available',
'not in stock',
'notify me when available',
'notify me',
'notify when available',
'não disponível',
'não estamos a aceitar encomendas',
'out of stock',
'out-of-stock',
'plus disponible',
'prodotto esaurito',
'produkt niedostępny',
'rupture',
'sold out',
'sold-out',
'stokta yok',
'temporarily out of stock',
'temporarily unavailable',
'there were no search results for',
'this item is currently unavailable',
'tickets unavailable',
'tijdelijk uitverkocht',
'tükendi',
'unavailable nearby',
'unavailable tickets',
'vergriffen',
'vorbestellen',
'vorbestellung ist bald möglich',
'we don\'t currently have any',
'we couldn\'t find any products that match',
'we do not currently have an estimate of when this product will be back in stock.',
'we don\'t know when or if this item will be back in stock.',
'we were not able to find a match',
'when this arrives in stock',
'zur zeit nicht an lager',
'品切れ',
'已售',
'已售完',
'품절'
];
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
function getElementBaseText(element) {
// .textContent can include text from children which may give the wrong results
// scan only immediate TEXT_NODEs, which will be a child of the element
var text = "";
for (var i = 0; i < element.childNodes.length; ++i)
if (element.childNodes[i].nodeType === Node.TEXT_NODE)
text += element.childNodes[i].textContent;
return text.toLowerCase().trim();
}
function getElementBaseText(element) {
// .textContent can include text from children which may give the wrong results
// scan only immediate TEXT_NODEs, which will be a child of the element
var text = "";
for (var i = 0; i < element.childNodes.length; ++i)
if (element.childNodes[i].nodeType === Node.TEXT_NODE)
text += element.childNodes[i].textContent;
return text.toLowerCase().trim();
}
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
// The out-of-stock or in-stock-text is generally always above-the-fold
// and often below-the-fold is a list of related products that may or may not contain trigger text
// so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
// The out-of-stock or in-stock-text is generally always above-the-fold
// and often below-the-fold is a list of related products that may or may not contain trigger text
// so it's good to filter to just the 'above the fold' elements
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
// @todo - if it's SVG or IMG, go into image diff mode
// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
function collectVisibleElements(parent, visibleElements) {
if (!parent) return; // Base case: if parent is null or undefined, return
console.log("Scanning %ELEMENTS%");
// Add the parent itself to the visible elements array if it's of the specified types
visibleElements.push(parent);
function collectVisibleElements(parent, visibleElements) {
if (!parent) return; // Base case: if parent is null or undefined, return
// Iterate over the parent's children
const children = parent.children;
for (let i = 0; i < children.length; i++) {
const child = children[i];
if (
child.nodeType === Node.ELEMENT_NODE &&
window.getComputedStyle(child).display !== 'none' &&
window.getComputedStyle(child).visibility !== 'hidden' &&
child.offsetWidth >= 0 &&
child.offsetHeight >= 0 &&
window.getComputedStyle(child).contentVisibility !== 'hidden'
) {
// If the child is an element and is visible, recursively collect visible elements
collectVisibleElements(child, visibleElements);
}
// Add the parent itself to the visible elements array if it's of the specified types
visibleElements.push(parent);
// Iterate over the parent's children
const children = parent.children;
for (let i = 0; i < children.length; i++) {
const child = children[i];
if (
child.nodeType === Node.ELEMENT_NODE &&
window.getComputedStyle(child).display !== 'none' &&
window.getComputedStyle(child).visibility !== 'hidden' &&
child.offsetWidth >= 0 &&
child.offsetHeight >= 0 &&
window.getComputedStyle(child).contentVisibility !== 'hidden'
) {
// If the child is an element and is visible, recursively collect visible elements
collectVisibleElements(child, visibleElements);
}
}
}
const elementsToScan = [];
collectVisibleElements(document.body, elementsToScan);
const elementsToScan = [];
collectVisibleElements(document.body, elementsToScan);
var elementText = "";
var elementText = "";
// REGEXS THAT REALLY MEAN IT'S IN STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
// REGEXS THAT REALLY MEAN IT'S IN STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue
}
elementText = "";
try {
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase().trim();
} else {
elementText = getElementBaseText(element);
}
} catch (e) {
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
}
if (elementText.length) {
// try which ones could mean its in stock
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
return 'Possibly in stock';
}
}
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue
}
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue
}
elementText = "";
elementText = "";
try {
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase().trim();
} else {
elementText = getElementBaseText(element);
}
} catch (e) {
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
}
if (elementText.length) {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
return outOfStockText; // item is out of stock
}
if (elementText.length) {
// try which ones could mean its in stock
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
return 'Possibly in stock';
}
}
}
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
for (let i = elementsToScan.length - 1; i >= 0; i--) {
const element = elementsToScan[i];
// outside the 'fold' or some weird text in the heading area
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
// Note: theres also an automated test that places the 'out of stock' text fairly low down
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
continue
}
elementText = "";
if (element.tagName.toLowerCase() === "input") {
elementText = element.value.toLowerCase().trim();
} else {
elementText = getElementBaseText(element);
}
if (elementText.length) {
// and these mean its out of stock
for (const outOfStockText of outOfStockTexts) {
if (elementText.includes(outOfStockText)) {
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
return outOfStockText; // item is out of stock
}
}
}
console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
}
// returns the element text that makes it think it's out of stock
return isItemInStock().trim()
console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
}
// returns the element text that makes it think it's out of stock
return isItemInStock().trim()

View File

@@ -1,285 +1,285 @@
async (options) => {
// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
// All rights reserved.
let visualselector_xpath_selectors = options.visualselector_xpath_selectors
let max_height = options.max_height
// @file Scrape the page looking for elements of concern (%ELEMENTS%)
// http://matatk.agrip.org.uk/tests/position-and-width/
// https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate
//
// Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis
// will automatically force a scroll somewhere, so include the position offset
// Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
var scroll_y = 0;
try {
scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
} catch (e) {
console.log(e);
}
var scroll_y = 0;
try {
scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
} catch (e) {
console.log(e);
}
// Include the getXpath script directly, easier than fetching
function getxpath(e) {
var n = e;
if (n && n.id) return '//*[@id="' + n.id + '"]';
for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
for (d = n.nextSibling; d;) {
if (d.nodeName === n.nodeName) {
r = !0;
break
}
d = d.nextSibling
function getxpath(e) {
var n = e;
if (n && n.id) return '//*[@id="' + n.id + '"]';
for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
for (d = n.nextSibling; d;) {
if (d.nodeName === n.nodeName) {
r = !0;
break
}
o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
d = d.nextSibling
}
return o.length ? "/" + o.reverse().join("/") : ""
o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
}
return o.length ? "/" + o.reverse().join("/") : ""
}
const findUpTag = (el) => {
let r = el
chained_css = [];
depth = 0;
const findUpTag = (el) => {
let r = el
chained_css = [];
depth = 0;
// Strategy 1: If it's an input, with name, and there's only one, prefer that
if (el.name !== undefined && el.name.length) {
var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
var proposed_element = window.document.querySelectorAll(proposed);
if (proposed_element.length) {
if (proposed_element.length === 1) {
return proposed;
} else {
// Some sites change ID but name= stays the same, we can hit it if we know the index
// Find all the elements that match and work out the input[n]
var n = Array.from(proposed_element).indexOf(el);
// Return a Playwright selector for nthinput[name=zipcode]
return proposed + " >> nth=" + n;
}
}
}
// Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
while (r.parentNode) {
if (depth === 5) {
break;
}
if ('' !== r.id) {
chained_css.unshift("#" + CSS.escape(r.id));
final_selector = chained_css.join(' > ');
// Be sure theres only one, some sites have multiples of the same ID tag :-(
if (window.document.querySelectorAll(final_selector).length === 1) {
return final_selector;
}
return null;
// Strategy 1: If it's an input, with name, and there's only one, prefer that
if (el.name !== undefined && el.name.length) {
var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
var proposed_element = window.document.querySelectorAll(proposed);
if (proposed_element.length) {
if (proposed_element.length === 1) {
return proposed;
} else {
chained_css.unshift(r.tagName.toLowerCase());
// Some sites change ID but name= stays the same, we can hit it if we know the index
// Find all the elements that match and work out the input[n]
var n = Array.from(proposed_element).indexOf(el);
// Return a Playwright selector for nthinput[name=zipcode]
return proposed + " >> nth=" + n;
}
r = r.parentNode;
depth += 1;
}
return null;
}
// Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
while (r.parentNode) {
if (depth === 5) {
break;
}
if ('' !== r.id) {
chained_css.unshift("#" + CSS.escape(r.id));
final_selector = chained_css.join(' > ');
// Be sure theres only one, some sites have multiples of the same ID tag :-(
if (window.document.querySelectorAll(final_selector).length === 1) {
return final_selector;
}
return null;
} else {
chained_css.unshift(r.tagName.toLowerCase());
}
r = r.parentNode;
depth += 1;
}
return null;
}
// @todo - if it's SVG or IMG, go into image diff mode
// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
var size_pos = [];
var size_pos = [];
// after page fetch, inject this JS
// build a map of all elements and their positions (maybe that only include text?)
var bbox;
console.log(`Scanning for "${visualselector_xpath_selectors}"`);
var bbox;
console.log("Scanning %ELEMENTS%");
function collectVisibleElements(parent, visibleElements) {
if (!parent) return; // Base case: if parent is null or undefined, return
function collectVisibleElements(parent, visibleElements) {
if (!parent) return; // Base case: if parent is null or undefined, return
// Add the parent itself to the visible elements array if it's of the specified types
const tagName = parent.tagName.toLowerCase();
if (visualselector_xpath_selectors.split(',').includes(tagName)) {
visibleElements.push(parent);
// Add the parent itself to the visible elements array if it's of the specified types
const tagName = parent.tagName.toLowerCase();
if ("%ELEMENTS%".split(',').includes(tagName)) {
visibleElements.push(parent);
}
// Iterate over the parent's children
const children = parent.children;
for (let i = 0; i < children.length; i++) {
const child = children[i];
const computedStyle = window.getComputedStyle(child);
if (
child.nodeType === Node.ELEMENT_NODE &&
computedStyle.display !== 'none' &&
computedStyle.visibility !== 'hidden' &&
child.offsetWidth >= 0 &&
child.offsetHeight >= 0 &&
computedStyle.contentVisibility !== 'hidden'
) {
// If the child is an element and is visible, recursively collect visible elements
collectVisibleElements(child, visibleElements);
}
}
}
// Iterate over the parent's children
const children = parent.children;
for (let i = 0; i < children.length; i++) {
const child = children[i];
const computedStyle = window.getComputedStyle(child);
// Create an array to hold the visible elements
const visibleElementsArray = [];
if (
child.nodeType === Node.ELEMENT_NODE &&
computedStyle.display !== 'none' &&
computedStyle.visibility !== 'hidden' &&
child.offsetWidth >= 0 &&
child.offsetHeight >= 0 &&
computedStyle.contentVisibility !== 'hidden'
) {
// If the child is an element and is visible, recursively collect visible elements
collectVisibleElements(child, visibleElements);
}
// Call collectVisibleElements with the starting parent element
collectVisibleElements(document.body, visibleElementsArray);
visibleElementsArray.forEach(function (element) {
bbox = element.getBoundingClientRect();
// Skip really small ones, and where width or height ==0
if (bbox['width'] * bbox['height'] < 10) {
return
}
// Don't include elements that are offset from canvas
if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
return
}
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
// it should not traverse when we know we can anchor off just an ID one level up etc..
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
xpath_result = false;
try {
var d = findUpTag(element);
if (d) {
xpath_result = d;
}
} catch (e) {
console.log(e);
}
// You could swap it and default to getXpath and then try the smarter one
// default back to the less intelligent one
if (!xpath_result) {
try {
// I've seen on FB and eBay that this doesnt work
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
xpath_result = getxpath(element);
} catch (e) {
console.log(e);
return
}
}
// Create an array to hold the visible elements
const visibleElementsArray = [];
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
// Call collectVisibleElements with the starting parent element
collectVisibleElements(document.body, visibleElementsArray);
let text = element.textContent.trim().slice(0, 30).trim();
while (/\n{2,}|\t{2,}/.test(text)) {
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
}
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,)/.test(text) ;
const computedStyle = window.getComputedStyle(element);
visibleElementsArray.forEach(function (element) {
bbox = element.getBoundingClientRect();
// Skip really small ones, and where width or height ==0
if (bbox['width'] * bbox['height'] < 10) {
return
}
// Don't include elements that are offset from canvas
if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
return
}
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
// it should not traverse when we know we can anchor off just an ID one level up etc..
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
xpath_result = false;
try {
var d = findUpTag(element);
if (d) {
xpath_result = d;
}
} catch (e) {
console.log(e);
}
// You could swap it and default to getXpath and then try the smarter one
// default back to the less intelligent one
if (!xpath_result) {
try {
// I've seen on FB and eBay that this doesnt work
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
xpath_result = getxpath(element);
} catch (e) {
console.log(e);
return
}
}
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
let text = element.textContent.trim().slice(0, 30).trim();
while (/\n{2,}|\t{2,}/.test(text)) {
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
}
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6))) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,)/.test(text);
const computedStyle = window.getComputedStyle(element);
if (Math.floor(bbox['top']) + scroll_y > max_height) {
return
}
size_pos.push({
xpath: xpath_result,
width: Math.round(bbox['width']),
height: Math.round(bbox['height']),
left: Math.floor(bbox['left']),
top: Math.floor(bbox['top']) + scroll_y,
// tagName used by Browser Steps
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
// tagtype used by Browser Steps
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
isClickable: computedStyle.cursor === "pointer",
// Used by the keras trainer
fontSize: computedStyle.getPropertyValue('font-size'),
fontWeight: computedStyle.getPropertyValue('font-weight'),
hasDigitCurrency: hasDigitCurrency,
label: label,
});
size_pos.push({
xpath: xpath_result,
width: Math.round(bbox['width']),
height: Math.round(bbox['height']),
left: Math.floor(bbox['left']),
top: Math.floor(bbox['top']) + scroll_y,
// tagName used by Browser Steps
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
// tagtype used by Browser Steps
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
isClickable: computedStyle.cursor === "pointer",
// Used by the keras trainer
fontSize: computedStyle.getPropertyValue('font-size'),
fontWeight: computedStyle.getPropertyValue('font-weight'),
hasDigitCurrency: hasDigitCurrency,
label: label,
});
});
// Inject the current one set in the include_filters, which may be a CSS rule
// used for displaying the current one in VisualSelector, where its not one we generated.
if (include_filters.length) {
let results;
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again
for (const f of include_filters) {
bbox = false;
q = false;
if (include_filters.length) {
let results;
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again
for (const f of include_filters) {
bbox = false;
q = false;
if (!f.length) {
console.log("xpath_element_scraper: Empty filter, skipping");
continue;
}
if (!f.length) {
console.log("xpath_element_scraper: Empty filter, skipping");
continue;
}
try {
// is it xpath?
if (f.startsWith('/') || f.startsWith('xpath')) {
var qry_f = f.replace(/xpath(:|\d:)/, '')
console.log("[xpath] Scanning for included filter " + qry_f)
let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
results = [];
for (let i = 0; i < xpathResult.snapshotLength; i++) {
results.push(xpathResult.snapshotItem(i));
}
} else {
console.log("[css] Scanning for included filter " + f)
console.log("[css] Scanning for included filter " + f);
results = document.querySelectorAll(f);
try {
// is it xpath?
if (f.startsWith('/') || f.startsWith('xpath')) {
var qry_f = f.replace(/xpath(:|\d:)/, '')
console.log("[xpath] Scanning for included filter " + qry_f)
let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
results = [];
for (let i = 0; i < xpathResult.snapshotLength; i++) {
results.push(xpathResult.snapshotItem(i));
}
} catch (e) {
// Maybe catch DOMException and alert?
console.log("xpath_element_scraper: Exception selecting element from filter " + f);
console.log(e);
} else {
console.log("[css] Scanning for included filter " + f)
console.log("[css] Scanning for included filter " + f);
results = document.querySelectorAll(f);
}
} catch (e) {
// Maybe catch DOMException and alert?
console.log("xpath_element_scraper: Exception selecting element from filter " + f);
console.log(e);
}
if (results != null && results.length) {
if (results != null && results.length) {
// Iterate over the results
results.forEach(node => {
// Try to resolve //something/text() back to its /something so we can atleast get the bounding box
// Iterate over the results
results.forEach(node => {
// Try to resolve //something/text() back to its /something so we can atleast get the bounding box
try {
if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
node = node.parentElement
}
} catch (e) {
console.log(e)
console.log("xpath_element_scraper: #text resolver")
}
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
if (typeof node.getBoundingClientRect == 'function') {
bbox = node.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
} else {
try {
if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
node = node.parentElement
}
// Try and see we can find its ownerElement
bbox = node.ownerElement.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
} catch (e) {
console.log(e)
console.log("xpath_element_scraper: #text resolver")
console.log("xpath_element_scraper: error looking up q.ownerElement")
}
}
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
if (typeof node.getBoundingClientRect == 'function') {
bbox = node.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
} else {
try {
// Try and see we can find its ownerElement
bbox = node.ownerElement.getBoundingClientRect();
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
} catch (e) {
console.log(e)
console.log("xpath_element_scraper: error looking up q.ownerElement")
}
}
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
size_pos.push({
xpath: f,
width: parseInt(bbox['width']),
height: parseInt(bbox['height']),
left: parseInt(bbox['left']),
top: parseInt(bbox['top']) + scroll_y,
highlight_as_custom_filter: true
});
}
});
}
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
size_pos.push({
xpath: f,
width: parseInt(bbox['width']),
height: parseInt(bbox['height']),
left: parseInt(bbox['left']),
top: parseInt(bbox['top']) + scroll_y,
highlight_as_custom_filter: true
});
}
});
}
}
}
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
// so that we dont select the wrapping element by mistake and be unable to select what we want
size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
// browser_width required for proper scaling in the frontend
// Return as a string to save playwright for juggling thousands of objects
return JSON.stringify({'size_pos': size_pos, 'browser_width': window.innerWidth});
}
size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
// Window.width required for proper scaling in the frontend
return {'size_pos': size_pos, 'browser_width': window.innerWidth};

View File

@@ -1,73 +0,0 @@
# Pages with a vertical height longer than this will use the 'stitch together' method.
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
# - If a page is taller than ~800010000px, it risks exceeding GPU memory limits.
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
from loguru import logger
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
import os
import io
from PIL import Image, ImageDraw, ImageFont
try:
# Load images from byte chunks
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
total_height = sum(im.height for im in images)
max_width = max(im.width for im in images)
# Create stitched image
stitched = Image.new('RGB', (max_width, total_height))
y_offset = 0
for im in images:
stitched.paste(im, (0, y_offset))
y_offset += im.height
# Draw caption on top (overlaid, not extending canvas)
draw = ImageDraw.Draw(stitched)
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
padding = 10
font_size = 35
font_color = (255, 0, 0)
background_color = (255, 255, 255)
# Try to load a proper font
try:
font = ImageFont.truetype("arial.ttf", font_size)
except IOError:
font = ImageFont.load_default()
bbox = draw.textbbox((0, 0), caption_text, font=font)
text_width = bbox[2] - bbox[0]
text_height = bbox[3] - bbox[1]
# Draw white rectangle background behind text
rect_top = 0
rect_bottom = text_height + 2 * padding
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
# Draw text centered horizontally, 10px padding from top of the rectangle
text_x = (max_width - text_width) // 2
text_y = padding
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
# Encode and send image
output = io.BytesIO()
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
pipe_conn.send_bytes(output.getvalue())
stitched.close()
except Exception as e:
pipe_conn.send(f"error:{e}")
finally:
pipe_conn.close()

View File

@@ -65,7 +65,6 @@ class fetcher(Fetcher):
# request_body, request_method unused for now, until some magic in the future happens.
options = ChromeOptions()
options.add_argument("--headless")
if self.proxy:
options.proxy = self.proxy
@@ -113,9 +112,9 @@ class fetcher(Fetcher):
self.quit()
return True
def quit(self, watch=None):
def quit(self):
if self.driver:
try:
self.driver.quit()
except Exception as e:
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")

View File

@@ -233,8 +233,7 @@ def changedetection_app(config=None, datastore_o=None):
if has_password_enabled and not flask_login.current_user.is_authenticated:
# Permitted
if request.endpoint and request.endpoint == 'static_content' and request.view_args:
# Handled by static_content handler
if request.endpoint and request.endpoint == 'static_content' and request.view_args and request.view_args.get('group') in ['styles', 'js', 'images', 'favicons']:
return None
# Permitted
elif request.endpoint and 'login' in request.endpoint:
@@ -352,15 +351,11 @@ def changedetection_app(config=None, datastore_o=None):
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
def static_content(group, filename):
from flask import make_response
import re
group = re.sub(r'[^\w.-]+', '', group.lower())
filename = re.sub(r'[^\w.-]+', '', filename.lower())
if group == 'screenshot':
# Could be sensitive, follow password requirements
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
if not datastore.data['settings']['application'].get('shared_diff_access'):
abort(403)
abort(403)
screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png"
@@ -394,7 +389,7 @@ def changedetection_app(config=None, datastore_o=None):
response.headers['Content-Type'] = 'application/json'
response.headers['Content-Encoding'] = 'deflate'
else:
logger.error(f'Request elements.deflate at "{watch_directory}" but was not found.')
logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.')
abort(404)
if response:
@@ -409,7 +404,7 @@ def changedetection_app(config=None, datastore_o=None):
# These files should be in our subdirectory
try:
return send_from_directory(f"static/{group}", path=filename)
return send_from_directory("static/{}".format(group), path=filename)
except FileNotFoundError:
abort(404)
@@ -447,16 +442,6 @@ def changedetection_app(config=None, datastore_o=None):
import changedetectionio.blueprint.watchlist as watchlist
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
# Memory cleanup endpoint
@app.route('/gc-cleanup', methods=['GET'])
@login_optionally_required
def gc_cleanup():
from changedetectionio.gc_cleanup import memory_cleanup
from flask import jsonify
result = memory_cleanup(app)
return jsonify({"status": "success", "message": "Memory cleanup completed", "result": result})
# @todo handle ctrl break
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
@@ -514,8 +499,7 @@ def notification_runner():
sent_obj = None
try:
from changedetectionio.notification.handler import process_notification
from changedetectionio import notification
# Fallback to system config if not set
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
@@ -525,8 +509,8 @@ def notification_runner():
if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
if n_object.get('notification_urls', {}):
sent_obj = process_notification(n_object, datastore)
sent_obj = notification.process_notification(n_object, datastore)
except Exception as e:
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")

View File

@@ -306,10 +306,10 @@ class ValidateAppRiseServers(object):
def __call__(self, form, field):
import apprise
from .notification.apprise_plugin.assets import apprise_asset
from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler # noqa: F401
apobj = apprise.Apprise()
apobj = apprise.Apprise(asset=apprise_asset)
# so that the custom endpoints are registered
from .apprise_asset import asset
for server_url in field.data:
url = server_url.strip()
@@ -586,7 +586,7 @@ class processor_text_json_diff_form(commonSettingsForm):
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
filter_text_removed = BooleanField('Removed lines', default=True)
trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
@@ -721,8 +721,6 @@ class globalSettingsRequestForm(Form):
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
return False
class globalSettingsApplicationUIForm(Form):
open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()])
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):
@@ -754,7 +752,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,
message="Should contain zero or more attempts")])
ui = FormField(globalSettingsApplicationUIForm)
class globalSettingsForm(Form):

View File

@@ -1,162 +0,0 @@
#!/usr/bin/env python3
import ctypes
import gc
import re
import psutil
import sys
import threading
import importlib
from loguru import logger
def memory_cleanup(app=None):
"""
Perform comprehensive memory cleanup operations and log memory usage
at each step with nicely formatted numbers.
Args:
app: Optional Flask app instance for clearing Flask-specific caches
Returns:
str: Status message
"""
# Get current process
process = psutil.Process()
# Log initial memory usage with nicely formatted numbers
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"Memory cleanup started - Current memory usage: {current_memory:,.2f} MB")
# 1. Standard garbage collection - force full collection on all generations
gc.collect(0) # Collect youngest generation
gc.collect(1) # Collect middle generation
gc.collect(2) # Collect oldest generation
# Run full collection again to ensure maximum cleanup
gc.collect()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After full gc.collect() - Memory usage: {current_memory:,.2f} MB")
# 3. Call libc's malloc_trim to release memory back to the OS
libc = ctypes.CDLL("libc.so.6")
libc.malloc_trim(0)
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After malloc_trim(0) - Memory usage: {current_memory:,.2f} MB")
# 4. Clear Python's regex cache
re.purge()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After re.purge() - Memory usage: {current_memory:,.2f} MB")
# 5. Reset thread-local storage
# Create a new thread local object to encourage cleanup of old ones
threading.local()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After threading.local() - Memory usage: {current_memory:,.2f} MB")
# 6. Clear sys.intern cache if Python version supports it
try:
sys.intern.clear()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After sys.intern.clear() - Memory usage: {current_memory:,.2f} MB")
except (AttributeError, TypeError):
logger.debug("sys.intern.clear() not supported in this Python version")
# 7. Clear XML/lxml caches if available
try:
# Check if lxml.etree is in use
lxml_etree = sys.modules.get('lxml.etree')
if lxml_etree:
# Clear module-level caches
if hasattr(lxml_etree, 'clear_error_log'):
lxml_etree.clear_error_log()
# Check for _ErrorLog and _RotatingErrorLog objects and clear them
for obj in gc.get_objects():
if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'):
class_name = obj.__class__.__name__
if class_name in ('_ErrorLog', '_RotatingErrorLog', '_DomainErrorLog') and hasattr(obj, 'clear'):
try:
obj.clear()
except (AttributeError, TypeError):
pass
# Clear Element objects which can hold references to documents
elif class_name in ('_Element', 'ElementBase') and hasattr(obj, 'clear'):
try:
obj.clear()
except (AttributeError, TypeError):
pass
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After lxml.etree cleanup - Memory usage: {current_memory:,.2f} MB")
# Check if lxml.html is in use
lxml_html = sys.modules.get('lxml.html')
if lxml_html:
# Clear HTML-specific element types
for obj in gc.get_objects():
if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'):
class_name = obj.__class__.__name__
if class_name in ('HtmlElement', 'FormElement', 'InputElement',
'SelectElement', 'TextareaElement', 'CheckboxGroup',
'RadioGroup', 'MultipleSelectOptions', 'FieldsDict') and hasattr(obj, 'clear'):
try:
obj.clear()
except (AttributeError, TypeError):
pass
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After lxml.html cleanup - Memory usage: {current_memory:,.2f} MB")
except (ImportError, AttributeError):
logger.debug("lxml cleanup not applicable")
# 8. Clear JSON parser caches if applicable
try:
# Check if json module is being used and try to clear its cache
json_module = sys.modules.get('json')
if json_module and hasattr(json_module, '_default_encoder'):
json_module._default_encoder.markers.clear()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After JSON parser cleanup - Memory usage: {current_memory:,.2f} MB")
except (AttributeError, KeyError):
logger.debug("JSON cleanup not applicable")
# 9. Force Python's memory allocator to release unused memory
try:
if hasattr(sys, 'pypy_version_info'):
# PyPy has different memory management
gc.collect()
else:
# CPython - try to release unused memory
ctypes.pythonapi.PyGC_Collect()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After PyGC_Collect - Memory usage: {current_memory:,.2f} MB")
except (AttributeError, TypeError):
logger.debug("PyGC_Collect not supported")
# 10. Clear Flask-specific caches if applicable
if app:
try:
# Clear Flask caches if they exist
for key in list(app.config.get('_cache', {}).keys()):
app.config['_cache'].pop(key, None)
# Clear Jinja2 template cache if available
if hasattr(app, 'jinja_env') and hasattr(app.jinja_env, 'cache'):
app.jinja_env.cache.clear()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After Flask cache clear - Memory usage: {current_memory:,.2f} MB")
except (AttributeError, KeyError):
logger.debug("No Flask cache to clear")
# Final garbage collection pass
gc.collect()
libc.malloc_trim(0)
# Log final memory usage
final_memory = process.memory_info().rss / 1024 / 1024
logger.info(f"Memory cleanup completed - Final memory usage: {final_memory:,.2f} MB")
return "cleaned"

View File

@@ -366,41 +366,22 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
# wordlist - list of regex's (str) or words (str)
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
def strip_ignore_text(content, wordlist, mode="content"):
i = 0
output = []
ignore_text = []
ignore_regex = []
ignore_regex_multiline = []
ignored_lines = []
ignored_line_numbers = []
for k in wordlist:
# Is it a regex?
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
if res:
res = re.compile(perl_style_slash_enclosed_regex_to_options(k))
if res.flags & re.DOTALL or res.flags & re.MULTILINE:
ignore_regex_multiline.append(res)
else:
ignore_regex.append(res)
ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k)))
else:
ignore_text.append(k.strip())
for r in ignore_regex_multiline:
for match in r.finditer(content):
content_lines = content[:match.end()].splitlines(keepends=True)
match_lines = content[match.start():match.end()].splitlines(keepends=True)
end_line = len(content_lines)
start_line = end_line - len(match_lines)
if end_line - start_line <= 1:
# Match is empty or in the middle of the line
ignored_lines.append(start_line)
else:
for i in range(start_line, end_line):
ignored_lines.append(i)
line_index = 0
lines = content.splitlines(keepends=True)
for line in lines:
for line in content.splitlines(keepends=True):
i += 1
# Always ignore blank lines in this mode. (when this function gets called)
got_match = False
for l in ignore_text:
@@ -412,19 +393,17 @@ def strip_ignore_text(content, wordlist, mode="content"):
if r.search(line):
got_match = True
if got_match:
ignored_lines.append(line_index)
line_index += 1
ignored_lines = set([i for i in ignored_lines if i >= 0 and i < len(lines)])
if not got_match:
# Not ignored, and should preserve "keepends"
output.append(line)
else:
ignored_line_numbers.append(i)
# Used for finding out what to highlight
if mode == "line numbers":
return [i + 1 for i in ignored_lines]
return ignored_line_numbers
output_lines = set(range(len(lines))) - ignored_lines
return ''.join([lines[i] for i in output_lines])
return ''.join(output)
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
from xml.sax.saxutils import escape as xml_escape
@@ -477,10 +456,8 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
def has_ldjson_product_info(content):
try:
# Better than .lower() which can use a lot of ram
if (re.search(r'application/ld\+json', content, re.IGNORECASE) and
re.search(r'"price"', content, re.IGNORECASE) and
re.search(r'"pricecurrency"', content, re.IGNORECASE)):
lc = content.lower()
if 'application/ld+json' in lc and lc.count('"price"') == 1 and '"pricecurrency"' in lc:
return True
# On some pages this is really terribly expensive when they dont really need it

View File

@@ -60,9 +60,6 @@ class model(dict):
'webdriver_delay': None , # Extra delay in seconds before extracting text
'tags': {}, #@todo use Tag.model initialisers
'timezone': None, # Default IANA timezone name
'ui': {
'open_diff_in_new_tab': True,
},
}
}
}

View File

@@ -553,10 +553,7 @@ class model(watch_base):
self.ensure_data_dir_exists()
with open(target_path, 'wb') as f:
if not isinstance(data, str):
f.write(zlib.compress(json.dumps(data).encode()))
else:
f.write(zlib.compress(data.encode()))
f.write(zlib.compress(json.dumps(data).encode()))
f.close()
# Save as PNG, PNG is larger but better for doing visual diff in the future
@@ -578,7 +575,7 @@ class model(watch_base):
import brotli
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
if not os.path.isfile(filepath):
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
dates = list(self.history.keys())
if len(dates):

View File

@@ -2,7 +2,7 @@ import os
import uuid
from changedetectionio import strtobool
default_notification_format_for_watch = 'System default'
from changedetectionio.notification import default_notification_format_for_watch
class watch_base(dict):

View File

@@ -1,17 +1,48 @@
import time
from apprise import NotifyFormat
import apprise
from loguru import logger
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
valid_tokens = {
'base_url': '',
'current_snapshot': '',
'diff': '',
'diff_added': '',
'diff_full': '',
'diff_patch': '',
'diff_removed': '',
'diff_url': '',
'preview_url': '',
'triggered_text': '',
'watch_tag': '',
'watch_title': '',
'watch_url': '',
'watch_uuid': '',
}
default_notification_format_for_watch = 'System default'
default_notification_format = 'HTML Color'
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
valid_notification_formats = {
'Text': NotifyFormat.TEXT,
'Markdown': NotifyFormat.MARKDOWN,
'HTML': NotifyFormat.HTML,
'HTML Color': 'htmlcolor',
# Used only for editing a watch (not for global)
default_notification_format_for_watch: default_notification_format_for_watch
}
def process_notification(n_object, datastore):
from changedetectionio.safe_jinja import render as jinja_render
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
# be sure its registered
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
# so that the custom endpoints are registered
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
from .safe_jinja import render as jinja_render
now = time.time()
if n_object.get('notification_timestamp'):
logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
@@ -28,18 +59,19 @@ def process_notification(n_object, datastore):
# Initially text or whatever
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s")
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.3f}s")
# https://github.com/caronc/apprise/wiki/Development_LogCapture
# Anything higher than or equal to WARNING (which covers things like Connection errors)
# raise it as an exception
sent_objs = []
from .apprise_asset import asset
if 'as_async' in n_object:
apprise_asset.async_mode = n_object.get('as_async')
asset.async_mode = n_object.get('as_async')
apobj = apprise.Apprise(debug=True, asset=apprise_asset)
apobj = apprise.Apprise(debug=True, asset=asset)
if not n_object.get('notification_urls'):
return None
@@ -80,7 +112,7 @@ def process_notification(n_object, datastore):
and not url.startswith('get') \
and not url.startswith('delete') \
and not url.startswith('put'):
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
if url.startswith('tgram://'):
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
@@ -145,7 +177,6 @@ def process_notification(n_object, datastore):
# ( Where we prepare the tokens in the notification to be replaced with actual values )
def create_notification_parameters(n_object, datastore):
from copy import deepcopy
from . import valid_tokens
# in the case we send a test notification from the main settings, there is no UUID.
uuid = n_object['uuid'] if 'uuid' in n_object else ''

View File

@@ -1,35 +0,0 @@
from changedetectionio.model import default_notification_format_for_watch
ult_notification_format_for_watch = 'System default'
default_notification_format = 'HTML Color'
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
# The values (markdown etc) are from apprise NotifyFormat,
# But to avoid importing the whole heavy module just use the same strings here.
valid_notification_formats = {
'Text': 'text',
'Markdown': 'markdown',
'HTML': 'html',
'HTML Color': 'htmlcolor',
# Used only for editing a watch (not for global)
default_notification_format_for_watch: default_notification_format_for_watch
}
valid_tokens = {
'base_url': '',
'current_snapshot': '',
'diff': '',
'diff_added': '',
'diff_full': '',
'diff_patch': '',
'diff_removed': '',
'diff_url': '',
'preview_url': '',
'triggered_text': '',
'watch_tag': '',
'watch_title': '',
'watch_url': '',
'watch_uuid': '',
}

View File

@@ -1,16 +0,0 @@
from apprise import AppriseAsset
# Refer to:
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
APPRISE_APP_ID = "changedetection.io"
APPRISE_APP_DESC = "ChangeDetection.io best and simplest website monitoring and change detection"
APPRISE_APP_URL = "https://changedetection.io"
APPRISE_AVATAR_URL = "https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png"
apprise_asset = AppriseAsset(
app_id=APPRISE_APP_ID,
app_desc=APPRISE_APP_DESC,
app_url=APPRISE_APP_URL,
image_url_logo=APPRISE_AVATAR_URL,
)

View File

@@ -1,112 +0,0 @@
import json
import re
from urllib.parse import unquote_plus
import requests
from apprise.decorators import notify
from apprise.utils.parse import parse_url as apprise_parse_url
from loguru import logger
from requests.structures import CaseInsensitiveDict
SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
def notify_supported_methods(func):
for method in SUPPORTED_HTTP_METHODS:
func = notify(on=method)(func)
# Add support for https, for each supported http method
func = notify(on=f"{method}s")(func)
return func
def _get_auth(parsed_url: dict) -> str | tuple[str, str]:
user: str | None = parsed_url.get("user")
password: str | None = parsed_url.get("password")
if user is not None and password is not None:
return (unquote_plus(user), unquote_plus(password))
if user is not None:
return unquote_plus(user)
return ""
def _get_headers(parsed_url: dict, body: str) -> CaseInsensitiveDict:
headers = CaseInsensitiveDict(
{unquote_plus(k).title(): unquote_plus(v) for k, v in parsed_url["qsd+"].items()}
)
# If Content-Type is not specified, guess if the body is a valid JSON
if headers.get("Content-Type") is None:
try:
json.loads(body)
headers["Content-Type"] = "application/json; charset=utf-8"
except Exception:
pass
return headers
def _get_params(parsed_url: dict) -> CaseInsensitiveDict:
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
# but here we are making straight requests, so we need todo convert this against apprise's logic
params = CaseInsensitiveDict(
{
unquote_plus(k): unquote_plus(v)
for k, v in parsed_url["qsd"].items()
if k.strip("-") not in parsed_url["qsd-"]
and k.strip("+") not in parsed_url["qsd+"]
}
)
return params
@notify_supported_methods
def apprise_http_custom_handler(
body: str,
title: str,
notify_type: str,
meta: dict,
*args,
**kwargs,
) -> bool:
url: str = meta.get("url")
schema: str = meta.get("schema")
method: str = re.sub(r"s$", "", schema).upper()
# Convert /foobar?+some-header=hello to proper header dictionary
parsed_url: dict[str, str | dict | None] | None = apprise_parse_url(url)
if parsed_url is None:
return False
auth = _get_auth(parsed_url=parsed_url)
headers = _get_headers(parsed_url=parsed_url, body=body)
params = _get_params(parsed_url=parsed_url)
url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
try:
response = requests.request(
method=method,
url=url,
auth=auth,
headers=headers,
params=params,
data=body.encode("utf-8") if isinstance(body, str) else body,
)
response.raise_for_status()
logger.info(f"Successfully sent custom notification to {url}")
return True
except requests.RequestException as e:
logger.error(f"Remote host error while sending custom notification to {url}: {e}")
return False
except Exception as e:
logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}")
return False

View File

@@ -159,7 +159,7 @@ class difference_detection_processor():
)
#@todo .quit here could go on close object, so we can run JS if change-detected
self.fetcher.quit(watch=self.watch)
self.fetcher.quit()
# After init, call run_changedetection() which will do the actual change-detection

View File

@@ -252,7 +252,6 @@ class perform_site_check(difference_detection_processor):
# 615 Extract text by regex
extract_text = watch.get('extract_text', [])
extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
if len(extract_text) > 0:
regex_matched_output = []
for s_re in extract_text:
@@ -297,8 +296,6 @@ class perform_site_check(difference_detection_processor):
### CALCULATE MD5
# If there's text to ignore
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
text_for_checksuming = stripped_text_from_html
if text_to_ignore:
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
@@ -311,8 +308,8 @@ class perform_site_check(difference_detection_processor):
############ Blocking rules, after checksum #################
blocked = False
trigger_text = watch.get('trigger_text', [])
trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
if len(trigger_text):
# Assume blocked
blocked = True
@@ -327,7 +324,6 @@ class perform_site_check(difference_detection_processor):
blocked = False
text_should_not_be_present = watch.get('text_should_not_be_present', [])
text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
if len(text_should_not_be_present):
# If anything matched, then we should block a change from happening
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
@@ -338,14 +334,12 @@ class perform_site_check(difference_detection_processor):
# And check if 'conditions' will let this pass through
if watch.get('conditions') and watch.get('conditions_match_logic'):
conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
application_datastruct=self.datastore.data,
ephemeral_data={
'text': stripped_text_from_html
}
)
if not conditions_result.get('result'):
if not execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
application_datastruct=self.datastore.data,
ephemeral_data={
'text': stripped_text_from_html
}
):
# Conditions say "Condition not met" so we block it.
blocked = True

View File

@@ -14,8 +14,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
find tests/test_*py -type f|while read test_name
do
echo "TEST RUNNING $test_name"
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
pytest $test_name
done
echo "RUNNING WITH BASE_URL SET"
@@ -23,7 +22,7 @@ echo "RUNNING WITH BASE_URL SET"
# Now re-run some tests with BASE_URL enabled
# Re #65 - Ability to include a link back to the installation, in the notification.
export BASE_URL="https://really-unique-domain.io"
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
pytest tests/test_notification.py
# Re-run with HIDE_REFERER set - could affect login
@@ -33,7 +32,7 @@ pytest tests/test_access_control.py
# Re-run a few tests that will trigger brotli based storage
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
pytest tests/test_access_control.py
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
pytest tests/test_notification.py
pytest tests/test_backend.py
pytest tests/test_rss.py
pytest tests/test_unique_lines.py

View File

Before

Width:  |  Height:  |  Size: 569 B

After

Width:  |  Height:  |  Size: 569 B

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 6.2 KiB

After

Width:  |  Height:  |  Size: 6.2 KiB

View File

@@ -8,7 +8,7 @@ $(document).ready(function () {
$(".addRuleRow").on("click", function(e) {
e.preventDefault();
let currentRow = $(this).closest(".fieldlist-row");
let currentRow = $(this).closest("tr");
// Clone without events
let newRow = currentRow.clone(false);
@@ -29,8 +29,8 @@ $(document).ready(function () {
e.preventDefault();
// Only remove if there's more than one row
if ($("#rulesTable .fieldlist-row").length > 1) {
$(this).closest(".fieldlist-row").remove();
if ($("#rulesTable tbody tr").length > 1) {
$(this).closest("tr").remove();
reindexRules();
}
});
@@ -39,7 +39,7 @@ $(document).ready(function () {
$(".verifyRuleRow").on("click", function(e) {
e.preventDefault();
let row = $(this).closest(".fieldlist-row");
let row = $(this).closest("tr");
let field = row.find("select[name$='field']").val();
let operator = row.find("select[name$='operator']").val();
let value = row.find("input[name$='value']").val();
@@ -52,7 +52,7 @@ $(document).ready(function () {
// Create a rule object
let rule = {
const rule = {
field: field,
operator: operator,
value: value
@@ -96,10 +96,6 @@ $(document).ready(function () {
contentType: false, // Let the browser set the correct content type
success: function (response) {
if (response.status === "success") {
if(rule['field'] !== "page_filtered_text") {
// A little debug helper for the user
$('#verify-state-text').text(`${rule['field']} was value "${response.data[rule['field']]}"`)
}
if (response.result) {
alert("✅ Condition PASSES verification against current snapshot!");
} else {
@@ -128,7 +124,7 @@ $(document).ready(function () {
$(".addRuleRow, .removeRuleRow, .verifyRuleRow").off("click");
// Reindex all form elements
$("#rulesTable .fieldlist-row").each(function(index) {
$("#rulesTable tbody tr").each(function(index) {
$(this).find("select, input").each(function() {
let oldName = $(this).attr("name");
let oldId = $(this).attr("id");

View File

@@ -1,135 +0,0 @@
/* Styles for the flexbox-based table replacement for conditions */
.fieldlist_formfields {
width: 100%;
background-color: var(--color-background, #fff);
border-radius: 4px;
border: 1px solid var(--color-border-table-cell, #cbcbcb);
/* Header row */
.fieldlist-header {
display: flex;
background-color: var(--color-background-table-thead, #e0e0e0);
font-weight: bold;
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb);
}
.fieldlist-header-cell {
flex: 1;
padding: 0.5em 1em;
text-align: left;
&:last-child {
flex: 0 0 120px; /* Fixed width for actions column */
}
}
/* Body rows */
.fieldlist-body {
display: flex;
flex-direction: column;
}
.fieldlist-row {
display: flex;
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb);
&:last-child {
border-bottom: none;
}
&:nth-child(2n-1) {
background-color: var(--color-table-stripe, #f2f2f2);
}
&.error-row {
background-color: var(--color-error-input, #ffdddd);
}
}
.fieldlist-cell {
flex: 1;
padding: 0.5em 1em;
display: flex;
flex-direction: column;
justify-content: center;
/* Make inputs take up full width of their cell */
input, select {
width: 100%;
}
&.fieldlist-actions {
flex: 0 0 120px; /* Fixed width for actions column */
display: flex;
flex-direction: row;
align-items: center;
gap: 4px;
}
}
/* Error styling */
ul.errors {
margin-top: 0.5em;
margin-bottom: 0;
padding: 0.5em;
background-color: var(--color-error-background-snapshot-age, #ffdddd);
border-radius: 4px;
list-style-position: inside;
}
/* Responsive styles */
@media only screen and (max-width: 760px) {
.fieldlist-header, .fieldlist-row {
flex-direction: column;
}
.fieldlist-header-cell {
display: none;
}
.fieldlist-row {
padding: 0.5em 0;
border-bottom: 2px solid var(--color-border-table-cell, #cbcbcb);
}
.fieldlist-cell {
padding: 0.25em 0.5em;
&.fieldlist-actions {
flex: 1;
justify-content: flex-start;
padding-top: 0.5em;
}
}
/* Add some spacing between fields on mobile */
.fieldlist-cell:not(:last-child) {
margin-bottom: 0.5em;
}
/* Label each cell on mobile view */
.fieldlist-cell::before {
content: attr(data-label);
font-weight: bold;
margin-bottom: 0.25em;
}
}
}
/* Button styling */
.fieldlist_formfields {
.addRuleRow, .removeRuleRow, .verifyRuleRow {
cursor: pointer;
border: none;
padding: 4px 8px;
border-radius: 3px;
font-weight: bold;
background-color: #aaa;
color: var(--color-foreground-text, #fff);
&:hover {
background-color: #999;
}
}
}

View File

@@ -14,7 +14,6 @@
@import "parts/_love";
@import "parts/preview_text_filter";
@import "parts/_edit";
@import "parts/_conditions_table";
body {
color: var(--color-text);

View File

@@ -530,99 +530,6 @@ ul#conditions_match_logic {
ul#conditions_match_logic li {
padding-right: 1em; }
/* Styles for the flexbox-based table replacement for conditions */
.fieldlist_formfields {
width: 100%;
background-color: var(--color-background, #fff);
border-radius: 4px;
border: 1px solid var(--color-border-table-cell, #cbcbcb);
/* Header row */
/* Body rows */
/* Error styling */
/* Responsive styles */ }
.fieldlist_formfields .fieldlist-header {
display: flex;
background-color: var(--color-background-table-thead, #e0e0e0);
font-weight: bold;
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb); }
.fieldlist_formfields .fieldlist-header-cell {
flex: 1;
padding: 0.5em 1em;
text-align: left; }
.fieldlist_formfields .fieldlist-header-cell:last-child {
flex: 0 0 120px;
/* Fixed width for actions column */ }
.fieldlist_formfields .fieldlist-body {
display: flex;
flex-direction: column; }
.fieldlist_formfields .fieldlist-row {
display: flex;
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb); }
.fieldlist_formfields .fieldlist-row:last-child {
border-bottom: none; }
.fieldlist_formfields .fieldlist-row:nth-child(2n-1) {
background-color: var(--color-table-stripe, #f2f2f2); }
.fieldlist_formfields .fieldlist-row.error-row {
background-color: var(--color-error-input, #ffdddd); }
.fieldlist_formfields .fieldlist-cell {
flex: 1;
padding: 0.5em 1em;
display: flex;
flex-direction: column;
justify-content: center;
/* Make inputs take up full width of their cell */ }
.fieldlist_formfields .fieldlist-cell input, .fieldlist_formfields .fieldlist-cell select {
width: 100%; }
.fieldlist_formfields .fieldlist-cell.fieldlist-actions {
flex: 0 0 120px;
/* Fixed width for actions column */
display: flex;
flex-direction: row;
align-items: center;
gap: 4px; }
.fieldlist_formfields ul.errors {
margin-top: 0.5em;
margin-bottom: 0;
padding: 0.5em;
background-color: var(--color-error-background-snapshot-age, #ffdddd);
border-radius: 4px;
list-style-position: inside; }
@media only screen and (max-width: 760px) {
.fieldlist_formfields {
/* Add some spacing between fields on mobile */
/* Label each cell on mobile view */ }
.fieldlist_formfields .fieldlist-header, .fieldlist_formfields .fieldlist-row {
flex-direction: column; }
.fieldlist_formfields .fieldlist-header-cell {
display: none; }
.fieldlist_formfields .fieldlist-row {
padding: 0.5em 0;
border-bottom: 2px solid var(--color-border-table-cell, #cbcbcb); }
.fieldlist_formfields .fieldlist-cell {
padding: 0.25em 0.5em; }
.fieldlist_formfields .fieldlist-cell.fieldlist-actions {
flex: 1;
justify-content: flex-start;
padding-top: 0.5em; }
.fieldlist_formfields .fieldlist-cell:not(:last-child) {
margin-bottom: 0.5em; }
.fieldlist_formfields .fieldlist-cell::before {
content: attr(data-label);
font-weight: bold;
margin-bottom: 0.25em; } }
/* Button styling */
.fieldlist_formfields .addRuleRow, .fieldlist_formfields .removeRuleRow, .fieldlist_formfields .verifyRuleRow {
cursor: pointer;
border: none;
padding: 4px 8px;
border-radius: 3px;
font-weight: bold;
background-color: #aaa;
color: var(--color-foreground-text, #fff); }
.fieldlist_formfields .addRuleRow:hover, .fieldlist_formfields .removeRuleRow:hover, .fieldlist_formfields .verifyRuleRow:hover {
background-color: #999; }
body {
color: var(--color-text);
background: var(--color-background-page);

View File

@@ -61,20 +61,21 @@
{{ field(**kwargs)|safe }}
{% endmacro %}
{% macro render_conditions_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %}
<div class="fieldlist_formfields" id="{{ table_id }}">
<div class="fieldlist-header">
{% for subfield in fieldlist[0] %}
<div class="fieldlist-header-cell">{{ subfield.label }}</div>
{% endfor %}
<div class="fieldlist-header-cell">Actions</div>
</div>
<div class="fieldlist-body">
{% macro render_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %}
<table class="fieldlist_formfields pure-table" id="{{ table_id }}">
<thead>
<tr>
{% for subfield in fieldlist[0] %}
<th>{{ subfield.label }}</th>
{% endfor %}
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for form_row in fieldlist %}
<div class="fieldlist-row {% if form_row.errors %}error-row{% endif %}">
<tr {% if form_row.errors %} class="error-row" {% endif %}>
{% for subfield in form_row %}
<div class="fieldlist-cell">
<td>
{{ subfield()|safe }}
{% if subfield.errors %}
<ul class="errors">
@@ -83,17 +84,17 @@
{% endfor %}
</ul>
{% endif %}
</div>
</td>
{% endfor %}
<div class="fieldlist-cell fieldlist-actions">
<button type="button" class="addRuleRow" title="Add a row/rule after">+</button>
<button type="button" class="removeRuleRow" title="Remove this row/rule">-</button>
<td>
<button type="button" class="addRuleRow">+</button>
<button type="button" class="removeRuleRow">-</button>
<button type="button" class="verifyRuleRow" title="Verify this rule against current snapshot"></button>
</div>
</div>
</td>
</tr>
{% endfor %}
</div>
</div>
</tbody>
</table>
{% endmacro %}

View File

@@ -159,7 +159,7 @@
<a id="chrome-extension-link"
title="Chrome Extension - Web Page Change Detection with changedetection.io!"
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
<img alt="Chrome store icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}">
<img alt="Chrome store icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}">
Chrome Webstore
</a>
</p>

View File

@@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_fieldlist_of_formfields_as_table %}
{% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
@@ -289,13 +289,25 @@ Math: {{ 1 + 1 }}") }}
<script>
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
</script>
<style>
.verifyRuleRow {
background-color: #4caf50;
color: white;
border: none;
cursor: pointer;
font-weight: bold;
}
.verifyRuleRow:hover {
background-color: #45a049;
}
</style>
<div class="pure-control-group">
{{ render_field(form.conditions_match_logic) }}
{{ render_conditions_fieldlist_of_formfields_as_table(form.conditions) }}
{{ render_fieldlist_of_formfields_as_table(form.conditions) }}
<div class="pure-form-message-inline">
<p id="verify-state-text">Use the verify (✓) button to test if a condition passes against the current snapshot.</p>
Read a quick tutorial about <a href="https://changedetection.io/tutorial/conditional-actions-web-page-changes">using conditional web page changes here</a>.<br>
<br>
Use the verify (✓) button to test if a condition passes against the current snapshot.<br><br>
Did you know that <strong>conditions</strong> can be extended with your own custom plugin? tutorials coming soon!<br>
</div>
</div>
</div>
@@ -314,8 +326,61 @@ Math: {{ 1 + 1 }}") }}
</li>
</ul>
</div>
<div class="pure-control-group">
{% set field = render_field(form.include_filters,
rows=5,
placeholder=has_tag_filters_extra+"#example
xpath://body/div/span[contains(@class, 'example-class')]",
class="m-d")
%}
{{ field }}
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br>
<ul id="advanced-help-selectors" style="display: none;">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul>
</li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code>
<ul>
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a
href="http://xpather.com/" target="new">test your XPath here</a></li>
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
<li>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</li>
</ul>
{% include "edit/include_subtract.html" %}
</span>
</div>
<fieldset class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
footer
nav
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
<div class="text-filtering border-fieldset">
<fieldset class="pure-group" id="text-filtering-type-options">
<h3>Text filtering</h3>
@@ -343,9 +408,76 @@ Math: {{ 1 + 1 }}") }}
{{ render_checkbox_field(form.trim_text_whitespace) }}
<span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
</fieldset>
{% include "edit/text-options.html" %}
<fieldset>
<div class="pure-control-group">
{{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">
<ul>
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>Each line is processed separately (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">
<ul>
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
</ul>
</span>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
Sold out
Not in stock
Unavailable") }}
<span class="pure-form-message-inline">
<ul>
<li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li>
<li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>All lines here must not exist (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
or
keyword") }}
<span class="pure-form-message-inline">
<ul>
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
<ul>
<li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
<li>Don't forget to consider the white-space at the start of a line <code>/.+?reports.+?2022/i</code></li>
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
<li>Keyword example &dash; example <code>Out of stock</code></li>
<li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
<li>Example - match lines containing a keyword <code>/.*icecream.*/</code></li>
</ul>
</li>
<li>One line per regular-expression/string match</li>
</ul>
</span>
</div>
</fieldset>
</div>
</div>
</div>
<div id="text-preview" style="display: none;" >
<script>
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";

View File

@@ -1,55 +0,0 @@
<div class="pure-control-group">
{% set field = render_field(form.include_filters,
rows=5,
placeholder=has_tag_filters_extra+"#example
xpath://body/div/span[contains(@class, 'example-class')]",
class="m-d")
%}
{{ field }}
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath 1 &amp; 2, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br>
<ul id="advanced-help-selectors" style="display: none;">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul>
</li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code>
<ul>
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a
href="http://xpather.com/" target="new">test your XPath here</a></li>
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
<li>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</li>
</ul>
</span>
</div>
<fieldset class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
footer
nav
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>

View File

@@ -1,69 +0,0 @@
<fieldset>
<div class="pure-control-group">
{{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">
<ul>
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>Each line is processed separately (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">
<ul>
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
</ul>
</span>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
Sold out
Not in stock
Unavailable") }}
<span class="pure-form-message-inline">
<ul>
<li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li>
<li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>All lines here must not exist (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
or
keyword") }}
<span class="pure-form-message-inline">
<ul>
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
<ul>
<li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
<li>Don't forget to consider the white-space at the start of a line <code>/.+?reports.+?2022/i</code></li>
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
<li>Keyword example &dash; example <code>Out of stock</code></li>
<li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
<li>Example - match lines containing a keyword <code>/.*icecream.*/</code></li>
</ul>
</li>
<li>One line per regular-expression/string match</li>
</ul>
</span>
</div>
</fieldset>

View File

@@ -1,24 +0,0 @@
import pytest
from apprise import AppriseAsset
from changedetectionio.apprise_asset import (
APPRISE_APP_DESC,
APPRISE_APP_ID,
APPRISE_APP_URL,
APPRISE_AVATAR_URL,
)
@pytest.fixture(scope="function")
def apprise_asset() -> AppriseAsset:
from changedetectionio.apprise_asset import apprise_asset
return apprise_asset
def test_apprise_asset_init(apprise_asset: AppriseAsset):
assert isinstance(apprise_asset, AppriseAsset)
assert apprise_asset.app_id == APPRISE_APP_ID
assert apprise_asset.app_desc == APPRISE_APP_DESC
assert apprise_asset.app_url == APPRISE_APP_URL
assert apprise_asset.image_url_logo == APPRISE_AVATAR_URL

View File

@@ -1,211 +0,0 @@
import json
from unittest.mock import patch
import pytest
import requests
from apprise.utils.parse import parse_url as apprise_parse_url
from ...apprise_plugin.custom_handlers import (
_get_auth,
_get_headers,
_get_params,
apprise_http_custom_handler,
SUPPORTED_HTTP_METHODS,
)
@pytest.mark.parametrize(
"url,expected_auth",
[
("get://user:pass@localhost:9999", ("user", "pass")),
("get://user@localhost:9999", "user"),
("get://localhost:9999", ""),
("get://user%20name:pass%20word@localhost:9999", ("user name", "pass word")),
],
)
def test_get_auth(url, expected_auth):
"""Test authentication extraction with various URL formats."""
parsed_url = apprise_parse_url(url)
assert _get_auth(parsed_url) == expected_auth
@pytest.mark.parametrize(
"url,body,expected_content_type",
[
(
"get://localhost:9999?+content-type=application/xml",
"test",
"application/xml",
),
("get://localhost:9999", '{"key": "value"}', "application/json; charset=utf-8"),
("get://localhost:9999", "plain text", None),
("get://localhost:9999?+content-type=text/plain", "test", "text/plain"),
],
)
def test_get_headers(url, body, expected_content_type):
"""Test header extraction and content type detection."""
parsed_url = apprise_parse_url(url)
headers = _get_headers(parsed_url, body)
if expected_content_type:
assert headers.get("Content-Type") == expected_content_type
@pytest.mark.parametrize(
"url,expected_params",
[
("get://localhost:9999?param1=value1", {"param1": "value1"}),
("get://localhost:9999?param1=value1&-param2=ignored", {"param1": "value1"}),
("get://localhost:9999?param1=value1&+header=test", {"param1": "value1"}),
(
"get://localhost:9999?encoded%20param=encoded%20value",
{"encoded param": "encoded value"},
),
],
)
def test_get_params(url, expected_params):
"""Test parameter extraction with URL encoding and exclusion logic."""
parsed_url = apprise_parse_url(url)
params = _get_params(parsed_url)
assert dict(params) == expected_params
@pytest.mark.parametrize(
"url,schema,method",
[
("get://localhost:9999", "get", "GET"),
("post://localhost:9999", "post", "POST"),
("delete://localhost:9999", "delete", "DELETE"),
],
)
@patch("requests.request")
def test_apprise_custom_api_call_success(mock_request, url, schema, method):
"""Test successful API calls with different HTTP methods and schemas."""
mock_request.return_value.raise_for_status.return_value = None
meta = {"url": url, "schema": schema}
result = apprise_http_custom_handler(
body="test body", title="Test Title", notify_type="info", meta=meta
)
assert result is True
mock_request.assert_called_once()
call_args = mock_request.call_args
assert call_args[1]["method"] == method.upper()
assert call_args[1]["url"].startswith("http")
@patch("requests.request")
def test_apprise_custom_api_call_with_auth(mock_request):
"""Test API call with authentication."""
mock_request.return_value.raise_for_status.return_value = None
url = "get://user:pass@localhost:9999/secure"
meta = {"url": url, "schema": "get"}
result = apprise_http_custom_handler(
body=json.dumps({"key": "value"}),
title="Secure Test",
notify_type="info",
meta=meta,
)
assert result is True
mock_request.assert_called_once()
call_args = mock_request.call_args
assert call_args[1]["auth"] == ("user", "pass")
@pytest.mark.parametrize(
"exception_type,expected_result",
[
(requests.RequestException, False),
(requests.HTTPError, False),
(Exception, False),
],
)
@patch("requests.request")
def test_apprise_custom_api_call_failure(mock_request, exception_type, expected_result):
"""Test various failure scenarios."""
url = "get://localhost:9999/error"
meta = {"url": url, "schema": "get"}
# Simulate different types of exceptions
mock_request.side_effect = exception_type("Error occurred")
result = apprise_http_custom_handler(
body="error body", title="Error Test", notify_type="error", meta=meta
)
assert result == expected_result
def test_invalid_url_parsing():
"""Test handling of invalid URL parsing."""
meta = {"url": "invalid://url", "schema": "invalid"}
result = apprise_http_custom_handler(
body="test", title="Invalid URL", notify_type="info", meta=meta
)
assert result is False
@pytest.mark.parametrize(
"schema,expected_method",
[
(http_method, http_method.upper())
for http_method in SUPPORTED_HTTP_METHODS
],
)
@patch("requests.request")
def test_http_methods(mock_request, schema, expected_method):
"""Test all supported HTTP methods."""
mock_request.return_value.raise_for_status.return_value = None
url = f"{schema}://localhost:9999"
result = apprise_http_custom_handler(
body="test body",
title="Test Title",
notify_type="info",
meta={"url": url, "schema": schema},
)
assert result is True
mock_request.assert_called_once()
call_args = mock_request.call_args
assert call_args[1]["method"] == expected_method
@pytest.mark.parametrize(
"input_schema,expected_method",
[
(f"{http_method}s", http_method.upper())
for http_method in SUPPORTED_HTTP_METHODS
],
)
@patch("requests.request")
def test_https_method_conversion(
mock_request, input_schema, expected_method
):
"""Validate that methods ending with 's' use HTTPS and correct HTTP method."""
mock_request.return_value.raise_for_status.return_value = None
url = f"{input_schema}://localhost:9999"
result = apprise_http_custom_handler(
body="test body",
title="Test Title",
notify_type="info",
meta={"url": url, "schema": input_schema},
)
assert result is True
mock_request.assert_called_once()
call_args = mock_request.call_args
assert call_args[1]["method"] == expected_method
assert call_args[1]["url"].startswith("https")

View File

@@ -25,6 +25,7 @@ def test_setup(live_server):
def get_last_message_from_smtp_server():
import socket
global smtp_test_server
port = 11080 # socket server port number
client_socket = socket.socket() # instantiate
@@ -43,6 +44,7 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
# live_server_setup(live_server)
set_original_response()
global smtp_test_server
notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com'
#####################
@@ -97,6 +99,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
# https://github.com/caronc/apprise/issues/633
set_original_response()
global smtp_test_server
notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com'
notification_body = f"""<!DOCTYPE html>
<html lang="en">

View File

@@ -60,11 +60,6 @@ def test_check_access_control(app, client, live_server):
res = c.get(url_for('static_content', group='styles', filename='404-testetest.css'))
assert res.status_code == 404
# Access to screenshots should be limited by 'shared_diff_access'
path = url_for('static_content', group='screenshot', filename='random-uuid-that-will-404.png', _external=True)
res = c.get(path)
assert res.status_code == 404
# Check wrong password does not let us in
res = c.post(
url_for("login"),
@@ -168,7 +163,7 @@ def test_check_access_control(app, client, live_server):
url_for("settings.settings_page"),
data={"application-password": "foobar",
# Should be disabled
"application-shared_diff_access": "",
# "application-shared_diff_access": "True",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
@@ -181,10 +176,6 @@ def test_check_access_control(app, client, live_server):
# Should be logged out
assert b"Login" in res.data
# Access to screenshots should be limited by 'shared_diff_access'
res = c.get(url_for('static_content', group='screenshot', filename='random-uuid-that-will-403.png'))
assert res.status_code == 403
# The diff page should return something valid when logged out
res = c.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
assert b'Random content' not in res.data

View File

@@ -1,6 +1,6 @@
#!/usr/bin/env python3
import json
import time
import urllib
from flask import url_for
from .util import live_server_setup, wait_for_all_checks
@@ -113,7 +113,6 @@ def test_conditions_with_text_and_number(client, live_server):
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
time.sleep(2)
# 75 is > 20 and < 100 and contains "5"
res = client.get(url_for("watchlist.index"))
assert b'unviewed' in res.data

View File

@@ -32,6 +32,7 @@ def test_strip_regex_text_func():
]
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert "but 1 lines" in stripped_content
assert "igNORe-cAse text" not in stripped_content
assert "but 1234 lines" not in stripped_content
@@ -41,46 +42,6 @@ def test_strip_regex_text_func():
# Check line number reporting
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers")
assert stripped_content == [2, 5, 6, 7, 8, 10]
stripped_content = html_tools.strip_ignore_text(test_content, ['/but 1.+5 lines/s'])
assert "but 1 lines" not in stripped_content
assert "skip 5 lines" not in stripped_content
stripped_content = html_tools.strip_ignore_text(test_content, ['/but 1.+5 lines/s'], mode="line numbers")
assert stripped_content == [4, 5]
stripped_content = html_tools.strip_ignore_text(test_content, ['/.+/s'])
assert stripped_content == ""
stripped_content = html_tools.strip_ignore_text(test_content, ['/.+/s'], mode="line numbers")
assert stripped_content == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+but.+\\n.+lines$/m'])
assert "but 1 lines" not in stripped_content
assert "skip 5 lines" not in stripped_content
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+but.+\\n.+lines$/m'], mode="line numbers")
assert stripped_content == [4, 5]
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+?\.$/m'])
assert "but sometimes we want to remove the lines." not in stripped_content
assert "but not always." not in stripped_content
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+?\.$/m'], mode="line numbers")
assert stripped_content == [2, 11]
stripped_content = html_tools.strip_ignore_text(test_content, ['/but.+?but/ms'])
assert "but sometimes we want to remove the lines." not in stripped_content
assert "but 1 lines" not in stripped_content
assert "but 1234 lines" not in stripped_content
assert "igNORe-cAse text we dont want to keep" not in stripped_content
assert "but not always." not in stripped_content
stripped_content = html_tools.strip_ignore_text(test_content, ['/but.+?but/ms'], mode="line numbers")
assert stripped_content == [2, 3, 4, 9, 10, 11]
stripped_content = html_tools.strip_ignore_text("\n\ntext\n\ntext\n\n", ['/^$/ms'], mode="line numbers")
assert stripped_content == [1, 2, 4, 6]
# Check that linefeeds are preserved when there are is no matching ignores
content = "some text\n\nand other text\n"

View File

@@ -167,10 +167,7 @@ def test_check_notification(client, live_server, measure_memory_usage):
assert ':-)' in notification_submission
# Check the attachment was added, and that it is a JPEG from the original PNG
notification_submission_object = json.loads(notification_submission)
assert notification_submission_object
# We keep PNG screenshots for now
# IF THIS FAILS YOU SHOULD BE TESTING WITH ENV VAR REMOVE_REQUESTS_OLD_SCREENSHOTS=False
assert notification_submission_object['attachments'][0]['filename'] == 'last-screenshot.png'
assert len(notification_submission_object['attachments'][0]['base64'])
assert notification_submission_object['attachments'][0]['mimetype'] == 'image/png'

View File

@@ -1,80 +0,0 @@
#!/usr/bin/env python3
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
def test_checkbox_open_diff_in_new_tab(client, live_server):
set_original_response()
live_server_setup(live_server)
# Add our URL to the import page
res = client.post(
url_for("imports.import_page"),
data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Make a change
set_modified_response()
# Test case 1 - checkbox is enabled in settings
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-open_diff_in_new_tab": "1"},
follow_redirects=True
)
assert b'Settings updated' in res.data
# Force recheck
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
lines = res.data.decode().split("\n")
# Find link to diff page
target_line = None
for line in lines:
if '/diff' in line:
target_line = line.strip()
break
assert target_line != None
assert 'target=' in target_line
# Test case 2 - checkbox is disabled in settings
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-open_diff_in_new_tab": ""},
follow_redirects=True
)
assert b'Settings updated' in res.data
# Force recheck
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
lines = res.data.decode().split("\n")
# Find link to diff page
target_line = None
for line in lines:
if '/diff' in line:
target_line = line.strip()
break
assert target_line != None
assert 'target=' not in target_line
# Cleanup everything
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -75,7 +75,7 @@ class TestTriggerConditions(unittest.TestCase):
ephemeral_data={'text': "I saw 500 people at a rock show"})
# @todo - now we can test that 'Extract number' increased more than X since last time
self.assertTrue(result.get('result'))
self.assertTrue(result)
if __name__ == '__main__':

View File

@@ -109,6 +109,7 @@ class update_worker(threading.Thread):
default_notification_title
)
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
v = watch.get(var_name)
if v and not watch.get('notification_muted'):
@@ -591,7 +592,6 @@ class update_worker(threading.Thread):
self.current_uuid = None # Done
self.q.task_done()
update_handler = None
logger.debug(f"Watch {uuid} done in {time.time()-fetch_start_time:.2f}s")
# Give the CPU time to interrupt

View File

@@ -63,10 +63,6 @@ services:
#
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
# - TZ=America/Los_Angeles
#
# Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
# RAM usage will be higher if you increase this.
# - SCREENSHOT_MAX_HEIGHT=16000
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
ports:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 104 KiB