Compare commits

..

2 Commits

Author SHA1 Message Date
dgtlmoon
bfa4482fb8 Adding delay random fail 2025-04-01 10:57:30 +02:00
dgtlmoon
a00e69abed Regession - Shared history/diff page with anonymous access turned on should allow screenshot access 2025-04-01 10:49:35 +02:00
42 changed files with 326 additions and 895 deletions

View File

@@ -28,6 +28,7 @@ jobs:
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.11'
skip-pypuppeteer: true
test-application-3-12:
needs: lint-code

View File

@@ -7,7 +7,7 @@ on:
description: 'Python version to use'
required: true
type: string
default: '3.11'
default: '3.10'
skip-pypuppeteer:
description: 'Skip PyPuppeteer (not supported in 3.11/3.12)'
required: false

View File

@@ -1,5 +1,8 @@
# pip dependencies install stage
# @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py
# If you know how to fix it, please do! and test it for both 3.10 and 3.11
ARG PYTHON_VERSION=3.11
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder

View File

@@ -89,7 +89,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
#### Key Features
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
- Target elements with xPath 1 and xPath 2, CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Target elements with xPath(1.0) and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
- Switch between fast non-JS and Chrome JS based "fetchers"
- Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums)
- Easily specify how often a site should be checked
@@ -105,12 +105,6 @@ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) glob
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
### Conditional web page changes
Easily [configure conditional actions](https://changedetection.io/tutorial/conditional-actions-web-page-changes), for example, only trigger when a price is above or below a preset amount, or [when a web page includes (or does not include) a keyword](https://changedetection.io/tutorial/how-monitor-keywords-any-website)
<img src="./docs/web-page-change-conditions.png" style="max-width:80%;" alt="Conditional web page changes" title="Conditional web page changes" />
### Schedule web page watches in any timezone, limit by day of week and time.
Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours.

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.49.12'
__version__ = '0.49.9'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -11,7 +11,6 @@ os.environ['EVENTLET_NO_GREENDNS'] = 'yes'
import eventlet
import eventlet.wsgi
import getopt
import platform
import signal
import socket
import sys
@@ -145,19 +144,6 @@ def main():
signal.signal(signal.SIGTERM, sigshutdown_handler)
signal.signal(signal.SIGINT, sigshutdown_handler)
# Custom signal handler for memory cleanup
def sigusr_clean_handler(_signo, _stack_frame):
from changedetectionio.gc_cleanup import memory_cleanup
logger.info('SIGUSR1 received: Running memory cleanup')
return memory_cleanup(app)
# Register the SIGUSR1 signal handler
# Only register the signal handler if running on Linux
if platform.system() == "Linux":
signal.signal(signal.SIGUSR1, sigusr_clean_handler)
else:
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
# Go into cleanup mode
if do_cleanup:

View File

@@ -4,7 +4,7 @@ import re
from random import randint
from loguru import logger
from changedetectionio.content_fetchers.helpers import capture_full_page
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
from changedetectionio.content_fetchers.base import manage_user_agent
from changedetectionio.safe_jinja import render as jinja_render
@@ -298,7 +298,14 @@ class browsersteps_live_ui(steppable_browser_interface):
now = time.time()
self.page.wait_for_timeout(1 * 1000)
screenshot = capture_full_page(self.page)
full_height = self.page.evaluate("document.documentElement.scrollHeight")
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
screenshot = capture_stitched_together_full_page(self.page)
else:
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")

View File

@@ -22,7 +22,6 @@
<li class="tab"><a href="#notifications">Notifications</a></li>
<li class="tab"><a href="#fetching">Fetching</a></li>
<li class="tab"><a href="#filters">Global Filters</a></li>
<li class="tab"><a href="#ui-options">UI Options</a></li>
<li class="tab"><a href="#api">API</a></li>
<li class="tab"><a href="#timedate">Time &amp Date</a></li>
<li class="tab"><a href="#proxies">CAPTCHA &amp; Proxies</a></li>
@@ -218,7 +217,7 @@ nav
<a id="chrome-extension-link"
title="Try our new Chrome Extension!"
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" alt="Chrome">
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
Chrome Webstore
</a>
</p>
@@ -241,12 +240,6 @@ nav
</p>
</div>
</div>
<div class="tab-pane-inner" id="ui-options">
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
</div>
</div>
<div class="tab-pane-inner" id="proxies">
<div id="recommended-proxy">
<div>

View File

@@ -13,7 +13,6 @@
/*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/
/*{% endif %}*/
{% set has_tag_filters_extra='' %}
</script>
@@ -47,12 +46,59 @@
</div>
<div class="tab-pane-inner" id="filters-and-triggers">
<p>These settings are <strong><i>added</i></strong> to any existing watch configurations.</p>
{% include "edit/include_subtract.html" %}
<div class="text-filtering border-fieldset">
<h3>Text filtering</h3>
{% include "edit/text-options.html" %}
</div>
<div class="pure-control-group">
{% set field = render_field(form.include_filters,
rows=5,
placeholder="#example
xpath://body/div/span[contains(@class, 'example-class')]",
class="m-d")
%}
{{ field }}
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
<ul id="advanced-help-selectors">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul>
</li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code>
<ul>
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a
href="http://xpather.com/" target="new">test your XPath here</a></li>
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
</ul>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</span>
</div>
<fieldset class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
footer
nav
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
</div>
{# rendered sub Template #}

View File

@@ -125,10 +125,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
else:
# Recheck all, including muted
# Get most overdue first
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
watch_uuid = k[0]
watch = k[1]
for watch_uuid, watch in datastore.data['watching'].items():
if not watch['paused']:
if watch_uuid not in running_uuids:
if with_errors and not watch.get('last_error'):
@@ -143,7 +140,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
if i == 1:
flash("Queued 1 watch for rechecking.")
if i > 1:
flash(f"Queued {i} watches for rechecking.")
flash("Queued {} watches for rechecking.".format(i))
if i == 0:
flash("No watches available to recheck.")

View File

@@ -130,7 +130,7 @@
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
or "extra_browser_" in watch.get_fetch_backend
%}
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
{% endif %}
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
@@ -209,18 +209,15 @@
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
{% if watch.history_n >= 2 %}
{% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
{% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
{% if is_unviewed %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
{% else %}
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
{% endif %}
{% else %}
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary">Preview</a>
{% endif %}
{% endif %}
</td>
@@ -244,7 +241,7 @@
all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
</li>
<li>
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='Generic_Feed-icon.svg')}}" height="15"></a>
</li>
</ul>
{{ pagination.links }}

View File

@@ -8,7 +8,7 @@ from . import default_plugin
# List of all supported JSON Logic operators
operator_choices = [
(None, "Choose one - Operator"),
(None, "Choose one"),
(">", "Greater Than"),
("<", "Less Than"),
(">=", "Greater Than or Equal To"),
@@ -21,7 +21,7 @@ operator_choices = [
# Fields available in the rules
field_choices = [
(None, "Choose one - Field"),
(None, "Choose one"),
]
# The data we will feed the JSON Rules to see if it passes the test/conditions or not

View File

@@ -19,7 +19,7 @@ class ConditionFormRow(Form):
validators=[validators.Optional()]
)
value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"})
value = StringField("Value", validators=[validators.Optional()])
def validate(self, extra_validators=None):
# First, run the default validators

View File

@@ -87,7 +87,7 @@ class Fetcher():
pass
@abstractmethod
def quit(self, watch=None):
def quit(self):
return
@abstractmethod

View File

@@ -1,107 +1,79 @@
# Pages with a vertical height longer than this will use the 'stitch together' method.
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
# - If a page is taller than ~800010000px, it risks exceeding GPU memory limits.
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
# The size at which we will switch to stitching method
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
from loguru import logger
def capture_full_page(page):
def capture_stitched_together_full_page(page):
import io
import os
import time
from PIL import Image, ImageDraw, ImageFont
# Maximum total height for the final image (When in stitch mode).
# We limit this to 16000px due to the huge amount of RAM that was being used
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", 16000))
# The size at which we will switch to stitching method, when below this (and
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
# screenshot method.
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
# Save the original viewport size
original_viewport = page.viewport_size
start = time.time()
stitched_image = None
now = time.time()
try:
viewport_width = original_viewport["width"]
viewport_height = original_viewport["height"]
viewport = page.viewport_size
page_height = page.evaluate("document.documentElement.scrollHeight")
# Optimization to avoid unnecessary stitching if we can avoid it
# Use the default screenshot method for smaller pages to take advantage
# of GPU and native playwright screenshot optimizations
if (
page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD
and page_height < MAX_TOTAL_HEIGHT
):
logger.debug("Using default screenshot method")
screenshot = page.screenshot(
type="jpeg",
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
full_page=True,
)
logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
return screenshot
logger.debug(
"Using stitching method for large screenshot because page height exceeds threshold"
)
# Limit the total capture height
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
# Calculate number of chunks needed using ORIGINAL viewport height
num_chunks = (capture_height + viewport_height - 1) // viewport_height
images = []
total_captured_height = 0
# Create the final image upfront to avoid holding all chunks in memory
stitched_image = Image.new("RGB", (viewport_width, capture_height))
for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
# Ensure we do not exceed the total height limit
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
# Track cumulative paste position
# Adjust viewport size for this chunk
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
# Scroll to the correct position
page.evaluate(f"window.scrollTo(0, {offset})")
# Capture screenshot chunk
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
images.append(Image.open(io.BytesIO(screenshot_bytes)))
total_captured_height += chunk_height
# Stop if we reached the maximum total height
if total_captured_height >= MAX_TOTAL_HEIGHT:
break
# Create the final stitched image
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
y_offset = 0
for _ in range(num_chunks):
# Scroll to position (no viewport resizing)
page.evaluate(f"window.scrollTo(0, {y_offset})")
# Stitch the screenshot chunks together
for img in images:
stitched_image.paste(img, (0, y_offset))
y_offset += img.height
# Capture only the visible area using clip
with io.BytesIO(
page.screenshot(
type="jpeg",
clip={
"x": 0,
"y": 0,
"width": viewport_width,
"height": min(viewport_height, capture_height - y_offset),
},
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
)
) as buf:
with Image.open(buf) as img:
img.load()
stitched_image.paste(img, (0, y_offset))
y_offset += img.height
logger.debug(f"Screenshot stitched together in {time.time() - start:.2f}s")
logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
# Overlay warning text if the screenshot was trimmed
if capture_height < page_height:
if page_height > MAX_TOTAL_HEIGHT:
draw = ImageDraw.Draw(stitched_image)
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
# Load font (default system font if Arial is unavailable)
try:
font = ImageFont.truetype(
"arial.ttf", WARNING_TEXT_HEIGHT
) # Arial (Windows/Mac)
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
except IOError:
font = ImageFont.load_default() # Default font if Arial not found
@@ -111,28 +83,22 @@ def capture_full_page(page):
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
# Define background rectangle (top of the image)
draw.rectangle(
[(0, 0), (viewport_width, WARNING_TEXT_HEIGHT)], fill="white"
)
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
# Center text horizontally within the warning area
text_x = (viewport_width - text_width) // 2
text_x = (viewport["width"] - text_width) // 2
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
# Draw the warning text in red
draw.text((text_x, text_y), warning_text, fill="red", font=font)
# Save final image
with io.BytesIO() as output:
stitched_image.save(
output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))
)
screenshot = output.getvalue()
# Save or return the final image
output = io.BytesIO()
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
screenshot = output.getvalue()
finally:
# Restore the original viewport size
page.set_viewport_size(original_viewport)
if stitched_image is not None:
stitched_image.close()
return screenshot

View File

@@ -4,7 +4,7 @@ from urllib.parse import urlparse
from loguru import logger
from changedetectionio.content_fetchers.helpers import capture_full_page
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
@@ -60,7 +60,7 @@ class fetcher(Fetcher):
def screenshot_step(self, step_n=''):
super().screenshot_step(step_n=step_n)
screenshot = capture_full_page(self.page)
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
if self.browser_steps_screenshot_path is not None:
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
@@ -164,7 +164,9 @@ class fetcher(Fetcher):
raise PageUnloadable(url=url, status_code=None, message=str(e))
if self.status_code != 200 and not ignore_status_codes:
screenshot = capture_full_page(self.page)
screenshot = self.page.screenshot(type='jpeg', full_page=True,
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
@@ -202,7 +204,14 @@ class fetcher(Fetcher):
# acceptable screenshot quality here
try:
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
self.screenshot = capture_full_page(self.page)
full_height = self.page.evaluate("document.documentElement.scrollHeight")
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
logger.warning(
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
self.screenshot = capture_stitched_together_full_page(self.page)
else:
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
except Exception as e:
# It's likely the screenshot was too long/big and something crashed

View File

@@ -96,17 +96,3 @@ class fetcher(Fetcher):
self.raw_content = r.content
def quit(self, watch=None):
# In case they switched to `requests` fetcher from something else
# Then the screenshot could be old, in any case, it's not used here.
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
if strtobool(os.getenv("REMOVE_REQUESTS_OLD_SCREENSHOTS", 'true')):
screenshot = watch.get_screenshot()
if screenshot:
try:
os.unlink(screenshot)
except Exception as e:
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")

View File

@@ -75,19 +75,13 @@ function isItemInStock() {
'rupture',
'sold out',
'sold-out',
'stok habis',
'stok kosong',
'stok varian ini habis',
'stokta yok',
'temporarily out of stock',
'temporarily unavailable',
'there were no search results for',
'this item is currently unavailable',
'tickets unavailable',
'tidak dijual',
'tidak tersedia',
'tijdelijk uitverkocht',
'tiket tidak tersedia',
'tükendi',
'unavailable nearby',
'unavailable tickets',

View File

@@ -65,7 +65,6 @@ class fetcher(Fetcher):
# request_body, request_method unused for now, until some magic in the future happens.
options = ChromeOptions()
options.add_argument("--headless")
if self.proxy:
options.proxy = self.proxy
@@ -113,9 +112,9 @@ class fetcher(Fetcher):
self.quit()
return True
def quit(self, watch=None):
def quit(self):
if self.driver:
try:
self.driver.quit()
except Exception as e:
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")

View File

@@ -447,16 +447,6 @@ def changedetection_app(config=None, datastore_o=None):
import changedetectionio.blueprint.watchlist as watchlist
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
# Memory cleanup endpoint
@app.route('/gc-cleanup', methods=['GET'])
@login_optionally_required
def gc_cleanup():
from changedetectionio.gc_cleanup import memory_cleanup
from flask import jsonify
result = memory_cleanup(app)
return jsonify({"status": "success", "message": "Memory cleanup completed", "result": result})
# @todo handle ctrl break
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()

View File

@@ -586,7 +586,7 @@ class processor_text_json_diff_form(commonSettingsForm):
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
filter_text_removed = BooleanField('Removed lines', default=True)
trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
@@ -721,8 +721,6 @@ class globalSettingsRequestForm(Form):
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
return False
class globalSettingsApplicationUIForm(Form):
open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()])
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):
@@ -754,7 +752,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0,
message="Should contain zero or more attempts")])
ui = FormField(globalSettingsApplicationUIForm)
class globalSettingsForm(Form):

View File

@@ -1,162 +0,0 @@
#!/usr/bin/env python3
import ctypes
import gc
import re
import psutil
import sys
import threading
import importlib
from loguru import logger
def memory_cleanup(app=None):
"""
Perform comprehensive memory cleanup operations and log memory usage
at each step with nicely formatted numbers.
Args:
app: Optional Flask app instance for clearing Flask-specific caches
Returns:
str: Status message
"""
# Get current process
process = psutil.Process()
# Log initial memory usage with nicely formatted numbers
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"Memory cleanup started - Current memory usage: {current_memory:,.2f} MB")
# 1. Standard garbage collection - force full collection on all generations
gc.collect(0) # Collect youngest generation
gc.collect(1) # Collect middle generation
gc.collect(2) # Collect oldest generation
# Run full collection again to ensure maximum cleanup
gc.collect()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After full gc.collect() - Memory usage: {current_memory:,.2f} MB")
# 3. Call libc's malloc_trim to release memory back to the OS
libc = ctypes.CDLL("libc.so.6")
libc.malloc_trim(0)
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After malloc_trim(0) - Memory usage: {current_memory:,.2f} MB")
# 4. Clear Python's regex cache
re.purge()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After re.purge() - Memory usage: {current_memory:,.2f} MB")
# 5. Reset thread-local storage
# Create a new thread local object to encourage cleanup of old ones
threading.local()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After threading.local() - Memory usage: {current_memory:,.2f} MB")
# 6. Clear sys.intern cache if Python version supports it
try:
sys.intern.clear()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After sys.intern.clear() - Memory usage: {current_memory:,.2f} MB")
except (AttributeError, TypeError):
logger.debug("sys.intern.clear() not supported in this Python version")
# 7. Clear XML/lxml caches if available
try:
# Check if lxml.etree is in use
lxml_etree = sys.modules.get('lxml.etree')
if lxml_etree:
# Clear module-level caches
if hasattr(lxml_etree, 'clear_error_log'):
lxml_etree.clear_error_log()
# Check for _ErrorLog and _RotatingErrorLog objects and clear them
for obj in gc.get_objects():
if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'):
class_name = obj.__class__.__name__
if class_name in ('_ErrorLog', '_RotatingErrorLog', '_DomainErrorLog') and hasattr(obj, 'clear'):
try:
obj.clear()
except (AttributeError, TypeError):
pass
# Clear Element objects which can hold references to documents
elif class_name in ('_Element', 'ElementBase') and hasattr(obj, 'clear'):
try:
obj.clear()
except (AttributeError, TypeError):
pass
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After lxml.etree cleanup - Memory usage: {current_memory:,.2f} MB")
# Check if lxml.html is in use
lxml_html = sys.modules.get('lxml.html')
if lxml_html:
# Clear HTML-specific element types
for obj in gc.get_objects():
if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'):
class_name = obj.__class__.__name__
if class_name in ('HtmlElement', 'FormElement', 'InputElement',
'SelectElement', 'TextareaElement', 'CheckboxGroup',
'RadioGroup', 'MultipleSelectOptions', 'FieldsDict') and hasattr(obj, 'clear'):
try:
obj.clear()
except (AttributeError, TypeError):
pass
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After lxml.html cleanup - Memory usage: {current_memory:,.2f} MB")
except (ImportError, AttributeError):
logger.debug("lxml cleanup not applicable")
# 8. Clear JSON parser caches if applicable
try:
# Check if json module is being used and try to clear its cache
json_module = sys.modules.get('json')
if json_module and hasattr(json_module, '_default_encoder'):
json_module._default_encoder.markers.clear()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After JSON parser cleanup - Memory usage: {current_memory:,.2f} MB")
except (AttributeError, KeyError):
logger.debug("JSON cleanup not applicable")
# 9. Force Python's memory allocator to release unused memory
try:
if hasattr(sys, 'pypy_version_info'):
# PyPy has different memory management
gc.collect()
else:
# CPython - try to release unused memory
ctypes.pythonapi.PyGC_Collect()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After PyGC_Collect - Memory usage: {current_memory:,.2f} MB")
except (AttributeError, TypeError):
logger.debug("PyGC_Collect not supported")
# 10. Clear Flask-specific caches if applicable
if app:
try:
# Clear Flask caches if they exist
for key in list(app.config.get('_cache', {}).keys()):
app.config['_cache'].pop(key, None)
# Clear Jinja2 template cache if available
if hasattr(app, 'jinja_env') and hasattr(app.jinja_env, 'cache'):
app.jinja_env.cache.clear()
current_memory = process.memory_info().rss / 1024 / 1024
logger.debug(f"After Flask cache clear - Memory usage: {current_memory:,.2f} MB")
except (AttributeError, KeyError):
logger.debug("No Flask cache to clear")
# Final garbage collection pass
gc.collect()
libc.malloc_trim(0)
# Log final memory usage
final_memory = process.memory_info().rss / 1024 / 1024
logger.info(f"Memory cleanup completed - Final memory usage: {final_memory:,.2f} MB")
return "cleaned"

View File

@@ -366,41 +366,22 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
# wordlist - list of regex's (str) or words (str)
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
def strip_ignore_text(content, wordlist, mode="content"):
i = 0
output = []
ignore_text = []
ignore_regex = []
ignore_regex_multiline = []
ignored_lines = []
ignored_line_numbers = []
for k in wordlist:
# Is it a regex?
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
if res:
res = re.compile(perl_style_slash_enclosed_regex_to_options(k))
if res.flags & re.DOTALL or res.flags & re.MULTILINE:
ignore_regex_multiline.append(res)
else:
ignore_regex.append(res)
ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k)))
else:
ignore_text.append(k.strip())
for r in ignore_regex_multiline:
for match in r.finditer(content):
content_lines = content[:match.end()].splitlines(keepends=True)
match_lines = content[match.start():match.end()].splitlines(keepends=True)
end_line = len(content_lines)
start_line = end_line - len(match_lines)
if end_line - start_line <= 1:
# Match is empty or in the middle of the line
ignored_lines.append(start_line)
else:
for i in range(start_line, end_line):
ignored_lines.append(i)
line_index = 0
lines = content.splitlines(keepends=True)
for line in lines:
for line in content.splitlines(keepends=True):
i += 1
# Always ignore blank lines in this mode. (when this function gets called)
got_match = False
for l in ignore_text:
@@ -412,19 +393,17 @@ def strip_ignore_text(content, wordlist, mode="content"):
if r.search(line):
got_match = True
if got_match:
ignored_lines.append(line_index)
line_index += 1
ignored_lines = set([i for i in ignored_lines if i >= 0 and i < len(lines)])
if not got_match:
# Not ignored, and should preserve "keepends"
output.append(line)
else:
ignored_line_numbers.append(i)
# Used for finding out what to highlight
if mode == "line numbers":
return [i + 1 for i in ignored_lines]
return ignored_line_numbers
output_lines = set(range(len(lines))) - ignored_lines
return ''.join([lines[i] for i in output_lines])
return ''.join(output)
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
from xml.sax.saxutils import escape as xml_escape

View File

@@ -60,9 +60,6 @@ class model(dict):
'webdriver_delay': None , # Extra delay in seconds before extracting text
'tags': {}, #@todo use Tag.model initialisers
'timezone': None, # Default IANA timezone name
'ui': {
'open_diff_in_new_tab': True,
},
}
}
}

View File

@@ -159,7 +159,7 @@ class difference_detection_processor():
)
#@todo .quit here could go on close object, so we can run JS if change-detected
self.fetcher.quit(watch=self.watch)
self.fetcher.quit()
# After init, call run_changedetection() which will do the actual change-detection

View File

@@ -252,7 +252,6 @@ class perform_site_check(difference_detection_processor):
# 615 Extract text by regex
extract_text = watch.get('extract_text', [])
extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
if len(extract_text) > 0:
regex_matched_output = []
for s_re in extract_text:
@@ -297,8 +296,6 @@ class perform_site_check(difference_detection_processor):
### CALCULATE MD5
# If there's text to ignore
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
text_for_checksuming = stripped_text_from_html
if text_to_ignore:
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
@@ -311,8 +308,8 @@ class perform_site_check(difference_detection_processor):
############ Blocking rules, after checksum #################
blocked = False
trigger_text = watch.get('trigger_text', [])
trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
if len(trigger_text):
# Assume blocked
blocked = True
@@ -327,7 +324,6 @@ class perform_site_check(difference_detection_processor):
blocked = False
text_should_not_be_present = watch.get('text_should_not_be_present', [])
text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
if len(text_should_not_be_present):
# If anything matched, then we should block a change from happening
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),

View File

@@ -14,8 +14,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
find tests/test_*py -type f|while read test_name
do
echo "TEST RUNNING $test_name"
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
pytest $test_name
done
echo "RUNNING WITH BASE_URL SET"
@@ -23,7 +22,7 @@ echo "RUNNING WITH BASE_URL SET"
# Now re-run some tests with BASE_URL enabled
# Re #65 - Ability to include a link back to the installation, in the notification.
export BASE_URL="https://really-unique-domain.io"
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
pytest tests/test_notification.py
# Re-run with HIDE_REFERER set - could affect login
@@ -33,7 +32,7 @@ pytest tests/test_access_control.py
# Re-run a few tests that will trigger brotli based storage
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
pytest tests/test_access_control.py
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
pytest tests/test_notification.py
pytest tests/test_backend.py
pytest tests/test_rss.py
pytest tests/test_unique_lines.py

View File

Before

Width:  |  Height:  |  Size: 569 B

After

Width:  |  Height:  |  Size: 569 B

View File

Before

Width:  |  Height:  |  Size: 14 KiB

After

Width:  |  Height:  |  Size: 14 KiB

View File

Before

Width:  |  Height:  |  Size: 6.2 KiB

After

Width:  |  Height:  |  Size: 6.2 KiB

View File

@@ -8,7 +8,7 @@ $(document).ready(function () {
$(".addRuleRow").on("click", function(e) {
e.preventDefault();
let currentRow = $(this).closest(".fieldlist-row");
let currentRow = $(this).closest("tr");
// Clone without events
let newRow = currentRow.clone(false);
@@ -29,8 +29,8 @@ $(document).ready(function () {
e.preventDefault();
// Only remove if there's more than one row
if ($("#rulesTable .fieldlist-row").length > 1) {
$(this).closest(".fieldlist-row").remove();
if ($("#rulesTable tbody tr").length > 1) {
$(this).closest("tr").remove();
reindexRules();
}
});
@@ -39,7 +39,7 @@ $(document).ready(function () {
$(".verifyRuleRow").on("click", function(e) {
e.preventDefault();
let row = $(this).closest(".fieldlist-row");
let row = $(this).closest("tr");
let field = row.find("select[name$='field']").val();
let operator = row.find("select[name$='operator']").val();
let value = row.find("input[name$='value']").val();
@@ -128,7 +128,7 @@ $(document).ready(function () {
$(".addRuleRow, .removeRuleRow, .verifyRuleRow").off("click");
// Reindex all form elements
$("#rulesTable .fieldlist-row").each(function(index) {
$("#rulesTable tbody tr").each(function(index) {
$(this).find("select, input").each(function() {
let oldName = $(this).attr("name");
let oldId = $(this).attr("id");

View File

@@ -1,135 +0,0 @@
/* Styles for the flexbox-based table replacement for conditions */
.fieldlist_formfields {
width: 100%;
background-color: var(--color-background, #fff);
border-radius: 4px;
border: 1px solid var(--color-border-table-cell, #cbcbcb);
/* Header row */
.fieldlist-header {
display: flex;
background-color: var(--color-background-table-thead, #e0e0e0);
font-weight: bold;
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb);
}
.fieldlist-header-cell {
flex: 1;
padding: 0.5em 1em;
text-align: left;
&:last-child {
flex: 0 0 120px; /* Fixed width for actions column */
}
}
/* Body rows */
.fieldlist-body {
display: flex;
flex-direction: column;
}
.fieldlist-row {
display: flex;
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb);
&:last-child {
border-bottom: none;
}
&:nth-child(2n-1) {
background-color: var(--color-table-stripe, #f2f2f2);
}
&.error-row {
background-color: var(--color-error-input, #ffdddd);
}
}
.fieldlist-cell {
flex: 1;
padding: 0.5em 1em;
display: flex;
flex-direction: column;
justify-content: center;
/* Make inputs take up full width of their cell */
input, select {
width: 100%;
}
&.fieldlist-actions {
flex: 0 0 120px; /* Fixed width for actions column */
display: flex;
flex-direction: row;
align-items: center;
gap: 4px;
}
}
/* Error styling */
ul.errors {
margin-top: 0.5em;
margin-bottom: 0;
padding: 0.5em;
background-color: var(--color-error-background-snapshot-age, #ffdddd);
border-radius: 4px;
list-style-position: inside;
}
/* Responsive styles */
@media only screen and (max-width: 760px) {
.fieldlist-header, .fieldlist-row {
flex-direction: column;
}
.fieldlist-header-cell {
display: none;
}
.fieldlist-row {
padding: 0.5em 0;
border-bottom: 2px solid var(--color-border-table-cell, #cbcbcb);
}
.fieldlist-cell {
padding: 0.25em 0.5em;
&.fieldlist-actions {
flex: 1;
justify-content: flex-start;
padding-top: 0.5em;
}
}
/* Add some spacing between fields on mobile */
.fieldlist-cell:not(:last-child) {
margin-bottom: 0.5em;
}
/* Label each cell on mobile view */
.fieldlist-cell::before {
content: attr(data-label);
font-weight: bold;
margin-bottom: 0.25em;
}
}
}
/* Button styling */
.fieldlist_formfields {
.addRuleRow, .removeRuleRow, .verifyRuleRow {
cursor: pointer;
border: none;
padding: 4px 8px;
border-radius: 3px;
font-weight: bold;
background-color: #aaa;
color: var(--color-foreground-text, #fff);
&:hover {
background-color: #999;
}
}
}

View File

@@ -14,7 +14,6 @@
@import "parts/_love";
@import "parts/preview_text_filter";
@import "parts/_edit";
@import "parts/_conditions_table";
body {
color: var(--color-text);

View File

@@ -530,99 +530,6 @@ ul#conditions_match_logic {
ul#conditions_match_logic li {
padding-right: 1em; }
/* Styles for the flexbox-based table replacement for conditions */
.fieldlist_formfields {
width: 100%;
background-color: var(--color-background, #fff);
border-radius: 4px;
border: 1px solid var(--color-border-table-cell, #cbcbcb);
/* Header row */
/* Body rows */
/* Error styling */
/* Responsive styles */ }
.fieldlist_formfields .fieldlist-header {
display: flex;
background-color: var(--color-background-table-thead, #e0e0e0);
font-weight: bold;
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb); }
.fieldlist_formfields .fieldlist-header-cell {
flex: 1;
padding: 0.5em 1em;
text-align: left; }
.fieldlist_formfields .fieldlist-header-cell:last-child {
flex: 0 0 120px;
/* Fixed width for actions column */ }
.fieldlist_formfields .fieldlist-body {
display: flex;
flex-direction: column; }
.fieldlist_formfields .fieldlist-row {
display: flex;
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb); }
.fieldlist_formfields .fieldlist-row:last-child {
border-bottom: none; }
.fieldlist_formfields .fieldlist-row:nth-child(2n-1) {
background-color: var(--color-table-stripe, #f2f2f2); }
.fieldlist_formfields .fieldlist-row.error-row {
background-color: var(--color-error-input, #ffdddd); }
.fieldlist_formfields .fieldlist-cell {
flex: 1;
padding: 0.5em 1em;
display: flex;
flex-direction: column;
justify-content: center;
/* Make inputs take up full width of their cell */ }
.fieldlist_formfields .fieldlist-cell input, .fieldlist_formfields .fieldlist-cell select {
width: 100%; }
.fieldlist_formfields .fieldlist-cell.fieldlist-actions {
flex: 0 0 120px;
/* Fixed width for actions column */
display: flex;
flex-direction: row;
align-items: center;
gap: 4px; }
.fieldlist_formfields ul.errors {
margin-top: 0.5em;
margin-bottom: 0;
padding: 0.5em;
background-color: var(--color-error-background-snapshot-age, #ffdddd);
border-radius: 4px;
list-style-position: inside; }
@media only screen and (max-width: 760px) {
.fieldlist_formfields {
/* Add some spacing between fields on mobile */
/* Label each cell on mobile view */ }
.fieldlist_formfields .fieldlist-header, .fieldlist_formfields .fieldlist-row {
flex-direction: column; }
.fieldlist_formfields .fieldlist-header-cell {
display: none; }
.fieldlist_formfields .fieldlist-row {
padding: 0.5em 0;
border-bottom: 2px solid var(--color-border-table-cell, #cbcbcb); }
.fieldlist_formfields .fieldlist-cell {
padding: 0.25em 0.5em; }
.fieldlist_formfields .fieldlist-cell.fieldlist-actions {
flex: 1;
justify-content: flex-start;
padding-top: 0.5em; }
.fieldlist_formfields .fieldlist-cell:not(:last-child) {
margin-bottom: 0.5em; }
.fieldlist_formfields .fieldlist-cell::before {
content: attr(data-label);
font-weight: bold;
margin-bottom: 0.25em; } }
/* Button styling */
.fieldlist_formfields .addRuleRow, .fieldlist_formfields .removeRuleRow, .fieldlist_formfields .verifyRuleRow {
cursor: pointer;
border: none;
padding: 4px 8px;
border-radius: 3px;
font-weight: bold;
background-color: #aaa;
color: var(--color-foreground-text, #fff); }
.fieldlist_formfields .addRuleRow:hover, .fieldlist_formfields .removeRuleRow:hover, .fieldlist_formfields .verifyRuleRow:hover {
background-color: #999; }
body {
color: var(--color-text);
background: var(--color-background-page);

View File

@@ -61,20 +61,21 @@
{{ field(**kwargs)|safe }}
{% endmacro %}
{% macro render_conditions_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %}
<div class="fieldlist_formfields" id="{{ table_id }}">
<div class="fieldlist-header">
{% for subfield in fieldlist[0] %}
<div class="fieldlist-header-cell">{{ subfield.label }}</div>
{% endfor %}
<div class="fieldlist-header-cell">Actions</div>
</div>
<div class="fieldlist-body">
{% macro render_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %}
<table class="fieldlist_formfields pure-table" id="{{ table_id }}">
<thead>
<tr>
{% for subfield in fieldlist[0] %}
<th>{{ subfield.label }}</th>
{% endfor %}
<th>Actions</th>
</tr>
</thead>
<tbody>
{% for form_row in fieldlist %}
<div class="fieldlist-row {% if form_row.errors %}error-row{% endif %}">
<tr {% if form_row.errors %} class="error-row" {% endif %}>
{% for subfield in form_row %}
<div class="fieldlist-cell">
<td>
{{ subfield()|safe }}
{% if subfield.errors %}
<ul class="errors">
@@ -83,17 +84,17 @@
{% endfor %}
</ul>
{% endif %}
</div>
</td>
{% endfor %}
<div class="fieldlist-cell fieldlist-actions">
<button type="button" class="addRuleRow" title="Add a row/rule after">+</button>
<button type="button" class="removeRuleRow" title="Remove this row/rule">-</button>
<td>
<button type="button" class="addRuleRow">+</button>
<button type="button" class="removeRuleRow">-</button>
<button type="button" class="verifyRuleRow" title="Verify this rule against current snapshot"></button>
</div>
</div>
</td>
</tr>
{% endfor %}
</div>
</div>
</tbody>
</table>
{% endmacro %}

View File

@@ -159,7 +159,7 @@
<a id="chrome-extension-link"
title="Chrome Extension - Web Page Change Detection with changedetection.io!"
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
<img alt="Chrome store icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}">
<img alt="Chrome store icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}">
Chrome Webstore
</a>
</p>

View File

@@ -1,6 +1,6 @@
{% extends 'base.html' %}
{% block content %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_fieldlist_of_formfields_as_table %}
{% from '_common_fields.html' import render_common_settings_form %}
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
@@ -289,9 +289,21 @@ Math: {{ 1 + 1 }}") }}
<script>
const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}";
</script>
<style>
.verifyRuleRow {
background-color: #4caf50;
color: white;
border: none;
cursor: pointer;
font-weight: bold;
}
.verifyRuleRow:hover {
background-color: #45a049;
}
</style>
<div class="pure-control-group">
{{ render_field(form.conditions_match_logic) }}
{{ render_conditions_fieldlist_of_formfields_as_table(form.conditions) }}
{{ render_fieldlist_of_formfields_as_table(form.conditions) }}
<div class="pure-form-message-inline">
<p id="verify-state-text">Use the verify (✓) button to test if a condition passes against the current snapshot.</p>
@@ -314,8 +326,61 @@ Math: {{ 1 + 1 }}") }}
</li>
</ul>
</div>
<div class="pure-control-group">
{% set field = render_field(form.include_filters,
rows=5,
placeholder=has_tag_filters_extra+"#example
xpath://body/div/span[contains(@class, 'example-class')]",
class="m-d")
%}
{{ field }}
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath 1 &amp; 2, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br>
<ul id="advanced-help-selectors" style="display: none;">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul>
</li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code>
<ul>
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a
href="http://xpather.com/" target="new">test your XPath here</a></li>
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
<li>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</li>
</ul>
{% include "edit/include_subtract.html" %}
</span>
</div>
<fieldset class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
footer
nav
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
<div class="text-filtering border-fieldset">
<fieldset class="pure-group" id="text-filtering-type-options">
<h3>Text filtering</h3>
@@ -343,9 +408,76 @@ Math: {{ 1 + 1 }}") }}
{{ render_checkbox_field(form.trim_text_whitespace) }}
<span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span>
</fieldset>
{% include "edit/text-options.html" %}
<fieldset>
<div class="pure-control-group">
{{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">
<ul>
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>Each line is processed separately (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">
<ul>
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
</ul>
</span>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
Sold out
Not in stock
Unavailable") }}
<span class="pure-form-message-inline">
<ul>
<li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li>
<li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>All lines here must not exist (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
or
keyword") }}
<span class="pure-form-message-inline">
<ul>
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
<ul>
<li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
<li>Don't forget to consider the white-space at the start of a line <code>/.+?reports.+?2022/i</code></li>
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
<li>Keyword example &dash; example <code>Out of stock</code></li>
<li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
<li>Example - match lines containing a keyword <code>/.*icecream.*/</code></li>
</ul>
</li>
<li>One line per regular-expression/string match</li>
</ul>
</span>
</div>
</fieldset>
</div>
</div>
</div>
<div id="text-preview" style="display: none;" >
<script>
const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}";

View File

@@ -1,55 +0,0 @@
<div class="pure-control-group">
{% set field = render_field(form.include_filters,
rows=5,
placeholder=has_tag_filters_extra+"#example
xpath://body/div/span[contains(@class, 'example-class')]",
class="m-d")
%}
{{ field }}
{% if '/text()' in field %}
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the &lt;element&gt; contains &lt;![CDATA[]]&gt;</strong></span><br>
{% endif %}
<span class="pure-form-message-inline">One CSS, xPath 1 &amp; 2, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
<span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br>
<ul id="advanced-help-selectors" style="display: none;">
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
<ul>
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
{% if jq_support %}
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
{% else %}
<li>jq support not installed</li>
{% endif %}
</ul>
</li>
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code>
<ul>
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a
href="http://xpather.com/" target="new">test your XPath here</a></li>
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
</ul>
</li>
<li>
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</li>
</ul>
</span>
</div>
<fieldset class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header
footer
nav
.stockticker
//*[contains(text(), 'Advertisement')]") }}
<span class="pure-form-message-inline">
<ul>
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>

View File

@@ -1,69 +0,0 @@
<fieldset>
<div class="pure-control-group">
{{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">
<ul>
<li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li>
<li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>Each line is processed separately (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset class="pure-group">
{{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line
/some.regex\d{2}/ for case-INsensitive regex
") }}
<span class="pure-form-message-inline">
<ul>
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
</ul>
</span>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
Sold out
Not in stock
Unavailable") }}
<span class="pure-form-message-inline">
<ul>
<li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li>
<li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>All lines here must not exist (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
or
keyword") }}
<span class="pure-form-message-inline">
<ul>
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
<ul>
<li>Regular expression &dash; example <code>/reports.+?2022/i</code></li>
<li>Don't forget to consider the white-space at the start of a line <code>/.+?reports.+?2022/i</code></li>
<li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li>
<li>Keyword example &dash; example <code>Out of stock</code></li>
<li>Use groups to extract just that text &dash; example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
<li>Example - match lines containing a keyword <code>/.*icecream.*/</code></li>
</ul>
</li>
<li>One line per regular-expression/string match</li>
</ul>
</span>
</div>
</fieldset>

View File

@@ -32,6 +32,7 @@ def test_strip_regex_text_func():
]
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
assert "but 1 lines" in stripped_content
assert "igNORe-cAse text" not in stripped_content
assert "but 1234 lines" not in stripped_content
@@ -41,46 +42,6 @@ def test_strip_regex_text_func():
# Check line number reporting
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers")
assert stripped_content == [2, 5, 6, 7, 8, 10]
stripped_content = html_tools.strip_ignore_text(test_content, ['/but 1.+5 lines/s'])
assert "but 1 lines" not in stripped_content
assert "skip 5 lines" not in stripped_content
stripped_content = html_tools.strip_ignore_text(test_content, ['/but 1.+5 lines/s'], mode="line numbers")
assert stripped_content == [4, 5]
stripped_content = html_tools.strip_ignore_text(test_content, ['/.+/s'])
assert stripped_content == ""
stripped_content = html_tools.strip_ignore_text(test_content, ['/.+/s'], mode="line numbers")
assert stripped_content == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+but.+\\n.+lines$/m'])
assert "but 1 lines" not in stripped_content
assert "skip 5 lines" not in stripped_content
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+but.+\\n.+lines$/m'], mode="line numbers")
assert stripped_content == [4, 5]
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+?\.$/m'])
assert "but sometimes we want to remove the lines." not in stripped_content
assert "but not always." not in stripped_content
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+?\.$/m'], mode="line numbers")
assert stripped_content == [2, 11]
stripped_content = html_tools.strip_ignore_text(test_content, ['/but.+?but/ms'])
assert "but sometimes we want to remove the lines." not in stripped_content
assert "but 1 lines" not in stripped_content
assert "but 1234 lines" not in stripped_content
assert "igNORe-cAse text we dont want to keep" not in stripped_content
assert "but not always." not in stripped_content
stripped_content = html_tools.strip_ignore_text(test_content, ['/but.+?but/ms'], mode="line numbers")
assert stripped_content == [2, 3, 4, 9, 10, 11]
stripped_content = html_tools.strip_ignore_text("\n\ntext\n\ntext\n\n", ['/^$/ms'], mode="line numbers")
assert stripped_content == [1, 2, 4, 6]
# Check that linefeeds are preserved when there are is no matching ignores
content = "some text\n\nand other text\n"

View File

@@ -1,80 +0,0 @@
#!/usr/bin/env python3
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
def test_checkbox_open_diff_in_new_tab(client, live_server):
set_original_response()
live_server_setup(live_server)
# Add our URL to the import page
res = client.post(
url_for("imports.import_page"),
data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Make a change
set_modified_response()
# Test case 1 - checkbox is enabled in settings
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-open_diff_in_new_tab": "1"},
follow_redirects=True
)
assert b'Settings updated' in res.data
# Force recheck
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
lines = res.data.decode().split("\n")
# Find link to diff page
target_line = None
for line in lines:
if '/diff' in line:
target_line = line.strip()
break
assert target_line != None
assert 'target=' in target_line
# Test case 2 - checkbox is disabled in settings
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-open_diff_in_new_tab": ""},
follow_redirects=True
)
assert b'Settings updated' in res.data
# Force recheck
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
lines = res.data.decode().split("\n")
# Find link to diff page
target_line = None
for line in lines:
if '/diff' in line:
target_line = line.strip()
break
assert target_line != None
assert 'target=' not in target_line
# Cleanup everything
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -63,10 +63,6 @@ services:
#
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
# - TZ=America/Los_Angeles
#
# Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
# RAM usage will be higher if you increase this.
# - SCREENSHOT_MAX_HEIGHT=16000
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
ports:

Binary file not shown.

Before

Width:  |  Height:  |  Size: 104 KiB