Compare commits

..

21 Commits

Author SHA1 Message Date
dgtlmoon ca63dad896 Add extra test 2025-10-28 21:31:25 +01:00
dgtlmoon bd9b72dbfa fix socks proxy test 2025-10-28 21:24:56 +01:00
dgtlmoon 8473da4bdb these are prolly needed 2025-10-28 21:16:50 +01:00
dgtlmoon 762e2dacb2 tweaks 2025-10-28 21:07:39 +01:00
dgtlmoon 62ea1f9b24 Fix paths 2025-10-28 21:02:41 +01:00
dgtlmoon 14a6ced8f4 adding proxy id info 2025-10-28 20:19:22 +01:00
dgtlmoon 465e5e2ecc more out 2025-10-28 19:05:06 +01:00
dgtlmoon ada63a3200 more debug 2025-10-28 18:56:38 +01:00
dgtlmoon eef5425908 more debug 2025-10-28 18:52:11 +01:00
dgtlmoon 096bd21663 m,ore debug 2025-10-28 18:47:11 +01:00
dgtlmoon 0f53233272 more debug 2025-10-28 18:24:28 +01:00
dgtlmoon faaa9937d6 More caching more debug 2025-10-28 18:17:28 +01:00
dgtlmoon 950d59ccfa Give a startup delay 2025-10-28 18:08:20 +01:00
dgtlmoon bd3f0360e4 maybe not needed? 2025-10-28 18:07:38 +01:00
dgtlmoon 57347fd55c Show output 2025-10-28 18:01:07 +01:00
dgtlmoon 8ef782760a remove debug 2025-10-28 17:53:20 +01:00
dgtlmoon 4e20fce82c tweaks 2025-10-28 17:47:32 +01:00
dgtlmoon 7d8c127e1f WIP 2025-10-28 17:30:17 +01:00
dgtlmoon 0ca2acd38c pytest-xdist 2025-10-28 16:35:07 +01:00
dgtlmoon 2a0131d0f4 WIP 2025-10-28 16:33:26 +01:00
dgtlmoon 9ed236434e WIP 2025-10-28 16:14:57 +01:00
58 changed files with 634 additions and 1558 deletions
+1 -1
View File
@@ -82,5 +82,5 @@ jobs:
file: ${{ matrix.dockerfile }}
platforms: ${{ matrix.platform }}
cache-from: type=gha
cache-to: type=gha,mode=max
cache-to: type=gha,mode=min
-7
View File
@@ -21,8 +21,6 @@ jobs:
python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"
test-application-3-10:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
@@ -30,15 +28,12 @@ jobs:
test-application-3-11:
# Always run
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
python-version: '3.11'
test-application-3-12:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
@@ -46,8 +41,6 @@ jobs:
skip-pypuppeteer: true
test-application-3-13:
# Only run on push to master (including PR merges)
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
needs: lint-code
uses: ./.github/workflows/test-stack-reusable-workflow.yml
with:
@@ -69,7 +69,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -96,7 +96,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -135,7 +135,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -177,7 +177,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -217,7 +217,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -253,7 +253,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -282,7 +282,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -322,7 +322,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -353,7 +353,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
@@ -398,7 +398,7 @@ jobs:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v6
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
-1
View File
@@ -21,7 +21,6 @@ venv/
# IDEs
.idea
.vscode/settings.json
*~
# Datastore files
datastore/
+15 -17
View File
@@ -34,27 +34,25 @@ ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
# Additional environment variables for cryptography Rust build
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
pip install \
--prefer-binary \
--extra-index-url https://www.piwheels.org/simple \
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
-r /requirements.txt
RUN --mount=type=cache,target=/tmp/pip-cache \
pip install \
--prefer-binary \
--extra-index-url https://www.piwheels.org/simple \
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
-r /requirements.txt
# Playwright is an alternative to Selenium
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
pip install \
--prefer-binary \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
playwright~=1.48.0 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
RUN --mount=type=cache,target=/tmp/pip-cache \
pip install \
--prefer-binary \
--cache-dir=/tmp/pip-cache \
--target=/dependencies \
playwright~=1.48.0 \
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
# Final image stage
FROM python:${PYTHON_VERSION}-slim-bookworm
+5 -4
View File
@@ -14,7 +14,7 @@ Ideal for monitoring price changes, content edits, conditional changes and more.
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
@@ -31,7 +31,7 @@ Available when connected to a <a href="https://github.com/dgtlmoon/changedetecti
### Perform interactive browser steps
Fill in text boxes, click buttons and more, setup your changedetection scenario.
Fill in text boxes, click buttons and more, setup your changedetection scenario.
Using the **Browser Steps** configuration, add basic steps before performing change detection, such as logging into websites, adding a product to a cart, accept cookie logins, entering dates and refining searches.
@@ -54,7 +54,7 @@ Requires Playwright to be enabled.
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
- COVID related news from government websites
- University/organisation news from their website
- Detect and monitor changes in JSON API responses
- Detect and monitor changes in JSON API responses
- JSON API monitoring and alerting
- Changes in legal and other documents
- Trigger API calls via notifications when text appears on a website
@@ -86,7 +86,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residential, ISP, Rotating and many other proxy types to suit your project.
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
@@ -106,3 +106,4 @@ $ changedetection.io -d /path/to/empty/data/dir -p 5000
Then visit http://127.0.0.1:5000 , You should now be able to access the UI.
See https://changedetection.io for more information.
+1 -1
View File
@@ -64,7 +64,7 @@ def count_words_in_history(watch):
return 0
latest_key = list(watch.history.keys())[-1]
latest_content = watch.get_history_snapshot(timestamp=latest_key)
latest_content = watch.get_history_snapshot(latest_key)
return len(latest_content.split())
except Exception as e:
logger.error(f"Error counting words: {str(e)}")
+6 -17
View File
@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.50.43'
__version__ = '0.50.33'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError
@@ -74,12 +74,6 @@ def main():
datastore_path = None
do_cleanup = False
# Optional URL to watch since start
default_url = None
# Set a default logger level
logger_level = 'DEBUG'
include_default_watches = True
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
port = int(os.environ.get('PORT', 5000))
ssl_mode = False
@@ -93,13 +87,15 @@ def main():
datastore_path = os.path.join(os.getcwd(), "../datastore")
try:
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:u:", "port")
opts, args = getopt.getopt(sys.argv[1:], "6Ccsd:h:p:l:", "port")
except getopt.GetoptError:
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -u [default URL to watch] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
print('backend.py -s SSL enable -h [host] -p [port] -d [datastore path] -l [debug level - TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL]')
sys.exit(2)
create_datastore_dir = False
# Set a default logger level
logger_level = 'DEBUG'
# Set a logger level via shell env variable
# Used: Dockerfile for CICD
# To set logger level for pytest, see the app function in tests/conftest.py
@@ -120,10 +116,6 @@ def main():
if opt == '-d':
datastore_path = arg
if opt == '-u':
default_url = arg
include_default_watches = False
# Cleanup (remove text files that arent in the index)
if opt == '-c':
do_cleanup = True
@@ -180,16 +172,13 @@ def main():
sys.exit(2)
try:
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__, include_default_watches=include_default_watches)
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], version_tag=__version__)
except JSONDecodeError as e:
# Dont' start if the JSON DB looks corrupt
logger.critical(f"ERROR: JSON DB or Proxy List JSON at '{app_config['datastore_path']}' appears to be corrupt, aborting.")
logger.critical(str(e))
return
if default_url:
datastore.add_watch(url = default_url)
app = changedetection_app(app_config, datastore)
# Get the SocketIO instance from the Flask app (created in flask_app.py)
+1 -1
View File
@@ -175,7 +175,7 @@ class WatchSingleHistory(Resource):
response = make_response("No content found", 404)
response.mimetype = "text/plain"
else:
content = watch.get_history_snapshot(timestamp=timestamp)
content = watch.get_history_snapshot(timestamp)
response = make_response(content, 200)
response.mimetype = "text/plain"
+1 -1
View File
@@ -41,7 +41,7 @@ def get_openapi_spec():
# Possibly for pip3 packages
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
with open(spec_path, 'r', encoding='utf-8') as f:
with open(spec_path, 'r') as f:
spec_dict = yaml.safe_load(f)
_openapi_spec = OpenAPI.from_dict(spec_dict)
return _openapi_spec
+1 -4
View File
@@ -96,10 +96,7 @@ def build_watch_json_schema(d):
"enum": ["html_requests", "html_webdriver"]
})
schema['properties']['processor'] = {"anyOf": [
{"type": "string", "enum": ["restock_diff", "text_json_diff"]},
{"type": "null"}
]}
# All headers must be key/value type dict
schema['properties']['headers'] = {
+6 -9
View File
@@ -353,15 +353,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
count = watch.get('check_count', 0) + 1
# Always record page title (used in notifications, and can change even when the content is the same)
if update_obj.get('content-type') and 'html' in update_obj.get('content-type'):
try:
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
if page_title:
page_title = page_title.strip()[:2000]
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
except Exception as e:
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
try:
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
except Exception as e:
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
# Record server header
try:
+1 -17
View File
@@ -1,17 +1 @@
from copy import deepcopy
from loguru import logger
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from changedetectionio.notification import valid_notification_formats
RSS_CONTENT_FORMAT_DEFAULT = 'text'
# Some stuff not related
RSS_FORMAT_TYPES = deepcopy(valid_notification_formats)
if RSS_FORMAT_TYPES.get('markdown'):
del RSS_FORMAT_TYPES['markdown']
if RSS_FORMAT_TYPES.get(USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH):
del RSS_FORMAT_TYPES[USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH]
if not RSS_FORMAT_TYPES.get(RSS_CONTENT_FORMAT_DEFAULT):
logger.critical(f"RSS_CONTENT_FORMAT_DEFAULT not in the acceptable list {RSS_CONTENT_FORMAT_DEFAULT}")
RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')]
-95
View File
@@ -1,95 +0,0 @@
"""
Utility functions for RSS feed generation.
"""
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.notification.handler import apply_service_tweaks
from loguru import logger
import re
BAD_CHARS_REGEX = r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
def scan_invalid_chars_in_rss(content):
"""
Scan for invalid characters in RSS content.
Returns True if invalid characters are found.
"""
for match in re.finditer(BAD_CHARS_REGEX, content):
i = match.start()
bad_char = content[i]
hex_value = f"0x{ord(bad_char):02x}"
# Grab context
start = max(0, i - 20)
end = min(len(content), i + 21)
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
# First match is enough
return True
return False
def clean_entry_content(content):
"""
Remove invalid characters from RSS content.
"""
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
return cleaned
def generate_watch_guid(watch):
"""
Generate a unique GUID for a watch RSS entry.
"""
return f"{watch['uuid']}/{watch.last_changed}"
def generate_watch_diff_content(watch, dates, rss_content_format, datastore):
"""
Generate HTML diff content for a watch given its history dates.
Returns tuple of (content, watch_label).
Args:
watch: The watch object
dates: List of history snapshot dates
rss_content_format: Format for RSS content (html or text)
datastore: The ChangeDetectionStore instance
Returns:
Tuple of (content, watch_label) - the rendered HTML content and watch label
"""
from changedetectionio import diff
# Same logic as watch-overview.html
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
watch_label = watch.label
else:
watch_label = watch.get('url')
try:
html_diff = diff.render_diff(
previous_version_file_contents=watch.get_history_snapshot(timestamp=dates[-2]),
newest_version_file_contents=watch.get_history_snapshot(timestamp=dates[-1]),
include_equal=False
)
requested_output_format = datastore.data['settings']['application'].get('rss_content_format')
url, html_diff, n_title = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
except FileNotFoundError as e:
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
# @note: We use <pre> because nearly all RSS readers render only HTML (Thunderbird for example cant do just plaintext)
rss_template = "<pre>{{watch_label}} had a change.\n\n{{html_diff}}\n</pre>"
if 'html' in rss_content_format:
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_label}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
content = jinja_render(template_str=rss_template, watch_label=watch_label, html_diff=html_diff, watch_url=watch.link)
# Out of range chars could also break feedgen
if scan_invalid_chars_in_rss(content):
content = clean_entry_content(content)
return content, watch_label
+146 -17
View File
@@ -1,26 +1,155 @@
from changedetectionio.jinja2_custom import render as jinja_render
from changedetectionio.notification.handler import apply_service_tweaks
from changedetectionio.store import ChangeDetectionStore
from flask import Blueprint
from feedgen.feed import FeedGenerator
from flask import Blueprint, make_response, request, url_for, redirect
from loguru import logger
import datetime
import pytz
import re
import time
from . import tag as tag_routes
from . import main_feed
from . import single_watch
BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
# Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc)
def scan_invalid_chars_in_rss(content):
for match in re.finditer(BAD_CHARS_REGEX, content):
i = match.start()
bad_char = content[i]
hex_value = f"0x{ord(bad_char):02x}"
# Grab context
start = max(0, i - 20)
end = min(len(content), i + 21)
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
# First match is enough
return True
return False
def clean_entry_content(content):
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
return cleaned
def construct_blueprint(datastore: ChangeDetectionStore):
"""
Construct and configure the RSS blueprint with all routes.
Args:
datastore: The ChangeDetectionStore instance
Returns:
The configured Flask blueprint
"""
rss_blueprint = Blueprint('rss', __name__)
# Register all route modules
main_feed.construct_main_feed_routes(rss_blueprint, datastore)
single_watch.construct_single_watch_routes(rss_blueprint, datastore)
tag_routes.construct_tag_routes(rss_blueprint, datastore)
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
# to some earlier blueprint rerouting work, it should goto feed.
@rss_blueprint.route("/", methods=['GET'])
def extraslash():
return redirect(url_for('rss.feed'))
# Import the login decorator if needed
# from changedetectionio.auth_decorator import login_optionally_required
@rss_blueprint.route("", methods=['GET'])
def feed():
now = time.time()
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
from changedetectionio import diff
limit_tag = request.args.get('tag', '').lower().strip()
# Be sure limit_tag is a uuid
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
if limit_tag == tag.get('title', '').lower().strip():
limit_tag = uuid
# Sort by last_changed and add the uuid which is usually the key..
sorted_watches = []
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
for uuid, watch in datastore.data['watching'].items():
# @todo tag notification_muted skip also (improve Watch model)
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
continue
if limit_tag and not limit_tag in watch['tags']:
continue
watch['uuid'] = uuid
sorted_watches.append(watch)
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
fg = FeedGenerator()
fg.title('changedetection.io')
fg.description('Feed description')
fg.link(href='https://changedetection.io')
html_colour_enable = False
if datastore.data['settings']['application'].get('rss_content_format') == 'html':
html_colour_enable = True
for watch in sorted_watches:
dates = list(watch.history.keys())
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
if len(dates) < 2:
continue
if not watch.viewed:
# Re #239 - GUID needs to be individual for each event
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
guid = "{}/{}".format(watch['uuid'], watch.last_changed)
fe = fg.add_entry()
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
# Description is the page you watch, link takes you to the diff JS UI page
# Dict val base_url will get overriden with the env var if it is set.
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
# @todo fix
# Because we are called via whatever web server, flask should figure out the right path (
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
fe.link(link=diff_link)
# Same logic as watch-overview.html
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
watch_label = watch.label
else:
watch_label = watch.get('url')
fe.title(title=watch_label)
try:
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
include_equal=False,
line_feed_sep="<br>"
)
requested_output_format = 'htmlcolor' if html_colour_enable else 'html'
html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
except FileNotFoundError as e:
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
# @todo Make this configurable and also consider html-colored markup
# @todo User could decide if <link> goes to the diff page, or to the watch link
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
# Out of range chars could also break feedgen
if scan_invalid_chars_in_rss(content):
content = clean_entry_content(content)
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
return response
return rss_blueprint
@@ -1,101 +0,0 @@
from flask import make_response, request, url_for, redirect
from feedgen.feed import FeedGenerator
from loguru import logger
import datetime
import pytz
import time
from ._util import generate_watch_guid, generate_watch_diff_content
def construct_main_feed_routes(rss_blueprint, datastore):
"""
Construct the main RSS feed routes.
Args:
rss_blueprint: The Flask blueprint to add routes to
datastore: The ChangeDetectionStore instance
"""
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
# to some earlier blueprint rerouting work, it should goto feed.
@rss_blueprint.route("/", methods=['GET'])
def extraslash():
return redirect(url_for('rss.feed'))
# Import the login decorator if needed
# from changedetectionio.auth_decorator import login_optionally_required
@rss_blueprint.route("", methods=['GET'])
def feed():
now = time.time()
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
limit_tag = request.args.get('tag', '').lower().strip()
# Be sure limit_tag is a uuid
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
if limit_tag == tag.get('title', '').lower().strip():
limit_tag = uuid
# Sort by last_changed and add the uuid which is usually the key..
sorted_watches = []
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
for uuid, watch in datastore.data['watching'].items():
# @todo tag notification_muted skip also (improve Watch model)
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
continue
if limit_tag and not limit_tag in watch['tags']:
continue
watch['uuid'] = uuid
sorted_watches.append(watch)
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
fg = FeedGenerator()
fg.title('changedetection.io')
fg.description('Feed description')
fg.link(href='https://changedetection.io')
for watch in sorted_watches:
dates = list(watch.history.keys())
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
if len(dates) < 2:
continue
if not watch.viewed:
# Re #239 - GUID needs to be individual for each event
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
guid = generate_watch_guid(watch)
fe = fg.add_entry()
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
# Description is the page you watch, link takes you to the diff JS UI page
# Dict val base_url will get overriden with the env var if it is set.
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
# @todo fix
# Because we are called via whatever web server, flask should figure out the right path (
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
fe.link(link=diff_link)
content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format, datastore)
fe.title(title=watch_label)
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
return response
@@ -1,71 +0,0 @@
from flask import make_response, request, url_for
from feedgen.feed import FeedGenerator
import datetime
import pytz
from ._util import generate_watch_guid, generate_watch_diff_content
def construct_single_watch_routes(rss_blueprint, datastore):
"""
Construct RSS feed routes for single watches.
Args:
rss_blueprint: The Flask blueprint to add routes to
datastore: The ChangeDetectionStore instance
"""
@rss_blueprint.route("/watch/<string:uuid>", methods=['GET'])
def rss_single_watch(uuid):
"""
Display the most recent change for a single watch as RSS feed.
Returns RSS XML with a single entry showing the diff between the last two snapshots.
"""
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
# Get the watch by UUID
watch = datastore.data['watching'].get(uuid)
if not watch:
return f"Watch with UUID {uuid} not found", 404
# Check if watch has at least 2 history snapshots
dates = list(watch.history.keys())
if len(dates) < 2:
return f"Watch {uuid} does not have enough history snapshots to show changes (need at least 2)", 400
# Add uuid to watch for proper functioning
watch['uuid'] = uuid
# Generate the diff content using the shared helper function
content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format, datastore)
# Create RSS feed with single entry
fg = FeedGenerator()
fg.title(f'changedetection.io - {watch.label}')
fg.description('Changes')
fg.link(href='https://changedetection.io')
# Add single entry for this watch
guid = generate_watch_guid(watch)
fe = fg.add_entry()
# Include a link to the diff page
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
fe.link(link=diff_link)
fe.title(title=watch_label)
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
return response
-86
View File
@@ -1,86 +0,0 @@
from flask import make_response, request, url_for
from feedgen.feed import FeedGenerator
import datetime
import pytz
from ._util import generate_watch_guid, generate_watch_diff_content
def construct_tag_routes(rss_blueprint, datastore):
"""
Construct RSS feed routes for tags.
Args:
rss_blueprint: The Flask blueprint to add routes to
datastore: The ChangeDetectionStore instance
"""
@rss_blueprint.route("/tag/<string:tag_uuid>", methods=['GET'])
def rss_tag_feed(tag_uuid):
"""
Display an RSS feed for all unviewed watches that belong to a specific tag.
Returns RSS XML with entries for each unviewed watch with sufficient history.
"""
# Always requires token set
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
rss_url_token = request.args.get('token')
rss_content_format = datastore.data['settings']['application'].get('rss_content_format')
if rss_url_token != app_rss_token:
return "Access denied, bad token", 403
# Verify tag exists
tag = datastore.data['settings']['application'].get('tags', {}).get(tag_uuid)
if not tag:
return f"Tag with UUID {tag_uuid} not found", 404
tag_title = tag.get('title', 'Unknown Tag')
# Create RSS feed
fg = FeedGenerator()
fg.title(f'changedetection.io - {tag_title}')
fg.description(f'Changes for watches tagged with {tag_title}')
fg.link(href='https://changedetection.io')
# Find all watches with this tag
for uuid, watch in datastore.data['watching'].items():
# Skip if watch doesn't have this tag
if tag_uuid not in watch.get('tags', []):
continue
# Skip muted watches if configured
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
continue
# Check if watch has at least 2 history snapshots
dates = list(watch.history.keys())
if len(dates) < 2:
continue
# Only include unviewed watches
if not watch.viewed:
# Add uuid to watch for proper functioning
watch['uuid'] = uuid
# Generate GUID for this entry
guid = generate_watch_guid(watch)
fe = fg.add_entry()
# Include a link to the diff page
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
fe.link(link=diff_link)
# Generate diff content
content, watch_label = generate_watch_diff_content(watch, dates, rss_content_format, datastore)
fe.title(title=watch_label)
fe.content(content=content, type='CDATA')
fe.guid(guid, permalink=False)
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
dt = dt.replace(tzinfo=pytz.UTC)
fe.pubDate(dt)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
return response
@@ -43,6 +43,10 @@
</div>
</div>
</div>
<div class="pure-control-group">
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
</div>
<div class="pure-control-group">
{{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
<span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
@@ -82,7 +86,7 @@
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
<span class="pure-form-message-inline">When watching RSS/Atom feeds, convert them into clean text for better change detection.</span>
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
</div>
</div>
</fieldset>
@@ -127,10 +131,6 @@
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
</div>
<div class="pure-control-group">
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
</div>
<div class="pure-control-group">
{{ render_field(form.requests.form.timeout) }}
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
+3 -4
View File
@@ -21,10 +21,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags'])
output = render_template("groups-overview.html",
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
available_tags=sorted_tags,
form=add_form,
tag_count=tag_count,
tag_count=tag_count
)
return output
@@ -150,9 +149,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
included_content = template.render(**template_args)
output = render_template("edit-tag.html",
extra_form_content=included_content,
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
settings_application=datastore.data['settings']['application'],
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
extra_form_content=included_content,
**template_args
)
@@ -52,7 +52,6 @@
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">Edit</a>&nbsp;
<a class="pure-button pure-button-primary" href="{{ url_for('tags.delete', uuid=uuid) }}" title="Deletes and removes tag">Delete</a>
<a class="pure-button pure-button-primary" href="{{ url_for('tags.unlink', uuid=uuid) }}" title="Keep the tag but unlink any watches">Unlink</a>
<a href="{{ url_for('rss.rss_tag_feed', tag_uuid=uuid, token=app_rss_token)}}"><img alt="RSS Feed for this watch" style="padding-left: 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
</td>
</tr>
{% endfor %}
+1 -6
View File
@@ -236,7 +236,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
# Import the global plugin system
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
app_rss_token = datastore.data['settings']['application'].get('rss_access_token'),
template_args = {
'available_processors': processors.available_processors(),
'available_timezones': sorted(available_timezones()),
@@ -252,11 +252,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
'jq_support': jq_support,
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
'app_rss_token': app_rss_token,
'rss_uuid_feed' : {
'label': watch.label,
'url': url_for('rss.rss_single_watch', uuid=watch['uuid'], token=app_rss_token)
},
'settings_application': datastore.data['settings']['application'],
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
+7 -42
View File
@@ -2,7 +2,7 @@ from flask import Blueprint, request, make_response
import random
from loguru import logger
from changedetectionio.notification_service import NotificationContextData, set_basic_notification_vars
from changedetectionio.notification_service import NotificationContextData
from changedetectionio.store import ChangeDetectionStore
from changedetectionio.auth_decorator import login_optionally_required
@@ -39,7 +39,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
watch = datastore.data['watching'].get(watch_uuid)
notification_urls = request.form.get('notification_urls','').strip().splitlines()
notification_urls = None
if request.form.get('notification_urls'):
notification_urls = request.form['notification_urls'].strip().splitlines()
if not notification_urls:
logger.debug("Test notification - Trying by group/tag in the edit form if available")
@@ -77,8 +81,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
# Only use if present, if not set in n_object it should use the default system value
if 'notification_format' in request.form and request.form['notification_format'].strip():
n_object['notification_format'] = request.form.get('notification_format', '').strip()
else:
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
if 'notification_title' in request.form and request.form['notification_title'].strip():
n_object['notification_title'] = request.form.get('notification_title', '').strip()
@@ -95,44 +97,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
n_object['notification_body'] = "Test body"
n_object['as_async'] = False
# Same like in notification service, should be refactored
dates = []
trigger_text = ''
snapshot_contents = ''
if watch:
watch_history = watch.history
dates = list(watch_history.keys())
trigger_text = watch.get('trigger_text', [])
# Add text that was triggered
if len(dates):
snapshot_contents = watch.get_history_snapshot(timestamp=dates[-1])
else:
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
if len(trigger_text):
from . import html_tools
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
if triggered_text:
triggered_text = '\n'.join(triggered_text)
# Could be called as a 'test notification' with only 1 snapshot available
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
if len(dates) > 1:
prev_snapshot = watch.get_history_snapshot(timestamp=dates[-2])
current_snapshot = watch.get_history_snapshot(timestamp=dates[-1])
n_object.update(set_basic_notification_vars(snapshot_contents=snapshot_contents,
current_snapshot=current_snapshot,
prev_snapshot=prev_snapshot,
watch=watch,
triggered_text=trigger_text))
n_object.update(watch.extra_notification_token_values())
sent_obj = process_notification(n_object, datastore)
except Exception as e:
@@ -476,7 +476,6 @@ Math: {{ 1 + 1 }}") }}
class="pure-button button-error">Clear History</a>{% endif %}
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
class="pure-button">Clone &amp; Edit</a>
<a href="{{ url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token)}}"><img alt="RSS Feed for this watch" style="padding: .5em 1em;" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
</div>
</div>
</form>
+1 -1
View File
@@ -47,7 +47,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
try:
versions = list(watch.history.keys())
content = watch.get_history_snapshot(timestamp=timestamp)
content = watch.get_history_snapshot(timestamp)
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
wordlist=watch['trigger_text'],
@@ -14,7 +14,7 @@ def count_words_in_history(watch, incoming_text=None):
elif watch.history.keys():
# When called from UI extras to count latest snapshot
latest_key = list(watch.history.keys())[-1]
latest_content = watch.get_history_snapshot(timestamp=latest_key)
latest_content = watch.get_history_snapshot(latest_key)
return len(latest_content.split())
return 0
except Exception as e:
@@ -139,7 +139,7 @@ class fetcher(Fetcher):
content = await self.page.content()
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
logger.debug(f"Saving step HTML to {destination}")
with open(destination, 'w', encoding='utf-8') as f:
with open(destination, 'w') as f:
f.write(content)
async def run(self,
+4 -9
View File
@@ -101,12 +101,12 @@ def init_app_secret(datastore_path):
path = os.path.join(datastore_path, "secret.txt")
try:
with open(path, "r", encoding='utf-8') as f:
with open(path, "r") as f:
secret = f.read()
except FileNotFoundError:
import secrets
with open(path, "w", encoding='utf-8') as f:
with open(path, "w") as f:
secret = secrets.token_hex(32)
f.write(secret)
@@ -794,19 +794,15 @@ def ticker_thread_check_time_launch_checks():
# @todo - Maybe make this a hook?
# Time schedule limit - Decide between watch or global settings
scheduler_source = None
if watch.get('time_between_check_use_default'):
time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {})
scheduler_source = 'system/global settings'
logger.trace(f"{uuid} Time scheduler - Using system/global settings")
else:
time_schedule_limit = watch.get('time_schedule_limit')
scheduler_source = 'watch'
logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
if time_schedule_limit and time_schedule_limit.get('enabled'):
logger.trace(f"{uuid} Time scheduler - Using scheduler settings from {scheduler_source}")
try:
result = is_within_schedule(time_schedule_limit=time_schedule_limit,
default_tz=tz_name
@@ -818,7 +814,6 @@ def ticker_thread_check_time_launch_checks():
logger.error(
f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}")
return False
# If they supplied an individual entry minutes to threshold.
threshold = recheck_time_system_seconds if watch.get('time_between_check_use_default') else watch.threshold_seconds()
+2 -4
View File
@@ -503,9 +503,7 @@ class ValidateJinja2Template(object):
jinja2_env = create_jinja_env(loader=BaseLoader)
# Add notification tokens for validation
static_token_placeholders = NotificationContextData()
static_token_placeholders.set_random_for_validation()
jinja2_env.globals.update(static_token_placeholders)
jinja2_env.globals.update(NotificationContextData())
if hasattr(field, 'extra_notification_tokens'):
jinja2_env.globals.update(field.extra_notification_tokens)
@@ -1000,7 +998,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
validators=[validators.NumberRange(min=0,
message="Should be atleast zero (disabled)")])
rss_content_format = SelectField('RSS Content format', choices=list(RSS_FORMAT_TYPES.items()))
rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES)
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
+3 -3
View File
@@ -1,7 +1,7 @@
from os import getenv
from copy import deepcopy
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES, RSS_CONTENT_FORMAT_DEFAULT
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
from changedetectionio.notification import (
default_notification_body,
@@ -54,7 +54,7 @@ class model(dict):
'password': False,
'render_anchor_tag_content': False,
'rss_access_token': None,
'rss_content_format': RSS_CONTENT_FORMAT_DEFAULT,
'rss_content_format': RSS_FORMAT_TYPES[0][0],
'rss_hide_muted_watches': True,
'rss_reader_mode': False,
'scheduler_timezone_default': None, # Default IANA timezone name
@@ -81,7 +81,7 @@ class model(dict):
def parse_headers_from_text_file(filepath):
headers = {}
with open(filepath, 'r', encoding='utf-8') as f:
with open(filepath, 'r') as f:
for l in f.readlines():
l = l.strip()
if not l.startswith('#') and ':' in l:
+7 -15
View File
@@ -188,7 +188,7 @@ class model(watch_base):
fname = os.path.join(self.watch_data_dir, "history.txt")
if os.path.isfile(fname):
logger.debug(f"Reading watch history index for {self.get('uuid')}")
with open(fname, "r", encoding='utf-8') as f:
with open(fname, "r") as f:
for i in f.readlines():
if ',' in i:
k, v = i.strip().split(',', 2)
@@ -276,17 +276,9 @@ class model(watch_base):
# When the 'last viewed' timestamp is less than the oldest snapshot, return oldest
return sorted_keys[-1]
def get_history_snapshot(self, timestamp=None, filepath=None):
"""
Accepts either timestamp or filepath
:param timestamp:
:param filepath:
:return:
"""
def get_history_snapshot(self, timestamp):
import brotli
if not filepath:
filepath = self.history[timestamp]
filepath = self.history[timestamp]
# See if a brotli versions exists and switch to that
if not filepath.endswith('.br') and os.path.isfile(f"{filepath}.br"):
@@ -390,7 +382,7 @@ class model(watch_base):
# Compare each lines (set) against each history text file (set) looking for something new..
existing_history = set({})
for k, v in self.history.items():
content = self.get_history_snapshot(filepath=v)
content = self.get_history_snapshot(k)
if ignore_whitespace:
alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
@@ -594,7 +586,7 @@ class model(watch_base):
"""Return the text saved from a previous request that resulted in a non-200 error"""
fname = os.path.join(self.watch_data_dir, "last-error.txt")
if os.path.isfile(fname):
with open(fname, 'r', encoding='utf-8') as f:
with open(fname, 'r') as f:
return f.read()
return False
@@ -647,7 +639,7 @@ class model(watch_base):
for k, fname in self.history.items():
if os.path.isfile(fname):
if True:
contents = self.get_history_snapshot(timestamp=k)
contents = self.get_history_snapshot(k)
res = re.findall(regex, contents, re.MULTILINE)
if res:
if not csv_writer:
@@ -740,7 +732,7 @@ class model(watch_base):
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
dates = list(self.history.keys())
if len(dates):
return self.get_history_snapshot(timestamp=dates[-1])
return self.get_history_snapshot(dates[-1])
else:
return ''
@@ -35,7 +35,7 @@ def as_monospaced_html_email(content: str, title: str) -> str:
</head>
<body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;">
<pre role="article" aria-roledescription="email" lang="en"
style="font-family: monospace, 'Courier New', Courier; font-size: 0.9rem;
style="font-family: monospace, 'Courier New', Courier; font-size: 0.8em;
white-space: pre-wrap; word-break: break-word;">{content}</pre>
</body>
</html>"""
+63 -91
View File
@@ -5,15 +5,13 @@ from apprise import NotifyFormat
from loguru import logger
from urllib.parse import urlparse
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
from .apprise_plugin.custom_handlers import SUPPORTED_HTTP_METHODS
from .email_helpers import as_monospaced_html_email
from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \
ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \
CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE
import re
from ..notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER
from ..notification_service import NotificationContextData
newline_re = re.compile(r'\r\n|\r|\n')
def markup_text_links_to_html(body):
@@ -129,66 +127,8 @@ def apply_standard_markdown_to_body(n_body):
return n_body
def replace_placemarkers_in_text(text, url, requested_output_format):
"""
Replace diff placemarkers in text based on the URL service type and requested output format.
Used for both notification title and body to ensure consistent placeholder replacement.
:param text: The text to process
:param url: The notification URL (to detect service type)
:param requested_output_format: The output format (html, htmlcolor, markdown, text, etc.)
:return: Processed text with placemarkers replaced
"""
if not text:
return text
if url.startswith('tgram://'):
# Telegram only supports a limited subset of HTML
# Use strikethrough for removed content, bold for added content
text = text.replace(REMOVED_PLACEMARKER_OPEN, '<s>')
text = text.replace(REMOVED_PLACEMARKER_CLOSED, '</s>')
text = text.replace(ADDED_PLACEMARKER_OPEN, '<b>')
text = text.replace(ADDED_PLACEMARKER_CLOSED, '</b>')
# Handle changed/replaced lines (old → new)
text = text.replace(CHANGED_PLACEMARKER_OPEN, '<s>')
text = text.replace(CHANGED_PLACEMARKER_CLOSED, '</s>')
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>')
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>')
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
or url.startswith('https://discord.com/api')) and requested_output_format == 'html':
# Discord doesn't support HTML, use Discord markdown
text = apply_discord_markdown_to_body(n_body=text)
elif requested_output_format == 'htmlcolor':
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
text = text.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
text = text.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
text = text.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
text = text.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
# Handle changed/replaced lines (old → new)
text = text.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
elif requested_output_format == 'markdown':
# Markdown to HTML - Apprise will convert this to HTML
text = apply_standard_markdown_to_body(n_body=text)
else:
# plaintext, html, and default - use simple text markers
text = text.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
text = text.replace(REMOVED_PLACEMARKER_CLOSED, '')
text = text.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
text = text.replace(ADDED_PLACEMARKER_CLOSED, '')
text = text.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'')
text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
return text
def apply_service_tweaks(url, n_body, n_title, requested_output_format):
logger.debug(f"Applying markup in '{requested_output_format}' mode")
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
# Because different notifications may require different pre-processing, run each sequentially :(
# 2000 bytes minus -
@@ -198,12 +138,6 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
if not n_body or not n_body.strip():
return url, n_body, n_title
# Normalize URL scheme to lowercase to prevent case-sensitivity issues
# e.g., "Discord://webhook" -> "discord://webhook", "TGRAM://bot123" -> "tgram://bot123"
scheme_separator_pos = url.find('://')
if scheme_separator_pos > 0:
url = url[:scheme_separator_pos].lower() + url[scheme_separator_pos:]
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
parsed = urlparse(url)
k = '?' if not parsed.query else '&'
@@ -215,22 +149,24 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
and not url.startswith('put'):
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
# Replace placemarkers in title first (this was the missing piece causing the bug)
# Titles are ALWAYS plain text across all notification services (Discord embeds, Slack attachments,
# email Subject headers, etc.), so we always use 'text' format for title placemarker replacement
# Looking over apprise library it seems that all plugins only expect plain-text.
n_title = replace_placemarkers_in_text(n_title, url, 'text')
if url.startswith('tgram://'):
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
# re https://github.com/dgtlmoon/changedetection.io/issues/555
# @todo re-use an existing library we have already imported to strip all non-allowed tags
n_body = n_body.replace('<br>', '\n')
n_body = n_body.replace('</br>', '\n')
n_body = newline_re.sub('\n', n_body)
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n')
# Replace placemarkers for body
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
# Use strikethrough for removed content, bold for added content
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '<s>')
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '</s>')
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '<b>')
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '</b>')
# Handle changed/replaced lines (old → new)
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, '<s>')
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, '</s>')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>')
# real limit is 4096, but minus some for extra metadata
payload_max_size = 3600
@@ -244,7 +180,7 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
# Discord doesn't support HTML, replace <br> with newlines
n_body = n_body.strip().replace('<br>', '\n')
n_body = n_body.replace('</br>', '\n')
n_body = newline_re.sub('\n', n_body)
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n')
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it
# The plugin will use embeds (6000 char limit across all embeds) if placeholders are present,
@@ -254,7 +190,7 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
if requested_output_format == 'html':
# No diff placeholders, use Discord markdown for any other formatting
# Use Discord markdown: strikethrough for removed, bold for added
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
n_body = apply_discord_markdown_to_body(n_body=n_body)
# Apply 2000 char limit for plain content
payload_max_size = 1700
@@ -265,17 +201,40 @@ def apply_service_tweaks(url, n_body, n_title, requested_output_format):
# Is not discord/tgram and they want htmlcolor
elif requested_output_format == 'htmlcolor':
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
n_body = newline_re.sub('<br>\n', n_body)
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
# Handle changed/replaced lines (old → new)
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n')
elif requested_output_format == 'html':
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
n_body = newline_re.sub('<br>\n', n_body)
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '')
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '')
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n')
elif requested_output_format == 'markdown':
# Markdown to HTML - Apprise will convert this to HTML
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
n_body = apply_standard_markdown_to_body(n_body=n_body)
else: #plaintext etc default
n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '')
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '')
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
return url, n_body, n_title
@@ -336,18 +295,24 @@ def process_notification(n_object: NotificationContextData, datastore):
with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
for url in n_object['notification_urls']:
# Get the notification body from datastore
n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters)
n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
if n_object.get('markup_text_links_to_html_links'):
n_body = markup_text_links_to_html(body=n_body)
n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
url = url.strip()
if not url or url.startswith('#'):
logger.debug(f"Skipping commented out or empty notification URL - '{url}'")
if url.startswith('#'):
logger.trace(f"Skipping commented out notification URL - {url}")
continue
logger.info(f">> Process Notification: AppRise start notifying '{url}'")
if not url:
logger.warning(f"Process Notification: skipping empty notification URL.")
continue
logger.info(f">> Process Notification: AppRise notifying {url}")
url = jinja_render(template_str=url, **notification_parameters)
# If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped
@@ -388,18 +353,25 @@ def process_notification(n_object: NotificationContextData, datastore):
requested_output_format = NotifyFormat.HTML.value
apprise_input_format = NotifyFormat.HTML.value # Changed from MARKDOWN to HTML
# Could have arrived at any stage, so we dont end up running .escape on it
if 'html' in requested_output_format:
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n')
else:
# texty types
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n')
else:
# ?format was IN the apprise URL, they are kind of on their own here, we will try our best
if 'format=html' in url:
n_body = newline_re.sub('<br>\r\n', n_body)
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n')
# This will also prevent apprise from doing conversion
apprise_input_format = NotifyFormat.HTML.value
requested_output_format = NotifyFormat.HTML.value
elif 'format=text' in url:
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n')
apprise_input_format = NotifyFormat.TEXT.value
requested_output_format = NotifyFormat.TEXT.value
sent_objs.append({'title': n_title,
'body': n_body,
'url': url})
+27 -45
View File
@@ -9,8 +9,11 @@ for both sync and async workers
from loguru import logger
import time
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from changedetectionio.notification import default_notification_format, valid_notification_formats
# This gets modified on notification time (handler.py) depending on the required notification output
CUSTOM_LINEBREAK_PLACEHOLDER='@BR@'
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
@@ -20,14 +23,10 @@ class NotificationContextData(dict):
'base_url': None,
'current_snapshot': None,
'diff': None,
'diff_clean': None,
'diff_added': None,
'diff_added_clean': None,
'diff_full': None,
'diff_full_clean': None,
'diff_patch': None,
'diff_removed': None,
'diff_removed_clean': None,
'diff_url': None,
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
'notification_timestamp': time.time(),
@@ -72,38 +71,6 @@ class NotificationContextData(dict):
super().__setitem__(key, value)
def set_basic_notification_vars(snapshot_contents, current_snapshot, prev_snapshot, watch, triggered_text):
now = time.time()
from changedetectionio import diff
n_object = {
'current_snapshot': snapshot_contents,
'diff': diff.render_diff(prev_snapshot, current_snapshot),
'diff_clean': diff.render_diff(prev_snapshot, current_snapshot, include_change_type_prefix=False),
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False),
'diff_added_clean': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, include_change_type_prefix=False),
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True),
'diff_full_clean': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, include_change_type_prefix=False),
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, patch_format=True),
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False),
'diff_removed_clean': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, include_change_type_prefix=False),
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
'triggered_text': triggered_text,
'uuid': watch.get('uuid') if watch else None,
'watch_url': watch.get('url') if watch else None,
'watch_uuid': watch.get('uuid') if watch else None,
'watch_mime_type': watch.get('content-type')
}
# The \n's in the content from the above will get converted to <br> etc depending on the notification format
if watch:
n_object.update(watch.extra_notification_token_values())
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time() - now:.3f}s")
return n_object
class NotificationService:
"""
Standalone notification service that handles all notification functionality
@@ -118,6 +85,7 @@ class NotificationService:
"""
Queue a notification for a watch with full diff rendering and template variables
"""
from changedetectionio import diff
from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
if not isinstance(n_object, NotificationContextData):
@@ -126,6 +94,8 @@ class NotificationService:
dates = []
trigger_text = ''
now = time.time()
if watch:
watch_history = watch.history
dates = list(watch_history.keys())
@@ -133,7 +103,7 @@ class NotificationService:
# Add text that was triggered
if len(dates):
snapshot_contents = watch.get_history_snapshot(timestamp=dates[-1])
snapshot_contents = watch.get_history_snapshot(dates[-1])
else:
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
@@ -147,23 +117,35 @@ class NotificationService:
from . import html_tools
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
if triggered_text:
triggered_text = '\n'.join(triggered_text)
triggered_text = CUSTOM_LINEBREAK_PLACEHOLDER.join(triggered_text)
# Could be called as a 'test notification' with only 1 snapshot available
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
if len(dates) > 1:
prev_snapshot = watch.get_history_snapshot(timestamp=dates[-2])
current_snapshot = watch.get_history_snapshot(timestamp=dates[-1])
prev_snapshot = watch.get_history_snapshot(dates[-2])
current_snapshot = watch.get_history_snapshot(dates[-1])
n_object.update({
'current_snapshot': snapshot_contents,
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER),
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER),
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER),
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER, patch_format=True),
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER),
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
'triggered_text': triggered_text,
'uuid': watch.get('uuid') if watch else None,
'watch_url': watch.get('url') if watch else None,
'watch_uuid': watch.get('uuid') if watch else None,
'watch_mime_type': watch.get('content-type')
})
n_object.update(set_basic_notification_vars(snapshot_contents=snapshot_contents,
current_snapshot=current_snapshot,
prev_snapshot=prev_snapshot,
watch=watch,
triggered_text=triggered_text))
if watch:
n_object.update(watch.extra_notification_token_values())
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
logger.debug("Queued notification for sending")
self.notification_q.put(n_object)
@@ -280,7 +280,7 @@ class ContentProcessor:
# Sort JSON to avoid false alerts from reordering
try:
content = json.dumps(json.loads(content), sort_keys=True, indent=2, ensure_ascii=False)
content = json.dumps(json.loads(content), sort_keys=True, indent=4)
except Exception:
# Might be malformed JSON, continue anyway
pass
@@ -37,6 +37,18 @@ class SignalHandler:
notification_event_signal.connect(self.handle_notification_event, weak=False)
logger.info("SignalHandler: Connected to notification_event signal")
# Create and start the queue update thread using standard threading
import threading
self.polling_emitter_thread = threading.Thread(
target=self.polling_emit_running_or_queued_watches_threaded,
daemon=True
)
self.polling_emitter_thread.start()
logger.info("Started polling thread using threading (eventlet-free)")
# Store the thread reference in socketio for clean shutdown
self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread
def handle_signal(self, *args, **kwargs):
logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs")
# Safely extract the watch UUID from kwargs
@@ -112,6 +124,74 @@ class SignalHandler:
except Exception as e:
logger.error(f"Socket.IO error in handle_notification_event: {str(e)}")
def polling_emit_running_or_queued_watches_threaded(self):
"""Threading version of polling for Windows compatibility"""
import time
import threading
logger.info("Queue update thread started (threading mode)")
# Import here to avoid circular imports
from changedetectionio.flask_app import app
from changedetectionio import worker_handler
watch_check_update = signal('watch_check_update')
# Track previous state to avoid unnecessary emissions
previous_running_uuids = set()
# Run until app shutdown - check exit flag more frequently for fast shutdown
exit_event = getattr(app.config, 'exit', threading.Event())
while not exit_event.is_set():
try:
# Get current running UUIDs from async workers
running_uuids = set(worker_handler.get_running_uuids())
# Only send updates for UUIDs that changed state
newly_running = running_uuids - previous_running_uuids
no_longer_running = previous_running_uuids - running_uuids
# Send updates for newly running UUIDs (but exit fast if shutdown requested)
for uuid in newly_running:
if exit_event.is_set():
break
logger.trace(f"Threading polling: UUID {uuid} started processing")
with app.app_context():
watch_check_update.send(app_context=app, watch_uuid=uuid)
time.sleep(0.01) # Small yield
# Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
if not exit_event.is_set():
for uuid in no_longer_running:
if exit_event.is_set():
break
logger.trace(f"Threading polling: UUID {uuid} finished processing")
with app.app_context():
watch_check_update.send(app_context=app, watch_uuid=uuid)
time.sleep(0.01) # Small yield
# Update tracking for next iteration
previous_running_uuids = running_uuids
# Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
for _ in range(20): # 20 * 0.5 = 10 seconds total
if exit_event.is_set():
break
time.sleep(0.5)
except Exception as e:
logger.error(f"Error in threading polling: {str(e)}")
# Even during error recovery, check for exit quickly
for _ in range(1): # 1 * 0.5 = 0.5 seconds
if exit_event.is_set():
break
time.sleep(0.5)
# Check if we're in pytest environment - if so, be more gentle with logging
import sys
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
if not in_pytest:
logger.info("Queue update thread stopped (threading mode)")
def handle_watch_update(socketio, **kwargs):
@@ -303,6 +383,19 @@ def init_socketio(app, datastore):
"""Shutdown the SocketIO server fast and aggressively"""
try:
logger.info("Socket.IO: Fast shutdown initiated...")
# For threading mode, give the thread a very short time to exit gracefully
if hasattr(socketio, 'polling_emitter_thread'):
if socketio.polling_emitter_thread.is_alive():
logger.info("Socket.IO: Waiting 1 second for polling thread to stop...")
socketio.polling_emitter_thread.join(timeout=1.0) # Only 1 second timeout
if socketio.polling_emitter_thread.is_alive():
logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown")
else:
logger.info("Socket.IO: Polling thread stopped quickly")
else:
logger.info("Socket.IO: Polling thread already stopped")
logger.info("Socket.IO: Fast shutdown complete")
except Exception as e:
logger.error(f"Socket.IO error during shutdown: {str(e)}")
-53
View File
@@ -11,56 +11,6 @@ set -e
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
# Since theres no curl installed lets roll with python3
check_sanity() {
local port="$1"
if [ -z "$port" ]; then
echo "Usage: check_sanity <port>" >&2
return 1
fi
python3 - "$port" <<'PYCODE'
import sys, time, urllib.request, socket
port = sys.argv[1]
url = f'http://localhost:{port}'
ok = False
for _ in range(6): # --retry 6
try:
r = urllib.request.urlopen(url, timeout=3).read().decode()
if 'est-url-is-sanity' in r:
ok = True
break
except (urllib.error.URLError, ConnectionRefusedError, socket.error):
time.sleep(1)
sys.exit(0 if ok else 1)
PYCODE
}
data_sanity_test () {
# Restart data sanity test
cd ..
TMPDIR=$(mktemp -d)
PORT_N=$((5000 + RANDOM % (6501 - 5000)))
./changedetection.py -p $PORT_N -d $TMPDIR -u "https://localhost?test-url-is-sanity=1" &
PID=$!
sleep 5
kill $PID
sleep 2
./changedetection.py -p $PORT_N -d $TMPDIR &
PID=$!
sleep 5
# On a restart the URL should still be there
check_sanity $PORT_N || exit 1
kill $PID
cd $OLDPWD
# datastore looks alright, continue
}
data_sanity_test
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load tests/test_*.py
@@ -91,6 +41,3 @@ FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l
# Check file:// will pickup a file when enabled
echo "Hello world" > /tmp/test-file.txt
ALLOW_FILE_URI=yes pytest tests/test_security.py
+4 -4
View File
@@ -14,10 +14,10 @@ $(document).ready(function () {
e.preventDefault();
data = {
notification_urls: $('textarea.notification-urls').val(),
notification_title: $('input.notification-title').val(),
notification_body: $('textarea.notification-body').val(),
notification_format: $('select.notification-format').val(),
notification_body: $('#notification_body').val(),
notification_format: $('#notification_format').val(),
notification_title: $('#notification_title').val(),
notification_urls: $('.notification-urls').val(),
tags: $('#tags').val(),
window_url: window.location.href,
}
@@ -329,18 +329,12 @@ a.pure-button-selected {
.notifications-wrapper {
padding-top: 0.5rem;
#notification-test-log {
margin-top: 1rem;
padding: 1rem;
padding-top: 1rem;
white-space: pre-wrap;
word-break: break-word;
overflow-wrap: break-word;
max-width: 100%;
box-sizing: border-box;
max-height: 12rem;
overflow-y: scroll;
border: 1px solid var(--color-border-notification);
border-radius: 5px;
}
}
File diff suppressed because one or more lines are too long
+76 -148
View File
@@ -6,7 +6,6 @@ from flask import (
flash
)
from .blueprint.rss import RSS_CONTENT_FORMAT_DEFAULT
from .html_tools import TRANSLATE_WHITESPACE_TABLE
from .model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
from copy import deepcopy, copy
@@ -23,13 +22,6 @@ import uuid as uuid_builder
from loguru import logger
from blinker import signal
# Try to import orjson for faster JSON serialization
try:
import orjson
HAS_ORJSON = True
except ImportError:
HAS_ORJSON = False
from .processors import get_custom_watch_obj_for_processor
from .processors.restock_diff import Restock
@@ -45,7 +37,7 @@ class ChangeDetectionStore:
lock = Lock()
# For general updates/writes that can wait a few seconds
needs_write = False
datastore_path = None
# For when we edit, we should write to disk
needs_write_urgent = False
@@ -55,30 +47,18 @@ class ChangeDetectionStore:
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
# Should only be active for docker
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
self.datastore_path = datastore_path
self.needs_write = False
self.start_time = time.time()
self.stop_thread = False
self.save_version_copy_json_db(version_tag)
self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag)
def save_version_copy_json_db(self, version_tag):
import re
version_text = re.sub(r'\D+', '-', version_tag)
db_path = os.path.join(self.datastore_path, "url-watches.json")
db_path_version_backup = os.path.join(self.datastore_path, f"url-watches-{version_text}.json")
if not os.path.isfile(db_path_version_backup) and os.path.isfile(db_path):
from shutil import copyfile
logger.info(f"Backing up JSON DB due to new version to '{db_path_version_backup}'.")
copyfile(db_path, db_path_version_backup)
def reload_state(self, datastore_path, include_default_watches, version_tag):
logger.info(f"Datastore path is '{datastore_path}'")
self.__data = App.model()
self.datastore_path = datastore_path
self.json_store_path = os.path.join(self.datastore_path, "url-watches.json")
# Base definition for all watchers
# deepcopy part of #569 - not sure why its needed exactly
@@ -91,46 +71,37 @@ class ChangeDetectionStore:
self.__data['build_sha'] = f.read()
try:
if HAS_ORJSON:
# orjson.loads() expects UTF-8 encoded bytes #3611
with open(self.json_store_path, 'rb') as json_file:
from_disk = orjson.loads(json_file.read())
else:
with open(self.json_store_path, encoding='utf-8') as json_file:
from_disk = json.load(json_file)
# @todo retest with ", encoding='utf-8'"
with open(self.json_store_path) as json_file:
from_disk = json.load(json_file)
if not from_disk:
# No FileNotFound exception was thrown but somehow the JSON was empty - abort for safety.
logger.critical(f"JSON DB existed but was empty on load - empty JSON file? '{self.json_store_path}' Aborting")
raise Exception('JSON DB existed but was empty on load - Aborting')
# @todo isnt there a way todo this dict.update recursively?
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
if 'watching' in from_disk:
self.__data['watching'].update(from_disk['watching'])
# @todo isnt there a way todo this dict.update recursively?
# Problem here is if the one on the disk is missing a sub-struct, it wont be present anymore.
if 'watching' in from_disk:
self.__data['watching'].update(from_disk['watching'])
if 'app_guid' in from_disk:
self.__data['app_guid'] = from_disk['app_guid']
if 'app_guid' in from_disk:
self.__data['app_guid'] = from_disk['app_guid']
if 'settings' in from_disk:
if 'headers' in from_disk['settings']:
self.__data['settings']['headers'].update(from_disk['settings']['headers'])
if 'settings' in from_disk:
if 'headers' in from_disk['settings']:
self.__data['settings']['headers'].update(from_disk['settings']['headers'])
if 'requests' in from_disk['settings']:
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
if 'requests' in from_disk['settings']:
self.__data['settings']['requests'].update(from_disk['settings']['requests'])
if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application'])
if 'application' in from_disk['settings']:
self.__data['settings']['application'].update(from_disk['settings']['application'])
# Convert each existing watch back to the Watch.model object
for uuid, watch in self.__data['watching'].items():
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
logger.info(f"Watching: {uuid} {watch['url']}")
# Convert each existing watch back to the Watch.model object
for uuid, watch in self.__data['watching'].items():
self.__data['watching'][uuid] = self.rehydrate_entity(uuid, watch)
logger.info(f"Watching: {uuid} {watch['url']}")
# And for Tags also, should be Restock type because it has extra settings
for uuid, tag in self.__data['settings']['application']['tags'].items():
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff')
logger.info(f"Tag: {uuid} {tag['title']}")
# And for Tags also, should be Restock type because it has extra settings
for uuid, tag in self.__data['settings']['application']['tags'].items():
self.__data['settings']['application']['tags'][uuid] = self.rehydrate_entity(uuid, tag, processor_override='restock_diff')
logger.info(f"Tag: {uuid} {tag['title']}")
# First time ran, Create the datastore.
except (FileNotFoundError):
@@ -290,8 +261,7 @@ class ChangeDetectionStore:
self.__data['watching'] = {}
time.sleep(1) # Mainly used for testing to allow all items to flush before running next test
for uuid in self.data['watching']:
path = pathlib.Path(
os.path.join(self.datastore_path, uuid))
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
if os.path.exists(path):
self.delete(uuid)
@@ -455,15 +425,9 @@ class ChangeDetectionStore:
# Re #286 - First write to a temp file, then confirm it looks OK and rename it
# This is a fairly basic strategy to deal with the case that the file is corrupted,
# system was out of memory, out of RAM etc
if HAS_ORJSON:
# Use orjson for faster serialization
# orjson.dumps() always returns UTF-8 encoded bytes #3611
with open(self.json_store_path+".tmp", 'wb') as json_file:
json_file.write(orjson.dumps(data, option=orjson.OPT_INDENT_2))
else:
# Fallback to standard json module
with open(self.json_store_path+".tmp", 'w', encoding='utf-8') as json_file:
json.dump(data, json_file, indent=2, ensure_ascii=False)
with open(self.json_store_path+".tmp", 'w') as json_file:
# Use compact JSON in production for better performance
json.dump(data, json_file, indent=2)
os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e:
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
@@ -525,13 +489,8 @@ class ChangeDetectionStore:
# Load from external config file
if path.isfile(proxy_list_file):
if HAS_ORJSON:
# orjson.loads() expects UTF-8 encoded bytes #3611
with open(os.path.join(self.datastore_path, "proxies.json"), 'rb') as f:
proxy_list = orjson.loads(f.read())
else:
with open(os.path.join(self.datastore_path, "proxies.json"), encoding='utf-8') as f:
proxy_list = json.load(f)
with open(os.path.join(self.datastore_path, "proxies.json")) as f:
proxy_list = json.load(f)
# Mapping from UI config if available
extras = self.data['settings']['requests'].get('extra_proxies')
@@ -776,28 +735,6 @@ class ChangeDetectionStore:
return updates_available
def add_notification_url(self, notification_url):
logger.debug(f">>> Adding new notification_url - '{notification_url}'")
notification_urls = self.data['settings']['application'].get('notification_urls', [])
if notification_url in notification_urls:
return notification_url
with self.lock:
notification_urls = self.__data['settings']['application'].get('notification_urls', [])
if notification_url in notification_urls:
return notification_url
# Append and update the datastore
notification_urls.append(notification_url)
self.__data['settings']['application']['notification_urls'] = notification_urls
self.needs_write = True
return notification_url
# Run all updates
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
@@ -810,16 +747,7 @@ class ChangeDetectionStore:
logger.critical(f"Applying update_{update_n}")
# Wont exist on fresh installs
if os.path.exists(self.json_store_path):
i = 0
while True:
i+=1
dest = os.path.join(self.datastore_path, f"url-watches-before-{update_n}-{i}.json")
if not os.path.exists(dest):
logger.debug(f"Copying url-watches.json DB to '{dest}' backup.")
shutil.copyfile(self.json_store_path, dest)
break
else:
logger.warning(f"Backup of url-watches.json '{dest}', DB already exists, trying {i+1}.. ")
shutil.copyfile(self.json_store_path, os.path.join(self.datastore_path, f"url-watches-before-{update_n}.json"))
try:
update_method = getattr(self, f"update_{update_n}")()
@@ -1077,48 +1005,48 @@ class ChangeDetectionStore:
# Some notification formats got the wrong name type
def update_23(self):
def re_run(formats):
sys_n_format = self.data['settings']['application'].get('notification_format')
key_exists_as_value = next((k for k, v in formats.items() if v == sys_n_format), None)
if key_exists_as_value: # key of "Plain text"
logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'")
self.data['settings']['application']['notification_format'] = key_exists_as_value
for uuid, watch in self.data['watching'].items():
n_format = self.data['watching'][uuid].get('notification_format')
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
self.data['watching'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever
for uuid, tag in self.data['settings']['application']['tags'].items():
n_format = self.data['settings']['application']['tags'][uuid].get('notification_format')
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
logger.success(
f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
self.data['settings']['application']['tags'][uuid][
'notification_format'] = key_exists_as_value # should be 'text' or whatever
def update_22(self):
from .notification import valid_notification_formats
formats = deepcopy(valid_notification_formats)
re_run(formats)
# And in previous versions, it was "text" instead of Plain text, Markdown instead of "Markdown to HTML"
formats['text'] = 'Text'
formats['markdown'] = 'Markdown'
re_run(formats)
sys_n_format = self.data['settings']['application'].get('notification_format')
key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == sys_n_format), None)
if key_exists_as_value: # key of "Plain text"
logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'")
self.data['settings']['application']['notification_format'] = key_exists_as_value
for uuid, watch in self.data['watching'].items():
n_format = self.data['watching'][uuid].get('notification_format')
key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == n_format), None)
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
self.data['watching'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever
for uuid, tag in self.data['settings']['application']['tags'].items():
n_format = self.data['settings']['application']['tags'][uuid].get('notification_format')
key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == n_format), None)
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
logger.success(f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
self.data['settings']['application']['tags'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever
def add_notification_url(self, notification_url):
logger.debug(f">>> Adding new notification_url - '{notification_url}'")
notification_urls = self.data['settings']['application'].get('notification_urls', [])
if notification_url in notification_urls:
return notification_url
with self.lock:
notification_urls = self.__data['settings']['application'].get('notification_urls', [])
if notification_url in notification_urls:
return notification_url
# Append and update the datastore
notification_urls.append(notification_url)
self.__data['settings']['application']['notification_urls'] = notification_urls
self.needs_write = True
return notification_url
# RSS types should be inline with the same names as notification types
def update_24(self):
rss_format = self.data['settings']['application'].get('rss_content_format')
if not rss_format or 'text' in rss_format:
# might have been 'plaintext, 'plain text' or something
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
elif 'html' in rss_format:
self.data['settings']['application']['rss_content_format'] = 'htmlcolor'
else:
# safe fallback to text
self.data['settings']['application']['rss_content_format'] = RSS_CONTENT_FORMAT_DEFAULT
@@ -87,35 +87,19 @@
<tr>
<td><code>{{ '{{diff}}' }}</code></td>
<td>The diff output - only changes, additions, and removals</td>
</tr>
<tr>
<td><code>{{ '{{diff_clean}}' }}</code></td>
<td>The diff output - only changes, additions, and removals &dash; <i>Without (added) prefix or colors</i></td>
</tr>
<tr>
<td><code>{{ '{{diff_added}}' }}</code></td>
<td>The diff output - only changes and additions</td>
</tr>
<tr>
<td><code>{{ '{{diff_added_clean}}' }}</code></td>
<td>The diff output - only changes and additions &dash; <i>Without (added) prefix or colors</i></td>
</tr>
<tr>
<td><code>{{ '{{diff_removed}}' }}</code></td>
<td>The diff output - only changes and removals</td>
</tr>
<tr>
<td><code>{{ '{{diff_removed_clean}}' }}</code></td>
<td>The diff output - only changes and removals &dash; <i>Without (added) prefix or colors</i></td>
</tr>
<tr>
<td><code>{{ '{{diff_full}}' }}</code></td>
<td>The diff output - full difference output</td>
</tr>
<tr>
<td><code>{{ '{{diff_full_clean}}' }}</code></td>
<td>The diff output - full difference output &dash; <i>Without (added) prefix or colors</i></td>
</tr>
<tr>
<td><code>{{ '{{diff_patch}}' }}</code></td>
<td>The diff output - patch in unified format</td>
+2 -7
View File
@@ -8,13 +8,8 @@
<meta name="robots" content="noindex">
<title>Change Detection{{extra_title}}</title>
{% if app_rss_token %}
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag_uuid %}- {{active_tag.title}}{% endif %}" href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token, _external=True )}}" >
{% if rss_uuid_feed %}
<link rel="alternate" type="application/rss+xml" title="Feed » {{ rss_uuid_feed['label'] }}" href="{{ rss_uuid_feed['url'] }}" >
{%- endif -%}
{%- endif -%}
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag_uuid %}- {{active_tag.title}}{% endif %}" href="{{ url_for('rss.feed', tag=active_tag_uuid , token=app_rss_token)}}" >
{% endif %}
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='pure-min.css')}}" >
<link rel="stylesheet" href="{{url_for('static_content', group='styles', filename='styles.css')}}?v={{ get_css_version() }}" >
{% if extra_stylesheets %}
@@ -3,9 +3,8 @@ from flask import url_for
from email import message_from_string
from email.policy import default as email_policy
from changedetectionio.diff import HTML_REMOVED_STYLE, HTML_ADDED_STYLE, HTML_CHANGED_STYLE, REMOVED_PLACEMARKER_OPEN, \
CHANGED_PLACEMARKER_OPEN, ADDED_PLACEMARKER_OPEN
from changedetectionio.notification_service import NotificationContextData
from changedetectionio.diff import HTML_REMOVED_STYLE, HTML_ADDED_STYLE, HTML_CHANGED_STYLE
from changedetectionio.notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER
from changedetectionio.tests.util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, \
wait_for_all_checks, \
set_longer_modified_response, delete_all_watches
@@ -101,6 +100,7 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
text_content = text_part.get_content()
assert '(added) So let\'s see what happens.\r\n' in text_content # The plaintext part
assert 'fallback-body\r\n' in text_content # The plaintext part
assert CUSTOM_LINEBREAK_PLACEHOLDER not in text_content
# Second part should be text/html
html_part = parts[1]
@@ -109,6 +109,7 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
assert 'some text<br>' in html_content # We converted \n from the notification body
assert 'fallback-body<br>' in html_content # kept the original <br>
assert '(added) So let\'s see what happens.<br>' in html_content # the html part
assert CUSTOM_LINEBREAK_PLACEHOLDER not in html_content
delete_all_watches(client)
@@ -123,8 +124,8 @@ def test_check_notification_plaintext_format(client, live_server, measure_memory
res = client.post(
url_for("settings.settings_page"),
data={"application-notification_urls": notification_url,
"application-notification_title": "fallback-title {{watch_title}} {{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }} " + default_notification_title,
"application-notification_body": f"some text\n" + default_notification_body + f"\nMore output test\n{ALL_MARKUP_TOKENS}",
"application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": "some text\n" + default_notification_body,
"application-notification_format": 'text',
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
@@ -147,18 +148,9 @@ def test_check_notification_plaintext_format(client, live_server, measure_memory
msg_raw = get_last_message_from_smtp_server()
assert len(msg_raw) >= 1
#time.sleep(60)
# Parse the email properly using Python's email library
msg = message_from_string(msg_raw, policy=email_policy)
# Subject/title got marked up
subject = msg['subject']
# Subject should always be plaintext and never marked up to anything else
assert REMOVED_PLACEMARKER_OPEN not in subject
assert CHANGED_PLACEMARKER_OPEN not in subject
assert ADDED_PLACEMARKER_OPEN not in subject
assert 'diff added didnt split' not in subject
assert '(changed) Which is across' in subject
assert 'PLACEMARKER' not in subject
# The email should be plain text only (not multipart)
assert not msg.is_multipart()
@@ -185,7 +177,7 @@ def test_check_notification_html_color_format(client, live_server, measure_memor
res = client.post(
url_for("settings.settings_page"),
data={"application-notification_urls": notification_url,
"application-notification_title": "fallback-title {{watch_title}} - diff_added_lines_test : '{{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }}' " + default_notification_title,
"application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": f"some text\n{default_notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}",
"application-notification_format": 'htmlcolor',
"requests-time_between_check-minutes": 180,
@@ -219,19 +211,6 @@ def test_check_notification_html_color_format(client, live_server, measure_memor
# Parse the email properly using Python's email library
msg = message_from_string(msg_raw, policy=email_policy)
# Subject/title got marked up
subject = msg['subject']
# Subject should always be plaintext and never marked up to anything else
assert REMOVED_PLACEMARKER_OPEN not in subject
assert CHANGED_PLACEMARKER_OPEN not in subject
assert ADDED_PLACEMARKER_OPEN not in subject
assert 'diff added didnt split' not in subject
assert '(changed) Which is across' in subject
assert 'PLACEMARKER' not in subject
assert 'head title' in subject
assert "span" not in subject
assert 'background-color' not in subject
# The email should have two bodies (multipart/alternative with text/plain and text/html)
assert msg.is_multipart()
@@ -270,7 +249,7 @@ def test_check_notification_markdown_format(client, live_server, measure_memory_
res = client.post(
url_for("settings.settings_page"),
data={"application-notification_urls": notification_url,
"application-notification_title": "fallback-title diff_added_lines_test : '{{ diff_added.splitlines()[0] if diff_added else 'diff added didnt split' }}' " + default_notification_title,
"application-notification_title": "fallback-title " + default_notification_title,
"application-notification_body": "*header*\n\nsome text\n" + default_notification_body,
"application-notification_format": 'markdown',
"requests-time_between_check-minutes": 180,
@@ -308,14 +287,6 @@ def test_check_notification_markdown_format(client, live_server, measure_memory_
# The email should have two bodies (multipart/alternative with text/plain and text/html)
assert msg.is_multipart()
assert msg.get_content_type() == 'multipart/alternative'
subject = msg['subject']
# Subject should always be plaintext and never marked up to anything else
assert REMOVED_PLACEMARKER_OPEN not in subject
assert CHANGED_PLACEMARKER_OPEN not in subject
assert ADDED_PLACEMARKER_OPEN not in subject
assert 'diff added didnt split' not in subject
assert '(changed) Which is across' in subject
# Get the parts
parts = list(msg.iter_parts())
@@ -334,10 +305,7 @@ def test_check_notification_markdown_format(client, live_server, measure_memory_
assert html_part.get_content_type() == 'text/html'
html_content = html_part.get_content()
assert '<p><em>header</em></p>' in html_content
assert '<strong>So let\'s see what happens.</strong><br />' in html_content # Additions are <strong> in markdown
# the '<br />' will come from apprises conversion, not from our code, we would rather use '<br>' correctly
# the '<br />' is actually a nice way to know if apprise done the conversion.
assert '<strong>So let\'s see what happens.</strong><br>' in html_content # Additions are <strong> in markdown
delete_all_watches(client)
# Custom notification body with HTML, that is either sent as HTML or rendered to plaintext and sent
@@ -585,7 +553,6 @@ def test_check_plaintext_document_html_notifications(client, live_server, measur
# Should be the HTML, but not HTML Color
assert 'background-color' not in html_content
assert '<br>(added) And let&#39;s talk about &lt;title&gt; tags<br>' in html_content
assert 'PLACEMARKER' not in html_content
assert '&lt;br' not in html_content
assert '<pre role="article"' in html_content # Should have got wrapped nicely in email_helpers.py
@@ -716,7 +683,6 @@ def test_check_html_document_plaintext_notification(client, live_server, measure
assert '<tag>' in body # Should have got converted from original HTML to plaintext
assert '(changed) some stuff\r\n' in body
assert 'PLACEMARKER' not in body
assert '(into) sxome stuff\r\n' in body
assert '(added) lets slip this in\r\n' in body
assert '(added) and this in\r\n' in body
@@ -786,6 +752,7 @@ def test_check_html_notification_with_apprise_format_is_html(client, live_server
text_content = text_part.get_content()
assert '(added) So let\'s see what happens.\r\n' in text_content # The plaintext part
assert 'fallback-body\r\n' in text_content # The plaintext part
assert CUSTOM_LINEBREAK_PLACEHOLDER not in text_content
# Second part should be text/html
html_part = parts[1]
@@ -794,4 +761,5 @@ def test_check_html_notification_with_apprise_format_is_html(client, live_server
assert 'some text<br>' in html_content # We converted \n from the notification body
assert 'fallback-body<br>' in html_content # kept the original <br>
assert '(added) So let\'s see what happens.<br>' in html_content # the html part
assert CUSTOM_LINEBREAK_PLACEHOLDER not in html_content
delete_all_watches(client)
+30 -59
View File
@@ -19,9 +19,18 @@ def test_inscriptus():
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage, datastore_path):
set_original_response(datastore_path=datastore_path)
# live_server_setup(live_server) # Setup on conftest per function
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
# Add our URL to the import page
res = client.post(
url_for("imports.import_page"),
data={"urls": url_for('test_endpoint', _external=True)},
follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Do this a few times.. ensures we dont accidently set the status
for n in range(3):
@@ -77,9 +86,10 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert b'<rss' in res.data
# re #16 should have the diff in here too
assert b'which has this one new line' in res.data
assert b'(into) which has this one new line' in res.data
assert b'CDATA' in res.data
assert expected_url.encode('utf-8') in res.data
#
# Following the 'diff' link, it should no longer display as 'has-unread-changes' even after we recheck it a few times
res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid))
@@ -105,6 +115,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# It should report nothing found (no new 'has-unread-changes' class)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' not in res.data
assert b'class="has-unread-changes' not in res.data
assert b'head title' in res.data # Should be ON by default
@@ -118,6 +129,23 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
res = client.get(url_for("watchlist.index"))
assert b'head title and more' in res.data
# disable <title> pickup
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-use_page_title_in_list": "", "requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'has-unread-changes' in res.data
assert b'class="has-unread-changes' in res.data
assert b'head title' not in res.data # should now be off
# Be sure the last_viewed is going to be greater than the last snapshot
time.sleep(1)
@@ -138,63 +166,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
# Cleanup everything
delete_all_watches(client)
def test_title_scraper(client, live_server, measure_memory_usage, datastore_path):
set_original_response(datastore_path=datastore_path)
uuid = client.application.config.get('DATASTORE').add_watch(url=url_for('test_endpoint', _external=True))
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks()
# It should report nothing found (no new 'has-unread-changes' class)
res = client.get(url_for("watchlist.index"))
assert b'head title' in res.data # Should be ON by default
# Recheck it but only with a title change, content wasnt changed
set_original_response(datastore_path=datastore_path, extra_title=" and more")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'head title and more' in res.data
# disable <title> pickup
res = client.post(
url_for("settings.settings_page"),
data={"application-ui-use_page_title_in_list": "",
"requests-time_between_check-minutes": 180,
'application-fetch_backend': "html_requests"},
follow_redirects=True
)
set_original_response(datastore_path=datastore_path, extra_title=" SHOULD NOT APPEAR")
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("watchlist.index"))
assert b'SHOULD NOT APPEAR' not in res.data
delete_all_watches(client)
def test_title_scraper_html_only(client, live_server, measure_memory_usage, datastore_path):
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write('"My text document\nWhere I talk about <title>\nwhich should not get registered\n</title>')
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks()
# It should report nothing found (no new 'has-unread-changes' class)
res = client.get(url_for("watchlist.index"))
assert b'which should not get registered' not in res.data # Should be ON by default
assert not live_server.app.config['DATASTORE'].data['watching'][uuid].get('title')
# Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that
def test_requests_timeout(client, live_server, measure_memory_usage, datastore_path):
@@ -40,7 +40,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
json_obj = None
with open(json_db_file, 'r', encoding='utf-8') as f:
with open(json_db_file, 'r') as f:
json_obj = json.load(f)
# assert the right amount of watches was found in the JSON
@@ -76,7 +76,7 @@ def test_consistent_history(client, live_server, measure_memory_usage, datastore
assert len(files_in_watch_dir) == 3, "Should be just three files in the dir, html.br snapshot, history.txt and the extracted text snapshot"
json_db_file = os.path.join(live_server.app.config['DATASTORE'].datastore_path, 'url-watches.json')
with open(json_db_file, 'r', encoding='utf-8') as f:
with open(json_db_file, 'r') as f:
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
@@ -353,7 +353,7 @@ def check_json_ext_filter(json_filter, client, live_server, datastore_path):
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
dates = list(watch.history.keys())
snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])
snapshot_contents = watch.get_history_snapshot(dates[0])
assert snapshot_contents[0] == '['
@@ -439,15 +439,16 @@ def test_correct_header_detect(client, live_server, measure_memory_usage, datast
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
dates = list(watch.history.keys())
snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])
snapshot_contents = watch.get_history_snapshot(dates[0])
assert b'&#34;hello&#34;: 123,' in res.data # properly html escaped in the front end
import json
data = json.loads(snapshot_contents)
keys = list(data.keys())
# Should be correctly formatted and sorted, ("world" goes to end)
assert keys == ["hello", "world"]
assert snapshot_contents == """{
"hello": 123,
"world": 123
}"""
delete_all_watches(client)
def test_check_jsonpath_ext_filter(client, live_server, measure_memory_usage, datastore_path):
+5 -58
View File
@@ -302,20 +302,15 @@ def test_notification_urls_jinja2_apprise_integration(client, live_server, measu
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了", "another": "{{diff|truncate(1500)}}" }',
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }',
"application-notification_format": default_notification_format,
"application-notification_urls": test_notification_url,
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} {{diff|truncate(200)}} ",
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ",
},
follow_redirects=True
)
assert b'Settings updated' in res.data
assert '网站监测'.encode() in res.data
assert b'{{diff|truncate(1500)}}' in res.data
assert b'{{diff|truncate(200)}}' in res.data
def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_memory_usage, datastore_path):
@@ -409,15 +404,15 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
#2510
#@todo run it again as text, html, htmlcolor
def test_global_send_test_notification(client, live_server, measure_memory_usage, datastore_path):
set_original_response(datastore_path=datastore_path)
if os.path.isfile(os.path.join(datastore_path, "notification.txt")):
os.unlink(os.path.join(datastore_path, "notification.txt")) \
# 1995 UTF-8 content should be encoded
test_body = 'change detection is cool 网站监测 内容更新了 - {{diff_full}}'
test_body = 'change detection is cool 网站监测 内容更新了'
# otherwise other settings would have already existed from previous tests in this file
res = client.post(
@@ -457,14 +452,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
with open(os.path.join(datastore_path, "notification.txt"), 'r') as f:
x = f.read()
assert 'change detection is cool 网站监测 内容更新了' in x
if 'html' in default_notification_format:
# this should come from default text when in global/system mode here changedetectionio/notification_service.py
assert 'title="Changed into">Example text:' in x
else:
assert 'title="Changed into">Example text:' not in x
assert 'span' not in x
assert 'Example text:' in x
assert test_body in x
os.unlink(os.path.join(datastore_path, "notification.txt"))
@@ -521,47 +509,6 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
assert b"Error: You must have atleast one watch configured for 'test notification' to work" in res.data
#2510
def test_single_send_test_notification_on_watch(client, live_server, measure_memory_usage, datastore_path):
set_original_response(datastore_path=datastore_path)
if os.path.isfile(os.path.join(datastore_path, "notification.txt")):
os.unlink(os.path.join(datastore_path, "notification.txt")) \
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123"
# 1995 UTF-8 content should be encoded
test_body = 'change detection is cool 网站监测 内容更新了 - {{diff_full}}'
######### Test global/system settings
res = client.post(
url_for("ui.ui_notification.ajax_callback_send_notification_test")+f"/{uuid}",
data={"notification_urls": test_notification_url,
"notification_body": test_body,
"notification_format": default_notification_format,
"notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
},
follow_redirects=True
)
assert res.status_code != 400
assert res.status_code != 500
with open(os.path.join(datastore_path, "notification.txt"), 'r') as f:
x = f.read()
assert 'change detection is cool 网站监测 内容更新了' in x
if 'html' in default_notification_format:
# this should come from default text when in global/system mode here changedetectionio/notification_service.py
assert 'title="Changed into">Example text:' in x
else:
assert 'title="Changed into">Example text:' not in x
assert 'span' not in x
assert 'Example text:' in x
os.unlink(os.path.join(datastore_path, "notification.txt"))
def _test_color_notifications(client, notification_body_token, datastore_path):
+2 -2
View File
@@ -22,7 +22,7 @@ def test_fetch_pdf(client, live_server, measure_memory_usage, datastore_path):
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
dates = list(watch.history.keys())
snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])
snapshot_contents = watch.get_history_snapshot(dates[0])
# PDF header should not be there (it was converted to text)
assert 'PDF' not in snapshot_contents
@@ -75,7 +75,7 @@ def test_fetch_pdf(client, live_server, measure_memory_usage, datastore_path):
dates = list(watch.history.keys())
# new snapshot was also OK, no HTML
snapshot_contents = watch.get_history_snapshot(timestamp=dates[1])
snapshot_contents = watch.get_history_snapshot(dates[1])
assert 'html' not in snapshot_contents.lower()
assert f'Original file size - {os.path.getsize(os.path.join(datastore_path, "endpoint-test.pdf"))}' in snapshot_contents
assert f'here is a change' in snapshot_contents
+2 -2
View File
@@ -142,7 +142,7 @@ def test_body_in_request(client, live_server, measure_memory_usage, datastore_pa
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
watches_with_body = 0
with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
with open(os.path.join(datastore_path, 'url-watches.json')) as f:
app_struct = json.load(f)
for uuid in app_struct['watching']:
if app_struct['watching'][uuid]['body']==body_value:
@@ -225,7 +225,7 @@ def test_method_in_request(client, live_server, measure_memory_usage, datastore_
wait_for_all_checks(client)
watches_with_method = 0
with open(os.path.join(datastore_path, 'url-watches.json'), encoding='utf-8') as f:
with open(os.path.join(datastore_path, 'url-watches.json')) as f:
app_struct = json.load(f)
for uuid in app_struct['watching']:
if app_struct['watching'][uuid]['method'] == 'PATCH':
-73
View File
@@ -4,8 +4,6 @@ import time
from flask import url_for
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
extract_UUID_from_client, delete_all_watches
from loguru import logger
from ..blueprint.rss import RSS_FORMAT_TYPES
def set_original_cdata_xml(datastore_path):
@@ -240,77 +238,6 @@ def test_rss_bad_chars_breaking(client, live_server, measure_memory_usage, datas
#assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n == 2
def test_rss_single_watch_feed(client, live_server, measure_memory_usage, datastore_path):
app_rss_token = live_server.app.config['DATASTORE'].data['settings']['application'].get('rss_access_token')
rss_content_format = live_server.app.config['DATASTORE'].data['settings']['application'].get('rss_content_format')
set_original_response(datastore_path=datastore_path)
test_url = url_for('test_endpoint', _external=True)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(
url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token),
follow_redirects=False
)
assert res.status_code == 400
assert b'not have enough history' in res.data
set_modified_response(datastore_path=datastore_path)
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(
url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token),
follow_redirects=False
)
assert res.status_code == 200
import xml.etree.ElementTree as ET
root = ET.fromstring(res.data)
def check_formatting(expected_type, content, url):
logger.debug(f"Checking formatting type {expected_type}")
if expected_type == 'text':
assert '<p>' not in content
assert 'body' not in content
assert '(changed) Which is across multiple lines\n'
assert 'modified head title had a change.' # Because it picked it up <title> as watch_title in default template
elif expected_type == 'html':
assert '<p>' in content
assert '<body>' in content
assert '<p>(changed) Which is across multiple lines<br>' in content
assert f'href="{url}">modified head title had a change.</a>'
elif expected_type == 'htmlcolor':
assert '<body>' in content
assert ' role="note" aria-label="Changed text" title="Changed text">Which is across multiple lines</span>' in content
assert f'href="{url}">modified head title had a change.</a>'
else:
raise Exception(f"Unknown type {expected_type}")
item = root.findall('.//item')[0].findtext('description')
check_formatting(expected_type=rss_content_format, content=item, url=test_url)
# Now the default one is over, lets try all the others
for k in list(RSS_FORMAT_TYPES.keys()):
res = client.post(
url_for("settings.settings_page"),
data={"application-rss_content_format": k},
follow_redirects=True
)
assert b'Settings updated' in res.data
res = client.get(
url_for('rss.rss_single_watch', uuid=uuid, token=app_rss_token),
follow_redirects=False
)
assert res.status_code == 200
root = ET.fromstring(res.data)
item = root.findall('.//item')[0].findtext('description')
check_formatting(expected_type=k, content=item, url=test_url)
-254
View File
@@ -1,254 +0,0 @@
#!/usr/bin/env python3
import time
from flask import url_for
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, delete_all_watches
import os
def set_original_response(datastore_path):
test_return_data = """<html>
<body>
Some initial text<br>
<p>Watch 1 content</p>
<p>Watch 2 content</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
return None
def set_modified_response(datastore_path):
test_return_data = """<html>
<body>
Some initial text<br>
<p>Watch 1 content MODIFIED</p>
<p>Watch 2 content CHANGED</p>
</body>
</html>
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
return None
def test_rss_group(client, live_server, measure_memory_usage, datastore_path):
"""
Test that RSS feed for a specific tag/group shows only watches in that group
and displays changes correctly.
"""
set_original_response(datastore_path=datastore_path)
# Create a tag/group
res = client.post(
url_for("tags.form_tag_add"),
data={"name": "test-rss-group"},
follow_redirects=True
)
assert b"Tag added" in res.data
assert b"test-rss-group" in res.data
# Get the tag UUID
tag_uuid = get_UUID_for_tag_name(client, name="test-rss-group")
assert tag_uuid is not None
# Add first watch with the tag
test_url_1 = url_for('test_endpoint', _external=True) + "?watch=1"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url_1, "tags": 'test-rss-group'},
follow_redirects=True
)
assert b"Watch added" in res.data
# Add second watch with the tag
test_url_2 = url_for('test_endpoint', _external=True) + "?watch=2"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url_2, "tags": 'test-rss-group'},
follow_redirects=True
)
assert b"Watch added" in res.data
# Add a third watch WITHOUT the tag (should not appear in RSS)
test_url_3 = url_for('test_endpoint', _external=True) + "?watch=3"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url_3, "tags": 'other-tag'},
follow_redirects=True
)
assert b"Watch added" in res.data
# Wait for initial checks to complete
wait_for_all_checks(client)
# Trigger a change
set_modified_response(datastore_path=datastore_path)
# Recheck all watches
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Get RSS token
rss_token = extract_rss_token_from_UI(client)
assert rss_token is not None
# Request RSS feed for the specific tag/group using the new endpoint
res = client.get(
url_for("rss.rss_tag_feed", tag_uuid=tag_uuid, token=rss_token, _external=True),
follow_redirects=True
)
# Verify response is successful
assert res.status_code == 200
assert b"<?xml" in res.data or b"<rss" in res.data
# Verify the RSS feed contains the tag name in the title
assert b"test-rss-group" in res.data
# Verify watch 1 and watch 2 are in the RSS feed (they have the tag)
assert b"watch=1" in res.data
assert b"watch=2" in res.data
# Verify watch 3 is NOT in the RSS feed (it doesn't have the tag)
assert b"watch=3" not in res.data
# Verify the changes are shown in the RSS feed
assert b"MODIFIED" in res.data or b"CHANGED" in res.data
# Verify it's actual RSS/XML format
assert b"<rss" in res.data or b"<feed" in res.data
# Test with invalid tag UUID - should return 404
res = client.get(
url_for("rss.rss_tag_feed", tag_uuid="invalid-uuid-12345", token=rss_token, _external=True),
follow_redirects=True
)
assert res.status_code == 404
assert b"not found" in res.data
# Test with invalid token - should return 403
res = client.get(
url_for("rss.rss_tag_feed", tag_uuid=tag_uuid, token="wrong-token", _external=True),
follow_redirects=True
)
assert res.status_code == 403
assert b"Access denied" in res.data
# Clean up
delete_all_watches(client)
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data
def test_rss_group_empty_tag(client, live_server, measure_memory_usage, datastore_path):
"""
Test that RSS feed for a tag with no watches returns valid but empty RSS.
"""
# Create a tag with no watches
res = client.post(
url_for("tags.form_tag_add"),
data={"name": "empty-tag"},
follow_redirects=True
)
assert b"Tag added" in res.data
tag_uuid = get_UUID_for_tag_name(client, name="empty-tag")
assert tag_uuid is not None
# Get RSS token
rss_token = extract_rss_token_from_UI(client)
# Request RSS feed for empty tag
res = client.get(
url_for("rss.rss_tag_feed", tag_uuid=tag_uuid, token=rss_token, _external=True),
follow_redirects=True
)
# Should still return 200 with valid RSS
assert res.status_code == 200
assert b"<?xml" in res.data or b"<rss" in res.data
assert b"empty-tag" in res.data
# Clean up
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data
def test_rss_group_only_unviewed(client, live_server, measure_memory_usage, datastore_path):
"""
Test that RSS feed for a tag only shows unviewed watches.
"""
set_original_response(datastore_path=datastore_path)
# Create a tag
res = client.post(
url_for("tags.form_tag_add"),
data={"name": "unviewed-test"},
follow_redirects=True
)
assert b"Tag added" in res.data
tag_uuid = get_UUID_for_tag_name(client, name="unviewed-test")
# Add two watches with the tag
test_url_1 = url_for('test_endpoint', _external=True) + "?unviewed=1"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url_1, "tags": 'unviewed-test'},
follow_redirects=True
)
assert b"Watch added" in res.data
test_url_2 = url_for('test_endpoint', _external=True) + "?unviewed=2"
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": test_url_2, "tags": 'unviewed-test'},
follow_redirects=True
)
assert b"Watch added" in res.data
wait_for_all_checks(client)
# Trigger changes
set_modified_response(datastore_path=datastore_path)
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
# Get RSS token
rss_token = extract_rss_token_from_UI(client)
# Request RSS feed - should show both watches (both unviewed)
res = client.get(
url_for("rss.rss_tag_feed", tag_uuid=tag_uuid, token=rss_token, _external=True),
follow_redirects=True
)
assert res.status_code == 200
assert b"unviewed=1" in res.data
assert b"unviewed=2" in res.data
# Mark all as viewed
res = client.get(url_for('ui.mark_all_viewed'), follow_redirects=True)
wait_for_all_checks(client)
# Request RSS feed again - should be empty now (no unviewed watches)
res = client.get(
url_for("rss.rss_tag_feed", tag_uuid=tag_uuid, token=rss_token, _external=True),
follow_redirects=True
)
assert res.status_code == 200
# Should not contain the watch URLs anymore since they're viewed
assert b"unviewed=1" not in res.data
assert b"unviewed=2" not in res.data
# Clean up
delete_all_watches(client)
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
assert b'All tags deleted' in res.data
@@ -65,7 +65,7 @@ def test_rss_reader_mode(client, live_server, measure_memory_usage, datastore_pa
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
dates = list(watch.history.keys())
snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])
snapshot_contents = watch.get_history_snapshot(dates[0])
assert 'Wet noodles escape' in snapshot_contents
assert '<br>' not in snapshot_contents
assert '&lt;' not in snapshot_contents
@@ -91,7 +91,7 @@ def test_rss_reader_mode_with_css_filters(client, live_server, measure_memory_us
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
dates = list(watch.history.keys())
snapshot_contents = watch.get_history_snapshot(timestamp=dates[0])
snapshot_contents = watch.get_history_snapshot(dates[0])
assert 'Wet noodles escape' not in snapshot_contents
assert '<br>' not in snapshot_contents
assert '&lt;' not in snapshot_contents
@@ -55,8 +55,8 @@ class TestTriggerConditions(unittest.TestCase):
self.assertEqual(len(history), 2)
# Retrieve and check snapshots
#snapshot1 = watch.get_history_snapshot(timestamp=str(timestamp1))
#snapshot2 = watch.get_history_snapshot(timestamp=str(timestamp2))
#snapshot1 = watch.get_history_snapshot(str(timestamp1))
#snapshot2 = watch.get_history_snapshot(str(timestamp2))
self.store.data['watching'][self.watch_uuid].update(
{
+1
View File
@@ -14,6 +14,7 @@ class Weekday(IntEnum):
Saturday = 5
Sunday = 6
@lru_cache(maxsize=100)
def am_i_inside_time(
day_of_week: str,
time_str: str,
+10 -31
View File
@@ -28,7 +28,7 @@ info:
For example: `x-api-key: YOUR_API_KEY`
version: 0.1.3
version: 0.1.2
contact:
name: ChangeDetection.io
url: https://github.com/dgtlmoon/changedetection.io
@@ -65,17 +65,13 @@ tags:
- name: Watch History
description: |
Get a list of timestamps of all changes detected for a watch.
Access historical snapshots and change data for your watches. View the complete timeline of detected changes
and retrieve specific versions of monitored content for comparison and analysis.
- name: Snapshots
description: |
Retrieve individual text snapshot of monitored content according to the `timestamp`. The text snapshot is the HTML
to Text at page check time.
Set the query argument `html` to any value to retrieve the last HTML fetched, the system only keeps the last two
(2) HTML files fetched.
Use the Watch History API endpoint to get a list of timestamps to pass to this query.
Retrieve individual snapshots of monitored content. Access both the processed change detection data and
the raw HTML content that was captured during monitoring checks.
- name: Favicon
description: |
@@ -232,11 +228,6 @@ components:
maxLength: 5000
required: [operation, selector, optional_value]
description: Browser automation steps
processor:
type: string
enum: [restock_diff, text_json_diff]
default: text_json_diff
description: Optional processor mode to use for change detection. Defaults to `text_json_diff` if not specified.
Watch:
allOf:
@@ -437,15 +428,7 @@ paths:
operationId: createWatch
tags: [Watch Management]
summary: Create a new watch
description: |
Create a single web page change monitor (watch). Requires at least `url` to be set.
Every watch can be configured with:
- **Processor mode**: `processor` field (`restock_diff` or `text_json_diff` - default)
- **Notification settings**: `notification_urls` (array), `notification_title`, `notification_body`, `notification_format`, `notification_muted`
- **Tags/Groups**: `tag` (UUID string) or `tags` (array of UUIDs)
- **Check settings**: `time_between_check`, `paused`, `method`, `fetch_backend`
- **Advanced options**: `headers`, `body`, `proxy`, `browser_steps`, and more
description: Create a single web page change monitor (watch). Requires at least 'url' to be set.
x-code-samples:
- lang: 'curl'
source: |
@@ -463,7 +446,7 @@ paths:
source: |
import requests
import json
headers = {
'x-api-key': 'YOUR_API_KEY',
'Content-Type': 'application/json'
@@ -475,7 +458,7 @@ paths:
'hours': 1
}
}
response = requests.post('http://localhost:5000/api/v1/watch',
response = requests.post('http://localhost:5000/api/v1/watch',
headers=headers, json=data)
print(response.text)
requestBody:
@@ -665,9 +648,7 @@ paths:
operationId: getWatchHistory
tags: [Watch History]
summary: Get watch history
description: |
Get a list of all historical snapshots available for a web page change monitor (watch), use the key `timestamp`
as the query argument for fetching a single watch history snapshot.
description: Get a list of all historical snapshots available for a web page change monitor (watch)
x-code-samples:
- lang: 'curl'
source: |
@@ -707,9 +688,7 @@ paths:
operationId: getWatchSnapshot
tags: [Snapshots]
summary: Get single snapshot
description: |
Get single snapshot from web page change monitor (watch). Use 'latest' for the most recent snapshot.
Use the Watch History API to get a list of timestamps to pass.
description: Get single snapshot from web page change monitor (watch). Use 'latest' for the most recent snapshot.
x-code-samples:
- lang: 'curl'
source: |
+57 -89
View File
File diff suppressed because one or more lines are too long
+3 -6
View File
@@ -12,7 +12,7 @@ janus # Thread-safe async/sync queue bridge
flask_wtf~=1.2
flask~=3.1
flask-socketio~=5.5.1
python-socketio~=5.14.3
python-socketio~=5.14.2
python-engineio~=4.12.3
inscriptis~=2.2
pytz
@@ -31,10 +31,7 @@ requests-file
chardet>2.3.0
wtforms~=3.2
jsonpath-ng~=1.7.0
# Fast JSON serialization for better performance
orjson~=3.11
jsonpath-ng~=1.5.3
# dnspython - Used by paho-mqtt for MQTT broker resolution
# Version pin removed since eventlet (which required the specific 2.6.1 pin) has been eliminated
@@ -90,7 +87,7 @@ pyppeteerstealth>=0.0.4
# Include pytest, so if theres a support issue we can ask them to run these tests on their setup
pytest ~=7.2
pytest-flask ~=1.3
pytest-flask ~=1.2
pytest-mock ~=3.15
# Anything 4.0 and up but not 5.0