mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-13 13:06:10 +00:00
Compare commits
24 Commits
API-adding
...
0.50.18
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
6ad4acc9fc | ||
|
|
3e59521f48 | ||
|
|
0970c087c8 | ||
|
|
676c550e6e | ||
|
|
78fa47f6f8 | ||
|
|
4aa5bb6da3 | ||
|
|
f7dfc9bbb8 | ||
|
|
584b6e378d | ||
|
|
754febfd33 | ||
|
|
0c9c475f32 | ||
|
|
e4baca1127 | ||
|
|
bb61a35a54 | ||
|
|
4b9ae5a97c | ||
|
|
c8caa0662d | ||
|
|
f4e8d1963f | ||
|
|
45d5e961dc | ||
|
|
45f2863966 | ||
|
|
01c1ac4c0c | ||
|
|
b2f9aec383 | ||
|
|
a95aa67aef | ||
|
|
cbeefeccbb | ||
|
|
2b72d38235 | ||
|
|
8fe7aec3c6 | ||
|
|
6e1f5a8503 |
8
.github/dependabot.yml
vendored
8
.github/dependabot.yml
vendored
@@ -4,11 +4,11 @@ updates:
|
||||
directory: /
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
"caronc/apprise":
|
||||
versioning-strategy: "increase"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
groups:
|
||||
all:
|
||||
patterns:
|
||||
- "*"
|
||||
- package-ecosystem: pip
|
||||
directory: /
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
|
||||
6
.github/workflows/codeql-analysis.yml
vendored
6
.github/workflows/codeql-analysis.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v3
|
||||
uses: github/codeql-action/init@v4
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
uses: github/codeql-action/autobuild@v4
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
@@ -59,4 +59,4 @@ jobs:
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v3
|
||||
uses: github/codeql-action/analyze@v4
|
||||
|
||||
4
.github/workflows/containers.yml
vendored
4
.github/workflows/containers.yml
vendored
@@ -95,7 +95,7 @@ jobs:
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
@@ -133,7 +133,7 @@ jobs:
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
# Looks like this was disabled
|
||||
|
||||
4
.github/workflows/test-container-build.yml
vendored
4
.github/workflows/test-container-build.yml
vendored
@@ -38,8 +38,6 @@ jobs:
|
||||
dockerfile: ./Dockerfile
|
||||
- platform: linux/arm/v8
|
||||
dockerfile: ./Dockerfile
|
||||
- platform: linux/arm64/v8
|
||||
dockerfile: ./Dockerfile
|
||||
# Alpine Dockerfile platforms (musl via alpine check)
|
||||
- platform: linux/amd64
|
||||
dockerfile: ./.github/test/Dockerfile-alpine
|
||||
@@ -76,5 +74,5 @@ jobs:
|
||||
file: ${{ matrix.dockerfile }}
|
||||
platforms: ${{ matrix.platform }}
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
cache-to: type=gha,mode=min
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.50.13'
|
||||
__version__ = '0.50.18'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -88,6 +88,8 @@ class Watch(Resource):
|
||||
# attr .last_changed will check for the last written text snapshot on change
|
||||
watch['last_changed'] = watch.last_changed
|
||||
watch['viewed'] = watch.viewed
|
||||
watch['link'] = watch.link,
|
||||
|
||||
return watch
|
||||
|
||||
@auth.check_token
|
||||
@@ -273,6 +275,8 @@ class CreateWatch(Resource):
|
||||
'last_changed': watch.last_changed,
|
||||
'last_checked': watch['last_checked'],
|
||||
'last_error': watch['last_error'],
|
||||
'link': watch.link,
|
||||
'page_title': watch['page_title'],
|
||||
'title': watch['title'],
|
||||
'url': watch['url'],
|
||||
'viewed': watch.viewed
|
||||
|
||||
@@ -191,6 +191,12 @@ nav
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
|
||||
<span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
</span>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="api">
|
||||
|
||||
@@ -87,7 +87,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
form=form,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=datastore.proxy_list,
|
||||
has_unviewed=datastore.has_unviewed,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
@@ -97,6 +96,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
tags=sorted_tags,
|
||||
unread_changes_count=datastore.unread_changes_count,
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
|
||||
@@ -82,8 +82,11 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{%- set cols_required = cols_required + 1 -%}
|
||||
{%- endif -%}
|
||||
{%- set ui_settings = datastore.data['settings']['application']['ui'] -%}
|
||||
|
||||
<div id="watch-table-wrapper">
|
||||
{%- set wrapper_classes = [
|
||||
'has-unread-changes' if unread_changes_count else '',
|
||||
'has-error' if errored_count else '',
|
||||
] -%}
|
||||
<div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}">
|
||||
{%- set table_classes = [
|
||||
'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled',
|
||||
] -%}
|
||||
@@ -158,9 +161,9 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<div>
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{watch.label}}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{watch.url}}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
@@ -241,10 +244,10 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
</tbody>
|
||||
</table>
|
||||
<ul id="post-list-buttons">
|
||||
<li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" >
|
||||
<li id="post-list-with-errors" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
|
||||
</li>
|
||||
<li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" >
|
||||
<li id="post-list-mark-views" style="display: none;" >
|
||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
|
||||
</li>
|
||||
{%- if active_tag_uuid -%}
|
||||
@@ -252,8 +255,8 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
|
||||
</li>
|
||||
{%- endif -%}
|
||||
<li id="post-list-unread" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread</a>
|
||||
<li id="post-list-unread" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
|
||||
|
||||
@@ -220,14 +220,16 @@ def validate_time_between_check_has_values(form):
|
||||
Custom validation function for TimeBetweenCheckForm.
|
||||
Returns True if at least one time interval field has a value > 0.
|
||||
"""
|
||||
return any([
|
||||
form.weeks.data and form.weeks.data > 0,
|
||||
form.days.data and form.days.data > 0,
|
||||
form.hours.data and form.hours.data > 0,
|
||||
form.minutes.data and form.minutes.data > 0,
|
||||
form.seconds.data and form.seconds.data > 0
|
||||
res = any([
|
||||
form.weeks.data and int(form.weeks.data) > 0,
|
||||
form.days.data and int(form.days.data) > 0,
|
||||
form.hours.data and int(form.hours.data) > 0,
|
||||
form.minutes.data and int(form.minutes.data) > 0,
|
||||
form.seconds.data and int(form.seconds.data) > 0
|
||||
])
|
||||
|
||||
return res
|
||||
|
||||
|
||||
class RequiredTimeInterval(object):
|
||||
"""
|
||||
@@ -757,6 +759,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
|
||||
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
|
||||
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
|
||||
strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None)
|
||||
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
|
||||
|
||||
filter_text_added = BooleanField('Added lines', default=True)
|
||||
@@ -934,6 +937,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()])
|
||||
strip_ignored_lines = BooleanField('Strip ignored lines')
|
||||
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
|
||||
validators=[validators.Optional()])
|
||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
||||
|
||||
@@ -57,6 +57,7 @@ class model(dict):
|
||||
'rss_hide_muted_watches': True,
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'strip_ignored_lines': False,
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'timezone': None, # Default IANA timezone name
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
|
||||
@@ -170,7 +170,7 @@ class model(watch_base):
|
||||
@property
|
||||
def label(self):
|
||||
# Used for sorting, display, etc
|
||||
return self.get('title') or self.get('page_title') or self.get('url')
|
||||
return self.get('title') or self.get('page_title') or self.link
|
||||
|
||||
@property
|
||||
def last_changed(self):
|
||||
|
||||
@@ -58,6 +58,7 @@ class watch_base(dict):
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'remote_server_reply': None, # From 'server' reply header
|
||||
'sort_text_alphabetically': False,
|
||||
'strip_ignored_lines': None,
|
||||
'subtractive_selectors': [],
|
||||
'tag': '', # Old system of text name for a tag, to be removed
|
||||
'tags': [], # list of UUIDs to App.Tags
|
||||
|
||||
126
changedetectionio/processors/magic.py
Normal file
126
changedetectionio/processors/magic.py
Normal file
@@ -0,0 +1,126 @@
|
||||
"""
|
||||
Content Type Detection and Stream Classification
|
||||
|
||||
This module provides intelligent content-type detection for changedetection.io.
|
||||
It addresses the common problem where HTTP Content-Type headers are missing, incorrect,
|
||||
or too generic, which would otherwise cause the wrong processor to be used.
|
||||
|
||||
The guess_stream_type class combines:
|
||||
1. HTTP Content-Type headers (when available and reliable)
|
||||
2. Python-magic library for MIME detection (analyzing actual file content)
|
||||
3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.)
|
||||
|
||||
This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF,
|
||||
plain text, CSV, YAML, and XML formats - even when servers provide misleading headers.
|
||||
|
||||
Used by: processors/text_json_diff/processor.py and other content processors
|
||||
"""
|
||||
|
||||
# When to apply the 'cdata to real HTML' hack
|
||||
RSS_XML_CONTENT_TYPES = [
|
||||
"application/rss+xml",
|
||||
"application/rdf+xml",
|
||||
"application/atom+xml",
|
||||
"text/rss+xml", # rare, non-standard
|
||||
"application/x-rss+xml", # legacy (older feed software)
|
||||
"application/x-atom+xml", # legacy (older Atom)
|
||||
]
|
||||
|
||||
# JSON Content-types
|
||||
JSON_CONTENT_TYPES = [
|
||||
"application/activity+json",
|
||||
"application/feed+json",
|
||||
"application/json",
|
||||
"application/ld+json",
|
||||
"application/vnd.api+json",
|
||||
]
|
||||
|
||||
|
||||
# Generic XML Content-types (non-RSS/Atom)
|
||||
XML_CONTENT_TYPES = [
|
||||
"text/xml",
|
||||
"application/xml",
|
||||
]
|
||||
|
||||
HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
|
||||
|
||||
import re
|
||||
import magic
|
||||
from loguru import logger
|
||||
|
||||
|
||||
class guess_stream_type():
|
||||
is_pdf = False
|
||||
is_json = False
|
||||
is_html = False
|
||||
is_plaintext = False
|
||||
is_rss = False
|
||||
is_csv = False
|
||||
is_xml = False # Generic XML, not RSS/Atom
|
||||
is_yaml = False
|
||||
|
||||
def __init__(self, http_content_header, content):
|
||||
|
||||
magic_content_header = http_content_header
|
||||
test_content = content[:200].lower().strip()
|
||||
|
||||
# Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.)
|
||||
test_content_normalized = re.sub(r'<\s+', '<', test_content)
|
||||
|
||||
# Magic will sometimes call text/plain as text/html!
|
||||
magic_result = None
|
||||
try:
|
||||
mime = magic.from_buffer(content[:200], mime=True) # Send the original content
|
||||
logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'")
|
||||
if mime and "/" in mime:
|
||||
magic_result = mime
|
||||
# Ignore generic/fallback mime types from magic
|
||||
if mime in ['application/octet-stream', 'application/x-empty', 'binary']:
|
||||
logger.debug(f"Ignoring generic mime type '{mime}' from magic library")
|
||||
# Trust magic for non-text types immediately
|
||||
elif mime not in ['text/html', 'text/plain']:
|
||||
magic_content_header = mime
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting a more precise mime type from 'magic' library ({str(e)}), using content-based detection")
|
||||
|
||||
# Content-based detection (most reliable for text formats)
|
||||
# Check for HTML patterns first - if found, override magic's text/plain
|
||||
has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
|
||||
|
||||
# Always trust headers first
|
||||
if 'text/plain' in http_content_header:
|
||||
self.is_plaintext = True
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
|
||||
# Only mark as generic XML if not already detected as RSS
|
||||
if not self.is_rss:
|
||||
self.is_xml = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
###
|
||||
elif has_html_patterns or http_content_header == 'text/html':
|
||||
self.is_html = True
|
||||
# If magic says text/plain and we found no HTML patterns, trust it
|
||||
elif magic_result == 'text/plain':
|
||||
self.is_plaintext = True
|
||||
logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")
|
||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# magic will call a rss document 'xml'
|
||||
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
|
||||
# Generic XML that's not RSS/Atom (RSS/Atom checked above)
|
||||
self.is_xml = True
|
||||
elif '%pdf-1' in test_content:
|
||||
self.is_pdf = True
|
||||
elif http_content_header.startswith('text/'):
|
||||
self.is_plaintext = True
|
||||
# Only trust magic for 'text' if no other patterns matched
|
||||
elif 'text' in magic_content_header:
|
||||
self.is_plaintext = True
|
||||
|
||||
@@ -13,6 +13,8 @@ from changedetectionio import html_tools, content_fetchers
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.processors.magic import guess_stream_type
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||
@@ -20,6 +22,9 @@ description = 'Detects all text changes where possible'
|
||||
|
||||
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
|
||||
|
||||
# Assume it's this type if the server says nothing on content-type
|
||||
DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html'
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg, screenshot=None, xpath_data=None):
|
||||
self.screenshot = screenshot
|
||||
@@ -45,6 +50,9 @@ class perform_site_check(difference_detection_processor):
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower()
|
||||
stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content)
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
|
||||
@@ -54,7 +62,7 @@ class perform_site_check(difference_detection_processor):
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type
|
||||
update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
update_obj['content_type'] = ctype_header
|
||||
|
||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||
# Saves a lot of CPU
|
||||
@@ -69,24 +77,12 @@ class perform_site_check(difference_detection_processor):
|
||||
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
||||
# return content().textfilter().jsonextract().checksumcompare() ?
|
||||
|
||||
is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
is_html = not is_json
|
||||
is_rss = False
|
||||
|
||||
ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
# Go into RSS preprocess for converting CDATA/comment to usable text
|
||||
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
|
||||
if '<rss' in self.fetcher.content[:100].lower():
|
||||
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
|
||||
is_rss = True
|
||||
if stream_content_type.is_rss:
|
||||
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
|
||||
|
||||
# source: support, basically treat it as plaintext
|
||||
if watch.is_source_type_url:
|
||||
is_html = False
|
||||
is_json = False
|
||||
|
||||
inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10]
|
||||
if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
|
||||
if watch.is_pdf or stream_content_type.is_pdf:
|
||||
from shutil import which
|
||||
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
||||
if not which(tool):
|
||||
@@ -130,11 +126,12 @@ class perform_site_check(difference_detection_processor):
|
||||
has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
|
||||
has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
|
||||
|
||||
if is_json and not has_filter_rule:
|
||||
include_filters_rule.append("json:$")
|
||||
has_filter_rule = True
|
||||
if stream_content_type.is_json:
|
||||
if not has_filter_rule:
|
||||
# Force a reformat
|
||||
include_filters_rule.append("json:$")
|
||||
has_filter_rule = True
|
||||
|
||||
if is_json:
|
||||
# Sort the JSON so we dont get false alerts when the content is just re-ordered
|
||||
try:
|
||||
self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True)
|
||||
@@ -142,23 +139,28 @@ class perform_site_check(difference_detection_processor):
|
||||
# Might have just been a snippet, or otherwise bad JSON, continue
|
||||
pass
|
||||
|
||||
if has_filter_rule:
|
||||
for filter in include_filters_rule:
|
||||
if any(prefix in filter for prefix in json_filter_prefixes):
|
||||
stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter)
|
||||
is_html = False
|
||||
if has_filter_rule:
|
||||
for filter in include_filters_rule:
|
||||
if any(prefix in filter for prefix in json_filter_prefixes):
|
||||
stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter)
|
||||
if stripped_text_from_html:
|
||||
stream_content_type.is_json = True
|
||||
stream_content_type.is_html = False
|
||||
|
||||
if is_html or watch.is_source_type_url:
|
||||
# We have 'watch.is_source_type_url' because we should be able to use selectors on the raw HTML but return just that selected HTML
|
||||
if stream_content_type.is_html or watch.is_source_type_url or stream_content_type.is_plaintext or stream_content_type.is_rss or stream_content_type.is_xml or stream_content_type.is_pdf:
|
||||
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
|
||||
html_content = self.fetcher.content
|
||||
|
||||
# If not JSON, and if it's not text/plain..
|
||||
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower():
|
||||
# Some kind of "text" but definitely not RSS looking
|
||||
if stream_content_type.is_plaintext:
|
||||
# Don't run get_text or xpath/css filters on plaintext
|
||||
# We are not HTML, we are not any kind of RSS, doesnt even look like HTML
|
||||
stripped_text_from_html = html_content
|
||||
else:
|
||||
# If not JSON, and if it's not text/plain..
|
||||
# Does it have some ld+json price data? used for easier monitoring
|
||||
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content)
|
||||
|
||||
@@ -172,13 +174,13 @@ class perform_site_check(difference_detection_processor):
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
is_rss=stream_content_type.is_rss)
|
||||
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
is_rss=stream_content_type.is_rss)
|
||||
else:
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
html_content=self.fetcher.content,
|
||||
@@ -197,7 +199,7 @@ class perform_site_check(difference_detection_processor):
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss) # 1874 activate the <title workaround hack
|
||||
is_rss=stream_content_type.is_rss) # 1874 activate the <title workaround hack
|
||||
|
||||
if watch.get('trim_text_whitespace'):
|
||||
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
|
||||
@@ -236,7 +238,7 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
# Treat pages with no renderable text content as a change? No by default
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
|
||||
if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
|
||||
raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
@@ -301,6 +303,11 @@ class perform_site_check(difference_detection_processor):
|
||||
text_for_checksuming = stripped_text_from_html
|
||||
if text_to_ignore:
|
||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||
# Some people prefer to also completely remove it
|
||||
strip_ignored_lines = watch.get('strip_ignored_lines') if watch.get('strip_ignored_lines') is not None else self.datastore.data['settings']['application'].get('strip_ignored_lines')
|
||||
if strip_ignored_lines:
|
||||
# @todo add test in the 'preview' mode, check the widget works? compare to datastruct
|
||||
stripped_text_from_html = text_for_checksuming
|
||||
|
||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||
if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
|
||||
@@ -243,14 +243,15 @@ def handle_watch_update(socketio, **kwargs):
|
||||
|
||||
general_stats = {
|
||||
'count_errors': errored_count,
|
||||
'has_unviewed': datastore.has_unviewed
|
||||
'unread_changes_count': datastore.unread_changes_count
|
||||
}
|
||||
|
||||
# Debug what's being emitted
|
||||
# logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
|
||||
|
||||
# Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
|
||||
socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats})
|
||||
socketio.emit("watch_update", {'watch': watch_data})
|
||||
socketio.emit("general_stats_update", general_stats)
|
||||
|
||||
# Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
|
||||
logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")
|
||||
|
||||
@@ -9,7 +9,7 @@ set -x
|
||||
# SOCKS5 related - start simple Socks5 proxy server
|
||||
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched
|
||||
docker run --network changedet-network -d --hostname socks5proxy --rm --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy
|
||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth serjs/go-socks5-proxy
|
||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy
|
||||
|
||||
echo "---------------------------------- SOCKS5 -------------------"
|
||||
# SOCKS5 related - test from proxies.json
|
||||
|
||||
@@ -117,15 +117,16 @@ $(document).ready(function () {
|
||||
}
|
||||
})
|
||||
|
||||
socket.on('general_stats_update', function (general_stats) {
|
||||
// Tabs at bottom of list
|
||||
$('#watch-table-wrapper').toggleClass("has-unread-changes", general_stats.unread_changes_count !==0)
|
||||
$('#watch-table-wrapper').toggleClass("has-error", general_stats.count_errors !== 0)
|
||||
$('#post-list-with-errors a').text(`With errors (${ new Intl.NumberFormat(navigator.language).format(general_stats.count_errors) })`);
|
||||
$('#unread-tab-counter').text(new Intl.NumberFormat(navigator.language).format(general_stats.unread_changes_count));
|
||||
});
|
||||
|
||||
socket.on('watch_update', function (data) {
|
||||
const watch = data.watch;
|
||||
const general_stats = data.general_stats;
|
||||
|
||||
// Log the entire watch object for debugging
|
||||
console.log('!!! WATCH UPDATE EVENT RECEIVED !!!');
|
||||
console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`);
|
||||
console.log('Watch data:', watch);
|
||||
console.log('General stats:', general_stats);
|
||||
|
||||
// Updating watch table rows
|
||||
const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]');
|
||||
@@ -150,13 +151,6 @@ $(document).ready(function () {
|
||||
|
||||
console.log('Updated UI for watch:', watch.uuid);
|
||||
}
|
||||
|
||||
// Tabs at bottom of list
|
||||
$('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed);
|
||||
$('#post-list-unread').toggleClass("has-unviewed", general_stats.has_unviewed);
|
||||
$('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0)
|
||||
$('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`);
|
||||
|
||||
$('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid));
|
||||
});
|
||||
|
||||
|
||||
@@ -17,18 +17,6 @@ body.checking-now {
|
||||
position: fixed;
|
||||
}
|
||||
|
||||
#post-list-buttons {
|
||||
#post-list-with-errors.has-error {
|
||||
display: inline-block !important;
|
||||
}
|
||||
#post-list-mark-views.has-unviewed {
|
||||
display: inline-block !important;
|
||||
}
|
||||
#post-list-unread.has-unviewed {
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -127,5 +127,44 @@
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#watch-table-wrapper {
|
||||
/* general styling */
|
||||
#post-list-buttons {
|
||||
text-align: right;
|
||||
padding: 0px;
|
||||
margin: 0px;
|
||||
|
||||
li {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
a {
|
||||
border-top-left-radius: initial;
|
||||
border-top-right-radius: initial;
|
||||
border-bottom-left-radius: 5px;
|
||||
border-bottom-right-radius: 5px;
|
||||
}
|
||||
}
|
||||
|
||||
/* post list dynamically on/off stuff */
|
||||
|
||||
&.has-error {
|
||||
#post-list-buttons {
|
||||
#post-list-with-errors {
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
&.has-unread-changes {
|
||||
#post-list-buttons {
|
||||
#post-list-unread, #post-list-mark-views, #post-list-unread {
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -34,7 +34,6 @@
|
||||
transition: all 0.2s ease;
|
||||
cursor: pointer;
|
||||
display: block;
|
||||
min-width: 60px;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
|
||||
@@ -203,24 +203,6 @@ code {
|
||||
}
|
||||
|
||||
|
||||
#post-list-buttons {
|
||||
text-align: right;
|
||||
padding: 0px;
|
||||
margin: 0px;
|
||||
|
||||
li {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
a {
|
||||
border-top-left-radius: initial;
|
||||
border-top-right-radius: initial;
|
||||
border-bottom-left-radius: 5px;
|
||||
border-bottom-right-radius: 5px;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
body:after {
|
||||
content: "";
|
||||
background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%);
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -202,14 +202,13 @@ class ChangeDetectionStore:
|
||||
return seconds
|
||||
|
||||
@property
|
||||
def has_unviewed(self):
|
||||
if not self.__data.get('watching'):
|
||||
return None
|
||||
|
||||
def unread_changes_count(self):
|
||||
unread_changes_count = 0
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
if watch.history_n >= 2 and watch.viewed == False:
|
||||
return True
|
||||
return False
|
||||
unread_changes_count += 1
|
||||
|
||||
return unread_changes_count
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
|
||||
@@ -1,21 +1,29 @@
|
||||
{% macro render_field(field) %}
|
||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field.label }}</div>
|
||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}
|
||||
{% if field.top_errors %}
|
||||
<ul class="errors top-errors">
|
||||
{% for error in field.top_errors %}
|
||||
<li>{{ error }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% if field.errors %}
|
||||
<ul class=errors>
|
||||
{% for error in field.errors %}
|
||||
<li>{{ error }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field.label }}</div>
|
||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}
|
||||
{% if field.top_errors %}
|
||||
top
|
||||
<ul class="errors top-errors">
|
||||
{% for error in field.top_errors %}
|
||||
<li>{{ error }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
{% if field.errors %}
|
||||
<ul class=errors>
|
||||
{% if field.errors is mapping and 'form' in field.errors %}
|
||||
{# and subfield form errors, such as used in RequiredFormField() for TimeBetweenCheckForm sub form #}
|
||||
{% set errors = field.errors['form'] %}
|
||||
{% else %}
|
||||
{# regular list of errors with this field #}
|
||||
{% set errors = field.errors %}
|
||||
{% endif %}
|
||||
{% for error in errors %}
|
||||
<li>{{ error }}</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% endif %}
|
||||
</div>
|
||||
{% endmacro %}
|
||||
|
||||
{% macro render_checkbox_field(field) %}
|
||||
|
||||
@@ -26,7 +26,10 @@
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
</ul>
|
||||
</span>
|
||||
|
||||
<br><br>
|
||||
<div class="pure-control-group">
|
||||
{{ render_ternary_field(form.strip_ignored_lines) }}
|
||||
</div>
|
||||
</fieldset>
|
||||
|
||||
<fieldset>
|
||||
|
||||
@@ -75,7 +75,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(0.5)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# The trigger line is REMOVED, this should trigger
|
||||
set_original(excluding='The golden line')
|
||||
@@ -84,7 +84,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
time.sleep(1)
|
||||
|
||||
@@ -98,14 +98,14 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(1)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# Remove it again, and we should get a trigger
|
||||
set_original(excluding='The golden line')
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
@@ -169,7 +169,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# The trigger line is ADDED, this should trigger
|
||||
set_original(add_line='<p>Oh yes please</p>')
|
||||
@@ -177,7 +177,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# Takes a moment for apprise to fire
|
||||
wait_for_notification_endpoint_output()
|
||||
|
||||
@@ -38,9 +38,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'test-endpoint' in res.data
|
||||
|
||||
# Default no password set, this stuff should be always available.
|
||||
@@ -74,9 +74,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid))
|
||||
assert b'which has this one new line' in res.data
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
# Now something should be ready, indicated by having a 'has-unread-changes' class
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# #75, and it should be in the RSS feed
|
||||
rss_token = extract_rss_token_from_UI(client)
|
||||
@@ -90,7 +90,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
assert expected_url.encode('utf-8') in res.data
|
||||
#
|
||||
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||
# Following the 'diff' link, it should no longer display as 'has-unread-changes' even after we recheck it a few times
|
||||
res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid))
|
||||
assert b'selected=""' in res.data, "Confirm diff history page loaded"
|
||||
|
||||
@@ -111,12 +111,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
|
||||
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'class="has-unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'class="has-unread-changes' not in res.data
|
||||
assert b'head title' in res.data # Should be ON by default
|
||||
assert b'test-endpoint' in res.data
|
||||
|
||||
@@ -140,8 +140,8 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'class="has-unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
assert b'class="has-unread-changes' in res.data
|
||||
assert b'head title' not in res.data # should now be off
|
||||
|
||||
|
||||
@@ -151,8 +151,8 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# hit the mark all viewed link
|
||||
res = client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
|
||||
assert b'class="has-unviewed' not in res.data
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'class="has-unread-changes' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
|
||||
client.get(url_for("ui.clear_watch_history", uuid=uuid))
|
||||
@@ -165,3 +165,166 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_non_text_mime_or_downloads(client, live_server, measure_memory_usage):
|
||||
"""
|
||||
|
||||
https://github.com/dgtlmoon/changedetection.io/issues/3434
|
||||
I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8,
|
||||
but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog),
|
||||
changedetection somehow ignores all line breaks and treats the document file as if everything is on one line.
|
||||
|
||||
WHAT THIS DOES - makes the system rely on 'magic' to determine what is it
|
||||
|
||||
:param client:
|
||||
:param live_server:
|
||||
:param measure_memory_usage:
|
||||
:return:
|
||||
"""
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("""some random text that should be split by line
|
||||
and not parsed with html_to_text
|
||||
this way we know that it correctly parsed as plain text
|
||||
\r\n
|
||||
ok\r\n
|
||||
got it\r\n
|
||||
""")
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="application/octet-stream", _external=True)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
### check the front end
|
||||
res = client.get(
|
||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"some random text that should be split by line\n" in res.data
|
||||
####
|
||||
|
||||
# Check the snapshot by API that it has linefeeds too
|
||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
res = client.get(
|
||||
url_for("watchhistory", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
|
||||
# Fetch a snapshot by timestamp, check the right one was found
|
||||
res = client.get(
|
||||
url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert b"some random text that should be split by line\n" in res.data
|
||||
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
|
||||
def test_standard_text_plain(client, live_server, measure_memory_usage):
|
||||
"""
|
||||
|
||||
https://github.com/dgtlmoon/changedetection.io/issues/3434
|
||||
I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8,
|
||||
but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog),
|
||||
changedetection somehow ignores all line breaks and treats the document file as if everything is on one line.
|
||||
|
||||
The real bug here can be that it will try to process plain-text as HTML, losing <etc>
|
||||
|
||||
:param client:
|
||||
:param live_server:
|
||||
:param measure_memory_usage:
|
||||
:return:
|
||||
"""
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("""some random text that should be split by line
|
||||
and not parsed with html_to_text
|
||||
<title>Even this title should stay because we are just plain text</title>
|
||||
this way we know that it correctly parsed as plain text
|
||||
\r\n
|
||||
ok\r\n
|
||||
got it\r\n
|
||||
""")
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
### check the front end
|
||||
res = client.get(
|
||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"some random text that should be split by line\n" in res.data
|
||||
####
|
||||
|
||||
# Check the snapshot by API that it has linefeeds too
|
||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||
res = client.get(
|
||||
url_for("watchhistory", uuid=watch_uuid),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
|
||||
# Fetch a snapshot by timestamp, check the right one was found
|
||||
res = client.get(
|
||||
url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]),
|
||||
headers={'x-api-key': api_key},
|
||||
)
|
||||
assert b"some random text that should be split by line\n" in res.data
|
||||
assert b"<title>Even this title should stay because we are just plain text</title>" in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
# Server says its plaintext, we should always treat it as plaintext
|
||||
def test_plaintext_even_if_xml_content(client, live_server, measure_memory_usage):
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("""<?xml version="1.0" encoding="utf-8"?>
|
||||
<resources xmlns:tools="http://schemas.android.com/tools">
|
||||
<!--Activity and fragment titles-->
|
||||
<string name="feed_update_receiver_name">Abonnementen bijwerken</string>
|
||||
</resources>
|
||||
""")
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'<string name="feed_update_receiver_name"' in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
|
||||
@@ -58,6 +58,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
|
||||
|
||||
has_watch_update = False
|
||||
has_unviewed_update = False
|
||||
got_general_stats_update = False
|
||||
|
||||
for i in range(10):
|
||||
# Get received events
|
||||
@@ -65,15 +66,11 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
|
||||
|
||||
if received:
|
||||
logger.info(f"Received {len(received)} events after {i+1} seconds")
|
||||
|
||||
# Check for watch_update events with unviewed=True
|
||||
for event in received:
|
||||
if event['name'] == 'watch_update':
|
||||
has_watch_update = True
|
||||
if event['args'][0]['watch'].get('unviewed', False):
|
||||
has_unviewed_update = True
|
||||
logger.info("Found unviewed update event!")
|
||||
break
|
||||
if event['name'] == 'general_stats_update':
|
||||
got_general_stats_update = True
|
||||
|
||||
if has_unviewed_update:
|
||||
break
|
||||
@@ -92,7 +89,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
|
||||
assert has_watch_update, "No watch_update events received"
|
||||
|
||||
# Verify we received an unviewed event
|
||||
assert has_unviewed_update, "No watch_update event with unviewed=True received"
|
||||
assert got_general_stats_update, "Got general stats update event"
|
||||
|
||||
# Alternatively, check directly if the watch in the datastore is marked as unviewed
|
||||
from changedetectionio.flask_app import app
|
||||
|
||||
@@ -107,9 +107,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
# The page changed, BUT the text is still there, just the rest of it changes, we should not see a change
|
||||
@@ -120,9 +120,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
# 2548
|
||||
@@ -131,7 +131,7 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
|
||||
# Now we set a change where the text is gone AND its different content, it should now trigger
|
||||
@@ -139,7 +139,7 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -125,7 +125,7 @@ def test_conditions_with_text_and_number(client, live_server):
|
||||
time.sleep(2)
|
||||
# 75 is > 20 and < 100 and contains "5"
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
|
||||
# Case 2: Change with one condition violated
|
||||
@@ -141,7 +141,7 @@ def test_conditions_with_text_and_number(client, live_server):
|
||||
|
||||
# Should NOT be marked as having changes since not all conditions are met
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
@@ -299,7 +299,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
|
||||
res = client.get(
|
||||
@@ -326,7 +326,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold
|
||||
assert b'has-unread-changes' not in res.data #because this will be like 0.90 not 0.8 threshold
|
||||
|
||||
############### Now change it a MORE THAN 50%
|
||||
test_return_data = """<html>
|
||||
@@ -345,7 +345,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
|
||||
assert b'Queued 1 watch for rechecking.' in res.data
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
# cleanup for the next
|
||||
client.get(
|
||||
url_for("ui.form_delete", uuid="all"),
|
||||
|
||||
@@ -116,10 +116,10 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should have 'unviewed' still
|
||||
# It should have 'has-unread-changes' still
|
||||
# Because it should be looking at only that 'sametext' id
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
|
||||
# Tests the whole stack works with the CSS Filter
|
||||
|
||||
@@ -190,7 +190,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# so that we set the state to 'unviewed' after all the edits
|
||||
# so that we set the state to 'has-unread-changes' after all the edits
|
||||
client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
|
||||
|
||||
# Make a change to header/footer/nav
|
||||
|
||||
@@ -31,7 +31,7 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text):
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
# no change
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert bytes(expected_text.encode('utf-8')) in res.data
|
||||
|
||||
|
||||
|
||||
@@ -174,10 +174,10 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should have 'unviewed' still
|
||||
# It should have 'has-unread-changes' still
|
||||
# Because it should be looking at only that 'sametext' id
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# Check HTML conversion detected and workd
|
||||
res = client.get(
|
||||
|
||||
@@ -264,8 +264,6 @@ def test_limit_tag_ui(client, live_server, measure_memory_usage):
|
||||
client.get(url_for('ui.mark_all_viewed', tag=tag_uuid), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
with open('/tmp/fuck.html', 'wb') as f:
|
||||
f.write(res.data)
|
||||
# Should be only 1 unviewed
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert res.data.count(b' unviewed ') == 1
|
||||
|
||||
@@ -3,9 +3,8 @@
|
||||
import time
|
||||
import os
|
||||
import json
|
||||
import logging
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
from .util import wait_for_all_checks
|
||||
from urllib.parse import urlparse, parse_qs
|
||||
|
||||
def test_consistent_history(client, live_server, measure_memory_usage):
|
||||
|
||||
@@ -58,3 +58,39 @@ def test_ignore(client, live_server, measure_memory_usage):
|
||||
# Should be in base.html
|
||||
assert b'csrftoken' in res.data
|
||||
|
||||
|
||||
def test_strip_ignore_lines(client, live_server, measure_memory_usage):
|
||||
# live_server_setup(live_server) # Setup on conftest per function
|
||||
set_original_ignore_response()
|
||||
|
||||
|
||||
# Goto the settings page, add our ignore text
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={
|
||||
"requests-time_between_check-minutes": 180,
|
||||
"application-ignore_whitespace": "y",
|
||||
"application-strip_ignored_lines": "y",
|
||||
"application-global_ignore_text": "Which is across multiple",
|
||||
'application-fetch_backend': "html_requests"
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
|
||||
# It should not be in the preview anymore
|
||||
res = client.get(url_for("ui.ui_views.preview_page", uuid=uuid))
|
||||
assert b'<div class="ignored">' not in res.data
|
||||
assert b'Which is across multiple' not in res.data
|
||||
|
||||
@@ -128,9 +128,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
# Make a change
|
||||
@@ -141,9 +141,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
|
||||
@@ -154,7 +154,7 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
res = client.get(url_for("ui.ui_views.preview_page", uuid="first"))
|
||||
|
||||
@@ -222,9 +222,9 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
||||
# Trigger a check
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
# It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change
|
||||
# It should report nothing found (no new 'has-unread-changes' class), adding random ignore text should not cause a change
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
#####
|
||||
|
||||
@@ -238,10 +238,10 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
# Just to be sure.. set a regular modified change that will trigger it
|
||||
@@ -249,7 +249,7 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -111,7 +111,7 @@ def test_render_anchor_tag_content_true(client, live_server, measure_memory_usag
|
||||
assert '(/modified_link)' in res.data.decode()
|
||||
|
||||
# since the link has changed, and we chose to render anchor tag content,
|
||||
# we should detect a change (new 'unviewed' class)
|
||||
# we should detect a change (new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"unviewed" in res.data
|
||||
assert b"/test-endpoint" in res.data
|
||||
|
||||
@@ -77,9 +77,9 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
|
||||
@@ -124,8 +124,8 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should have 'unviewed' still
|
||||
# It should have 'has-unread-changes' still
|
||||
# Because it should be looking at only that 'sametext' id
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
|
||||
@@ -89,7 +89,7 @@ def test_check_ignore_whitespace(client, live_server, measure_memory_usage):
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
@@ -26,7 +26,7 @@ def test_jinja2_in_url_query(client, live_server, measure_memory_usage):
|
||||
assert b"Watch added" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(
|
||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
@@ -51,7 +51,7 @@ def test_jinja2_security_url_query(client, live_server, measure_memory_usage):
|
||||
assert b"Watch added" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'is invalid and cannot be used' in res.data
|
||||
# Some of the spewed output from the subclasses
|
||||
|
||||
@@ -280,9 +280,9 @@ def check_json_filter(json_filter, client, live_server):
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should have 'unviewed' still
|
||||
# It should have 'has-unread-changes' still
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# Should not see this, because its not in the JSONPath we entered
|
||||
res = client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
|
||||
@@ -418,14 +418,14 @@ def check_json_ext_filter(json_filter, client, live_server):
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should have 'unviewed'
|
||||
# It should have 'has-unread-changes'
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
res = client.get(url_for("ui.ui_views.preview_page", uuid="first"))
|
||||
|
||||
# We should never see 'ForSale' because we are selecting on 'Sold' in the rule,
|
||||
# But we should know it triggered ('unviewed' assert above)
|
||||
# But we should know it triggered ('has-unread-changes' assert above)
|
||||
assert b'ForSale' not in res.data
|
||||
assert b'Sold' in res.data
|
||||
|
||||
@@ -465,7 +465,7 @@ def test_ignore_json_order(client, live_server, measure_memory_usage):
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# Just to be sure it still works
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
@@ -476,7 +476,7 @@ def test_ignore_json_order(client, live_server, measure_memory_usage):
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -40,9 +40,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
|
||||
#####################
|
||||
@@ -62,9 +62,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||
watch = live_server.app.config['DATASTORE'].data['watching'][uuid]
|
||||
@@ -92,9 +92,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
|
||||
@@ -108,7 +108,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
|
||||
assert b'has-unread-changes' in res.data # A change should have registered because empty_pages_are_a_change is ON
|
||||
assert b'fetch-error' not in res.data
|
||||
|
||||
#
|
||||
|
||||
@@ -49,9 +49,9 @@ def test_fetch_pdf(client, live_server, measure_memory_usage):
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
# Now something should be ready, indicated by having a 'has-unread-changes' class
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
|
||||
|
||||
|
||||
@@ -47,9 +47,9 @@ def test_fetch_pdf(client, live_server, measure_memory_usage):
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
# Now something should be ready, indicated by having a 'has-unread-changes' class
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
|
||||
|
||||
|
||||
@@ -112,7 +112,7 @@ def test_itemprop_price_change(client, live_server):
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'180.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
|
||||
@@ -129,7 +129,7 @@ def test_itemprop_price_change(client, live_server):
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'120.45' in res.data
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
@@ -178,7 +178,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
assert b'more than one price detected' not in res.data
|
||||
# BUT the new price should show, even tho its within limits
|
||||
assert b'1,000.45' or b'1000.45' in res.data #depending on locale
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# price changed to something LESS than min (900), SHOULD be a change
|
||||
set_original_response(props_markup=instock_props[0], price='890.45')
|
||||
@@ -188,7 +188,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'890.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
client.get(url_for("ui.mark_all_viewed"))
|
||||
|
||||
@@ -200,7 +200,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'820.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
client.get(url_for("ui.mark_all_viewed"))
|
||||
|
||||
# price changed to something MORE than max (1100.10), SHOULD be a change
|
||||
@@ -210,7 +210,7 @@ def _run_test_minmax_limit(client, extra_watch_edit_form):
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
# Depending on the LOCALE it may be either of these (generally for US/default/etc)
|
||||
assert b'1,890.45' in res.data or b'1890.45' in res.data
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
@@ -294,7 +294,7 @@ def test_itemprop_percent_threshold(client, live_server):
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'960.45' in res.data
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# Bigger INCREASE change than the threshold should trigger
|
||||
set_original_response(props_markup=instock_props[0], price='1960.45')
|
||||
@@ -302,7 +302,7 @@ def test_itemprop_percent_threshold(client, live_server):
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'1,960.45' or b'1960.45' in res.data #depending on locale
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
|
||||
# Small decrease should NOT trigger
|
||||
@@ -312,7 +312,7 @@ def test_itemprop_percent_threshold(client, live_server):
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'1,950.45' or b'1950.45' in res.data #depending on locale
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -111,7 +111,7 @@ def test_basic_cdata_rss_markup(client, live_server, measure_memory_usage):
|
||||
|
||||
set_original_cdata_xml()
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
|
||||
test_url = url_for('test_endpoint', content_type="application/atom+xml; charset=UTF-8", _external=True)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
@@ -139,7 +139,7 @@ def test_rss_xpath_filtering(client, live_server, measure_memory_usage):
|
||||
|
||||
set_original_cdata_xml()
|
||||
|
||||
test_url = url_for('test_endpoint', content_type="application/xml", _external=True)
|
||||
test_url = url_for('test_endpoint', content_type="application/atom+xml; charset=UTF-8", _external=True)
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
|
||||
@@ -43,9 +43,9 @@ def test_check_basic_change_detection_functionality_source(client, live_server,
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
# Now something should be ready, indicated by having a 'has-unread-changes' class
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_views.diff_history_page", uuid="first"),
|
||||
|
||||
@@ -96,7 +96,7 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):
|
||||
|
||||
|
||||
|
||||
# so that we set the state to 'unviewed' after all the edits
|
||||
# so that we set the state to 'has-unread-changes' after all the edits
|
||||
client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
|
||||
|
||||
# Trigger a check
|
||||
@@ -104,9 +104,9 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
# Make a change
|
||||
@@ -116,9 +116,9 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# Now set the content which contains the trigger text
|
||||
set_modified_with_trigger_text_response()
|
||||
@@ -126,7 +126,7 @@ def test_trigger_functionality(client, live_server, measure_memory_usage):
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/616
|
||||
# Apparently the actual snapshot that contains the trigger never shows
|
||||
|
||||
@@ -42,7 +42,7 @@ def test_trigger_regex_functionality(client, live_server, measure_memory_usage):
|
||||
|
||||
# It should report nothing found (just a new one shouldnt have anything)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
### test regex
|
||||
res = client.post(
|
||||
@@ -54,7 +54,7 @@ def test_trigger_regex_functionality(client, live_server, measure_memory_usage):
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
# so that we set the state to 'unviewed' after all the edits
|
||||
# so that we set the state to 'has-unread-changes' after all the edits
|
||||
client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
@@ -65,7 +65,7 @@ def test_trigger_regex_functionality(client, live_server, measure_memory_usage):
|
||||
|
||||
# It should report nothing found (nothing should match the regex)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write("regex test123<br>\nsomething 123")
|
||||
@@ -73,7 +73,7 @@ def test_trigger_regex_functionality(client, live_server, measure_memory_usage):
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -69,7 +69,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server, measure_me
|
||||
|
||||
# It should report nothing found (nothing should match the regex and filter)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# now this should trigger something
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
@@ -78,7 +78,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server, measure_me
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
@@ -2,12 +2,107 @@
|
||||
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
from ..forms import REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT, REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT
|
||||
|
||||
|
||||
def test_recheck_time_field_validation_global_settings(client, live_server):
|
||||
"""
|
||||
Tests that the global settings time field has atleast one value for week/day/hours/minute/seconds etc entered
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
||||
"""
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={
|
||||
"requests-time_between_check-weeks": '',
|
||||
"requests-time_between_check-days": '',
|
||||
"requests-time_between_check-hours": '',
|
||||
"requests-time_between_check-minutes": '',
|
||||
"requests-time_between_check-seconds": '',
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
|
||||
assert REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT.encode('utf-8') in res.data
|
||||
|
||||
|
||||
def test_recheck_time_field_validation_single_watch(client, live_server):
|
||||
"""
|
||||
Tests that the global settings time field has atleast one value for week/day/hours/minute/seconds etc entered
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
||||
"""
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={
|
||||
"url": test_url,
|
||||
'fetch_backend': "html_requests",
|
||||
"time_between_check_use_default": "", # OFF
|
||||
"time_between_check-weeks": '',
|
||||
"time_between_check-days": '',
|
||||
"time_between_check-hours": '',
|
||||
"time_between_check-minutes": '',
|
||||
"time_between_check-seconds": '',
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
|
||||
assert REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT.encode('utf-8') in res.data
|
||||
|
||||
# Now set some time
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={
|
||||
"url": test_url,
|
||||
'fetch_backend': "html_requests",
|
||||
"time_between_check_use_default": "", # OFF
|
||||
"time_between_check-weeks": '',
|
||||
"time_between_check-days": '',
|
||||
"time_between_check-hours": '',
|
||||
"time_between_check-minutes": '5',
|
||||
"time_between_check-seconds": '',
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
assert REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT.encode('utf-8') not in res.data
|
||||
|
||||
# Now set to use defaults
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={
|
||||
"url": test_url,
|
||||
'fetch_backend': "html_requests",
|
||||
"time_between_check_use_default": "y", # ON YES
|
||||
"time_between_check-weeks": '',
|
||||
"time_between_check-days": '',
|
||||
"time_between_check-hours": '',
|
||||
"time_between_check-minutes": '',
|
||||
"time_between_check-seconds": '',
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
assert REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT.encode('utf-8') not in res.data
|
||||
|
||||
def test_checkbox_open_diff_in_new_tab(client, live_server):
|
||||
|
||||
set_original_response()
|
||||
# live_server_setup(live_server) # Setup on conftest per function
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
@@ -78,3 +173,119 @@ def test_checkbox_open_diff_in_new_tab(client, live_server):
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_page_title_listing_behaviour(client, live_server):
|
||||
|
||||
set_original_response(extra_title="custom html")
|
||||
|
||||
# either the manually entered title/description or the page link should be visible
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-ui-use_page_title_in_list": "",
|
||||
"requests-time_between_check-minutes": 180,
|
||||
'application-fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": url_for('test_endpoint', _external=True)},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# We see the URL only, no title/description was manually entered
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert url_for('test_endpoint', _external=True).encode('utf-8') in res.data
|
||||
|
||||
|
||||
# Now 'my title' should override
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={
|
||||
"url": url_for('test_endpoint', _external=True),
|
||||
"title": "my title",
|
||||
"fetch_backend": "html_requests",
|
||||
"time_between_check_use_default": "y"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"my title" in res.data
|
||||
|
||||
# Now we enable page <title> and unset the override title/description
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-ui-use_page_title_in_list": "y",
|
||||
"requests-time_between_check-minutes": 180,
|
||||
'application-fetch_backend': "html_requests"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Settings updated." in res.data
|
||||
|
||||
# Page title description override should take precedence
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"my title" in res.data
|
||||
|
||||
# Remove page title description override and it should fall back to title
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||
data={
|
||||
"url": url_for('test_endpoint', _external=True),
|
||||
"title": "",
|
||||
"fetch_backend": "html_requests",
|
||||
"time_between_check_use_default": "y"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
# No page title description, and 'use_page_title_in_list' is on, it should show the <title>
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b"head titlecustom html" in res.data
|
||||
|
||||
|
||||
def test_ui_viewed_unread_flag(client, live_server):
|
||||
|
||||
import time
|
||||
|
||||
set_original_response(extra_title="custom html")
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": url_for('test_endpoint', _external=True)+"\r\n"+url_for('test_endpoint', _external=True)},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"2 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
set_modified_response()
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
assert b'Queued 2 watches for rechecking.' in res.data
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'<span id="unread-tab-counter">2</span>' in res.data
|
||||
assert res.data.count(b'data-watch-uuid') == 2
|
||||
|
||||
# one should now be viewed, but two in total still
|
||||
client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'<span id="unread-tab-counter">1</span>' in res.data
|
||||
assert res.data.count(b'data-watch-uuid') == 2
|
||||
|
||||
# check ?unread=1 works
|
||||
res = client.get(url_for("watchlist.index")+"?unread=1")
|
||||
assert res.data.count(b'data-watch-uuid') == 1
|
||||
assert b'<span id="unread-tab-counter">1</span>' in res.data
|
||||
|
||||
# Mark all viewed test again
|
||||
client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||
time.sleep(0.2)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'<span id="unread-tab-counter">0</span>' in res.data
|
||||
@@ -97,7 +97,7 @@ def test_unique_lines_functionality(client, live_server, measure_memory_usage):
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# Make a change
|
||||
set_modified_swapped_lines()
|
||||
@@ -108,16 +108,16 @@ def test_unique_lines_functionality(client, live_server, measure_memory_usage):
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
# It should report nothing found (no new 'has-unread-changes' class)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
|
||||
# Now set the content which contains the new text and re-ordered existing text
|
||||
set_modified_with_trigger_text_response()
|
||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -157,7 +157,7 @@ def test_sort_lines_functionality(client, live_server, measure_memory_usage):
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
# Should be a change registered
|
||||
assert b'unviewed' in res.data
|
||||
assert b'has-unread-changes' in res.data
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
||||
|
||||
@@ -1,12 +1,42 @@
|
||||
# -*- coding: utf-8 -*-
|
||||
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
|
||||
from ..html_tools import *
|
||||
from .util import wait_for_all_checks
|
||||
from ..processors.magic import RSS_XML_CONTENT_TYPES
|
||||
|
||||
|
||||
def set_rss_atom_feed_response(header=''):
|
||||
test_return_data = f"""{header}<!-- Generated on Wed, 08 Oct 2025 08:42:33 -0700, really really honestly -->
|
||||
<rss xmlns:atom="http://www.w3.org/2005/Atom" version="2.0">
|
||||
<channel>
|
||||
<atom:link href="https://store.waterpowered.com/news/collection//" rel="self" type="application/rss+xml"/>
|
||||
<title>RSS Feed</title>
|
||||
<link>
|
||||
<![CDATA[ https://store.waterpowered.com/news/collection// ]]>
|
||||
</link>
|
||||
<description>
|
||||
<![CDATA[ Events and Announcements for ]]>
|
||||
</description>
|
||||
<language>en-us</language>
|
||||
<generator>water News RSS</generator>
|
||||
<item>
|
||||
<title> 🍁 Lets go discount</title>
|
||||
<description><p class="bb_paragraph">ok heres the description</p></description>
|
||||
<link>
|
||||
<![CDATA[ https://store.waterpowered.com/news/app/1643320/view/511845698831908921 ]]>
|
||||
</link>
|
||||
<pubDate>Wed, 08 Oct 2025 15:28:55 +0000</pubDate>
|
||||
<guid isPermaLink="true">https://store.waterpowered.com/news/app/1643320/view/511845698831908921</guid>
|
||||
<enclosure url="https://clan.fastly.waterstatic.com/images/40721482/42822e5f00b2becf520ace9500981bb56f3a89f2.jpg" length="0" type="image/jpeg"/>
|
||||
</item>
|
||||
</channel>
|
||||
</rss>"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(test_return_data)
|
||||
|
||||
return None
|
||||
|
||||
|
||||
|
||||
@@ -208,7 +238,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server, measure_memo
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'has-unread-changes' not in res.data
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -575,3 +605,47 @@ def test_xpath_20_function_string_join_matches(client, live_server, measure_memo
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
|
||||
def _subtest_xpath_rss(client, content_type='text/html'):
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', content_type=content_type, _external=True)
|
||||
res = client.post(
|
||||
url_for("ui.ui_views.form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("ui.ui_edit.edit_page", uuid="first", unpause_on_save=1),
|
||||
data={
|
||||
"url": test_url,
|
||||
"include_filters": "xpath://item",
|
||||
"tags": '',
|
||||
"fetch_backend": "html_requests",
|
||||
"time_between_check_use_default": "y",
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"unpaused" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Lets go discount" in res.data, f"When testing for Lets go discount called with content type '{content_type}'"
|
||||
assert b"Events and Announcements" not in res.data, f"When testing for Lets go discount called with content type '{content_type}'" # It should not be here because thats not our selector target
|
||||
|
||||
client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
|
||||
# Be sure all-in-the-wild types of RSS feeds work with xpath
|
||||
def test_rss_xpath(client, live_server):
|
||||
for feed_header in ['', '<?xml version="1.0" encoding="utf-8"?>']:
|
||||
set_rss_atom_feed_response(header=feed_header)
|
||||
for content_type in RSS_XML_CONTENT_TYPES:
|
||||
_subtest_xpath_rss(client, content_type=content_type)
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
openapi: 3.0.4
|
||||
openapi: 3.1.0
|
||||
info:
|
||||
title: ChangeDetection.io API
|
||||
description: |
|
||||
@@ -28,7 +28,7 @@ info:
|
||||
|
||||
For example: `x-api-key: YOUR_API_KEY`
|
||||
|
||||
version: 0.1.0
|
||||
version: 0.1.1
|
||||
contact:
|
||||
name: ChangeDetection.io
|
||||
url: https://github.com/dgtlmoon/changedetection.io
|
||||
@@ -129,7 +129,7 @@ components:
|
||||
maxLength: 5000
|
||||
title:
|
||||
type: string
|
||||
description: Custom title for the web page change monitor (watch)
|
||||
description: Custom title for the web page change monitor (watch), not to be confused with page_title
|
||||
maxLength: 5000
|
||||
tag:
|
||||
type: string
|
||||
@@ -255,6 +255,11 @@ components:
|
||||
type: integer
|
||||
description: Unix timestamp in seconds of the last time the watch was viewed. Setting it to a value higher than `last_changed` in the "Update watch" endpoint marks the watch as viewed.
|
||||
minimum: 0
|
||||
link:
|
||||
type: string
|
||||
format: string
|
||||
description: The watch URL rendered in case of any Jinja2 markup, always use this for listing.
|
||||
readOnly: true
|
||||
|
||||
CreateWatch:
|
||||
allOf:
|
||||
@@ -395,9 +400,10 @@ paths:
|
||||
example:
|
||||
"095be615-a8ad-4c33-8e9c-c7612fbf6c9f":
|
||||
uuid: "095be615-a8ad-4c33-8e9c-c7612fbf6c9f"
|
||||
url: "http://example.com"
|
||||
title: "Example Website Monitor"
|
||||
tag: "550e8400-e29b-41d4-a716-446655440000"
|
||||
url: "http://example.com?id={{1+1}} - the raw URL"
|
||||
link: "http://example.com?id=2 - the rendered URL, always use this for listing."
|
||||
title: "Example Website Monitor - manually entered title/description"
|
||||
page_title: "The HTML <title> from the page"
|
||||
tags: ["550e8400-e29b-41d4-a716-446655440000"]
|
||||
paused: false
|
||||
muted: false
|
||||
@@ -407,9 +413,10 @@ paths:
|
||||
last_changed: 1640995200
|
||||
"7c9e6b8d-f2a1-4e5c-9d3b-8a7f6e4c2d1a":
|
||||
uuid: "7c9e6b8d-f2a1-4e5c-9d3b-8a7f6e4c2d1a"
|
||||
url: "https://news.example.org"
|
||||
title: "News Site Tracker"
|
||||
tag: "330e8400-e29b-41d4-a716-446655440001"
|
||||
url: "http://example.com?id={{1+1}} - the raw URL"
|
||||
link: "http://example.com?id=2 - the rendered URL, always use this for listing."
|
||||
title: "News Site Tracker - manually entered title/description"
|
||||
page_title: "The HTML <title> from the page"
|
||||
tags: ["330e8400-e29b-41d4-a716-446655440001"]
|
||||
paused: false
|
||||
muted: true
|
||||
@@ -1215,7 +1222,6 @@ paths:
|
||||
uuid: "095be615-a8ad-4c33-8e9c-c7612fbf6c9f"
|
||||
url: "http://example.com"
|
||||
title: "Example Website Monitor"
|
||||
tag: "550e8400-e29b-41d4-a716-446655440000"
|
||||
tags: ["550e8400-e29b-41d4-a716-446655440000"]
|
||||
paused: false
|
||||
muted: false
|
||||
|
||||
@@ -12,7 +12,7 @@ flask_wtf~=1.2
|
||||
flask~=2.3
|
||||
flask-socketio~=5.5.1
|
||||
python-socketio~=5.13.0
|
||||
python-engineio~=4.12.0
|
||||
python-engineio~=4.12.3
|
||||
inscriptis~=2.2
|
||||
pytz
|
||||
timeago~=1.0
|
||||
@@ -39,7 +39,7 @@ jsonpath-ng~=1.5.3
|
||||
# jq not available on Windows so must be installed manually
|
||||
|
||||
# Notification library
|
||||
apprise==1.9.3
|
||||
apprise==1.9.5
|
||||
|
||||
# - Needed for apprise/spush, and maybe others? hopefully doesnt trigger a rust compile.
|
||||
# - Requires extra wheel for rPi, adds build time for arm/v8 which is not in piwheels
|
||||
@@ -51,8 +51,8 @@ cryptography==44.0.1
|
||||
# use any version other than 2.0.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
|
||||
paho-mqtt!=2.0.*
|
||||
|
||||
# Used for CSS filtering
|
||||
beautifulsoup4>=4.0.0
|
||||
# Used for CSS filtering, JSON extraction from HTML
|
||||
beautifulsoup4>=4.0.0,<=4.13.5
|
||||
|
||||
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
|
||||
# #2328 - 5.2.0 and 5.2.1 had extra CPU flag CFLAGS set which was not compatible on older hardware
|
||||
@@ -135,7 +135,7 @@ tzdata
|
||||
pluggy ~= 1.5
|
||||
|
||||
# Needed for testing, cross-platform for process and system monitoring
|
||||
psutil==7.0.0
|
||||
psutil==7.1.0
|
||||
|
||||
ruff >= 0.11.2
|
||||
pre_commit >= 4.2.0
|
||||
|
||||
Reference in New Issue
Block a user