mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-13 21:16:11 +00:00
Compare commits
1 Commits
timezone-i
...
add-button
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0db2b76bf1 |
4
.github/ISSUE_TEMPLATE/bug_report.md
vendored
4
.github/ISSUE_TEMPLATE/bug_report.md
vendored
@@ -27,10 +27,6 @@ A clear and concise description of what the bug is.
|
||||
**Version**
|
||||
*Exact version* in the top right area: 0....
|
||||
|
||||
**How did you install?**
|
||||
|
||||
Docker, Pip, from source directly etc
|
||||
|
||||
**To Reproduce**
|
||||
|
||||
Steps to reproduce the behavior:
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -10,6 +10,5 @@ dist
|
||||
venv
|
||||
test-datastore/*
|
||||
test-datastore
|
||||
test-memory.log
|
||||
*.egg-info*
|
||||
.vscode/settings.json
|
||||
|
||||
@@ -4,7 +4,7 @@ In any commercial activity involving 'Hosting' (as defined herein), whether in p
|
||||
|
||||
# Commercial License Agreement
|
||||
|
||||
This Commercial License Agreement ("Agreement") is entered into by and between Web Technologies s.r.o. here-in ("Licensor") and (your company or personal name) _____________ ("Licensee"). This Agreement sets forth the terms and conditions under which Licensor provides its software ("Software") and services to Licensee for the purpose of reselling the software either in part or full, as part of any commercial activity where the activity involves a third party.
|
||||
This Commercial License Agreement ("Agreement") is entered into by and between Mr Morresi (the original creator of this software) here-in ("Licensor") and (your company or personal name) _____________ ("Licensee"). This Agreement sets forth the terms and conditions under which Licensor provides its software ("Software") and services to Licensee for the purpose of reselling the software either in part or full, as part of any commercial activity where the activity involves a third party.
|
||||
|
||||
### Definition of Hosting
|
||||
|
||||
|
||||
@@ -37,7 +37,6 @@ RUN pip install --target=/dependencies playwright~=1.41.2 \
|
||||
|
||||
# Final image stage
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm
|
||||
LABEL org.opencontainers.image.source="https://github.com/dgtlmoon/changedetection.io"
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libxslt1.1 \
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
recursive-include changedetectionio/api *
|
||||
recursive-include changedetectionio/apprise_plugin *
|
||||
recursive-include changedetectionio/blueprint *
|
||||
recursive-include changedetectionio/content_fetchers *
|
||||
recursive-include changedetectionio/model *
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.47.06'
|
||||
__version__ = '0.46.04'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
|
||||
@@ -58,7 +58,7 @@ class Watch(Resource):
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
if request.args.get('recheck'):
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
return "OK", 200
|
||||
if request.args.get('paused', '') == 'paused':
|
||||
self.datastore.data['watching'].get(uuid).pause()
|
||||
@@ -246,7 +246,7 @@ class CreateWatch(Resource):
|
||||
|
||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||
if new_uuid:
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
|
||||
return {'uuid': new_uuid}, 201
|
||||
else:
|
||||
return "Invalid or unsupported URL", 400
|
||||
@@ -303,7 +303,7 @@ class CreateWatch(Resource):
|
||||
|
||||
if request.args.get('recheck_all'):
|
||||
for uuid in self.datastore.data['watching'].keys():
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
return {'status': "OK"}, 200
|
||||
|
||||
return list, 200
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
# include the decorator
|
||||
from apprise.decorators import notify
|
||||
from loguru import logger
|
||||
|
||||
@notify(on="delete")
|
||||
@notify(on="deletes")
|
||||
@@ -13,7 +12,6 @@ from loguru import logger
|
||||
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
||||
import requests
|
||||
import json
|
||||
from urllib.parse import unquote_plus
|
||||
from apprise.utils import parse_url as apprise_parse_url
|
||||
from apprise import URLBase
|
||||
|
||||
@@ -48,7 +46,7 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
||||
if results:
|
||||
# Add our headers that the user can potentially over-ride if they wish
|
||||
# to to our returned result set and tidy entries by unquoting them
|
||||
headers = {unquote_plus(x): unquote_plus(y)
|
||||
headers = {URLBase.unquote(x): URLBase.unquote(y)
|
||||
for x, y in results['qsd+'].items()}
|
||||
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||
@@ -56,22 +54,20 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
||||
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
||||
for k, v in results['qsd'].items():
|
||||
if not k.strip('+-') in results['qsd+'].keys():
|
||||
params[unquote_plus(k)] = unquote_plus(v)
|
||||
params[URLBase.unquote(k)] = URLBase.unquote(v)
|
||||
|
||||
# Determine Authentication
|
||||
auth = ''
|
||||
if results.get('user') and results.get('password'):
|
||||
auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user')))
|
||||
auth = (URLBase.unquote(results.get('user')), URLBase.unquote(results.get('user')))
|
||||
elif results.get('user'):
|
||||
auth = (unquote_plus(results.get('user')))
|
||||
auth = (URLBase.unquote(results.get('user')))
|
||||
|
||||
# Try to auto-guess if it's JSON
|
||||
h = 'application/json; charset=utf-8'
|
||||
try:
|
||||
json.loads(body)
|
||||
headers['Content-Type'] = h
|
||||
headers['Content-Type'] = 'application/json; charset=utf-8'
|
||||
except ValueError as e:
|
||||
logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}")
|
||||
pass
|
||||
|
||||
r(results.get('url'),
|
||||
|
||||
@@ -1,164 +0,0 @@
|
||||
import datetime
|
||||
import glob
|
||||
import threading
|
||||
|
||||
from flask import Blueprint, render_template, send_from_directory, flash, url_for, redirect, abort
|
||||
import os
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.flask_app import login_optionally_required
|
||||
from loguru import logger
|
||||
|
||||
BACKUP_FILENAME_FORMAT = "changedetection-backup-{}.zip"
|
||||
|
||||
|
||||
def create_backup(datastore_path, watches: dict):
|
||||
logger.debug("Creating backup...")
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
# create a ZipFile object
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
backupname = BACKUP_FILENAME_FORMAT.format(timestamp)
|
||||
backup_filepath = os.path.join(datastore_path, backupname)
|
||||
|
||||
with zipfile.ZipFile(backup_filepath.replace('.zip', '.tmp'), "w",
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8) as zipObj:
|
||||
|
||||
# Add the index
|
||||
zipObj.write(os.path.join(datastore_path, "url-watches.json"), arcname="url-watches.json")
|
||||
|
||||
# Add the flask app secret
|
||||
zipObj.write(os.path.join(datastore_path, "secret.txt"), arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in watches.items():
|
||||
for f in Path(w.watch_data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
# Use the full path to access the file, but make the file 'relative' in the Zip.
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
|
||||
# Create a list file with just the URLs, so it's easier to port somewhere else in the future
|
||||
list_file = "url-list.txt"
|
||||
with open(os.path.join(datastore_path, list_file), "w") as f:
|
||||
for uuid in watches:
|
||||
url = watches[uuid]["url"]
|
||||
f.write("{}\r\n".format(url))
|
||||
list_with_tags_file = "url-list-with-tags.txt"
|
||||
with open(
|
||||
os.path.join(datastore_path, list_with_tags_file), "w"
|
||||
) as f:
|
||||
for uuid in watches:
|
||||
url = watches[uuid].get('url')
|
||||
tag = watches[uuid].get('tags', {})
|
||||
f.write("{} {}\r\n".format(url, tag))
|
||||
|
||||
# Add it to the Zip
|
||||
zipObj.write(
|
||||
os.path.join(datastore_path, list_file),
|
||||
arcname=list_file,
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8,
|
||||
)
|
||||
zipObj.write(
|
||||
os.path.join(datastore_path, list_with_tags_file),
|
||||
arcname=list_with_tags_file,
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8,
|
||||
)
|
||||
|
||||
# Now it's done, rename it so it shows up finally and its completed being written.
|
||||
os.rename(backup_filepath.replace('.zip', '.tmp'), backup_filepath.replace('.tmp', '.zip'))
|
||||
|
||||
|
||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
backups_blueprint = Blueprint('backups', __name__, template_folder="templates")
|
||||
backup_threads = []
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/request-backup", methods=['GET'])
|
||||
def request_backup():
|
||||
if any(thread.is_alive() for thread in backup_threads):
|
||||
flash("A backup is already running, check back in a few minutes", "error")
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
if len(find_backups()) > int(os.getenv("MAX_NUMBER_BACKUPS", 100)):
|
||||
flash("Maximum number of backups reached, please remove some", "error")
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
zip_thread = threading.Thread(target=create_backup, args=(datastore.datastore_path, datastore.data.get("watching")))
|
||||
zip_thread.start()
|
||||
backup_threads.append(zip_thread)
|
||||
flash("Backup building in background, check back in a few minutes.")
|
||||
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
def find_backups():
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
backups = glob.glob(backup_filepath)
|
||||
backup_info = []
|
||||
|
||||
for backup in backups:
|
||||
size = os.path.getsize(backup) / (1024 * 1024)
|
||||
creation_time = os.path.getctime(backup)
|
||||
backup_info.append({
|
||||
'filename': os.path.basename(backup),
|
||||
'filesize': f"{size:.2f}",
|
||||
'creation_time': creation_time
|
||||
})
|
||||
|
||||
backup_info.sort(key=lambda x: x['creation_time'], reverse=True)
|
||||
|
||||
return backup_info
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/download/<string:filename>", methods=['GET'])
|
||||
def download_backup(filename):
|
||||
import re
|
||||
filename = filename.strip()
|
||||
backup_filename_regex = BACKUP_FILENAME_FORMAT.format("\d+")
|
||||
|
||||
full_path = os.path.join(os.path.abspath(datastore.datastore_path), filename)
|
||||
if not full_path.startswith(os.path.abspath(datastore.datastore_path)):
|
||||
abort(404)
|
||||
|
||||
if filename == 'latest':
|
||||
backups = find_backups()
|
||||
filename = backups[0]['filename']
|
||||
|
||||
if not re.match(r"^" + backup_filename_regex + "$", filename):
|
||||
abort(400) # Bad Request if the filename doesn't match the pattern
|
||||
|
||||
logger.debug(f"Backup download request for '{full_path}'")
|
||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/", methods=['GET'])
|
||||
def index():
|
||||
backups = find_backups()
|
||||
output = render_template("overview.html",
|
||||
available_backups=backups,
|
||||
backup_running=any(thread.is_alive() for thread in backup_threads)
|
||||
)
|
||||
|
||||
return output
|
||||
|
||||
@login_optionally_required
|
||||
@backups_blueprint.route("/remove-backups", methods=['GET'])
|
||||
def remove_backups():
|
||||
|
||||
backup_filepath = os.path.join(datastore.datastore_path, BACKUP_FILENAME_FORMAT.format("*"))
|
||||
backups = glob.glob(backup_filepath)
|
||||
for backup in backups:
|
||||
os.unlink(backup)
|
||||
|
||||
flash("Backups were deleted.")
|
||||
|
||||
return redirect(url_for('backups.index'))
|
||||
|
||||
return backups_blueprint
|
||||
@@ -1,36 +0,0 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_simple_field, render_field %}
|
||||
<div class="edit-form">
|
||||
<div class="box-wrap inner">
|
||||
<h4>Backups</h4>
|
||||
{% if backup_running %}
|
||||
<p>
|
||||
<strong>A backup is running!</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
<p>
|
||||
Here you can download and request a new backup, when a backup is completed you will see it listed below.
|
||||
</p>
|
||||
<br>
|
||||
{% if available_backups %}
|
||||
<ul>
|
||||
{% for backup in available_backups %}
|
||||
<li><a href="{{ url_for('backups.download_backup', filename=backup["filename"]) }}">{{ backup["filename"] }}</a> {{ backup["filesize"] }} Mb</li>
|
||||
{% endfor %}
|
||||
</ul>
|
||||
{% else %}
|
||||
<p>
|
||||
<strong>No backups found.</strong>
|
||||
</p>
|
||||
{% endif %}
|
||||
|
||||
<a class="pure-button pure-button-primary" href="{{ url_for('backups.request_backup') }}">Create backup</a>
|
||||
{% if available_backups %}
|
||||
<a class="pure-button button-small button-error " href="{{ url_for('backups.remove_backups') }}">Remove backups</a>
|
||||
{% endif %}
|
||||
</div>
|
||||
</div>
|
||||
|
||||
|
||||
{% endblock %}
|
||||
@@ -1,7 +1,4 @@
|
||||
import importlib
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
|
||||
from functools import wraps
|
||||
@@ -33,6 +30,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def long_task(uuid, preferred_proxy):
|
||||
import time
|
||||
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
|
||||
from changedetectionio.processors.text_json_diff import text_json_diff
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
status = {'status': '', 'length': 0, 'text': ''}
|
||||
@@ -40,12 +38,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
contents = ''
|
||||
now = time.time()
|
||||
try:
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid
|
||||
)
|
||||
|
||||
update_handler.call_browser(preferred_proxy_id=preferred_proxy)
|
||||
update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid)
|
||||
update_handler.call_browser()
|
||||
# title, size is len contents not len xfer
|
||||
except content_fetcher_exceptions.Non200ErrorCodeReceived as e:
|
||||
if e.status_code == 404:
|
||||
@@ -54,7 +48,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"{e.status_code} - Access denied"})
|
||||
else:
|
||||
status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"})
|
||||
except FilterNotFoundInResponse:
|
||||
except text_json_diff.FilterNotFoundInResponse:
|
||||
status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"})
|
||||
except content_fetcher_exceptions.EmptyReply as e:
|
||||
if e.status_code == 403 or e.status_code == 401:
|
||||
|
||||
@@ -19,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||
datastore.data['watching'][uuid].clear_watch()
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||
return redirect(url_for("index"))
|
||||
|
||||
@login_required
|
||||
|
||||
@@ -17,6 +17,7 @@
|
||||
</script>
|
||||
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
|
||||
<!--<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>-->
|
||||
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
|
||||
|
||||
<div class="edit-form monospaced-textarea">
|
||||
|
||||
@@ -75,7 +75,6 @@ class fetcher(Fetcher):
|
||||
self.headers = r.headers
|
||||
|
||||
if not r.content or not len(r.content):
|
||||
logger.debug(f"Requests returned empty content for '{url}'")
|
||||
if not empty_pages_are_a_change:
|
||||
raise EmptyReply(url=url, status_code=r.status_code)
|
||||
else:
|
||||
|
||||
@@ -30,8 +30,6 @@ function isItemInStock() {
|
||||
'dieser artikel ist bald wieder verfügbar',
|
||||
'dostępne wkrótce',
|
||||
'en rupture de stock',
|
||||
'esgotado',
|
||||
'indisponível',
|
||||
'isn\'t in stock right now',
|
||||
'isnt in stock right now',
|
||||
'isn’t in stock right now',
|
||||
@@ -59,7 +57,6 @@ function isItemInStock() {
|
||||
'notify me when available',
|
||||
'notify me',
|
||||
'notify when available',
|
||||
'não disponível',
|
||||
'não estamos a aceitar encomendas',
|
||||
'out of stock',
|
||||
'out-of-stock',
|
||||
@@ -157,15 +154,11 @@ function isItemInStock() {
|
||||
}
|
||||
|
||||
elementText = "";
|
||||
try {
|
||||
if (element.tagName.toLowerCase() === "input") {
|
||||
elementText = element.value.toLowerCase().trim();
|
||||
} else {
|
||||
elementText = getElementBaseText(element);
|
||||
}
|
||||
} catch (e) {
|
||||
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
|
||||
}
|
||||
|
||||
if (elementText.length) {
|
||||
// try which ones could mean its in stock
|
||||
|
||||
@@ -1,6 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import datetime
|
||||
import importlib
|
||||
|
||||
import flask_login
|
||||
import locale
|
||||
@@ -11,7 +12,9 @@ import threading
|
||||
import time
|
||||
import timeago
|
||||
|
||||
from .content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
from .processors import find_processors, get_parent_module, get_custom_watch_obj_for_processor
|
||||
from .processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
from .safe_jinja import render as jinja_render
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from copy import deepcopy
|
||||
@@ -53,7 +56,6 @@ extra_stylesheets = []
|
||||
|
||||
update_q = queue.PriorityQueue()
|
||||
notification_q = queue.Queue()
|
||||
MAX_QUEUE_SIZE = 2000
|
||||
|
||||
app = Flask(__name__,
|
||||
static_url_path="",
|
||||
@@ -68,6 +70,7 @@ FlaskCompress(app)
|
||||
|
||||
# Stop browser caching of assets
|
||||
app.config['SEND_FILE_MAX_AGE_DEFAULT'] = 0
|
||||
|
||||
app.config.exit = Event()
|
||||
|
||||
app.config['NEW_VERSION_AVAILABLE'] = False
|
||||
@@ -84,7 +87,7 @@ csrf = CSRFProtect()
|
||||
csrf.init_app(app)
|
||||
notification_debug_log=[]
|
||||
|
||||
# Locale for correct presentation of prices etc
|
||||
# get locale ready
|
||||
default_locale = locale.getdefaultlocale()
|
||||
logger.info(f"System locale default is {default_locale}")
|
||||
try:
|
||||
@@ -159,21 +162,6 @@ def _jinja2_filter_pagination_slice(arr, skip):
|
||||
|
||||
return arr
|
||||
|
||||
def app_get_system_time():
|
||||
from zoneinfo import ZoneInfo # Built-in timezone support in Python 3.9+
|
||||
|
||||
system_timezone = datastore.data['settings']['application'].get('timezone')
|
||||
if not system_timezone:
|
||||
system_timezone = os.environ.get("TZ")
|
||||
|
||||
try:
|
||||
system_zone = ZoneInfo(system_timezone)
|
||||
except Exception as e:
|
||||
logger.warning(f'Warning, unable to use timezone "{system_timezone}" defaulting to UTC- {str(e)}')
|
||||
system_zone = ZoneInfo("UTC") # Fallback to UTC if the timezone is invalid
|
||||
|
||||
return system_zone
|
||||
|
||||
@app.template_filter('format_seconds_ago')
|
||||
def _jinja2_filter_seconds_precise(timestamp):
|
||||
if timestamp == False:
|
||||
@@ -258,9 +246,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# (instead of the global var)
|
||||
app.config['DATASTORE'] = datastore_o
|
||||
|
||||
# Just to check (it will output some debug if not)
|
||||
app_get_system_time()
|
||||
|
||||
login_manager = flask_login.LoginManager(app)
|
||||
login_manager.login_view = 'login'
|
||||
app.secret_key = init_app_secret(config['datastore_path'])
|
||||
@@ -551,31 +536,23 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
@login_optionally_required
|
||||
def ajax_callback_send_notification_test(watch_uuid=None):
|
||||
|
||||
# Watch_uuid could be unset in the case it`s used in tag editor, global settings
|
||||
# Watch_uuid could be unset in the case its used in tag editor, global setings
|
||||
import apprise
|
||||
import random
|
||||
from .apprise_asset import asset
|
||||
apobj = apprise.Apprise(asset=asset)
|
||||
|
||||
# so that the custom endpoints are registered
|
||||
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
||||
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
|
||||
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
|
||||
|
||||
# Use an existing random one on the global/main settings form
|
||||
if not watch_uuid and (is_global_settings_form or is_group_settings_form) \
|
||||
and datastore.data.get('watching'):
|
||||
if not watch_uuid and (is_global_settings_form or is_group_settings_form):
|
||||
logger.debug(f"Send test notification - Choosing random Watch {watch_uuid}")
|
||||
watch_uuid = random.choice(list(datastore.data['watching'].keys()))
|
||||
|
||||
if not watch_uuid:
|
||||
return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
|
||||
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
|
||||
notification_urls = None
|
||||
|
||||
if request.form.get('notification_urls'):
|
||||
notification_urls = request.form['notification_urls'].strip().splitlines()
|
||||
|
||||
if not notification_urls:
|
||||
@@ -594,12 +571,12 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
|
||||
if not notification_urls:
|
||||
return 'Error: No Notification URLs set/found'
|
||||
return 'No Notification URLs set/found'
|
||||
|
||||
for n_url in notification_urls:
|
||||
if len(n_url.strip()):
|
||||
if not apobj.add(n_url):
|
||||
return f'Error: {n_url} is not a valid AppRise URL.'
|
||||
return f'Error - {n_url} is not a valid AppRise URL.'
|
||||
|
||||
try:
|
||||
# use the same as when it is triggered, but then override it with the form test values
|
||||
@@ -618,13 +595,11 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if 'notification_body' in request.form and request.form['notification_body'].strip():
|
||||
n_object['notification_body'] = request.form.get('notification_body', '').strip()
|
||||
|
||||
n_object.update(watch.extra_notification_token_values())
|
||||
|
||||
from . import update_worker
|
||||
new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
|
||||
new_worker.queue_notification_for_watch(notification_q=notification_q, n_object=n_object, watch=watch)
|
||||
except Exception as e:
|
||||
return make_response(f"Error: str(e)", 400)
|
||||
return make_response({'error': str(e)}, 400)
|
||||
|
||||
return 'OK - Sent test notifications'
|
||||
|
||||
@@ -816,15 +791,15 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# Recast it if need be to right data Watch handler
|
||||
watch_class = get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore_o.datastore_path, default=datastore.data['watching'][uuid])
|
||||
|
||||
flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
|
||||
|
||||
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||
# But in the case something is added we should save straight away
|
||||
datastore.needs_write_urgent = True
|
||||
|
||||
if not datastore.data['watching'][uuid].get('paused'):
|
||||
# Queue the watch for immediate recheck, with a higher priority
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||
|
||||
# Diff page [edit] link should go back to diff page
|
||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||
@@ -900,7 +875,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
@login_optionally_required
|
||||
def settings_page():
|
||||
from changedetectionio import forms
|
||||
from datetime import datetime
|
||||
|
||||
default = deepcopy(datastore.data['settings'])
|
||||
if datastore.proxy_list is not None:
|
||||
@@ -968,13 +942,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
else:
|
||||
flash("An error occurred, please see below.", "error")
|
||||
|
||||
|
||||
system_timezone = app_get_system_time()
|
||||
system_time = datetime.now(system_timezone)
|
||||
|
||||
# Fallback for locale formatting
|
||||
formatted_system_time = system_time.strftime("%Y-%m-%d %H:%M:%S %Z%z") # Locale-aware time
|
||||
|
||||
output = render_template("settings.html",
|
||||
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
@@ -982,9 +949,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
form=form,
|
||||
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||
min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
system_time=formatted_system_time,
|
||||
timezone_name=system_timezone
|
||||
settings_application=datastore.data['settings']['application']
|
||||
)
|
||||
|
||||
return output
|
||||
@@ -1015,7 +980,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
importer = import_url_list()
|
||||
importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||
for uuid in importer.new_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
if len(importer.remaining_data) == 0:
|
||||
return redirect(url_for('index'))
|
||||
@@ -1028,7 +993,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
d_importer = import_distill_io_json()
|
||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||
for uuid in d_importer.new_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
# XLSX importer
|
||||
if request.files and request.files.get('xlsx_file'):
|
||||
@@ -1052,7 +1017,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||
|
||||
for uuid in w_importer.new_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
# Could be some remaining, or we could be on GET
|
||||
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
||||
@@ -1265,6 +1230,78 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
return output
|
||||
|
||||
# We're good but backups are even better!
|
||||
@app.route("/backup", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def get_backup():
|
||||
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
# Remove any existing backup file, for now we just keep one file
|
||||
|
||||
for previous_backup_filename in Path(datastore_o.datastore_path).rglob('changedetection-backup-*.zip'):
|
||||
os.unlink(previous_backup_filename)
|
||||
|
||||
# create a ZipFile object
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
backupname = "changedetection-backup-{}.zip".format(timestamp)
|
||||
backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
|
||||
|
||||
with zipfile.ZipFile(backup_filepath, "w",
|
||||
compression=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8) as zipObj:
|
||||
|
||||
# Be sure we're written fresh
|
||||
datastore.sync_to_json()
|
||||
|
||||
# Add the index
|
||||
zipObj.write(os.path.join(datastore_o.datastore_path, "url-watches.json"), arcname="url-watches.json")
|
||||
|
||||
# Add the flask app secret
|
||||
zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
|
||||
|
||||
# Add any data in the watch data directory.
|
||||
for uuid, w in datastore.data['watching'].items():
|
||||
for f in Path(w.watch_data_dir).glob('*'):
|
||||
zipObj.write(f,
|
||||
# Use the full path to access the file, but make the file 'relative' in the Zip.
|
||||
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8)
|
||||
|
||||
# Create a list file with just the URLs, so it's easier to port somewhere else in the future
|
||||
list_file = "url-list.txt"
|
||||
with open(os.path.join(datastore_o.datastore_path, list_file), "w") as f:
|
||||
for uuid in datastore.data["watching"]:
|
||||
url = datastore.data["watching"][uuid]["url"]
|
||||
f.write("{}\r\n".format(url))
|
||||
list_with_tags_file = "url-list-with-tags.txt"
|
||||
with open(
|
||||
os.path.join(datastore_o.datastore_path, list_with_tags_file), "w"
|
||||
) as f:
|
||||
for uuid in datastore.data["watching"]:
|
||||
url = datastore.data["watching"][uuid].get('url')
|
||||
tag = datastore.data["watching"][uuid].get('tags', {})
|
||||
f.write("{} {}\r\n".format(url, tag))
|
||||
|
||||
# Add it to the Zip
|
||||
zipObj.write(
|
||||
os.path.join(datastore_o.datastore_path, list_file),
|
||||
arcname=list_file,
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8,
|
||||
)
|
||||
zipObj.write(
|
||||
os.path.join(datastore_o.datastore_path, list_with_tags_file),
|
||||
arcname=list_with_tags_file,
|
||||
compress_type=zipfile.ZIP_DEFLATED,
|
||||
compresslevel=8,
|
||||
)
|
||||
|
||||
# Send_from_directory needs to be the full absolute path
|
||||
return send_from_directory(os.path.abspath(datastore_o.datastore_path), backupname, as_attachment=True)
|
||||
|
||||
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
|
||||
def static_content(group, filename):
|
||||
from flask import make_response
|
||||
@@ -1344,9 +1381,78 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
@app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
|
||||
@login_optionally_required
|
||||
def watch_get_preview_rendered(uuid):
|
||||
from flask import jsonify
|
||||
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||
from .processors.text_json_diff import prepare_filter_prevew
|
||||
return prepare_filter_prevew(watch_uuid=uuid, datastore=datastore)
|
||||
now = time.time()
|
||||
import brotli
|
||||
from . import forms
|
||||
|
||||
text_after_filter = ''
|
||||
tmp_watch = deepcopy(datastore.data['watching'].get(uuid))
|
||||
|
||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
|
||||
# Splice in the temporary stuff from the form
|
||||
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
|
||||
data=request.form
|
||||
)
|
||||
# Only update vars that came in via the AJAX post
|
||||
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
|
||||
tmp_watch.update(p)
|
||||
|
||||
latest_filename = next(reversed(tmp_watch.history))
|
||||
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
|
||||
|
||||
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=uuid # probably not needed anymore anyway?
|
||||
)
|
||||
# Use the last loaded HTML as the input
|
||||
update_handler.fetcher.content = decompressed_data
|
||||
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
|
||||
try:
|
||||
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(
|
||||
watch=tmp_watch,
|
||||
skip_when_checksum_same=False,
|
||||
)
|
||||
except FilterNotFoundInResponse as e:
|
||||
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
||||
except ReplyWithContentButNoText as e:
|
||||
text_after_filter = f"Filter found but no text (empty result)"
|
||||
except Exception as e:
|
||||
text_after_filter = f"Error: {str(e)}"
|
||||
|
||||
if not text_after_filter.strip():
|
||||
text_after_filter = 'Empty content'
|
||||
|
||||
# because run_changedetection always returns bytes due to saving the snapshots etc
|
||||
text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
|
||||
|
||||
do_anchor = datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
|
||||
trigger_line_numbers = []
|
||||
try:
|
||||
text_before_filter = html_tools.html_to_text(html_content=decompressed_data,
|
||||
render_anchor_tag_content=do_anchor)
|
||||
|
||||
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
wordlist=tmp_watch['trigger_text'],
|
||||
mode='line numbers'
|
||||
)
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
logger.trace(f"Parsed in {time.time() - now:.3f}s")
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
'after_filter': text_after_filter,
|
||||
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
|
||||
'trigger_line_numbers': trigger_line_numbers
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@app.route("/form/add/quickwatch", methods=['POST'])
|
||||
@@ -1409,7 +1515,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
new_uuid = datastore.clone(uuid)
|
||||
if new_uuid:
|
||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True}))
|
||||
flash('Cloned.')
|
||||
|
||||
return redirect(url_for('index'))
|
||||
@@ -1430,7 +1536,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
if uuid:
|
||||
if uuid not in running_uuids:
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||
i = 1
|
||||
|
||||
elif tag:
|
||||
@@ -1441,7 +1547,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
continue
|
||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
update_q.put(
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})
|
||||
queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})
|
||||
)
|
||||
i += 1
|
||||
|
||||
@@ -1451,8 +1557,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']:
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False}))
|
||||
i += 1
|
||||
|
||||
flash(f"{i} watches queued for rechecking.")
|
||||
return redirect(url_for('index', tag=tag))
|
||||
|
||||
@@ -1509,7 +1616,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
uuid = uuid.strip()
|
||||
if datastore.data['watching'].get(uuid):
|
||||
# Recheck and require a full reprocessing
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False}))
|
||||
flash("{} watches queued for rechecking".format(len(uuids)))
|
||||
|
||||
elif (op == 'clear-errors'):
|
||||
@@ -1644,16 +1751,13 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
import changedetectionio.blueprint.check_proxies as check_proxies
|
||||
app.register_blueprint(check_proxies.construct_blueprint(datastore=datastore), url_prefix='/check_proxy')
|
||||
|
||||
import changedetectionio.blueprint.backups as backups
|
||||
app.register_blueprint(backups.construct_blueprint(datastore), url_prefix='/backups')
|
||||
|
||||
|
||||
# @todo handle ctrl break
|
||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||
threading.Thread(target=notification_runner).start()
|
||||
|
||||
# Check for new release version, but not when running in test/build or pytest
|
||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')):
|
||||
if not os.getenv("GITHUB_REF", False) and not config.get('disable_checkver') == True:
|
||||
threading.Thread(target=check_for_new_version).start()
|
||||
|
||||
return app
|
||||
@@ -1737,6 +1841,7 @@ def notification_runner():
|
||||
def ticker_thread_check_time_launch_checks():
|
||||
import random
|
||||
from changedetectionio import update_worker
|
||||
|
||||
proxy_last_called_time = {}
|
||||
|
||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
@@ -1770,14 +1875,12 @@ def ticker_thread_check_time_launch_checks():
|
||||
except RuntimeError as e:
|
||||
# RuntimeError: dictionary changed size during iteration
|
||||
time.sleep(0.1)
|
||||
watch_uuid_list = []
|
||||
else:
|
||||
break
|
||||
|
||||
# Re #438 - Don't place more watches in the queue to be checked if the queue is already large
|
||||
while update_q.qsize() >= 2000:
|
||||
logger.warning(f"Recheck watches queue size limit reached ({MAX_QUEUE_SIZE}), skipping adding more items")
|
||||
time.sleep(3)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
recheck_time_system_seconds = int(datastore.threshold_seconds)
|
||||
@@ -1837,7 +1940,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||
|
||||
# Into the queue with you
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
|
||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True}))
|
||||
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
@@ -476,7 +476,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
title = StringField('Title', default='')
|
||||
|
||||
ignore_text = StringListField('Ignore lines containing', [ValidateListRegex()])
|
||||
ignore_text = StringListField('Remove lines containing', [ValidateListRegex()])
|
||||
headers = StringDictKeyValue('Request headers')
|
||||
body = TextAreaField('Request body', [validators.Optional()])
|
||||
method = SelectField('Request method', choices=valid_method, default=default_method)
|
||||
@@ -496,7 +496,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
proxy = RadioField('Proxy')
|
||||
filter_failure_notification_send = BooleanField(
|
||||
@@ -515,7 +515,6 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
if not super().validate():
|
||||
return False
|
||||
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
result = True
|
||||
|
||||
# Fail form validation when a body is set for a GET
|
||||
@@ -525,44 +524,16 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
|
||||
# Attempt to validate jinja2 templates in the URL
|
||||
try:
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
jinja_render(template_str=self.url.data)
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.url.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
self.url.errors.append(e)
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.url.errors.append(f'Invalid template syntax: {e}')
|
||||
result = False
|
||||
|
||||
# Attempt to validate jinja2 templates in the body
|
||||
if self.body.data and self.body.data.strip():
|
||||
try:
|
||||
jinja_render(template_str=self.body.data)
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.body.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.body.errors.append(f'Invalid template syntax: {e}')
|
||||
result = False
|
||||
|
||||
# Attempt to validate jinja2 templates in the headers
|
||||
if len(self.headers.data) > 0:
|
||||
try:
|
||||
for header, value in self.headers.data.items():
|
||||
jinja_render(template_str=value)
|
||||
except ModuleNotFoundError as e:
|
||||
# incase jinja2_time or others is missing
|
||||
logger.error(e)
|
||||
self.headers.errors.append(f'Invalid template syntax configuration: {e}')
|
||||
result = False
|
||||
except Exception as e:
|
||||
logger.error(e)
|
||||
self.headers.errors.append(f'Invalid template syntax in "{header}" header: {e}')
|
||||
self.url.errors.append('Invalid template syntax')
|
||||
result = False
|
||||
|
||||
return result
|
||||
@@ -645,7 +616,7 @@ class globalSettingsForm(Form):
|
||||
|
||||
requests = FormField(globalSettingsRequestForm)
|
||||
application = FormField(globalSettingsApplicationForm)
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
class extractDataForm(Form):
|
||||
|
||||
@@ -3,11 +3,11 @@ from lxml import etree
|
||||
import json
|
||||
import re
|
||||
|
||||
|
||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||
TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
|
||||
TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
|
||||
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||
|
||||
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
|
||||
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
|
||||
@@ -54,64 +54,29 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
|
||||
def subtractive_css_selector(css_selector, html_content):
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
|
||||
elements_to_remove = soup.select(css_selector)
|
||||
|
||||
# Then, remove them in a separate loop
|
||||
for item in elements_to_remove:
|
||||
for item in soup.select(css_selector):
|
||||
item.decompose()
|
||||
|
||||
return str(soup)
|
||||
|
||||
def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
|
||||
# Parse the HTML content using lxml
|
||||
def subtractive_xpath_selector(xpath_selector, html_content):
|
||||
html_tree = etree.HTML(html_content)
|
||||
elements_to_remove = html_tree.xpath(xpath_selector)
|
||||
|
||||
# First, collect all elements to remove
|
||||
elements_to_remove = []
|
||||
|
||||
# Iterate over the list of XPath selectors
|
||||
for selector in selectors:
|
||||
# Collect elements for each selector
|
||||
elements_to_remove.extend(html_tree.xpath(selector))
|
||||
|
||||
# Then, remove them in a separate loop
|
||||
for element in elements_to_remove:
|
||||
if element.getparent() is not None: # Ensure the element has a parent before removing
|
||||
element.getparent().remove(element)
|
||||
|
||||
# Convert the modified HTML tree back to a string
|
||||
modified_html = etree.tostring(html_tree, method="html").decode("utf-8")
|
||||
return modified_html
|
||||
|
||||
|
||||
def element_removal(selectors: List[str], html_content):
|
||||
"""Removes elements that match a list of CSS or XPath selectors."""
|
||||
"""Removes elements that match a list of CSS or xPath selectors."""
|
||||
modified_html = html_content
|
||||
css_selectors = []
|
||||
xpath_selectors = []
|
||||
|
||||
for selector in selectors:
|
||||
if selector.startswith(('xpath:', 'xpath1:', '//')):
|
||||
# Handle XPath selectors separately
|
||||
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
|
||||
xpath_selectors.append(xpath_selector)
|
||||
modified_html = subtractive_xpath_selector(xpath_selector, modified_html)
|
||||
else:
|
||||
# Collect CSS selectors as one "hit", see comment in subtractive_css_selector
|
||||
css_selectors.append(selector.strip().strip(","))
|
||||
|
||||
if xpath_selectors:
|
||||
modified_html = subtractive_xpath_selector(xpath_selectors, modified_html)
|
||||
|
||||
if css_selectors:
|
||||
# Remove duplicates, then combine all CSS selectors into one string, separated by commas
|
||||
# This stops the elements index shifting
|
||||
unique_selectors = list(set(css_selectors)) # Ensure uniqueness
|
||||
combined_css_selector = " , ".join(unique_selectors)
|
||||
modified_html = subtractive_css_selector(combined_css_selector, modified_html)
|
||||
|
||||
|
||||
modified_html = subtractive_css_selector(selector, modified_html)
|
||||
return modified_html
|
||||
|
||||
def elementpath_tostring(obj):
|
||||
@@ -361,7 +326,6 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
# - "line numbers" return a list of line numbers that match (int list)
|
||||
#
|
||||
# wordlist - list of regex's (str) or words (str)
|
||||
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
|
||||
def strip_ignore_text(content, wordlist, mode="content"):
|
||||
i = 0
|
||||
output = []
|
||||
@@ -377,10 +341,11 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
||||
else:
|
||||
ignore_text.append(k.strip())
|
||||
|
||||
for line in content.splitlines(keepends=True):
|
||||
for line in content.splitlines():
|
||||
i += 1
|
||||
# Always ignore blank lines in this mode. (when this function gets called)
|
||||
got_match = False
|
||||
if len(line.strip()):
|
||||
for l in ignore_text:
|
||||
if l.lower() in line.lower():
|
||||
got_match = True
|
||||
@@ -391,16 +356,17 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
||||
got_match = True
|
||||
|
||||
if not got_match:
|
||||
# Not ignored, and should preserve "keepends"
|
||||
output.append(line)
|
||||
# Not ignored
|
||||
output.append(line.encode('utf8'))
|
||||
else:
|
||||
ignored_line_numbers.append(i)
|
||||
|
||||
|
||||
# Used for finding out what to highlight
|
||||
if mode == "line numbers":
|
||||
return ignored_line_numbers
|
||||
|
||||
return ''.join(output)
|
||||
return "\n".encode('utf8').join(output)
|
||||
|
||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
|
||||
@@ -52,8 +52,7 @@ class model(dict):
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'timezone': None,
|
||||
'tags': {} #@todo use Tag.model initialisers
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,8 +6,6 @@ import re
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
# Allowable protocols, protects against javascript: etc
|
||||
# file:// is further checked by ALLOW_FILE_URI
|
||||
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
|
||||
@@ -38,8 +36,7 @@ class model(watch_base):
|
||||
jitter_seconds = 0
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
self.__datastore_path = kw.get('datastore_path')
|
||||
if kw.get('datastore_path'):
|
||||
self.__datastore_path = kw['datastore_path']
|
||||
del kw['datastore_path']
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
if kw.get('default'):
|
||||
@@ -89,10 +86,6 @@ class model(watch_base):
|
||||
|
||||
if ready_url.startswith('source:'):
|
||||
ready_url=ready_url.replace('source:', '')
|
||||
|
||||
# Also double check it after any Jinja2 formatting just incase
|
||||
if not is_safe_url(ready_url):
|
||||
return 'DISABLED'
|
||||
return ready_url
|
||||
|
||||
def clear_watch(self):
|
||||
@@ -178,10 +171,6 @@ class model(watch_base):
|
||||
"""
|
||||
tmp_history = {}
|
||||
|
||||
# In the case we are only using the watch for processing without history
|
||||
if not self.watch_data_dir:
|
||||
return []
|
||||
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
if os.path.isfile(fname):
|
||||
@@ -318,13 +307,13 @@ class model(watch_base):
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
|
||||
f.write(brotli.compress(contents, mode=brotli.MODE_TEXT))
|
||||
else:
|
||||
snapshot_fname = f"{snapshot_id}.txt"
|
||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||
if not os.path.exists(dest):
|
||||
with open(dest, 'wb') as f:
|
||||
f.write(contents.encode('utf-8'))
|
||||
f.write(contents)
|
||||
|
||||
# Append to index
|
||||
# @todo check last char was \n
|
||||
@@ -356,32 +345,14 @@ class model(watch_base):
|
||||
return seconds
|
||||
|
||||
# Iterate over all history texts and see if something new exists
|
||||
# Always applying .strip() to start/end but optionally replace any other whitespace
|
||||
def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
|
||||
local_lines = []
|
||||
if lines:
|
||||
if ignore_whitespace:
|
||||
if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
|
||||
local_lines = set([l.translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
|
||||
else:
|
||||
local_lines = set([l.decode('utf-8').translate(TRANSLATE_WHITESPACE_TABLE).lower() for l in lines])
|
||||
else:
|
||||
if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
|
||||
local_lines = set([l.strip().lower() for l in lines])
|
||||
else:
|
||||
def lines_contain_something_unique_compared_to_history(self, lines: list):
|
||||
local_lines = set([l.decode('utf-8').strip().lower() for l in lines])
|
||||
|
||||
|
||||
# Compare each lines (set) against each history text file (set) looking for something new..
|
||||
existing_history = set({})
|
||||
for k, v in self.history.items():
|
||||
content = self.get_history_snapshot(k)
|
||||
|
||||
if ignore_whitespace:
|
||||
alist = set([line.translate(TRANSLATE_WHITESPACE_TABLE).lower() for line in content.splitlines()])
|
||||
else:
|
||||
alist = set([line.strip().lower() for line in content.splitlines()])
|
||||
|
||||
existing_history = existing_history.union(alist)
|
||||
|
||||
# Check that everything in local_lines(new stuff) already exists in existing_history - it should
|
||||
@@ -425,7 +396,7 @@ class model(watch_base):
|
||||
@property
|
||||
def watch_data_dir(self):
|
||||
# The base dir of the watch data
|
||||
return os.path.join(self.__datastore_path, self['uuid']) if self.__datastore_path else None
|
||||
return os.path.join(self.__datastore_path, self['uuid'])
|
||||
|
||||
def get_error_text(self):
|
||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||
|
||||
@@ -18,7 +18,6 @@ class difference_detection_processor():
|
||||
screenshot = None
|
||||
watch = None
|
||||
xpath_data = None
|
||||
preferred_proxy = None
|
||||
|
||||
def __init__(self, *args, datastore, watch_uuid, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -27,24 +26,23 @@ class difference_detection_processor():
|
||||
# Generic fetcher that should be extended (requests, playwright etc)
|
||||
self.fetcher = Fetcher()
|
||||
|
||||
def call_browser(self, preferred_proxy_id=None):
|
||||
|
||||
def call_browser(self):
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Protect against file://, file:/ access, check the real "link" without any meta "source:" etc prepended.
|
||||
if re.search(r'^file:/', url.strip(), re.IGNORECASE):
|
||||
# Protect against file:// access
|
||||
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
|
||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
raise Exception(
|
||||
"file:// type access is denied for security reasons."
|
||||
)
|
||||
|
||||
url = self.watch.link
|
||||
|
||||
# Requests, playwright, other browser via wss:// etc, fetch_extra_something
|
||||
prefer_fetch_backend = self.watch.get('fetch_backend', 'system')
|
||||
|
||||
# Proxy ID "key"
|
||||
preferred_proxy_id = preferred_proxy_id if preferred_proxy_id else self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||
preferred_proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=self.watch.get('uuid'))
|
||||
|
||||
# Pluggable content self.fetcher
|
||||
if not prefer_fetch_backend or prefer_fetch_backend == 'system':
|
||||
@@ -102,7 +100,6 @@ class difference_detection_processor():
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
@@ -119,15 +116,9 @@ class difference_detection_processor():
|
||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||
|
||||
for header_name in request_headers:
|
||||
request_headers.update({header_name: jinja_render(template_str=request_headers.get(header_name))})
|
||||
|
||||
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||
|
||||
request_body = self.watch.get('body')
|
||||
if request_body:
|
||||
request_body = jinja_render(template_str=self.watch.get('body'))
|
||||
|
||||
request_method = self.watch.get('method')
|
||||
ignore_status_codes = self.watch.get('ignore_status_codes', False)
|
||||
|
||||
@@ -164,7 +155,7 @@ class difference_detection_processor():
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
@abstractmethod
|
||||
def run_changedetection(self, watch):
|
||||
def run_changedetection(self, watch, skip_when_checksum_same: bool = True):
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
some_data = 'xxxxx'
|
||||
update_obj["previous_md5"] = hashlib.md5(some_data.encode('utf-8')).hexdigest()
|
||||
|
||||
@@ -27,27 +27,22 @@ def _search_prop_by_value(matches, value):
|
||||
return prop[1] # Yield the desired value and exit the function
|
||||
|
||||
def _deduplicate_prices(data):
|
||||
import re
|
||||
seen = set()
|
||||
unique_data = []
|
||||
|
||||
'''
|
||||
Some price data has multiple entries, OR it has a single entry with ['$159', '159', 159, "$ 159"] or just "159"
|
||||
Get all the values, clean it and add it to a set then return the unique values
|
||||
'''
|
||||
unique_data = set()
|
||||
|
||||
# Return the complete 'datum' where its price was not seen before
|
||||
for datum in data:
|
||||
# Convert 'value' to float if it can be a numeric string, otherwise leave it as is
|
||||
try:
|
||||
normalized_value = float(datum.value) if isinstance(datum.value, str) and datum.value.replace('.', '', 1).isdigit() else datum.value
|
||||
except ValueError:
|
||||
normalized_value = datum.value
|
||||
|
||||
if isinstance(datum.value, list):
|
||||
# Process each item in the list
|
||||
normalized_value = set([float(re.sub(r'[^\d.]', '', str(item))) for item in datum.value if str(item).strip()])
|
||||
unique_data.update(normalized_value)
|
||||
else:
|
||||
# Process single value
|
||||
v = float(re.sub(r'[^\d.]', '', str(datum.value)))
|
||||
unique_data.add(v)
|
||||
# If the normalized value hasn't been seen yet, add it to unique data
|
||||
if normalized_value not in seen:
|
||||
unique_data.append(datum)
|
||||
seen.add(normalized_value)
|
||||
|
||||
return list(unique_data)
|
||||
return unique_data
|
||||
|
||||
|
||||
# should return Restock()
|
||||
@@ -88,13 +83,14 @@ def get_itemprop_availability(html_content) -> Restock:
|
||||
if price_result:
|
||||
# Right now, we just support single product items, maybe we will store the whole actual metadata seperately in teh future and
|
||||
# parse that for the UI?
|
||||
if len(price_result) > 1 and len(price_result) > 1:
|
||||
prices_found = set(str(item.value).replace('$', '') for item in price_result)
|
||||
if len(price_result) > 1 and len(prices_found) > 1:
|
||||
# See of all prices are different, in the case that one product has many embedded data types with the same price
|
||||
# One might have $121.95 and another 121.95 etc
|
||||
logger.warning(f"More than one price found {price_result}, throwing exception, cant use this plugin.")
|
||||
logger.warning(f"More than one price found {prices_found}, throwing exception, cant use this plugin.")
|
||||
raise MoreThanOnePriceFound()
|
||||
|
||||
value['price'] = price_result[0]
|
||||
value['price'] = price_result[0].value
|
||||
|
||||
pricecurrency_result = pricecurrency_parse.find(data)
|
||||
if pricecurrency_result:
|
||||
@@ -144,7 +140,7 @@ class perform_site_check(difference_detection_processor):
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
import hashlib
|
||||
|
||||
if not watch:
|
||||
@@ -224,7 +220,7 @@ class perform_site_check(difference_detection_processor):
|
||||
itemprop_availability['original_price'] = itemprop_availability.get('price')
|
||||
update_obj['restock']["original_price"] = itemprop_availability.get('price')
|
||||
|
||||
if not self.fetcher.instock_data and not itemprop_availability.get('availability') and not itemprop_availability.get('price'):
|
||||
if not self.fetcher.instock_data and not itemprop_availability.get('availability'):
|
||||
raise ProcessorException(
|
||||
message=f"Unable to extract restock data for this page unfortunately. (Got code {self.fetcher.get_last_status_code()} from server), no embedded stock information was found and nothing interesting in the text, try using this watch with Chrome.",
|
||||
url=watch.get('url'),
|
||||
@@ -241,14 +237,6 @@ class perform_site_check(difference_detection_processor):
|
||||
update_obj['restock']["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} restock check returned instock_data - '{self.fetcher.instock_data}' from JS scraper.")
|
||||
|
||||
# Very often websites will lie about the 'availability' in the metadata, so if the scraped version says its NOT in stock, use that.
|
||||
if self.fetcher.instock_data and self.fetcher.instock_data != 'Possibly in stock':
|
||||
if update_obj['restock'].get('in_stock'):
|
||||
logger.warning(
|
||||
f"Lie detected in the availability machine data!! when scraping said its not in stock!! itemprop was '{itemprop_availability}' and scraped from browser was '{self.fetcher.instock_data}' update obj was {update_obj['restock']} ")
|
||||
logger.warning(f"Setting instock to FALSE, scraper found '{self.fetcher.instock_data}' in the body but metadata reported not-in-stock")
|
||||
update_obj['restock']["in_stock"] = False
|
||||
|
||||
# What we store in the snapshot
|
||||
price = update_obj.get('restock').get('price') if update_obj.get('restock').get('price') else ""
|
||||
snapshot_content = f"In Stock: {update_obj.get('restock').get('in_stock')} - Price: {price}"
|
||||
@@ -311,4 +299,4 @@ class perform_site_check(difference_detection_processor):
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
return changed_detected, update_obj, snapshot_content.strip()
|
||||
return changed_detected, update_obj, snapshot_content.encode('utf-8').strip()
|
||||
|
||||
@@ -1,115 +0,0 @@
|
||||
|
||||
from loguru import logger
|
||||
|
||||
|
||||
|
||||
def _task(watch, update_handler):
|
||||
from changedetectionio.content_fetchers.exceptions import ReplyWithContentButNoText
|
||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
||||
|
||||
text_after_filter = ''
|
||||
|
||||
try:
|
||||
# The slow process (we run 2 of these in parallel)
|
||||
changed_detected, update_obj, text_after_filter = update_handler.run_changedetection(watch=watch)
|
||||
except FilterNotFoundInResponse as e:
|
||||
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
||||
except ReplyWithContentButNoText as e:
|
||||
text_after_filter = f"Filter found but no text (empty result)"
|
||||
except Exception as e:
|
||||
text_after_filter = f"Error: {str(e)}"
|
||||
|
||||
if not text_after_filter.strip():
|
||||
text_after_filter = 'Empty content'
|
||||
|
||||
# because run_changedetection always returns bytes due to saving the snapshots etc
|
||||
text_after_filter = text_after_filter.decode('utf-8') if isinstance(text_after_filter, bytes) else text_after_filter
|
||||
|
||||
return text_after_filter
|
||||
|
||||
|
||||
def prepare_filter_prevew(datastore, watch_uuid):
|
||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
||||
from changedetectionio import forms, html_tools
|
||||
from changedetectionio.model.Watch import model as watch_model
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from copy import deepcopy
|
||||
from flask import request, jsonify
|
||||
import brotli
|
||||
import importlib
|
||||
import os
|
||||
import time
|
||||
now = time.time()
|
||||
|
||||
text_after_filter = ''
|
||||
text_before_filter = ''
|
||||
trigger_line_numbers = []
|
||||
ignore_line_numbers = []
|
||||
|
||||
tmp_watch = deepcopy(datastore.data['watching'].get(watch_uuid))
|
||||
|
||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
|
||||
# Splice in the temporary stuff from the form
|
||||
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
|
||||
data=request.form
|
||||
)
|
||||
|
||||
# Only update vars that came in via the AJAX post
|
||||
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
|
||||
tmp_watch.update(p)
|
||||
blank_watch_no_filters = watch_model()
|
||||
blank_watch_no_filters['url'] = tmp_watch.get('url')
|
||||
|
||||
latest_filename = next(reversed(tmp_watch.history))
|
||||
html_fname = os.path.join(tmp_watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||
with open(html_fname, 'rb') as f:
|
||||
decompressed_data = brotli.decompress(f.read()).decode('utf-8') if html_fname.endswith('.br') else f.read().decode('utf-8')
|
||||
|
||||
# Just like a normal change detection except provide a fake "watch" object and dont call .call_browser()
|
||||
processor_module = importlib.import_module("changedetectionio.processors.text_json_diff.processor")
|
||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
||||
watch_uuid=tmp_watch.get('uuid') # probably not needed anymore anyway?
|
||||
)
|
||||
# Use the last loaded HTML as the input
|
||||
update_handler.datastore = datastore
|
||||
update_handler.fetcher.content = str(decompressed_data) # str() because playwright/puppeteer/requests return string
|
||||
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
|
||||
|
||||
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
|
||||
# Do this as a parallel process because it could take some time
|
||||
with ProcessPoolExecutor(max_workers=2) as executor:
|
||||
future1 = executor.submit(_task, tmp_watch, update_handler)
|
||||
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
|
||||
|
||||
text_after_filter = future1.result()
|
||||
text_before_filter = future2.result()
|
||||
|
||||
try:
|
||||
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
wordlist=tmp_watch['trigger_text'],
|
||||
mode='line numbers'
|
||||
)
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
try:
|
||||
text_to_ignore = tmp_watch.get('ignore_text', []) + datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||
ignore_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
wordlist=text_to_ignore,
|
||||
mode='line numbers'
|
||||
)
|
||||
except Exception as e:
|
||||
text_before_filter = f"Error: {str(e)}"
|
||||
|
||||
logger.trace(f"Parsed in {time.time() - now:.3f}s")
|
||||
|
||||
return jsonify(
|
||||
{
|
||||
'after_filter': text_after_filter,
|
||||
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
|
||||
'duration': time.time() - now,
|
||||
'trigger_line_numbers': trigger_line_numbers,
|
||||
'ignore_line_numbers': ignore_line_numbers,
|
||||
}
|
||||
)
|
||||
|
||||
|
||||
@@ -7,7 +7,7 @@ import re
|
||||
import urllib3
|
||||
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from loguru import logger
|
||||
@@ -35,7 +35,8 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
def run_changedetection(self, watch, skip_when_checksum_same=True):
|
||||
|
||||
changed_detected = False
|
||||
html_content = ""
|
||||
screenshot = False # as bytes
|
||||
@@ -58,6 +59,9 @@ class perform_site_check(difference_detection_processor):
|
||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||
# Saves a lot of CPU
|
||||
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||
if skip_when_checksum_same:
|
||||
if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'):
|
||||
raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame()
|
||||
|
||||
# Fetching complete, now filters
|
||||
|
||||
@@ -201,14 +205,22 @@ class perform_site_check(difference_detection_processor):
|
||||
if watch.get('trim_text_whitespace'):
|
||||
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
|
||||
|
||||
if watch.get('remove_duplicate_lines'):
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||
|
||||
if watch.get('sort_text_alphabetically'):
|
||||
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
|
||||
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
# Also used to calculate/show what was removed
|
||||
text_content_before_ignored_filter = stripped_text_from_html
|
||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||
|
||||
# @todo whitespace coming from missing rtrim()?
|
||||
# stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
|
||||
# Rewrite's the processing text based on only what diff result they want to see
|
||||
|
||||
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
|
||||
# Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
|
||||
from changedetectionio import diff
|
||||
@@ -223,12 +235,12 @@ class perform_site_check(difference_detection_processor):
|
||||
line_feed_sep="\n",
|
||||
include_change_type_prefix=False)
|
||||
|
||||
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
|
||||
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter)
|
||||
|
||||
if not rendered_diff and stripped_text_from_html:
|
||||
# We had some content, but no differences were found
|
||||
# Store our new file as the MD5 so it will trigger in the future
|
||||
c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
|
||||
c = hashlib.md5(stripped_text_from_html.encode('utf-8').translate(None, b'\r\n\t ')).hexdigest()
|
||||
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
|
||||
else:
|
||||
stripped_text_from_html = rendered_diff
|
||||
@@ -249,6 +261,14 @@ class perform_site_check(difference_detection_processor):
|
||||
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# If there's text to skip
|
||||
# @todo we could abstract out the get_text() to handle this cleaner
|
||||
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||
if len(text_to_ignore):
|
||||
stripped_text_from_html = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||
else:
|
||||
stripped_text_from_html = stripped_text_from_html.encode('utf8')
|
||||
|
||||
# 615 Extract text by regex
|
||||
extract_text = watch.get('extract_text', [])
|
||||
if len(extract_text) > 0:
|
||||
@@ -257,53 +277,39 @@ class perform_site_check(difference_detection_processor):
|
||||
# incase they specified something in '/.../x'
|
||||
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
|
||||
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
|
||||
result = re.findall(regex, stripped_text_from_html)
|
||||
result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
|
||||
|
||||
for l in result:
|
||||
if type(l) is tuple:
|
||||
# @todo - some formatter option default (between groups)
|
||||
regex_matched_output += list(l) + ['\n']
|
||||
regex_matched_output += list(l) + [b'\n']
|
||||
else:
|
||||
# @todo - some formatter option default (between each ungrouped result)
|
||||
regex_matched_output += [l] + ['\n']
|
||||
regex_matched_output += [l] + [b'\n']
|
||||
else:
|
||||
# Doesnt look like regex, just hunt for plaintext and return that which matches
|
||||
# `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
|
||||
r = re.compile(re.escape(s_re), re.IGNORECASE)
|
||||
r = re.compile(re.escape(s_re.encode('utf-8')), re.IGNORECASE)
|
||||
res = r.findall(stripped_text_from_html)
|
||||
if res:
|
||||
for match in res:
|
||||
regex_matched_output += [match] + ['\n']
|
||||
regex_matched_output += [match] + [b'\n']
|
||||
|
||||
##########################################################
|
||||
stripped_text_from_html = ''
|
||||
|
||||
stripped_text_from_html = b''
|
||||
text_content_before_ignored_filter = b''
|
||||
if regex_matched_output:
|
||||
# @todo some formatter for presentation?
|
||||
stripped_text_from_html = ''.join(regex_matched_output)
|
||||
|
||||
if watch.get('remove_duplicate_lines'):
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||
stripped_text_from_html = b''.join(regex_matched_output)
|
||||
text_content_before_ignored_filter = stripped_text_from_html
|
||||
|
||||
|
||||
if watch.get('sort_text_alphabetically'):
|
||||
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
|
||||
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
### CALCULATE MD5
|
||||
# If there's text to ignore
|
||||
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||
text_for_checksuming = stripped_text_from_html
|
||||
if text_to_ignore:
|
||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||
|
||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||
if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
|
||||
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
||||
else:
|
||||
fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
|
||||
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
|
||||
|
||||
############ Blocking rules, after checksum #################
|
||||
blocked = False
|
||||
@@ -331,14 +337,25 @@ class perform_site_check(difference_detection_processor):
|
||||
if result:
|
||||
blocked = True
|
||||
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
if watch.get('previous_md5') != fetched_md5:
|
||||
changed_detected = True
|
||||
|
||||
# Looks like something changed, but did it match all the rules?
|
||||
if blocked:
|
||||
changed_detected = False
|
||||
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
|
||||
if changed_detected:
|
||||
if watch.get('check_unique_lines', False):
|
||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
|
||||
# One or more lines? unsure?
|
||||
if not has_unique_lines:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
||||
changed_detected = False
|
||||
else:
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
if watch.get('previous_md5') != fetched_md5:
|
||||
changed_detected = True
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
@@ -347,24 +364,5 @@ class perform_site_check(difference_detection_processor):
|
||||
if not watch.get('previous_md5'):
|
||||
watch['previous_md5'] = fetched_md5
|
||||
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
|
||||
if changed_detected:
|
||||
if watch.get('check_unique_lines', False):
|
||||
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
|
||||
|
||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
|
||||
lines=stripped_text_from_html.splitlines(),
|
||||
ignore_whitespace=ignore_whitespace
|
||||
)
|
||||
|
||||
# One or more lines? unsure?
|
||||
if not has_unique_lines:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
||||
changed_detected = False
|
||||
else:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
||||
|
||||
|
||||
# stripped_text_from_html - Everything after filters and NO 'ignored' content
|
||||
return changed_detected, update_obj, stripped_text_from_html
|
||||
|
||||
56
changedetectionio/static/js/limit.js
Normal file
56
changedetectionio/static/js/limit.js
Normal file
@@ -0,0 +1,56 @@
|
||||
/**
|
||||
* debounce
|
||||
* @param {integer} milliseconds This param indicates the number of milliseconds
|
||||
* to wait after the last call before calling the original function.
|
||||
* @param {object} What "this" refers to in the returned function.
|
||||
* @return {function} This returns a function that when called will wait the
|
||||
* indicated number of milliseconds after the last call before
|
||||
* calling the original function.
|
||||
*/
|
||||
Function.prototype.debounce = function (milliseconds, context) {
|
||||
var baseFunction = this,
|
||||
timer = null,
|
||||
wait = milliseconds;
|
||||
|
||||
return function () {
|
||||
var self = context || this,
|
||||
args = arguments;
|
||||
|
||||
function complete() {
|
||||
baseFunction.apply(self, args);
|
||||
timer = null;
|
||||
}
|
||||
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
|
||||
timer = setTimeout(complete, wait);
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* throttle
|
||||
* @param {integer} milliseconds This param indicates the number of milliseconds
|
||||
* to wait between calls before calling the original function.
|
||||
* @param {object} What "this" refers to in the returned function.
|
||||
* @return {function} This returns a function that when called will wait the
|
||||
* indicated number of milliseconds between calls before
|
||||
* calling the original function.
|
||||
*/
|
||||
Function.prototype.throttle = function (milliseconds, context) {
|
||||
var baseFunction = this,
|
||||
lastEventTimestamp = null,
|
||||
limit = milliseconds;
|
||||
|
||||
return function () {
|
||||
var self = context || this,
|
||||
args = arguments,
|
||||
now = Date.now();
|
||||
|
||||
if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
|
||||
lastEventTimestamp = now;
|
||||
baseFunction.apply(self, args);
|
||||
}
|
||||
};
|
||||
};
|
||||
@@ -28,14 +28,17 @@ $(document).ready(function() {
|
||||
url: notification_base_url,
|
||||
data : data,
|
||||
statusCode: {
|
||||
400: function(data) {
|
||||
400: function() {
|
||||
// More than likely the CSRF token was lost when the server restarted
|
||||
alert(data.responseText);
|
||||
alert("There was a problem processing the request, please reload the page.");
|
||||
}
|
||||
}
|
||||
}).done(function(data){
|
||||
console.log(data);
|
||||
alert(data);
|
||||
}).fail(function(data){
|
||||
console.log(data);
|
||||
alert('There was an error communicating with the server.');
|
||||
})
|
||||
});
|
||||
});
|
||||
|
||||
@@ -1,60 +1,18 @@
|
||||
(function($) {
|
||||
/**
|
||||
* debounce
|
||||
* @param {integer} milliseconds This param indicates the number of milliseconds
|
||||
* to wait after the last call before calling the original function.
|
||||
* @param {object} What "this" refers to in the returned function.
|
||||
* @return {function} This returns a function that when called will wait the
|
||||
* indicated number of milliseconds after the last call before
|
||||
* calling the original function.
|
||||
|
||||
/*
|
||||
$('#code-block').highlightLines([
|
||||
{
|
||||
'color': '#dd0000',
|
||||
'lines': [10, 12]
|
||||
},
|
||||
{
|
||||
'color': '#ee0000',
|
||||
'lines': [15, 18]
|
||||
}
|
||||
]);
|
||||
});
|
||||
*/
|
||||
Function.prototype.debounce = function (milliseconds, context) {
|
||||
var baseFunction = this,
|
||||
timer = null,
|
||||
wait = milliseconds;
|
||||
|
||||
return function () {
|
||||
var self = context || this,
|
||||
args = arguments;
|
||||
|
||||
function complete() {
|
||||
baseFunction.apply(self, args);
|
||||
timer = null;
|
||||
}
|
||||
|
||||
if (timer) {
|
||||
clearTimeout(timer);
|
||||
}
|
||||
|
||||
timer = setTimeout(complete, wait);
|
||||
};
|
||||
};
|
||||
|
||||
/**
|
||||
* throttle
|
||||
* @param {integer} milliseconds This param indicates the number of milliseconds
|
||||
* to wait between calls before calling the original function.
|
||||
* @param {object} What "this" refers to in the returned function.
|
||||
* @return {function} This returns a function that when called will wait the
|
||||
* indicated number of milliseconds between calls before
|
||||
* calling the original function.
|
||||
*/
|
||||
Function.prototype.throttle = function (milliseconds, context) {
|
||||
var baseFunction = this,
|
||||
lastEventTimestamp = null,
|
||||
limit = milliseconds;
|
||||
|
||||
return function () {
|
||||
var self = context || this,
|
||||
args = arguments,
|
||||
now = Date.now();
|
||||
|
||||
if (!lastEventTimestamp || now - lastEventTimestamp >= limit) {
|
||||
lastEventTimestamp = now;
|
||||
baseFunction.apply(self, args);
|
||||
}
|
||||
};
|
||||
};
|
||||
|
||||
$.fn.highlightLines = function(configurations) {
|
||||
return this.each(function() {
|
||||
|
||||
@@ -1,14 +1,14 @@
|
||||
$(function () {
|
||||
/* add container before each proxy location to show status */
|
||||
var isActive = false;
|
||||
|
||||
function setup_html_widget() {
|
||||
var option_li = $('.fetch-backend-proxy li').filter(function() {
|
||||
return $("input",this)[0].value.length >0;
|
||||
});
|
||||
|
||||
//var option_li = $('.fetch-backend-proxy li');
|
||||
var isActive = false;
|
||||
$(option_li).prepend('<div class="proxy-status"></div>');
|
||||
$(option_li).append('<div class="proxy-timing"></div><div class="proxy-check-details"></div>');
|
||||
}
|
||||
|
||||
function set_proxy_check_status(proxy_key, state) {
|
||||
// select input by value name
|
||||
@@ -59,14 +59,8 @@ $(function () {
|
||||
}
|
||||
|
||||
$('#check-all-proxies').click(function (e) {
|
||||
|
||||
e.preventDefault()
|
||||
|
||||
if (!$('body').hasClass('proxy-check-active')) {
|
||||
setup_html_widget();
|
||||
$('body').addClass('proxy-check-active');
|
||||
}
|
||||
|
||||
$('.proxy-check-details').html('');
|
||||
$('.proxy-status').html('<span class="spinner"></span>').fadeIn();
|
||||
$('.proxy-timing').html('');
|
||||
|
||||
@@ -26,7 +26,8 @@ function set_active_tab() {
|
||||
if (tab.length) {
|
||||
tab[0].parentElement.className = "active";
|
||||
}
|
||||
|
||||
// hash could move the page down
|
||||
window.scrollTo(0, 0);
|
||||
}
|
||||
|
||||
function focus_error_tab() {
|
||||
|
||||
@@ -49,9 +49,4 @@ $(document).ready(function () {
|
||||
$("#overlay").toggleClass('visible');
|
||||
heartpath.style.fill = document.getElementById("overlay").classList.contains("visible") ? '#ff0000' : 'var(--color-background)';
|
||||
});
|
||||
|
||||
setInterval(function () {
|
||||
$('body').toggleClass('spinner-active', $.active > 0);
|
||||
}, 2000);
|
||||
|
||||
});
|
||||
|
||||
@@ -26,28 +26,25 @@ function request_textpreview_update() {
|
||||
data[name] = $element.is(':checkbox') ? ($element.is(':checked') ? $element.val() : false) : $element.val();
|
||||
});
|
||||
|
||||
$('body').toggleClass('spinner-active', 1);
|
||||
|
||||
$.abortiveSingularAjax({
|
||||
type: "POST",
|
||||
url: preview_text_edit_filters_url,
|
||||
data: data,
|
||||
namespace: 'watchEdit'
|
||||
}).done(function (data) {
|
||||
console.debug(data['duration'])
|
||||
$('#filters-and-triggers #text-preview-before-inner').text(data['before_filter']);
|
||||
|
||||
$('#filters-and-triggers #text-preview-inner')
|
||||
.text(data['after_filter'])
|
||||
.highlightLines([
|
||||
{
|
||||
'color': '#ee0000',
|
||||
'lines': data['trigger_line_numbers']
|
||||
},
|
||||
{
|
||||
'color': '#757575',
|
||||
'lines': data['ignore_line_numbers']
|
||||
}
|
||||
])
|
||||
]);
|
||||
|
||||
|
||||
|
||||
}).fail(function (error) {
|
||||
if (error.statusText === 'abort') {
|
||||
console.log('Request was aborted due to a new request being fired.');
|
||||
@@ -76,13 +73,18 @@ $(document).ready(function () {
|
||||
$("#text-preview-inner").css('max-height', (vh-300)+"px");
|
||||
$("#text-preview-before-inner").css('max-height', (vh-300)+"px");
|
||||
|
||||
// Realtime preview of 'Filters & Text' setup
|
||||
var debounced_request_textpreview_update = request_textpreview_update.debounce(100);
|
||||
|
||||
$("#activate-text-preview").click(function (e) {
|
||||
$('body').toggleClass('preview-text-enabled')
|
||||
request_textpreview_update();
|
||||
|
||||
const method = $('body').hasClass('preview-text-enabled') ? 'on' : 'off';
|
||||
$('#filters-and-triggers textarea')[method]('blur', request_textpreview_update.throttle(1000));
|
||||
$('#filters-and-triggers input')[method]('change', request_textpreview_update.throttle(1000));
|
||||
$("#filters-and-triggers-tab")[method]('click', request_textpreview_update.throttle(1000));
|
||||
$("#text-preview-refresh")[method]('click', debounced_request_textpreview_update);
|
||||
$('textarea:visible')[method]('keyup blur', debounced_request_textpreview_update);
|
||||
$('input:visible')[method]('keyup blur change', debounced_request_textpreview_update);
|
||||
$("#filters-and-triggers-tab")[method]('click', debounced_request_textpreview_update);
|
||||
});
|
||||
$('.minitabs-wrapper').miniTabs({
|
||||
"Content after filters": "#text-preview-inner",
|
||||
|
||||
@@ -153,8 +153,7 @@ html[data-darkmode="true"] {
|
||||
border: 1px solid transparent;
|
||||
vertical-align: top;
|
||||
font: 1em monospace;
|
||||
text-align: left;
|
||||
overflow: clip; }
|
||||
text-align: left; }
|
||||
#diff-ui pre {
|
||||
white-space: pre-wrap; }
|
||||
|
||||
@@ -173,9 +172,7 @@ ins {
|
||||
text-decoration: none; }
|
||||
|
||||
#result {
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
overflow-wrap: break-word; }
|
||||
white-space: pre-wrap; }
|
||||
|
||||
#settings {
|
||||
background: rgba(0, 0, 0, 0.05);
|
||||
@@ -234,12 +231,3 @@ td#diff-col div {
|
||||
border-radius: 5px;
|
||||
background: var(--color-background);
|
||||
box-shadow: 1px 1px 4px var(--color-shadow-jump); }
|
||||
|
||||
.pure-form button.reset-margin {
|
||||
margin: 0px; }
|
||||
|
||||
.diff-fieldset {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
flex-wrap: wrap; }
|
||||
|
||||
@@ -24,7 +24,6 @@
|
||||
vertical-align: top;
|
||||
font: 1em monospace;
|
||||
text-align: left;
|
||||
overflow: clip; // clip overflowing contents to cell boundariess
|
||||
}
|
||||
|
||||
pre {
|
||||
@@ -51,8 +50,6 @@ ins {
|
||||
|
||||
#result {
|
||||
white-space: pre-wrap;
|
||||
word-break: break-word;
|
||||
overflow-wrap: break-word;
|
||||
|
||||
.change {
|
||||
span {}
|
||||
@@ -137,15 +134,3 @@ td#diff-col div {
|
||||
background: var(--color-background);
|
||||
box-shadow: 1px 1px 4px var(--color-shadow-jump);
|
||||
}
|
||||
|
||||
// resets button margin to 0px
|
||||
.pure-form button.reset-margin {
|
||||
margin: 0px;
|
||||
}
|
||||
|
||||
.diff-fieldset {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
@@ -9,24 +9,9 @@ ul#requests-extra_browsers {
|
||||
}
|
||||
|
||||
/* each proxy entry is a `table` */
|
||||
table {
|
||||
tr {
|
||||
display: table-row; // default display for small screens
|
||||
input[type=text] {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// apply inline display for larger screens
|
||||
@media only screen and (min-width: 1280px) {
|
||||
table {
|
||||
tr {
|
||||
display: inline;
|
||||
input[type=text] {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -9,24 +9,12 @@ ul#requests-extra_proxies {
|
||||
}
|
||||
|
||||
/* each proxy entry is a `table` */
|
||||
table {
|
||||
tr {
|
||||
display: table-row; // default display for small screens
|
||||
input[type=text] {
|
||||
width: 100%;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// apply inline display for large screens
|
||||
@media only screen and (min-width: 1024px) {
|
||||
table {
|
||||
tr {
|
||||
display: inline;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#request {
|
||||
/* Auto proxy scan/checker */
|
||||
@@ -37,19 +25,15 @@ ul#requests-extra_proxies {
|
||||
|
||||
body.proxy-check-active {
|
||||
#request {
|
||||
// Padding set by flex layout
|
||||
/*
|
||||
.proxy-status {
|
||||
width: 2em;
|
||||
}
|
||||
*/
|
||||
|
||||
.proxy-check-details {
|
||||
font-size: 80%;
|
||||
color: #555;
|
||||
display: block;
|
||||
padding-left: 2em;
|
||||
max-width: 500px;
|
||||
padding-left: 4em;
|
||||
}
|
||||
|
||||
.proxy-timing {
|
||||
|
||||
@@ -7,16 +7,6 @@
|
||||
border-top: none;
|
||||
}
|
||||
|
||||
.minitabs-content {
|
||||
width: 100%;
|
||||
display: flex;
|
||||
> div {
|
||||
flex: 1 1 auto;
|
||||
min-width: 0;
|
||||
overflow: scroll;
|
||||
}
|
||||
}
|
||||
|
||||
.minitabs {
|
||||
display: flex;
|
||||
border-bottom: 1px solid #ccc;
|
||||
|
||||
@@ -42,8 +42,9 @@ body.preview-text-enabled {
|
||||
color: var(--color-text-input);
|
||||
font-family: "Courier New", Courier, monospace; /* Sets the font to a monospace type */
|
||||
font-size: 70%;
|
||||
word-break: break-word;
|
||||
overflow-x: scroll;
|
||||
white-space: pre-wrap; /* Preserves whitespace and line breaks like <pre> */
|
||||
overflow-wrap: break-word; /* Allows long words to break and wrap to the next line */
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -106,34 +106,10 @@ button.toggle-button {
|
||||
padding: 5px;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
border-bottom: 2px solid var(--color-menu-accent);
|
||||
align-items: center;
|
||||
}
|
||||
|
||||
#pure-menu-horizontal-spinner {
|
||||
height: 3px;
|
||||
background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
|
||||
background-size: 400% 400%;
|
||||
width: 100%;
|
||||
animation: gradient 200s ease infinite;
|
||||
}
|
||||
|
||||
body.spinner-active {
|
||||
#pure-menu-horizontal-spinner {
|
||||
animation: gradient 1s ease infinite;
|
||||
}
|
||||
}
|
||||
|
||||
@keyframes gradient {
|
||||
0% {
|
||||
background-position: 0% 50%;
|
||||
}
|
||||
50% {
|
||||
background-position: 100% 50%;
|
||||
}
|
||||
100% {
|
||||
background-position: 0% 50%;
|
||||
}
|
||||
}
|
||||
.pure-menu-heading {
|
||||
color: var(--color-text-menu-heading);
|
||||
}
|
||||
@@ -147,14 +123,8 @@ body.spinner-active {
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
.tab-pane-inner {
|
||||
// .tab-pane-inner will have the #id that the tab button jumps/anchors to
|
||||
scroll-margin-top: 200px;
|
||||
}
|
||||
|
||||
section.content {
|
||||
padding-top: 100px;
|
||||
padding-top: 5em;
|
||||
padding-bottom: 1em;
|
||||
flex-direction: column;
|
||||
display: flex;
|
||||
@@ -937,7 +907,6 @@ $form-edge-padding: 20px;
|
||||
}
|
||||
|
||||
.tab-pane-inner {
|
||||
|
||||
&:not(:target) {
|
||||
display: none;
|
||||
}
|
||||
|
||||
@@ -112,30 +112,22 @@ ul#requests-extra_proxies {
|
||||
ul#requests-extra_proxies li > label {
|
||||
display: none; }
|
||||
ul#requests-extra_proxies table tr {
|
||||
display: table-row; }
|
||||
ul#requests-extra_proxies table tr input[type=text] {
|
||||
width: 100%; }
|
||||
@media only screen and (min-width: 1024px) {
|
||||
ul#requests-extra_proxies table tr {
|
||||
display: inline; } }
|
||||
display: inline; }
|
||||
|
||||
#request {
|
||||
/* Auto proxy scan/checker */ }
|
||||
#request label[for=proxy] {
|
||||
display: inline-block; }
|
||||
|
||||
body.proxy-check-active #request {
|
||||
/*
|
||||
.proxy-status {
|
||||
width: 2em;
|
||||
}
|
||||
*/ }
|
||||
body.proxy-check-active #request .proxy-status {
|
||||
width: 2em; }
|
||||
|
||||
body.proxy-check-active #request .proxy-check-details {
|
||||
font-size: 80%;
|
||||
color: #555;
|
||||
display: block;
|
||||
padding-left: 2em;
|
||||
max-width: 500px; }
|
||||
padding-left: 4em; }
|
||||
|
||||
body.proxy-check-active #request .proxy-timing {
|
||||
font-size: 80%;
|
||||
padding-left: 1rem;
|
||||
@@ -165,15 +157,8 @@ ul#requests-extra_browsers {
|
||||
/* each proxy entry is a `table` */ }
|
||||
ul#requests-extra_browsers li > label {
|
||||
display: none; }
|
||||
ul#requests-extra_browsers table tr {
|
||||
display: table-row; }
|
||||
ul#requests-extra_browsers table tr input[type=text] {
|
||||
width: 100%; }
|
||||
@media only screen and (min-width: 1280px) {
|
||||
ul#requests-extra_browsers table tr {
|
||||
display: inline; }
|
||||
ul#requests-extra_browsers table tr input[type=text] {
|
||||
width: 100%; } }
|
||||
|
||||
#extra-browsers-setting {
|
||||
border: 1px solid var(--color-grey-800);
|
||||
@@ -449,13 +434,6 @@ html[data-darkmode="true"] #toggle-light-mode .icon-dark {
|
||||
padding: 20px;
|
||||
border: 1px solid #ccc;
|
||||
border-top: none; }
|
||||
.minitabs-wrapper .minitabs-content {
|
||||
width: 100%;
|
||||
display: flex; }
|
||||
.minitabs-wrapper .minitabs-content > div {
|
||||
flex: 1 1 auto;
|
||||
min-width: 0;
|
||||
overflow: scroll; }
|
||||
.minitabs-wrapper .minitabs {
|
||||
display: flex;
|
||||
border-bottom: 1px solid #ccc; }
|
||||
@@ -510,9 +488,11 @@ body.preview-text-enabled {
|
||||
font-family: "Courier New", Courier, monospace;
|
||||
/* Sets the font to a monospace type */
|
||||
font-size: 70%;
|
||||
word-break: break-word;
|
||||
overflow-x: scroll;
|
||||
white-space: pre-wrap;
|
||||
/* Preserves whitespace and line breaks like <pre> */ }
|
||||
/* Preserves whitespace and line breaks like <pre> */
|
||||
overflow-wrap: break-word;
|
||||
/* Allows long words to break and wrap to the next line */ }
|
||||
|
||||
#activate-text-preview {
|
||||
right: 0;
|
||||
@@ -588,26 +568,9 @@ button.toggle-button {
|
||||
padding: 5px;
|
||||
display: flex;
|
||||
justify-content: space-between;
|
||||
border-bottom: 2px solid var(--color-menu-accent);
|
||||
align-items: center; }
|
||||
|
||||
#pure-menu-horizontal-spinner {
|
||||
height: 3px;
|
||||
background: linear-gradient(-75deg, #ff6000, #ff8f00, #ffdd00, #ed0000);
|
||||
background-size: 400% 400%;
|
||||
width: 100%;
|
||||
animation: gradient 200s ease infinite; }
|
||||
|
||||
body.spinner-active #pure-menu-horizontal-spinner {
|
||||
animation: gradient 1s ease infinite; }
|
||||
|
||||
@keyframes gradient {
|
||||
0% {
|
||||
background-position: 0% 50%; }
|
||||
50% {
|
||||
background-position: 100% 50%; }
|
||||
100% {
|
||||
background-position: 0% 50%; } }
|
||||
|
||||
.pure-menu-heading {
|
||||
color: var(--color-text-menu-heading); }
|
||||
|
||||
@@ -617,11 +580,8 @@ body.spinner-active #pure-menu-horizontal-spinner {
|
||||
background-color: var(--color-background-menu-link-hover);
|
||||
color: var(--color-text-menu-link-hover); }
|
||||
|
||||
.tab-pane-inner {
|
||||
scroll-margin-top: 200px; }
|
||||
|
||||
section.content {
|
||||
padding-top: 100px;
|
||||
padding-top: 5em;
|
||||
padding-bottom: 1em;
|
||||
flex-direction: column;
|
||||
display: flex;
|
||||
|
||||
@@ -4,7 +4,6 @@ from flask import (
|
||||
flash
|
||||
)
|
||||
|
||||
from .html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
from . model import App, Watch
|
||||
from copy import deepcopy, copy
|
||||
from os import path, unlink
|
||||
@@ -751,17 +750,17 @@ class ChangeDetectionStore:
|
||||
def update_5(self):
|
||||
# If the watch notification body, title look the same as the global one, unset it, so the watch defaults back to using the main settings
|
||||
# In other words - the watch notification_title and notification_body are not needed if they are the same as the default one
|
||||
current_system_body = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
current_system_title = self.data['settings']['application']['notification_body'].translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
current_system_body = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n "))
|
||||
current_system_title = self.data['settings']['application']['notification_body'].translate(str.maketrans('', '', "\r\n "))
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
try:
|
||||
watch_body = watch.get('notification_body', '')
|
||||
if watch_body and watch_body.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_body:
|
||||
if watch_body and watch_body.translate(str.maketrans('', '', "\r\n ")) == current_system_body:
|
||||
# Looks the same as the default one, so unset it
|
||||
watch['notification_body'] = None
|
||||
|
||||
watch_title = watch.get('notification_title', '')
|
||||
if watch_title and watch_title.translate(TRANSLATE_WHITESPACE_TABLE) == current_system_title:
|
||||
if watch_title and watch_title.translate(str.maketrans('', '', "\r\n ")) == current_system_title:
|
||||
# Looks the same as the default one, so unset it
|
||||
watch['notification_title'] = None
|
||||
except Exception as e:
|
||||
|
||||
@@ -35,9 +35,7 @@
|
||||
|
||||
<body class="">
|
||||
<div class="header">
|
||||
<div class="pure-menu-fixed" style="width: 100%;">
|
||||
<div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
|
||||
|
||||
<div class="home-menu pure-menu pure-menu-horizontal pure-menu-fixed" id="nav-menu">
|
||||
{% if has_password and not current_user.is_authenticated %}
|
||||
<a class="pure-menu-heading" href="https://changedetection.io" rel="noopener">
|
||||
<strong>Change</strong>Detection.io</a>
|
||||
@@ -70,7 +68,7 @@
|
||||
<a href="{{ url_for('import_page')}}" class="pure-menu-link">IMPORT</a>
|
||||
</li>
|
||||
<li class="pure-menu-item">
|
||||
<a href="{{ url_for('backups.index')}}" class="pure-menu-link">BACKUPS</a>
|
||||
<a href="{{ url_for('get_backup')}}" class="pure-menu-link">BACKUP</a>
|
||||
</li>
|
||||
{% else %}
|
||||
<li class="pure-menu-item">
|
||||
@@ -131,12 +129,7 @@
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
<div id="pure-menu-horizontal-spinner"></div>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
|
||||
|
||||
{% if hosted_sticky %}
|
||||
<div class="sticky-tab" id="hosted-sticky">
|
||||
<a href="https://changedetection.io/?ref={{guid}}">Let us host your instance!</a>
|
||||
|
||||
@@ -14,7 +14,7 @@
|
||||
|
||||
<div id="settings">
|
||||
<form class="pure-form " action="" method="GET" id="diff-form">
|
||||
<fieldset class="diff-fieldset">
|
||||
<fieldset>
|
||||
{% if versions|length >= 1 %}
|
||||
<strong>Compare</strong>
|
||||
<del class="change"><span>from</span></del>
|
||||
@@ -33,7 +33,7 @@
|
||||
</option>
|
||||
{% endfor %}
|
||||
</select>
|
||||
<button type="submit" class="pure-button pure-button-primary reset-margin">Go</button>
|
||||
<button type="submit" class="pure-button pure-button-primary">Go</button>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
<fieldset>
|
||||
|
||||
@@ -26,6 +26,7 @@
|
||||
</script>
|
||||
<script src="{{url_for('static_content', group='js', filename='plugins.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='limit.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='visual-selector.js')}}" defer></script>
|
||||
{% if playwright_enabled %}
|
||||
@@ -65,8 +66,8 @@
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
||||
<div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
|
||||
<div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
|
||||
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.processor) }}
|
||||
@@ -149,24 +150,21 @@
|
||||
{{ render_field(form.method) }}
|
||||
</div>
|
||||
<div id="request-body">
|
||||
{{ render_field(form.body, rows=7, placeholder="Example
|
||||
{{ render_field(form.body, rows=5, placeholder="Example
|
||||
{
|
||||
\"name\":\"John\",
|
||||
\"age\":30,
|
||||
\"car\":null,
|
||||
\"year\":{% now 'Europe/Berlin', '%Y' %}
|
||||
\"car\":null
|
||||
}") }}
|
||||
</div>
|
||||
<div class="pure-form-message">Variables are supported in the request body (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
</div>
|
||||
</fieldset>
|
||||
<!-- hmm -->
|
||||
<div class="pure-control-group advanced-options" style="display: none;">
|
||||
{{ render_field(form.headers, rows=7, placeholder="Example
|
||||
{{ render_field(form.headers, rows=5, placeholder="Example
|
||||
Cookie: foobar
|
||||
User-Agent: wonderbra 1.0
|
||||
Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-form-message">Variables are supported in the request header values (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||
User-Agent: wonderbra 1.0") }}
|
||||
|
||||
<div class="pure-form-message-inline">
|
||||
{% if has_extra_headers_file %}
|
||||
<strong>Alert! Extra headers file found and will be added to this watch!</strong>
|
||||
@@ -332,9 +330,9 @@ nav
|
||||
{{ render_checkbox_field(form.filter_text_added) }}
|
||||
{{ render_checkbox_field(form.filter_text_replaced) }}
|
||||
{{ render_checkbox_field(form.filter_text_removed) }}
|
||||
<span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span><br>
|
||||
<span class="pure-form-message-inline"> So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
|
||||
<span class="pure-form-message-inline"> When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
|
||||
<span class="pure-form-message-inline">Note: Depending on the length and similarity of the text on each line, the algorithm may consider an <strong>addition</strong> instead of <strong>replacement</strong> for example.</span>
|
||||
<span class="pure-form-message-inline">So it's always better to select <strong>Added</strong>+<strong>Replaced</strong> when you're interested in new content.</span><br>
|
||||
<span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-control-group">
|
||||
{{ render_checkbox_field(form.check_unique_lines) }}
|
||||
@@ -373,7 +371,7 @@ nav
|
||||
") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
|
||||
<li>Matching text will be <strong>removed</strong> from the text snapshot</li>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||
@@ -400,9 +398,7 @@ Unavailable") }}
|
||||
</fieldset>
|
||||
<fieldset>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/
|
||||
or
|
||||
keyword") }}
|
||||
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Extracts text in the final output (line by line) after other filters using regular expressions or string match;
|
||||
@@ -428,8 +424,8 @@ keyword") }}
|
||||
</script>
|
||||
<br>
|
||||
{#<div id="text-preview-controls"><span id="text-preview-refresh" class="pure-button button-xsmall">Refresh</span></div>#}
|
||||
|
||||
<div class="minitabs-wrapper">
|
||||
<div class="minitabs-content">
|
||||
<div id="text-preview-inner" class="monospace-preview">
|
||||
<p>Loading...</p>
|
||||
</div>
|
||||
@@ -440,7 +436,6 @@ keyword") }}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
{% endif %}
|
||||
{# rendered sub Template #}
|
||||
|
||||
@@ -78,10 +78,6 @@
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
<p><strong>Local Time:</strong> {{ system_time }}</p>
|
||||
<p><strong>Timezone:</strong> {{ timezone_name }}</p>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
@@ -176,7 +172,7 @@ nav
|
||||
<span class="pure-form-message-inline">Note: This is applied globally in addition to the per-watch rules.</span><br>
|
||||
<span class="pure-form-message-inline">
|
||||
<ul>
|
||||
<li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li>
|
||||
<li>Matching text will be <strong>removed</strong> from the text snapshot</li>
|
||||
<li>Note: This is applied globally in addition to the per-watch rules.</li>
|
||||
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
|
||||
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
|
||||
@@ -280,7 +276,7 @@ nav
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
|
||||
<a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
|
||||
<a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -1,6 +0,0 @@
|
||||
# A list of real world examples!
|
||||
|
||||
Always the price should be 666.66 for our tests
|
||||
|
||||
see test_restock_itemprop.py::test_special_prop_examples
|
||||
|
||||
@@ -1,25 +0,0 @@
|
||||
<div class="PriceSection PriceSection_PriceSection__Vx1_Q PriceSection_variantHuge__P9qxg PdpPriceSection"
|
||||
data-testid="price-section"
|
||||
data-optly-product-tile-price-section="true"><span
|
||||
class="PriceRange ProductPrice variant-huge" itemprop="offers"
|
||||
itemscope="" itemtype="http://schema.org/Offer"><div
|
||||
class="VisuallyHidden_VisuallyHidden__VBD83">$155.55</div><span
|
||||
aria-hidden="true" class="Price variant-huge" data-testid="price"
|
||||
itemprop="price"><sup class="sup" data-testid="price-symbol"
|
||||
itemprop="priceCurrency" content="AUD">$</sup><span
|
||||
class="dollars" data-testid="price-value" itemprop="price"
|
||||
content="155.55">155.55</span><span class="extras"><span class="sup"
|
||||
data-testid="price-sup"></span></span></span></span>
|
||||
</div>
|
||||
|
||||
<script type="application/ld+json">{
|
||||
"@type": "Product",
|
||||
"@context": "https://schema.org",
|
||||
"name": "test",
|
||||
"description": "test",
|
||||
"offers": {
|
||||
"@type": "Offer",
|
||||
"priceCurrency": "AUD",
|
||||
"price": 155.55
|
||||
},
|
||||
}</script>
|
||||
@@ -16,4 +16,4 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(3)
|
||||
|
||||
@@ -1,8 +1,7 @@
|
||||
#!/usr/bin/env python3
|
||||
import json
|
||||
import os
|
||||
from flask import url_for
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
def set_response():
|
||||
@@ -19,6 +18,7 @@ def set_response():
|
||||
f.write(data)
|
||||
time.sleep(1)
|
||||
|
||||
|
||||
def test_socks5(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
set_response()
|
||||
@@ -79,24 +79,3 @@ def test_socks5(client, live_server, measure_memory_usage):
|
||||
|
||||
# Should see the proper string
|
||||
assert "Awesome, you made it".encode('utf-8') in res.data
|
||||
|
||||
# PROXY CHECKER WIDGET CHECK - this needs more checking
|
||||
uuid = extract_UUID_from_client(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("check_proxies.start_check", uuid=uuid),
|
||||
follow_redirects=True
|
||||
)
|
||||
# It's probably already finished super fast :(
|
||||
#assert b"RUNNING" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(
|
||||
url_for("check_proxies.get_recheck_status", uuid=uuid),
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"OK" in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
|
||||
@@ -77,8 +77,6 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
||||
|
||||
# The trigger line is REMOVED, this should trigger
|
||||
set_original(excluding='The golden line')
|
||||
|
||||
# Check in the processor here what's going on, its triggering empty-reply and no change.
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
@@ -153,6 +151,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
|
||||
# A line thats not the trigger should not trigger anything
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
@@ -174,5 +173,6 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
||||
assert b'-Oh yes please-' in response
|
||||
assert '网站监测 内容更新了'.encode('utf-8') in response
|
||||
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -26,24 +26,8 @@ def test_backup(client, live_server, measure_memory_usage):
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Launch the thread in the background to create the backup
|
||||
res = client.get(
|
||||
url_for("backups.request_backup"),
|
||||
follow_redirects=True
|
||||
)
|
||||
time.sleep(2)
|
||||
|
||||
res = client.get(
|
||||
url_for("backups.index"),
|
||||
follow_redirects=True
|
||||
)
|
||||
# Can see the download link to the backup
|
||||
assert b'<a href="/backups/download/changedetection-backup-20' in res.data
|
||||
assert b'Remove backups' in res.data
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
url_for("backups.download_backup", filename="latest"),
|
||||
url_for("get_backup"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
@@ -60,11 +44,3 @@ def test_backup(client, live_server, measure_memory_usage):
|
||||
|
||||
# Should be two txt files in the archive (history and the snapshot)
|
||||
assert len(newlist) == 2
|
||||
|
||||
# Get the latest one
|
||||
res = client.get(
|
||||
url_for("backups.remove_backups"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b'No backups found.' in res.data
|
||||
@@ -65,8 +65,11 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
||||
live_server_setup(live_server)
|
||||
# Use a mix of case in ZzZ to prove it works case-insensitive.
|
||||
ignore_text = "out of stoCk\r\nfoobar"
|
||||
|
||||
set_original_ignore_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
@@ -124,24 +127,13 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
# 2548
|
||||
# Going back to the ORIGINAL should NOT trigger a change
|
||||
set_original_ignore_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
|
||||
# Now we set a change where the text is gone AND its different content, it should now trigger
|
||||
# Now we set a change where the text is gone, it should now trigger
|
||||
set_modified_response_minus_block_text()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
|
||||
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
@@ -5,41 +5,12 @@ import time
|
||||
from flask import url_for
|
||||
|
||||
from ..html_tools import *
|
||||
from .util import live_server_setup, wait_for_all_checks
|
||||
from .util import live_server_setup
|
||||
|
||||
|
||||
def test_setup(live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
def set_response_with_multiple_index():
|
||||
data= """<!DOCTYPE html>
|
||||
<html>
|
||||
<body>
|
||||
|
||||
<!-- NOTE!! CHROME WILL ADD TBODY HERE IF ITS NOT THERE!! -->
|
||||
<table style="width:100%">
|
||||
<tr>
|
||||
<th>Person 1</th>
|
||||
<th>Person 2</th>
|
||||
<th>Person 3</th>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>Emil</td>
|
||||
<td>Tobias</td>
|
||||
<td>Linus</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td>16</td>
|
||||
<td>14</td>
|
||||
<td>10</td>
|
||||
</tr>
|
||||
</table>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
@@ -148,10 +119,12 @@ across multiple lines
|
||||
|
||||
|
||||
def test_element_removal_full(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
sleep_time_for_fetch_thread = 3
|
||||
|
||||
set_original_response()
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for("test_endpoint", _external=True)
|
||||
@@ -159,8 +132,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
|
||||
url_for("import_page"), data={"urls": test_url}, follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
time.sleep(1)
|
||||
# Goto the edit page, add the filter data
|
||||
# Not sure why \r needs to be added - absent of the #changetext this is not necessary
|
||||
subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
|
||||
@@ -176,7 +148,6 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
|
||||
follow_redirects=True,
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
@@ -185,10 +156,10 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
|
||||
assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data
|
||||
|
||||
# Trigger a check
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
wait_for_all_checks(client)
|
||||
# Give the thread time to pick it up
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# so that we set the state to 'unviewed' after all the edits
|
||||
client.get(url_for("diff_history_page", uuid="first"))
|
||||
@@ -197,70 +168,11 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
|
||||
set_modified_response()
|
||||
|
||||
# Trigger a check
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# There should not be an unviewed change, as changes should be removed
|
||||
res = client.get(url_for("index"))
|
||||
assert b"unviewed" not in res.data
|
||||
|
||||
# Re #2752
|
||||
def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
set_response_with_multiple_index()
|
||||
subtractive_selectors_data = ["""
|
||||
body > table > tr:nth-child(1) > th:nth-child(2)
|
||||
body > table > tr:nth-child(2) > td:nth-child(2)
|
||||
body > table > tr:nth-child(3) > td:nth-child(2)
|
||||
body > table > tr:nth-child(1) > th:nth-child(3)
|
||||
body > table > tr:nth-child(2) > td:nth-child(3)
|
||||
body > table > tr:nth-child(3) > td:nth-child(3)""",
|
||||
"""//body/table/tr[1]/th[2]
|
||||
//body/table/tr[2]/td[2]
|
||||
//body/table/tr[3]/td[2]
|
||||
//body/table/tr[1]/th[3]
|
||||
//body/table/tr[2]/td[3]
|
||||
//body/table/tr[3]/td[3]"""]
|
||||
|
||||
for selector_list in subtractive_selectors_data:
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
# Add our URL to the import page
|
||||
test_url = url_for("test_endpoint", _external=True)
|
||||
res = client.post(
|
||||
url_for("import_page"), data={"urls": test_url}, follow_redirects=True
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"subtractive_selectors": selector_list,
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
"fetch_backend": "html_requests",
|
||||
},
|
||||
follow_redirects=True,
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"Tobias" not in res.data
|
||||
assert b"Linus" not in res.data
|
||||
assert b"Person 2" not in res.data
|
||||
assert b"Person 3" not in res.data
|
||||
# First column should exist
|
||||
assert b"Emil" in res.data
|
||||
|
||||
|
||||
@@ -33,17 +33,13 @@ def test_strip_regex_text_func():
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
|
||||
|
||||
assert "but 1 lines" in stripped_content
|
||||
assert "igNORe-cAse text" not in stripped_content
|
||||
assert "but 1234 lines" not in stripped_content
|
||||
assert "really" not in stripped_content
|
||||
assert "not this" not in stripped_content
|
||||
assert b"but 1 lines" in stripped_content
|
||||
assert b"igNORe-cAse text" not in stripped_content
|
||||
assert b"but 1234 lines" not in stripped_content
|
||||
assert b"really" not in stripped_content
|
||||
assert b"not this" not in stripped_content
|
||||
|
||||
# Check line number reporting
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers")
|
||||
assert stripped_content == [2, 5, 6, 7, 8, 10]
|
||||
|
||||
# Check that linefeeds are preserved when there are is no matching ignores
|
||||
content = "some text\n\nand other text\n"
|
||||
stripped_content = html_tools.strip_ignore_text(content, ignore_lines)
|
||||
assert content == stripped_content
|
||||
|
||||
@@ -22,15 +22,10 @@ def test_strip_text_func():
|
||||
ignore_lines = ["sometimes"]
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
|
||||
assert "sometimes" not in stripped_content
|
||||
assert "Some content" in stripped_content
|
||||
|
||||
# Check that line feeds dont get chewed up when something is found
|
||||
test_content = "Some initial text\n\nWhich is across multiple lines\n\nZZZZz\n\n\nSo let's see what happens."
|
||||
ignore = ['something irrelevent but just to check', 'XXXXX', 'YYYYY', 'ZZZZZ']
|
||||
assert b"sometimes" not in stripped_content
|
||||
assert b"Some content" in stripped_content
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ignore)
|
||||
assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens."
|
||||
|
||||
def set_original_ignore_response():
|
||||
test_return_data = """<html>
|
||||
@@ -146,6 +141,8 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
# Just to be sure.. set a regular modified change..
|
||||
set_modified_original_ignore_response()
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
@@ -156,17 +153,17 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
||||
|
||||
res = client.get(url_for("preview_page", uuid="first"))
|
||||
|
||||
# SHOULD BE be in the preview, it was added in set_modified_original_ignore_response()
|
||||
# and we have "new ignore stuff" in ignore_text
|
||||
# it is only ignored, it is not removed (it will be highlighted too)
|
||||
assert b'new ignore stuff' in res.data
|
||||
# Should no longer be in the preview
|
||||
assert b'new ignore stuff' not in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
# When adding some ignore text, it should not trigger a change, even if something else on that line changes
|
||||
def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
# Give the endpoint time to spin up
|
||||
time.sleep(1)
|
||||
|
||||
ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ"
|
||||
set_original_ignore_response()
|
||||
|
||||
@@ -175,7 +172,6 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
|
||||
url_for("settings_page"),
|
||||
data={
|
||||
"requests-time_between_check-minutes": 180,
|
||||
"application-ignore_whitespace": "y",
|
||||
"application-global_ignore_text": ignore_text,
|
||||
'application-fetch_backend': "html_requests"
|
||||
},
|
||||
@@ -196,7 +192,9 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
|
||||
#Adding some ignore text should not trigger a change
|
||||
|
||||
# Goto the edit page of the item, add our ignore text
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"},
|
||||
@@ -212,15 +210,20 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
|
||||
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
# Give the thread time to pick it up
|
||||
wait_for_all_checks(client)
|
||||
# It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change
|
||||
|
||||
# so that we are sure everything is viewed and in a known 'nothing changed' state
|
||||
res = client.get(url_for("diff_history_page", uuid="first"))
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
#####
|
||||
|
||||
# Make a change which includes the ignore text, it should be ignored and no 'change' triggered
|
||||
# It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list
|
||||
|
||||
# Make a change which includes the ignore text
|
||||
set_modified_ignore_response()
|
||||
|
||||
# Trigger a check
|
||||
@@ -230,7 +233,6 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem
|
||||
|
||||
# It should report nothing found (no new 'unviewed' class)
|
||||
res = client.get(url_for("index"))
|
||||
|
||||
assert b'unviewed' not in res.data
|
||||
assert b'/test-endpoint' in res.data
|
||||
|
||||
|
||||
@@ -1,78 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from flask import url_for
|
||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
|
||||
|
||||
def set_response():
|
||||
|
||||
data = f"""<html>
|
||||
<body>Awesome, you made it<br>
|
||||
yeah the socks request worked<br>
|
||||
something to ignore<br>
|
||||
something to trigger<br>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
|
||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||
f.write(data)
|
||||
|
||||
def test_content_filter_live_preview(client, live_server, measure_memory_usage):
|
||||
live_server_setup(live_server)
|
||||
set_response()
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid=uuid),
|
||||
data={
|
||||
"include_filters": "",
|
||||
"fetch_backend": 'html_requests',
|
||||
"ignore_text": "something to ignore",
|
||||
"trigger_text": "something to trigger",
|
||||
"url": test_url,
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# The endpoint is a POST and accepts the form values to override the watch preview
|
||||
import json
|
||||
|
||||
# DEFAULT OUTPUT WITHOUT ANYTHING UPDATED/CHANGED - SHOULD SEE THE WATCH DEFAULTS
|
||||
res = client.post(
|
||||
url_for("watch_get_preview_rendered", uuid=uuid)
|
||||
)
|
||||
default_return = json.loads(res.data.decode('utf-8'))
|
||||
assert default_return.get('after_filter')
|
||||
assert default_return.get('before_filter')
|
||||
assert default_return.get('ignore_line_numbers') == [3] # "something to ignore" line 3
|
||||
assert default_return.get('trigger_line_numbers') == [4] # "something to trigger" line 4
|
||||
|
||||
# SEND AN UPDATE AND WE SHOULD SEE THE OUTPUT CHANGE SO WE KNOW TO HIGHLIGHT NEW STUFF
|
||||
res = client.post(
|
||||
url_for("watch_get_preview_rendered", uuid=uuid),
|
||||
data={
|
||||
"include_filters": "",
|
||||
"fetch_backend": 'html_requests',
|
||||
"ignore_text": "sOckS", # Also be sure case insensitive works
|
||||
"trigger_text": "AweSOme",
|
||||
"url": test_url,
|
||||
},
|
||||
)
|
||||
reply = json.loads(res.data.decode('utf-8'))
|
||||
assert reply.get('after_filter')
|
||||
assert reply.get('before_filter')
|
||||
assert reply.get('ignore_line_numbers') == [2] # Ignored - "socks" on line 2
|
||||
assert reply.get('trigger_line_numbers') == [1] # Triggers "Awesome" in line 1
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
@@ -284,7 +284,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
# CUSTOM JSON BODY CHECK for POST://
|
||||
set_original_response()
|
||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#header-manipulation
|
||||
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123&+second=hello+world%20%22space%22"
|
||||
test_notification_url = url_for('test_notification_endpoint', _external=True).replace('http://', 'post://')+"?xxx={{ watch_url }}&+custom-header=123"
|
||||
|
||||
res = client.post(
|
||||
url_for("settings_page"),
|
||||
@@ -326,7 +326,6 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
assert j['secret'] == 444
|
||||
assert j['somebug'] == '网站监测 内容更新了'
|
||||
|
||||
|
||||
# URL check, this will always be converted to lowercase
|
||||
assert os.path.isfile("test-datastore/notification-url.txt")
|
||||
with open("test-datastore/notification-url.txt", 'r') as f:
|
||||
@@ -338,7 +337,6 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
|
||||
with open("test-datastore/notification-headers.txt", 'r') as f:
|
||||
notification_headers = f.read()
|
||||
assert 'custom-header: 123' in notification_headers.lower()
|
||||
assert 'second: hello world "space"' in notification_headers.lower()
|
||||
|
||||
|
||||
# Should always be automatically detected as JSON content type even when we set it as 'Text' (default)
|
||||
@@ -431,15 +429,3 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
######### Test global/system settings - When everything is deleted it should give a helpful error
|
||||
# See #2727
|
||||
res = client.post(
|
||||
url_for("ajax_callback_send_notification_test")+"?mode=global-settings",
|
||||
data={"notification_urls": test_notification_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert res.status_code == 400
|
||||
assert b"Error: You must have atleast one watch configured for 'test notification' to work" in res.data
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,72 +0,0 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
|
||||
|
||||
# `subtractive_selectors` should still work in `source:` type requests
|
||||
def test_fetch_pdf(client, live_server, measure_memory_usage):
|
||||
import shutil
|
||||
shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf")
|
||||
|
||||
live_server_setup(live_server)
|
||||
test_url = url_for('test_pdf_endpoint', _external=True)
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": test_url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# PDF header should not be there (it was converted to text)
|
||||
assert b'PDF' not in res.data[:10]
|
||||
assert b'hello world' in res.data
|
||||
|
||||
# So we know if the file changes in other ways
|
||||
import hashlib
|
||||
original_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
|
||||
# We should have one
|
||||
assert len(original_md5) > 0
|
||||
# And it's going to be in the document
|
||||
assert b'Document checksum - ' + bytes(str(original_md5).encode('utf-8')) in res.data
|
||||
|
||||
shutil.copy("tests/test2.pdf", "test-datastore/endpoint-test.pdf")
|
||||
changed_md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper()
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Now something should be ready, indicated by having a 'unviewed' class
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' in res.data
|
||||
|
||||
# The original checksum should be not be here anymore (cdio adds it to the bottom of the text)
|
||||
|
||||
res = client.get(
|
||||
url_for("preview_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert original_md5.encode('utf-8') not in res.data
|
||||
assert changed_md5.encode('utf-8') in res.data
|
||||
|
||||
res = client.get(
|
||||
url_for("diff_history_page", uuid="first"),
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert original_md5.encode('utf-8') in res.data
|
||||
assert changed_md5.encode('utf-8') in res.data
|
||||
|
||||
assert b'here is a change' in res.data
|
||||
@@ -45,7 +45,7 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
|
||||
"url": test_url,
|
||||
"tags": "",
|
||||
"fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests',
|
||||
"headers": "jinja2:{{ 1+1 }}\nxxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
|
||||
"headers": "xxx:ooo\ncool:yeah\r\ncookie:"+cookie_header},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Updated watch." in res.data
|
||||
@@ -61,7 +61,6 @@ def test_headers_in_request(client, live_server, measure_memory_usage):
|
||||
)
|
||||
|
||||
# Flask will convert the header key to uppercase
|
||||
assert b"Jinja2:2" in res.data
|
||||
assert b"Xxx:ooo" in res.data
|
||||
assert b"Cool:yeah" in res.data
|
||||
|
||||
@@ -118,8 +117,7 @@ def test_body_in_request(client, live_server, measure_memory_usage):
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Now the change which should trigger a change
|
||||
body_value = 'Test Body Value {{ 1+1 }}'
|
||||
body_value_formatted = 'Test Body Value 2'
|
||||
body_value = 'Test Body Value'
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
@@ -142,9 +140,8 @@ def test_body_in_request(client, live_server, measure_memory_usage):
|
||||
|
||||
# If this gets stuck something is wrong, something should always be there
|
||||
assert b"No history found" not in res.data
|
||||
# We should see the formatted value of what we sent in the reply
|
||||
assert str.encode(body_value) not in res.data
|
||||
assert str.encode(body_value_formatted) in res.data
|
||||
# We should see what we sent in the reply
|
||||
assert str.encode(body_value) in res.data
|
||||
|
||||
####### data sanity checks
|
||||
# Add the test URL twice, we will check
|
||||
|
||||
@@ -3,7 +3,7 @@ import os
|
||||
import time
|
||||
|
||||
from flask import url_for
|
||||
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, extract_UUID_from_client
|
||||
from .util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, wait_for_notification_endpoint_output
|
||||
from ..notification import default_notification_format
|
||||
|
||||
instock_props = [
|
||||
@@ -367,12 +367,6 @@ def test_change_with_notification_values(client, live_server):
|
||||
assert "new price 1950.45" in notification
|
||||
assert "title new price 1950.45" in notification
|
||||
|
||||
## Now test the "SEND TEST NOTIFICATION" is working
|
||||
os.unlink("test-datastore/notification.txt")
|
||||
uuid = extract_UUID_from_client(client)
|
||||
res = client.post(url_for("ajax_callback_send_notification_test", watch_uuid=uuid), data={}, follow_redirects=True)
|
||||
time.sleep(5)
|
||||
assert os.path.isfile("test-datastore/notification.txt"), "Notification received"
|
||||
|
||||
|
||||
def test_data_sanity(client, live_server):
|
||||
@@ -419,31 +413,3 @@ def test_data_sanity(client, live_server):
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"))
|
||||
assert test_url2.encode('utf-8') in res.data
|
||||
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
# All examples should give a prive of 666.66
|
||||
def test_special_prop_examples(client, live_server):
|
||||
import glob
|
||||
#live_server_setup(live_server)
|
||||
|
||||
test_url = url_for('test_endpoint', _external=True)
|
||||
check_path = os.path.join(os.path.dirname(__file__), "itemprop_test_examples", "*.txt")
|
||||
files = glob.glob(check_path)
|
||||
assert files
|
||||
for test_example_filename in files:
|
||||
with open(test_example_filename, 'r') as example_f:
|
||||
with open("test-datastore/endpoint-content.txt", "w") as test_f:
|
||||
test_f.write(f"<html><body>{example_f.read()}</body></html>")
|
||||
|
||||
# Now fetch it and check the price worked
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tags": 'restock tests', 'processor': 'restock_diff'},
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
assert b'ception' not in res.data
|
||||
assert b'155.55' in res.data
|
||||
|
||||
@@ -61,10 +61,10 @@ def test_bad_access(client, live_server, measure_memory_usage):
|
||||
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data
|
||||
|
||||
|
||||
def test_file_slashslash_access(client, live_server, measure_memory_usage):
|
||||
def test_file_access(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
test_file_path = os.path.abspath(__file__)
|
||||
test_file_path = "/tmp/test-file.txt"
|
||||
|
||||
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
|
||||
client.post(
|
||||
@@ -82,30 +82,8 @@ def test_file_slashslash_access(client, live_server, measure_memory_usage):
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"test_file_slashslash_access" in res.data
|
||||
else:
|
||||
# Default should be here
|
||||
assert b'file:// type access is denied for security reasons.' in res.data
|
||||
|
||||
def test_file_slash_access(client, live_server, measure_memory_usage):
|
||||
#live_server_setup(live_server)
|
||||
|
||||
test_file_path = os.path.abspath(__file__)
|
||||
|
||||
# file:// is permitted by default, but it will be caught by ALLOW_FILE_URI
|
||||
client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": f"file:/{test_file_path}", "tags": ''},
|
||||
follow_redirects=True
|
||||
)
|
||||
wait_for_all_checks(client)
|
||||
res = client.get(url_for("index"))
|
||||
|
||||
# If it is enabled at test time
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||
# So it should permit it, but it should fall back to the 'requests' library giving an error
|
||||
# (but means it gets passed to playwright etc)
|
||||
assert b"URLs with hostname components are not permitted" in res.data
|
||||
# Should see something (this file added by run_basic_tests.sh)
|
||||
assert b"Hello world" in res.data
|
||||
else:
|
||||
# Default should be here
|
||||
assert b'file:// type access is denied for security reasons.' in res.data
|
||||
|
||||
@@ -18,13 +18,12 @@ class TestDiffBuilder(unittest.TestCase):
|
||||
|
||||
watch['last_viewed'] = 110
|
||||
|
||||
# Contents from the browser are always returned from the browser/requests/etc as str, str is basically UTF-16 in python
|
||||
watch.save_history_text(contents="hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents="hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents="hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents="hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents="hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents="hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents=b"hello world", timestamp=100, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents=b"hello world", timestamp=105, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents=b"hello world", timestamp=109, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents=b"hello world", timestamp=112, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents=b"hello world", timestamp=115, snapshot_id=str(uuid_builder.uuid4()))
|
||||
watch.save_history_text(contents=b"hello world", timestamp=117, snapshot_id=str(uuid_builder.uuid4()))
|
||||
|
||||
p = watch.get_next_snapshot_key_to_last_viewed
|
||||
assert p == "112", "Correct last-viewed timestamp was detected"
|
||||
|
||||
@@ -81,7 +81,6 @@ class update_worker(threading.Thread):
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
})
|
||||
|
||||
if watch:
|
||||
n_object.update(watch.extra_notification_token_values())
|
||||
|
||||
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
|
||||
@@ -261,6 +260,9 @@ class update_worker(threading.Thread):
|
||||
try:
|
||||
# Processor is what we are using for detecting the "Change"
|
||||
processor = watch.get('processor', 'text_json_diff')
|
||||
# Abort processing when the content was the same as the last fetch
|
||||
skip_when_same_checksum = queued_item_data.item.get('skip_when_checksum_same')
|
||||
|
||||
|
||||
# Init a new 'difference_detection_processor', first look in processors
|
||||
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
||||
@@ -276,13 +278,16 @@ class update_worker(threading.Thread):
|
||||
|
||||
update_handler.call_browser()
|
||||
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
|
||||
changed_detected, update_obj, contents = update_handler.run_changedetection(
|
||||
watch=watch,
|
||||
skip_when_checksum_same=skip_when_same_checksum,
|
||||
)
|
||||
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
# if not isinstance(contents, (bytes, bytearray)):
|
||||
# raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
except PermissionError as e:
|
||||
logger.critical(f"File permission error updating file, watch: {uuid}")
|
||||
logger.critical(str(e))
|
||||
@@ -333,8 +338,7 @@ class update_worker(threading.Thread):
|
||||
elif e.status_code == 500:
|
||||
err_text = "Error - 500 (Internal server error) received from the web site"
|
||||
else:
|
||||
extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
|
||||
err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
|
||||
err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code))
|
||||
|
||||
if e.screenshot:
|
||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
||||
|
||||
@@ -61,12 +61,6 @@ services:
|
||||
#
|
||||
# If you want to watch local files file:///path/to/file.txt (careful! security implications!)
|
||||
# - ALLOW_FILE_URI=False
|
||||
#
|
||||
# For complete privacy if you don't want to use the 'check version' / telemetry service
|
||||
# - DISABLE_VERSION_CHECK=true
|
||||
#
|
||||
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||
# - TZ=America/Los_Angeles
|
||||
|
||||
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
|
||||
ports:
|
||||
@@ -80,7 +74,7 @@ services:
|
||||
# If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that
|
||||
# and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used)
|
||||
# depends_on:
|
||||
# sockpuppetbrowser:
|
||||
# playwright-chrome:
|
||||
# condition: service_started
|
||||
|
||||
|
||||
|
||||
@@ -59,9 +59,7 @@ elementpath==4.1.5
|
||||
|
||||
selenium~=4.14.0
|
||||
|
||||
# https://github.com/pallets/werkzeug/issues/2985
|
||||
# Maybe related to pytest?
|
||||
werkzeug==3.0.6
|
||||
werkzeug~=3.0
|
||||
|
||||
# Templating, so far just in the URLs but in the future can be for the notifications also
|
||||
jinja2~=3.1
|
||||
@@ -96,3 +94,4 @@ babel
|
||||
# Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096
|
||||
greenlet >= 3.0.3
|
||||
|
||||
|
||||
|
||||
Reference in New Issue
Block a user