mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2026-02-02 12:26:03 +00:00
Compare commits
3 Commits
0.50.23
...
API-OpenAP
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
a0ab1ab6be | ||
|
|
f4f716fffa | ||
|
|
6e6136aaa7 |
@@ -33,6 +33,7 @@ venv/
|
|||||||
# Test and development files
|
# Test and development files
|
||||||
test-datastore/
|
test-datastore/
|
||||||
tests/
|
tests/
|
||||||
|
docs/
|
||||||
*.md
|
*.md
|
||||||
!README.md
|
!README.md
|
||||||
|
|
||||||
|
|||||||
8
.github/dependabot.yml
vendored
8
.github/dependabot.yml
vendored
@@ -4,11 +4,11 @@ updates:
|
|||||||
directory: /
|
directory: /
|
||||||
schedule:
|
schedule:
|
||||||
interval: "weekly"
|
interval: "weekly"
|
||||||
|
"caronc/apprise":
|
||||||
|
versioning-strategy: "increase"
|
||||||
|
schedule:
|
||||||
|
interval: "daily"
|
||||||
groups:
|
groups:
|
||||||
all:
|
all:
|
||||||
patterns:
|
patterns:
|
||||||
- "*"
|
- "*"
|
||||||
- package-ecosystem: pip
|
|
||||||
directory: /
|
|
||||||
schedule:
|
|
||||||
interval: "weekly"
|
|
||||||
|
|||||||
6
.github/workflows/codeql-analysis.yml
vendored
6
.github/workflows/codeql-analysis.yml
vendored
@@ -34,7 +34,7 @@ jobs:
|
|||||||
|
|
||||||
# Initializes the CodeQL tools for scanning.
|
# Initializes the CodeQL tools for scanning.
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
uses: github/codeql-action/init@v4
|
uses: github/codeql-action/init@v3
|
||||||
with:
|
with:
|
||||||
languages: ${{ matrix.language }}
|
languages: ${{ matrix.language }}
|
||||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||||
@@ -45,7 +45,7 @@ jobs:
|
|||||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||||
# If this step fails, then you should remove it and run the build manually (see below)
|
# If this step fails, then you should remove it and run the build manually (see below)
|
||||||
- name: Autobuild
|
- name: Autobuild
|
||||||
uses: github/codeql-action/autobuild@v4
|
uses: github/codeql-action/autobuild@v3
|
||||||
|
|
||||||
# ℹ️ Command-line programs to run using the OS shell.
|
# ℹ️ Command-line programs to run using the OS shell.
|
||||||
# 📚 https://git.io/JvXDl
|
# 📚 https://git.io/JvXDl
|
||||||
@@ -59,4 +59,4 @@ jobs:
|
|||||||
# make release
|
# make release
|
||||||
|
|
||||||
- name: Perform CodeQL Analysis
|
- name: Perform CodeQL Analysis
|
||||||
uses: github/codeql-action/analyze@v4
|
uses: github/codeql-action/analyze@v3
|
||||||
|
|||||||
6
.github/workflows/containers.yml
vendored
6
.github/workflows/containers.yml
vendored
@@ -41,7 +41,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.11
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: 3.11
|
python-version: 3.11
|
||||||
|
|
||||||
@@ -95,7 +95,7 @@ jobs:
|
|||||||
push: true
|
push: true
|
||||||
tags: |
|
tags: |
|
||||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
||||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
@@ -133,7 +133,7 @@ jobs:
|
|||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
push: true
|
push: true
|
||||||
tags: ${{ steps.meta.outputs.tags }}
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
# Looks like this was disabled
|
# Looks like this was disabled
|
||||||
|
|||||||
4
.github/workflows/pypi-release.yml
vendored
4
.github/workflows/pypi-release.yml
vendored
@@ -9,7 +9,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: "3.11"
|
python-version: "3.11"
|
||||||
- name: Install pypa/build
|
- name: Install pypa/build
|
||||||
@@ -39,7 +39,7 @@ jobs:
|
|||||||
name: python-package-distributions
|
name: python-package-distributions
|
||||||
path: dist/
|
path: dist/
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.11
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
- name: Test that the basic pip built package runs without error
|
- name: Test that the basic pip built package runs without error
|
||||||
|
|||||||
6
.github/workflows/test-container-build.yml
vendored
6
.github/workflows/test-container-build.yml
vendored
@@ -38,6 +38,8 @@ jobs:
|
|||||||
dockerfile: ./Dockerfile
|
dockerfile: ./Dockerfile
|
||||||
- platform: linux/arm/v8
|
- platform: linux/arm/v8
|
||||||
dockerfile: ./Dockerfile
|
dockerfile: ./Dockerfile
|
||||||
|
- platform: linux/arm64/v8
|
||||||
|
dockerfile: ./Dockerfile
|
||||||
# Alpine Dockerfile platforms (musl via alpine check)
|
# Alpine Dockerfile platforms (musl via alpine check)
|
||||||
- platform: linux/amd64
|
- platform: linux/amd64
|
||||||
dockerfile: ./.github/test/Dockerfile-alpine
|
dockerfile: ./.github/test/Dockerfile-alpine
|
||||||
@@ -46,7 +48,7 @@ jobs:
|
|||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v5
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.11
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: 3.11
|
python-version: 3.11
|
||||||
|
|
||||||
@@ -74,5 +76,5 @@ jobs:
|
|||||||
file: ${{ matrix.dockerfile }}
|
file: ${{ matrix.dockerfile }}
|
||||||
platforms: ${{ matrix.platform }}
|
platforms: ${{ matrix.platform }}
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=min
|
cache-to: type=gha,mode=max
|
||||||
|
|
||||||
|
|||||||
4
.github/workflows/test-only.yml
vendored
4
.github/workflows/test-only.yml
vendored
@@ -15,10 +15,6 @@ jobs:
|
|||||||
ruff check . --select E9,F63,F7,F82
|
ruff check . --select E9,F63,F7,F82
|
||||||
# Complete check with errors treated as warnings
|
# Complete check with errors treated as warnings
|
||||||
ruff check . --exit-zero
|
ruff check . --exit-zero
|
||||||
- name: Validate OpenAPI spec
|
|
||||||
run: |
|
|
||||||
pip install openapi-spec-validator
|
|
||||||
python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"
|
|
||||||
|
|
||||||
test-application-3-10:
|
test-application-3-10:
|
||||||
needs: lint-code
|
needs: lint-code
|
||||||
|
|||||||
@@ -24,7 +24,7 @@ jobs:
|
|||||||
|
|
||||||
# Mainly just for link/flake8
|
# Mainly just for link/flake8
|
||||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||||
uses: actions/setup-python@v6
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
python-version: ${{ env.PYTHON_VERSION }}
|
python-version: ${{ env.PYTHON_VERSION }}
|
||||||
|
|
||||||
@@ -253,30 +253,6 @@ jobs:
|
|||||||
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
|
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
|
||||||
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
|
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
|
||||||
|
|
||||||
- name: Extract and display memory test report
|
|
||||||
if: always()
|
|
||||||
run: |
|
|
||||||
# Extract test-memory.log from the container
|
|
||||||
echo "Extracting test-memory.log from container..."
|
|
||||||
docker cp test-cdio-basic-tests:/app/changedetectionio/test-memory.log output-logs/test-memory-${{ env.PYTHON_VERSION }}.log || echo "test-memory.log not found in container"
|
|
||||||
|
|
||||||
# Display the memory log contents for immediate visibility in workflow output
|
|
||||||
echo "=== Top 10 Highest Peak Memory Tests ==="
|
|
||||||
if [ -f output-logs/test-memory-${{ env.PYTHON_VERSION }}.log ]; then
|
|
||||||
# Sort by peak memory value (extract number before MB and sort numerically, reverse order)
|
|
||||||
grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log | \
|
|
||||||
sed 's/.*Peak memory: //' | \
|
|
||||||
paste -d'|' - <(grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log) | \
|
|
||||||
sort -t'|' -k1 -nr | \
|
|
||||||
cut -d'|' -f2 | \
|
|
||||||
head -10
|
|
||||||
echo ""
|
|
||||||
echo "=== Full Memory Test Report ==="
|
|
||||||
cat output-logs/test-memory-${{ env.PYTHON_VERSION }}.log
|
|
||||||
else
|
|
||||||
echo "No memory log available"
|
|
||||||
fi
|
|
||||||
|
|
||||||
- name: Store everything including test-datastore
|
- name: Store everything including test-datastore
|
||||||
if: always()
|
if: always()
|
||||||
uses: actions/upload-artifact@v4
|
uses: actions/upload-artifact@v4
|
||||||
|
|||||||
15
Dockerfile
15
Dockerfile
@@ -5,6 +5,7 @@ ARG PYTHON_VERSION=3.11
|
|||||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
||||||
|
|
||||||
# See `cryptography` pin comment in requirements.txt
|
# See `cryptography` pin comment in requirements.txt
|
||||||
|
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
g++ \
|
g++ \
|
||||||
@@ -16,7 +17,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
libxslt-dev \
|
libxslt-dev \
|
||||||
make \
|
make \
|
||||||
patch \
|
patch \
|
||||||
pkg-config \
|
|
||||||
zlib1g-dev
|
zlib1g-dev
|
||||||
|
|
||||||
RUN mkdir /install
|
RUN mkdir /install
|
||||||
@@ -26,14 +26,6 @@ COPY requirements.txt /requirements.txt
|
|||||||
|
|
||||||
# Use cache mounts and multiple wheel sources for faster ARM builds
|
# Use cache mounts and multiple wheel sources for faster ARM builds
|
||||||
ENV PIP_CACHE_DIR=/tmp/pip-cache
|
ENV PIP_CACHE_DIR=/tmp/pip-cache
|
||||||
# Help Rust find OpenSSL for cryptography package compilation on ARM
|
|
||||||
ENV PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/lib/arm-linux-gnueabihf/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig"
|
|
||||||
ENV PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1
|
|
||||||
ENV OPENSSL_DIR="/usr"
|
|
||||||
ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
|
|
||||||
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
|
|
||||||
# Additional environment variables for cryptography Rust build
|
|
||||||
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
|
||||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
RUN --mount=type=cache,target=/tmp/pip-cache \
|
||||||
pip install \
|
pip install \
|
||||||
--extra-index-url https://www.piwheels.org/simple \
|
--extra-index-url https://www.piwheels.org/simple \
|
||||||
@@ -84,11 +76,6 @@ EXPOSE 5000
|
|||||||
|
|
||||||
# The actual flask app module
|
# The actual flask app module
|
||||||
COPY changedetectionio /app/changedetectionio
|
COPY changedetectionio /app/changedetectionio
|
||||||
|
|
||||||
# Also for OpenAPI validation wrapper - needs the YML
|
|
||||||
RUN [ ! -d "/app/docs" ] && mkdir /app/docs
|
|
||||||
COPY docs/api-spec.yaml /app/docs/api-spec.yaml
|
|
||||||
|
|
||||||
# Starting wrapper
|
# Starting wrapper
|
||||||
COPY changedetection.py /app/changedetection.py
|
COPY changedetection.py /app/changedetection.py
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,7 @@
|
|||||||
recursive-include changedetectionio/api *
|
recursive-include changedetectionio/api *
|
||||||
recursive-include changedetectionio/blueprint *
|
recursive-include changedetectionio/blueprint *
|
||||||
recursive-include changedetectionio/conditions *
|
|
||||||
recursive-include changedetectionio/content_fetchers *
|
recursive-include changedetectionio/content_fetchers *
|
||||||
recursive-include changedetectionio/jinja2_custom *
|
recursive-include changedetectionio/conditions *
|
||||||
recursive-include changedetectionio/model *
|
recursive-include changedetectionio/model *
|
||||||
recursive-include changedetectionio/notification *
|
recursive-include changedetectionio/notification *
|
||||||
recursive-include changedetectionio/processors *
|
recursive-include changedetectionio/processors *
|
||||||
@@ -10,7 +9,6 @@ recursive-include changedetectionio/realtime *
|
|||||||
recursive-include changedetectionio/static *
|
recursive-include changedetectionio/static *
|
||||||
recursive-include changedetectionio/templates *
|
recursive-include changedetectionio/templates *
|
||||||
recursive-include changedetectionio/tests *
|
recursive-include changedetectionio/tests *
|
||||||
recursive-include changedetectionio/widgets *
|
|
||||||
prune changedetectionio/static/package-lock.json
|
prune changedetectionio/static/package-lock.json
|
||||||
prune changedetectionio/static/styles/node_modules
|
prune changedetectionio/static/styles/node_modules
|
||||||
prune changedetectionio/static/styles/package-lock.json
|
prune changedetectionio/static/styles/package-lock.json
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||||
|
|
||||||
__version__ = '0.50.23'
|
__version__ = '0.50.10'
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ from changedetectionio.strtobool import strtobool
|
|||||||
from flask_restful import abort, Resource
|
from flask_restful import abort, Resource
|
||||||
from flask import request
|
from flask import request
|
||||||
import validators
|
import validators
|
||||||
from . import auth, validate_openapi_request
|
from . import auth
|
||||||
|
|
||||||
|
|
||||||
class Import(Resource):
|
class Import(Resource):
|
||||||
@@ -12,7 +12,6 @@ class Import(Resource):
|
|||||||
self.datastore = kwargs['datastore']
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('importWatches')
|
|
||||||
def post(self):
|
def post(self):
|
||||||
"""Import a list of watched URLs."""
|
"""Import a list of watched URLs."""
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,9 @@
|
|||||||
from flask_expects_json import expects_json
|
from flask_expects_json import expects_json
|
||||||
from flask_restful import Resource, abort
|
from flask_restful import Resource
|
||||||
|
from . import auth
|
||||||
|
from flask_restful import abort, Resource
|
||||||
from flask import request
|
from flask import request
|
||||||
from . import auth, validate_openapi_request
|
from . import auth
|
||||||
from . import schema_create_notification_urls, schema_delete_notification_urls
|
from . import schema_create_notification_urls, schema_delete_notification_urls
|
||||||
|
|
||||||
class Notifications(Resource):
|
class Notifications(Resource):
|
||||||
@@ -10,7 +12,6 @@ class Notifications(Resource):
|
|||||||
self.datastore = kwargs['datastore']
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getNotifications')
|
|
||||||
def get(self):
|
def get(self):
|
||||||
"""Return Notification URL List."""
|
"""Return Notification URL List."""
|
||||||
|
|
||||||
@@ -21,7 +22,6 @@ class Notifications(Resource):
|
|||||||
}, 200
|
}, 200
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('addNotifications')
|
|
||||||
@expects_json(schema_create_notification_urls)
|
@expects_json(schema_create_notification_urls)
|
||||||
def post(self):
|
def post(self):
|
||||||
"""Create Notification URLs."""
|
"""Create Notification URLs."""
|
||||||
@@ -49,7 +49,6 @@ class Notifications(Resource):
|
|||||||
return {'notification_urls': added_urls}, 201
|
return {'notification_urls': added_urls}, 201
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('replaceNotifications')
|
|
||||||
@expects_json(schema_create_notification_urls)
|
@expects_json(schema_create_notification_urls)
|
||||||
def put(self):
|
def put(self):
|
||||||
"""Replace Notification URLs."""
|
"""Replace Notification URLs."""
|
||||||
@@ -72,7 +71,6 @@ class Notifications(Resource):
|
|||||||
return {'notification_urls': clean_urls}, 200
|
return {'notification_urls': clean_urls}, 200
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('deleteNotifications')
|
|
||||||
@expects_json(schema_delete_notification_urls)
|
@expects_json(schema_delete_notification_urls)
|
||||||
def delete(self):
|
def delete(self):
|
||||||
"""Delete Notification URLs."""
|
"""Delete Notification URLs."""
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from flask_restful import Resource, abort
|
from flask_restful import Resource, abort
|
||||||
from flask import request
|
from flask import request
|
||||||
from . import auth, validate_openapi_request
|
from . import auth
|
||||||
|
|
||||||
class Search(Resource):
|
class Search(Resource):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
@@ -8,7 +8,6 @@ class Search(Resource):
|
|||||||
self.datastore = kwargs['datastore']
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('searchWatches')
|
|
||||||
def get(self):
|
def get(self):
|
||||||
"""Search for watches by URL or title text."""
|
"""Search for watches by URL or title text."""
|
||||||
query = request.args.get('q', '').strip()
|
query = request.args.get('q', '').strip()
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
from flask_restful import Resource
|
from flask_restful import Resource
|
||||||
from . import auth, validate_openapi_request
|
from . import auth
|
||||||
|
|
||||||
|
|
||||||
class SystemInfo(Resource):
|
class SystemInfo(Resource):
|
||||||
@@ -9,7 +9,6 @@ class SystemInfo(Resource):
|
|||||||
self.update_q = kwargs['update_q']
|
self.update_q = kwargs['update_q']
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getSystemInfo')
|
|
||||||
def get(self):
|
def get(self):
|
||||||
"""Return system info."""
|
"""Return system info."""
|
||||||
import time
|
import time
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ from flask import request
|
|||||||
from . import auth
|
from . import auth
|
||||||
|
|
||||||
# Import schemas from __init__.py
|
# Import schemas from __init__.py
|
||||||
from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request
|
from . import schema_tag, schema_create_tag, schema_update_tag
|
||||||
|
|
||||||
|
|
||||||
class Tag(Resource):
|
class Tag(Resource):
|
||||||
@@ -19,7 +19,6 @@ class Tag(Resource):
|
|||||||
# Get information about a single tag
|
# Get information about a single tag
|
||||||
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getTag')
|
|
||||||
def get(self, uuid):
|
def get(self, uuid):
|
||||||
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
|
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@@ -51,7 +50,6 @@ class Tag(Resource):
|
|||||||
return tag
|
return tag
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('deleteTag')
|
|
||||||
def delete(self, uuid):
|
def delete(self, uuid):
|
||||||
"""Delete a tag/group and remove it from all watches."""
|
"""Delete a tag/group and remove it from all watches."""
|
||||||
if not self.datastore.data['settings']['application']['tags'].get(uuid):
|
if not self.datastore.data['settings']['application']['tags'].get(uuid):
|
||||||
@@ -68,7 +66,6 @@ class Tag(Resource):
|
|||||||
return 'OK', 204
|
return 'OK', 204
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('updateTag')
|
|
||||||
@expects_json(schema_update_tag)
|
@expects_json(schema_update_tag)
|
||||||
def put(self, uuid):
|
def put(self, uuid):
|
||||||
"""Update tag information."""
|
"""Update tag information."""
|
||||||
@@ -83,7 +80,6 @@ class Tag(Resource):
|
|||||||
|
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('createTag')
|
|
||||||
# Only cares for {'title': 'xxxx'}
|
# Only cares for {'title': 'xxxx'}
|
||||||
def post(self):
|
def post(self):
|
||||||
"""Create a single tag/group."""
|
"""Create a single tag/group."""
|
||||||
@@ -104,7 +100,6 @@ class Tags(Resource):
|
|||||||
self.datastore = kwargs['datastore']
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('listTags')
|
|
||||||
def get(self):
|
def get(self):
|
||||||
"""List tags/groups."""
|
"""List tags/groups."""
|
||||||
result = {}
|
result = {}
|
||||||
|
|||||||
@@ -11,40 +11,7 @@ from . import auth
|
|||||||
import copy
|
import copy
|
||||||
|
|
||||||
# Import schemas from __init__.py
|
# Import schemas from __init__.py
|
||||||
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
|
from . import schema, schema_create_watch, schema_update_watch
|
||||||
|
|
||||||
|
|
||||||
def validate_time_between_check_required(json_data):
|
|
||||||
"""
|
|
||||||
Validate that at least one time interval is specified when not using default settings.
|
|
||||||
Returns None if valid, or error message string if invalid.
|
|
||||||
Defaults to using global settings if time_between_check_use_default is not provided.
|
|
||||||
"""
|
|
||||||
# Default to using global settings if not specified
|
|
||||||
use_default = json_data.get('time_between_check_use_default', True)
|
|
||||||
|
|
||||||
# If using default settings, no validation needed
|
|
||||||
if use_default:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# If not using defaults, check if time_between_check exists and has at least one non-zero value
|
|
||||||
time_check = json_data.get('time_between_check')
|
|
||||||
if not time_check:
|
|
||||||
# No time_between_check provided and not using defaults - this is an error
|
|
||||||
return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
|
|
||||||
|
|
||||||
# time_between_check exists, check if it has at least one non-zero value
|
|
||||||
if any([
|
|
||||||
(time_check.get('weeks') or 0) > 0,
|
|
||||||
(time_check.get('days') or 0) > 0,
|
|
||||||
(time_check.get('hours') or 0) > 0,
|
|
||||||
(time_check.get('minutes') or 0) > 0,
|
|
||||||
(time_check.get('seconds') or 0) > 0
|
|
||||||
]):
|
|
||||||
return None
|
|
||||||
|
|
||||||
# time_between_check exists but all values are 0 or empty - this is an error
|
|
||||||
return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
|
|
||||||
|
|
||||||
|
|
||||||
class Watch(Resource):
|
class Watch(Resource):
|
||||||
@@ -58,7 +25,6 @@ class Watch(Resource):
|
|||||||
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
||||||
# ?recheck=true
|
# ?recheck=true
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getWatch')
|
|
||||||
def get(self, uuid):
|
def get(self, uuid):
|
||||||
"""Get information about a single watch, recheck, pause, or mute."""
|
"""Get information about a single watch, recheck, pause, or mute."""
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
@@ -88,12 +54,9 @@ class Watch(Resource):
|
|||||||
# attr .last_changed will check for the last written text snapshot on change
|
# attr .last_changed will check for the last written text snapshot on change
|
||||||
watch['last_changed'] = watch.last_changed
|
watch['last_changed'] = watch.last_changed
|
||||||
watch['viewed'] = watch.viewed
|
watch['viewed'] = watch.viewed
|
||||||
watch['link'] = watch.link,
|
|
||||||
|
|
||||||
return watch
|
return watch
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('deleteWatch')
|
|
||||||
def delete(self, uuid):
|
def delete(self, uuid):
|
||||||
"""Delete a watch and related history."""
|
"""Delete a watch and related history."""
|
||||||
if not self.datastore.data['watching'].get(uuid):
|
if not self.datastore.data['watching'].get(uuid):
|
||||||
@@ -103,7 +66,6 @@ class Watch(Resource):
|
|||||||
return 'OK', 204
|
return 'OK', 204
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('updateWatch')
|
|
||||||
@expects_json(schema_update_watch)
|
@expects_json(schema_update_watch)
|
||||||
def put(self, uuid):
|
def put(self, uuid):
|
||||||
"""Update watch information."""
|
"""Update watch information."""
|
||||||
@@ -116,11 +78,6 @@ class Watch(Resource):
|
|||||||
if not request.json.get('proxy') in plist:
|
if not request.json.get('proxy') in plist:
|
||||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||||
|
|
||||||
# Validate time_between_check when not using defaults
|
|
||||||
validation_error = validate_time_between_check_required(request.json)
|
|
||||||
if validation_error:
|
|
||||||
return validation_error, 400
|
|
||||||
|
|
||||||
watch.update(request.json)
|
watch.update(request.json)
|
||||||
|
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
@@ -134,7 +91,6 @@ class WatchHistory(Resource):
|
|||||||
# Get a list of available history for a watch by UUID
|
# Get a list of available history for a watch by UUID
|
||||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getWatchHistory')
|
|
||||||
def get(self, uuid):
|
def get(self, uuid):
|
||||||
"""Get a list of all historical snapshots available for a watch."""
|
"""Get a list of all historical snapshots available for a watch."""
|
||||||
watch = self.datastore.data['watching'].get(uuid)
|
watch = self.datastore.data['watching'].get(uuid)
|
||||||
@@ -149,7 +105,6 @@ class WatchSingleHistory(Resource):
|
|||||||
self.datastore = kwargs['datastore']
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getWatchSnapshot')
|
|
||||||
def get(self, uuid, timestamp):
|
def get(self, uuid, timestamp):
|
||||||
"""Get single snapshot from watch."""
|
"""Get single snapshot from watch."""
|
||||||
watch = self.datastore.data['watching'].get(uuid)
|
watch = self.datastore.data['watching'].get(uuid)
|
||||||
@@ -183,7 +138,6 @@ class WatchFavicon(Resource):
|
|||||||
self.datastore = kwargs['datastore']
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('getWatchFavicon')
|
|
||||||
def get(self, uuid):
|
def get(self, uuid):
|
||||||
"""Get favicon for a watch."""
|
"""Get favicon for a watch."""
|
||||||
watch = self.datastore.data['watching'].get(uuid)
|
watch = self.datastore.data['watching'].get(uuid)
|
||||||
@@ -218,7 +172,6 @@ class CreateWatch(Resource):
|
|||||||
self.update_q = kwargs['update_q']
|
self.update_q = kwargs['update_q']
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('createWatch')
|
|
||||||
@expects_json(schema_create_watch)
|
@expects_json(schema_create_watch)
|
||||||
def post(self):
|
def post(self):
|
||||||
"""Create a single watch."""
|
"""Create a single watch."""
|
||||||
@@ -236,11 +189,6 @@ class CreateWatch(Resource):
|
|||||||
if not json_data.get('proxy') in plist:
|
if not json_data.get('proxy') in plist:
|
||||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||||
|
|
||||||
# Validate time_between_check when not using defaults
|
|
||||||
validation_error = validate_time_between_check_required(json_data)
|
|
||||||
if validation_error:
|
|
||||||
return validation_error, 400
|
|
||||||
|
|
||||||
extras = copy.deepcopy(json_data)
|
extras = copy.deepcopy(json_data)
|
||||||
|
|
||||||
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
||||||
@@ -259,7 +207,6 @@ class CreateWatch(Resource):
|
|||||||
return "Invalid or unsupported URL", 400
|
return "Invalid or unsupported URL", 400
|
||||||
|
|
||||||
@auth.check_token
|
@auth.check_token
|
||||||
@validate_openapi_request('listWatches')
|
|
||||||
def get(self):
|
def get(self):
|
||||||
"""List watches."""
|
"""List watches."""
|
||||||
list = {}
|
list = {}
|
||||||
@@ -275,8 +222,6 @@ class CreateWatch(Resource):
|
|||||||
'last_changed': watch.last_changed,
|
'last_changed': watch.last_changed,
|
||||||
'last_checked': watch['last_checked'],
|
'last_checked': watch['last_checked'],
|
||||||
'last_error': watch['last_error'],
|
'last_error': watch['last_error'],
|
||||||
'link': watch.link,
|
|
||||||
'page_title': watch['page_title'],
|
|
||||||
'title': watch['title'],
|
'title': watch['title'],
|
||||||
'url': watch['url'],
|
'url': watch['url'],
|
||||||
'viewed': watch.viewed
|
'viewed': watch.viewed
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
import copy
|
import copy
|
||||||
import functools
|
|
||||||
from flask import request, abort
|
|
||||||
from loguru import logger
|
|
||||||
from . import api_schema
|
from . import api_schema
|
||||||
from ..model import watch_base
|
from ..model import watch_base
|
||||||
|
|
||||||
@@ -11,7 +8,6 @@ schema = api_schema.build_watch_json_schema(watch_base_config)
|
|||||||
|
|
||||||
schema_create_watch = copy.deepcopy(schema)
|
schema_create_watch = copy.deepcopy(schema)
|
||||||
schema_create_watch['required'] = ['url']
|
schema_create_watch['required'] = ['url']
|
||||||
del schema_create_watch['properties']['last_viewed']
|
|
||||||
|
|
||||||
schema_update_watch = copy.deepcopy(schema)
|
schema_update_watch = copy.deepcopy(schema)
|
||||||
schema_update_watch['additionalProperties'] = False
|
schema_update_watch['additionalProperties'] = False
|
||||||
@@ -29,54 +25,9 @@ schema_create_notification_urls['required'] = ['notification_urls']
|
|||||||
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
|
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||||
schema_delete_notification_urls['required'] = ['notification_urls']
|
schema_delete_notification_urls['required'] = ['notification_urls']
|
||||||
|
|
||||||
@functools.cache
|
|
||||||
def get_openapi_spec():
|
|
||||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
|
||||||
import os
|
|
||||||
import yaml # Lazy import - only loaded when API validation is actually used
|
|
||||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
|
||||||
|
|
||||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
|
||||||
with open(spec_path, 'r') as f:
|
|
||||||
spec_dict = yaml.safe_load(f)
|
|
||||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
|
||||||
return _openapi_spec
|
|
||||||
|
|
||||||
def validate_openapi_request(operation_id):
|
|
||||||
"""Decorator to validate incoming requests against OpenAPI spec."""
|
|
||||||
def decorator(f):
|
|
||||||
@functools.wraps(f)
|
|
||||||
def wrapper(*args, **kwargs):
|
|
||||||
try:
|
|
||||||
# Skip OpenAPI validation for GET requests since they don't have request bodies
|
|
||||||
if request.method.upper() != 'GET':
|
|
||||||
# Lazy import - only loaded when actually validating a request
|
|
||||||
from openapi_core.contrib.flask import FlaskOpenAPIRequest
|
|
||||||
|
|
||||||
spec = get_openapi_spec()
|
|
||||||
openapi_request = FlaskOpenAPIRequest(request)
|
|
||||||
result = spec.unmarshal_request(openapi_request)
|
|
||||||
if result.errors:
|
|
||||||
from werkzeug.exceptions import BadRequest
|
|
||||||
error_details = []
|
|
||||||
for error in result.errors:
|
|
||||||
error_details.append(str(error))
|
|
||||||
raise BadRequest(f"OpenAPI validation failed: {error_details}")
|
|
||||||
except BadRequest:
|
|
||||||
# Re-raise BadRequest exceptions (validation failures)
|
|
||||||
raise
|
|
||||||
except Exception as e:
|
|
||||||
# If OpenAPI spec loading fails, log but don't break existing functionality
|
|
||||||
logger.critical(f"OpenAPI validation warning for {operation_id}: {e}")
|
|
||||||
abort(500)
|
|
||||||
return f(*args, **kwargs)
|
|
||||||
return wrapper
|
|
||||||
return decorator
|
|
||||||
|
|
||||||
# Import all API resources
|
# Import all API resources
|
||||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon
|
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon
|
||||||
from .Tags import Tags, Tag
|
from .Tags import Tags, Tag
|
||||||
from .Import import Import
|
from .Import import Import
|
||||||
from .SystemInfo import SystemInfo
|
from .SystemInfo import SystemInfo
|
||||||
from .Notifications import Notifications
|
from .Notifications import Notifications
|
||||||
|
|
||||||
|
|||||||
@@ -78,13 +78,6 @@ def build_watch_json_schema(d):
|
|||||||
]:
|
]:
|
||||||
schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
|
schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
|
||||||
|
|
||||||
for v in ['last_viewed']:
|
|
||||||
schema['properties'][v] = {
|
|
||||||
"type": "integer",
|
|
||||||
"description": "Unix timestamp in seconds of the last time the watch was viewed.",
|
|
||||||
"minimum": 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# None or Boolean
|
# None or Boolean
|
||||||
schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
|
schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
|
||||||
|
|
||||||
@@ -119,12 +112,6 @@ def build_watch_json_schema(d):
|
|||||||
|
|
||||||
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
||||||
|
|
||||||
schema['properties']['time_between_check_use_default'] = {
|
|
||||||
"type": "boolean",
|
|
||||||
"default": True,
|
|
||||||
"description": "Whether to use global settings for time between checks - defaults to true if not set"
|
|
||||||
}
|
|
||||||
|
|
||||||
schema['properties']['browser_steps'] = {
|
schema['properties']['browser_steps'] = {
|
||||||
"anyOf": [
|
"anyOf": [
|
||||||
{
|
{
|
||||||
|
|||||||
@@ -310,6 +310,15 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
|||||||
continue
|
continue
|
||||||
|
|
||||||
if process_changedetection_results:
|
if process_changedetection_results:
|
||||||
|
# Extract title if needed
|
||||||
|
if datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
|
||||||
|
if not watch['title'] or not len(watch['title']):
|
||||||
|
try:
|
||||||
|
update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
|
||||||
|
logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}")
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||||
|
|
||||||
@@ -334,10 +343,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
|||||||
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
|
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
|
||||||
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
|
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
|
||||||
|
|
||||||
# Explicitly delete large content variables to free memory IMMEDIATELY after saving
|
|
||||||
# These are no longer needed after being saved to history
|
|
||||||
del contents
|
|
||||||
|
|
||||||
# Send notifications on second+ check
|
# Send notifications on second+ check
|
||||||
if watch.history_n >= 2:
|
if watch.history_n >= 2:
|
||||||
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
|
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
|
||||||
@@ -352,14 +357,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
|||||||
# Always record attempt count
|
# Always record attempt count
|
||||||
count = watch.get('check_count', 0) + 1
|
count = watch.get('check_count', 0) + 1
|
||||||
|
|
||||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
|
||||||
try:
|
|
||||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
|
||||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
|
||||||
|
|
||||||
# Record server header
|
# Record server header
|
||||||
try:
|
try:
|
||||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||||
@@ -376,12 +373,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
|||||||
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||||
'check_count': count})
|
'check_count': count})
|
||||||
|
|
||||||
# NOW clear fetcher content - after all processing is complete
|
|
||||||
# This is the last point where we need the fetcher data
|
|
||||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
|
||||||
update_handler.fetcher.clear_content()
|
|
||||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
||||||
@@ -402,28 +393,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
|||||||
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
||||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
watch_check_update.send(watch_uuid=watch['uuid'])
|
||||||
|
|
||||||
# Explicitly clean up update_handler and all its references
|
update_handler = None
|
||||||
if update_handler:
|
|
||||||
# Clear fetcher content using the proper method
|
|
||||||
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
|
||||||
update_handler.fetcher.clear_content()
|
|
||||||
|
|
||||||
# Clear processor references
|
|
||||||
if hasattr(update_handler, 'content_processor'):
|
|
||||||
update_handler.content_processor = None
|
|
||||||
|
|
||||||
update_handler = None
|
|
||||||
|
|
||||||
# Clear local contents variable if it still exists
|
|
||||||
if 'contents' in locals():
|
|
||||||
del contents
|
|
||||||
|
|
||||||
# Note: We don't set watch = None here because:
|
|
||||||
# 1. watch is just a local reference to datastore.data['watching'][uuid]
|
|
||||||
# 2. Setting it to None doesn't affect the datastore
|
|
||||||
# 3. GC can't collect the object anyway (still referenced by datastore)
|
|
||||||
# 4. It would just cause confusion
|
|
||||||
|
|
||||||
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
|
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
|
||||||
except Exception as cleanup_error:
|
except Exception as cleanup_error:
|
||||||
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
|
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from loguru import logger
|
|||||||
|
|
||||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
||||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
def long_task(uuid, preferred_proxy):
|
def long_task(uuid, preferred_proxy):
|
||||||
import time
|
import time
|
||||||
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
|
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
|
|
||||||
status = {'status': '', 'length': 0, 'text': ''}
|
status = {'status': '', 'length': 0, 'text': ''}
|
||||||
|
|
||||||
|
|||||||
@@ -1,5 +1,5 @@
|
|||||||
|
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
from feedgen.feed import FeedGenerator
|
from feedgen.feed import FeedGenerator
|
||||||
from flask import Blueprint, make_response, request, url_for, redirect
|
from flask import Blueprint, make_response, request, url_for, redirect
|
||||||
@@ -108,13 +108,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
fe.link(link=diff_link)
|
fe.link(link=diff_link)
|
||||||
|
|
||||||
# Same logic as watch-overview.html
|
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
|
||||||
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
|
|
||||||
watch_label = watch.label
|
|
||||||
else:
|
|
||||||
watch_label = watch.get('url')
|
|
||||||
|
|
||||||
fe.title(title=watch_label)
|
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
||||||
|
fe.title(title=watch_title)
|
||||||
try:
|
try:
|
||||||
|
|
||||||
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||||
@@ -130,7 +127,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
||||||
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
||||||
|
|
||||||
content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
|
content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
|
||||||
|
|
||||||
# Out of range chars could also break feedgen
|
# Out of range chars could also break feedgen
|
||||||
if scan_invalid_chars_in_rss(content):
|
if scan_invalid_chars_in_rss(content):
|
||||||
|
|||||||
@@ -119,7 +119,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||||
min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
|
min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
|
||||||
settings_application=datastore.data['settings']['application'],
|
settings_application=datastore.data['settings']['application'],
|
||||||
timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
timezone_default_config=datastore.data['settings']['application'].get('timezone'),
|
||||||
utc_time=utc_time,
|
utc_time=utc_time,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
{% extends 'base.html' %}
|
{% extends 'base.html' %}
|
||||||
|
|
||||||
{% block content %}
|
{% block content %}
|
||||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %}
|
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
|
||||||
{% from '_common_fields.html' import render_common_settings_form %}
|
{% from '_common_fields.html' import render_common_settings_form %}
|
||||||
<script>
|
<script>
|
||||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
|
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
|
||||||
@@ -72,23 +72,33 @@
|
|||||||
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
|
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||||
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_field(form.application.form.pager_size) }}
|
||||||
|
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||||
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_field(form.application.form.rss_content_format) }}
|
||||||
|
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
||||||
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
||||||
|
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
|
||||||
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="grey-form-border">
|
{% if form.requests.proxy %}
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||||
</div>
|
<span class="pure-form-message-inline">
|
||||||
<div class="pure-control-group">
|
Choose a default proxy for all watches
|
||||||
{{ render_field(form.application.form.rss_content_format) }}
|
</span>
|
||||||
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
|
||||||
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
{% endif %}
|
||||||
</fieldset>
|
</fieldset>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -131,10 +141,6 @@
|
|||||||
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
||||||
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_field(form.requests.form.timeout) }}
|
|
||||||
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_field(form.requests.form.default_ua) }}
|
{{ render_field(form.requests.form.default_ua) }}
|
||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">
|
||||||
@@ -193,17 +199,11 @@ nav
|
|||||||
</ul>
|
</ul>
|
||||||
</span>
|
</span>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<fieldset class="pure-group">
|
|
||||||
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
|
|
||||||
<span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
|
|
||||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
|
||||||
</span>
|
|
||||||
</fieldset>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="tab-pane-inner" id="api">
|
<div class="tab-pane-inner" id="api">
|
||||||
<h4>API Access</h4>
|
<h4>API Access</h4>
|
||||||
<p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p>
|
<p>Drive your changedetection.io via API, More about <a href="https://github.com/dgtlmoon/changedetection.io/wiki/API-Reference">API access here</a></p>
|
||||||
|
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
|
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
|
||||||
@@ -238,7 +238,7 @@ nav
|
|||||||
<p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
<p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||||
<p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
<p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||||
<p>
|
<p>
|
||||||
{{ render_field(form.application.form.scheduler_timezone_default) }}
|
{{ render_field(form.application.form.timezone) }}
|
||||||
<datalist id="timezones" style="display: none;">
|
<datalist id="timezones" style="display: none;">
|
||||||
{% for tz_name in available_timezones %}
|
{% for tz_name in available_timezones %}
|
||||||
<option value="{{ tz_name }}">{{ tz_name }}</option>
|
<option value="{{ tz_name }}">{{ tz_name }}</option>
|
||||||
@@ -260,13 +260,6 @@ nav
|
|||||||
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
|
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
|
||||||
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
|
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_field(form.application.form.pager_size) }}
|
|
||||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div class="tab-pane-inner" id="proxies">
|
<div class="tab-pane-inner" id="proxies">
|
||||||
@@ -316,33 +309,23 @@ nav
|
|||||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
||||||
|
|
||||||
<div class="pure-control-group" id="extra-proxies-setting">
|
<div class="pure-control-group" id="extra-proxies-setting">
|
||||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
{{ render_field(form.requests.form.extra_proxies) }}
|
||||||
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
|
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
|
||||||
<span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
|
<span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
|
||||||
{% if form.requests.proxy %}
|
|
||||||
<div>
|
|
||||||
<br>
|
|
||||||
<div class="inline-radio">
|
|
||||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
|
||||||
<span class="pure-form-message-inline">Choose a default proxy for all watches</span>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group" id="extra-browsers-setting">
|
<div class="pure-control-group" id="extra-browsers-setting">
|
||||||
<p>
|
<p>
|
||||||
<span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br>
|
<span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br>
|
||||||
<span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span>
|
<span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span>
|
||||||
</p>
|
</p>
|
||||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
|
{{ render_field(form.requests.form.extra_browsers) }}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
<div id="actions">
|
<div id="actions">
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_button(form.save_button) }}
|
{{ render_button(form.save_button) }}
|
||||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a>
|
<a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a>
|
||||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a>
|
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{% extends 'base.html' %}
|
{% extends 'base.html' %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_ternary_field %}
|
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||||
{% from '_common_fields.html' import render_common_settings_form %}
|
{% from '_common_fields.html' import render_common_settings_form %}
|
||||||
<script>
|
<script>
|
||||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}";
|
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}";
|
||||||
@@ -64,7 +64,7 @@
|
|||||||
<div class="tab-pane-inner" id="notifications">
|
<div class="tab-pane-inner" id="notifications">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_ternary_field(form.notification_muted, BooleanField=True) }}
|
{{ render_checkbox_field(form.notification_muted) }}
|
||||||
</div>
|
</div>
|
||||||
{% if 1 %}
|
{% if 1 %}
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
|
|||||||
@@ -187,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
|
|
||||||
tz_name = time_schedule_limit.get('timezone')
|
tz_name = time_schedule_limit.get('timezone')
|
||||||
if not tz_name:
|
if not tz_name:
|
||||||
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
|
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
|
||||||
|
|
||||||
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
||||||
try:
|
try:
|
||||||
@@ -242,7 +242,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
'available_timezones': sorted(available_timezones()),
|
'available_timezones': sorted(available_timezones()),
|
||||||
'browser_steps_config': browser_step_ui_config,
|
'browser_steps_config': browser_step_ui_config,
|
||||||
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||||
'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
|
|
||||||
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
||||||
'extra_processor_config': form.extra_tab_content(),
|
'extra_processor_config': form.extra_tab_content(),
|
||||||
'extra_title': f" - Edit - {watch.label}",
|
'extra_title': f" - Edit - {watch.label}",
|
||||||
@@ -257,7 +256,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
||||||
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
|
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
|
||||||
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
|
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
|
||||||
'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
||||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||||
'uuid': uuid,
|
'uuid': uuid,
|
||||||
'watch': watch,
|
'watch': watch,
|
||||||
|
|||||||
@@ -44,16 +44,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
# Sort by last_changed and add the uuid which is usually the key..
|
# Sort by last_changed and add the uuid which is usually the key..
|
||||||
sorted_watches = []
|
sorted_watches = []
|
||||||
with_errors = request.args.get('with_errors') == "1"
|
with_errors = request.args.get('with_errors') == "1"
|
||||||
unread_only = request.args.get('unread') == "1"
|
|
||||||
errored_count = 0
|
errored_count = 0
|
||||||
search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
|
search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
|
||||||
for uuid, watch in datastore.data['watching'].items():
|
for uuid, watch in datastore.data['watching'].items():
|
||||||
if with_errors and not watch.get('last_error'):
|
if with_errors and not watch.get('last_error'):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if unread_only and (watch.viewed or watch.last_changed == 0) :
|
|
||||||
continue
|
|
||||||
|
|
||||||
if active_tag_uuid and not active_tag_uuid in watch['tags']:
|
if active_tag_uuid and not active_tag_uuid in watch['tags']:
|
||||||
continue
|
continue
|
||||||
if watch.get('last_error'):
|
if watch.get('last_error'):
|
||||||
@@ -87,6 +83,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
form=form,
|
form=form,
|
||||||
guid=datastore.data['app_guid'],
|
guid=datastore.data['app_guid'],
|
||||||
has_proxies=datastore.proxy_list,
|
has_proxies=datastore.proxy_list,
|
||||||
|
has_unviewed=datastore.has_unviewed,
|
||||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||||
now_time_server=round(time.time()),
|
now_time_server=round(time.time()),
|
||||||
pagination=pagination,
|
pagination=pagination,
|
||||||
@@ -96,7 +93,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||||
tags=sorted_tags,
|
tags=sorted_tags,
|
||||||
unread_changes_count=datastore.unread_changes_count,
|
|
||||||
watches=sorted_watches
|
watches=sorted_watches
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -82,11 +82,8 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
{%- set cols_required = cols_required + 1 -%}
|
{%- set cols_required = cols_required + 1 -%}
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
{%- set ui_settings = datastore.data['settings']['application']['ui'] -%}
|
{%- set ui_settings = datastore.data['settings']['application']['ui'] -%}
|
||||||
{%- set wrapper_classes = [
|
|
||||||
'has-unread-changes' if unread_changes_count else '',
|
<div id="watch-table-wrapper">
|
||||||
'has-error' if errored_count else '',
|
|
||||||
] -%}
|
|
||||||
<div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}">
|
|
||||||
{%- set table_classes = [
|
{%- set table_classes = [
|
||||||
'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled',
|
'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled',
|
||||||
] -%}
|
] -%}
|
||||||
@@ -121,8 +118,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
{%- set checking_now = is_checking_now(watch) -%}
|
{%- set checking_now = is_checking_now(watch) -%}
|
||||||
{%- set history_n = watch.history_n -%}
|
{%- set history_n = watch.history_n -%}
|
||||||
{%- set favicon = watch.get_favicon_filename() -%}
|
{%- set favicon = watch.get_favicon_filename() -%}
|
||||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
{# Mirror in changedetectionio/static/js/realtime.js for the frontend #}
|
||||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
|
||||||
{%- set row_classes = [
|
{%- set row_classes = [
|
||||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||||
'processor-' ~ watch['processor'],
|
'processor-' ~ watch['processor'],
|
||||||
@@ -137,8 +133,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
'checking-now' if checking_now else '',
|
'checking-now' if checking_now else '',
|
||||||
'notification_muted' if watch.notification_muted else '',
|
'notification_muted' if watch.notification_muted else '',
|
||||||
'single-history' if history_n == 1 else '',
|
'single-history' if history_n == 1 else '',
|
||||||
'multiple-history' if history_n >= 2 else '',
|
'multiple-history' if history_n >= 2 else '',
|
||||||
'use-html-title' if system_use_url_watchlist else 'no-html-title',
|
|
||||||
] -%}
|
] -%}
|
||||||
<tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}">
|
<tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}">
|
||||||
<td class="inline checkbox-uuid" ><div><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
|
<td class="inline checkbox-uuid" ><div><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
|
||||||
@@ -160,12 +155,7 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<span class="watch-title">
|
<span class="watch-title">
|
||||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||||
{{ watch.label }}
|
|
||||||
{% else %}
|
|
||||||
{{ watch.get('title') or watch.link }}
|
|
||||||
{% endif %}
|
|
||||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
|
||||||
</span>
|
</span>
|
||||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
|
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
|
||||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||||
@@ -244,10 +234,10 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
<ul id="post-list-buttons">
|
<ul id="post-list-buttons">
|
||||||
<li id="post-list-with-errors" style="display: none;" >
|
<li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" >
|
||||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
|
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
|
||||||
</li>
|
</li>
|
||||||
<li id="post-list-mark-views" style="display: none;" >
|
<li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" >
|
||||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
|
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
|
||||||
</li>
|
</li>
|
||||||
{%- if active_tag_uuid -%}
|
{%- if active_tag_uuid -%}
|
||||||
@@ -255,9 +245,6 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
|
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
|
||||||
</li>
|
</li>
|
||||||
{%- endif -%}
|
{%- endif -%}
|
||||||
<li id="post-list-unread" style="display: none;" >
|
|
||||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
|
|
||||||
</li>
|
|
||||||
<li>
|
<li>
|
||||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
|
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
|
||||||
all {% if active_tag_uuid %} in '{{active_tag.title}}'{%endif%}</a>
|
all {% if active_tag_uuid %} in '{{active_tag.title}}'{%endif%}</a>
|
||||||
|
|||||||
@@ -64,19 +64,6 @@ class Fetcher():
|
|||||||
# Time ONTOP of the system defined env minimum time
|
# Time ONTOP of the system defined env minimum time
|
||||||
render_extract_delay = 0
|
render_extract_delay = 0
|
||||||
|
|
||||||
def clear_content(self):
|
|
||||||
"""
|
|
||||||
Explicitly clear all content from memory to free up heap space.
|
|
||||||
Call this after content has been saved to disk.
|
|
||||||
"""
|
|
||||||
self.content = None
|
|
||||||
if hasattr(self, 'raw_content'):
|
|
||||||
self.raw_content = None
|
|
||||||
self.screenshot = None
|
|
||||||
self.xpath_data = None
|
|
||||||
# Keep headers and status_code as they're small
|
|
||||||
logger.trace("Fetcher content cleared from memory")
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_error(self):
|
def get_error(self):
|
||||||
return self.error
|
return self.error
|
||||||
@@ -141,7 +128,7 @@ class Fetcher():
|
|||||||
async def iterate_browser_steps(self, start_url=None):
|
async def iterate_browser_steps(self, start_url=None):
|
||||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||||
from playwright._impl._errors import TimeoutError, Error
|
from playwright._impl._errors import TimeoutError, Error
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
step_n = 0
|
step_n = 0
|
||||||
|
|
||||||
if self.browser_steps is not None and len(self.browser_steps):
|
if self.browser_steps is not None and len(self.browser_steps):
|
||||||
|
|||||||
@@ -51,7 +51,6 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
session = requests.Session()
|
session = requests.Session()
|
||||||
|
|
||||||
|
|
||||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||||
from requests_file import FileAdapter
|
from requests_file import FileAdapter
|
||||||
session.mount('file://', FileAdapter())
|
session.mount('file://', FileAdapter())
|
||||||
|
|||||||
@@ -47,7 +47,6 @@ async () => {
|
|||||||
'nicht lieferbar',
|
'nicht lieferbar',
|
||||||
'nicht verfügbar',
|
'nicht verfügbar',
|
||||||
'nicht vorrätig',
|
'nicht vorrätig',
|
||||||
'nicht mehr lieferbar',
|
|
||||||
'nicht zur verfügung',
|
'nicht zur verfügung',
|
||||||
'nie znaleziono produktów',
|
'nie znaleziono produktów',
|
||||||
'niet beschikbaar',
|
'niet beschikbaar',
|
||||||
|
|||||||
@@ -795,7 +795,7 @@ def ticker_thread_check_time_launch_checks():
|
|||||||
else:
|
else:
|
||||||
time_schedule_limit = watch.get('time_schedule_limit')
|
time_schedule_limit = watch.get('time_schedule_limit')
|
||||||
logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
|
logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
|
||||||
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
|
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
|
||||||
|
|
||||||
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
||||||
try:
|
try:
|
||||||
|
|||||||
@@ -23,14 +23,11 @@ from wtforms import (
|
|||||||
)
|
)
|
||||||
from flask_wtf.file import FileField, FileAllowed
|
from flask_wtf.file import FileField, FileAllowed
|
||||||
from wtforms.fields import FieldList
|
from wtforms.fields import FieldList
|
||||||
from wtforms.utils import unset_value
|
|
||||||
|
|
||||||
from wtforms.validators import ValidationError
|
from wtforms.validators import ValidationError
|
||||||
|
|
||||||
from validators.url import url as url_validator
|
from validators.url import url as url_validator
|
||||||
|
|
||||||
from changedetectionio.widgets import TernaryNoneBooleanField
|
|
||||||
|
|
||||||
|
|
||||||
# default
|
# default
|
||||||
# each select <option data-enabled="enabled-0-0"
|
# each select <option data-enabled="enabled-0-0"
|
||||||
@@ -57,8 +54,6 @@ valid_method = {
|
|||||||
|
|
||||||
default_method = 'GET'
|
default_method = 'GET'
|
||||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||||
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
|
||||||
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.'
|
|
||||||
|
|
||||||
class StringListField(StringField):
|
class StringListField(StringField):
|
||||||
widget = widgets.TextArea()
|
widget = widgets.TextArea()
|
||||||
@@ -215,35 +210,6 @@ class ScheduleLimitForm(Form):
|
|||||||
self.sunday.form.enabled.label.text = "Sunday"
|
self.sunday.form.enabled.label.text = "Sunday"
|
||||||
|
|
||||||
|
|
||||||
def validate_time_between_check_has_values(form):
|
|
||||||
"""
|
|
||||||
Custom validation function for TimeBetweenCheckForm.
|
|
||||||
Returns True if at least one time interval field has a value > 0.
|
|
||||||
"""
|
|
||||||
res = any([
|
|
||||||
form.weeks.data and int(form.weeks.data) > 0,
|
|
||||||
form.days.data and int(form.days.data) > 0,
|
|
||||||
form.hours.data and int(form.hours.data) > 0,
|
|
||||||
form.minutes.data and int(form.minutes.data) > 0,
|
|
||||||
form.seconds.data and int(form.seconds.data) > 0
|
|
||||||
])
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|
||||||
class RequiredTimeInterval(object):
|
|
||||||
"""
|
|
||||||
WTForms validator that ensures at least one time interval field has a value > 0.
|
|
||||||
Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]).
|
|
||||||
"""
|
|
||||||
def __init__(self, message=None):
|
|
||||||
self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
|
||||||
|
|
||||||
def __call__(self, form, field):
|
|
||||||
if not validate_time_between_check_has_values(field.form):
|
|
||||||
raise ValidationError(self.message)
|
|
||||||
|
|
||||||
|
|
||||||
class TimeBetweenCheckForm(Form):
|
class TimeBetweenCheckForm(Form):
|
||||||
weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||||
days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||||
@@ -252,123 +218,6 @@ class TimeBetweenCheckForm(Form):
|
|||||||
seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||||
# @todo add total seconds minimum validatior = minimum_seconds_recheck_time
|
# @todo add total seconds minimum validatior = minimum_seconds_recheck_time
|
||||||
|
|
||||||
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
|
|
||||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
|
||||||
self.require_at_least_one = kwargs.get('require_at_least_one', False)
|
|
||||||
self.require_at_least_one_message = kwargs.get('require_at_least_one_message', REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT)
|
|
||||||
|
|
||||||
def validate(self, **kwargs):
|
|
||||||
"""Custom validation that can optionally require at least one time interval."""
|
|
||||||
# Run normal field validation first
|
|
||||||
if not super().validate(**kwargs):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Apply optional "at least one" validation
|
|
||||||
if self.require_at_least_one:
|
|
||||||
if not validate_time_between_check_has_values(self):
|
|
||||||
# Add error to the form's general errors (not field-specific)
|
|
||||||
if not hasattr(self, '_formdata_errors'):
|
|
||||||
self._formdata_errors = []
|
|
||||||
self._formdata_errors.append(self.require_at_least_one_message)
|
|
||||||
return False
|
|
||||||
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
class EnhancedFormField(FormField):
|
|
||||||
"""
|
|
||||||
An enhanced FormField that supports conditional validation with top-level error messages.
|
|
||||||
Adds a 'top_errors' property for validation errors at the FormField level.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, form_class, label=None, validators=None, separator="-",
|
|
||||||
conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs):
|
|
||||||
"""
|
|
||||||
Initialize EnhancedFormField with optional conditional validation.
|
|
||||||
|
|
||||||
:param conditional_field: Name of the field this FormField depends on (e.g. 'time_between_check_use_default')
|
|
||||||
:param conditional_message: Error message to show when validation fails
|
|
||||||
:param conditional_test_function: Custom function to test if FormField has valid values.
|
|
||||||
Should take self.form as parameter and return True if valid.
|
|
||||||
"""
|
|
||||||
super().__init__(form_class, label, validators, separator, **kwargs)
|
|
||||||
self.top_errors = []
|
|
||||||
self.conditional_field = conditional_field
|
|
||||||
self.conditional_message = conditional_message or "At least one field must have a value when not using defaults."
|
|
||||||
self.conditional_test_function = conditional_test_function
|
|
||||||
|
|
||||||
def validate(self, form, extra_validators=()):
|
|
||||||
"""
|
|
||||||
Custom validation that supports conditional logic and stores top-level errors.
|
|
||||||
"""
|
|
||||||
self.top_errors = []
|
|
||||||
|
|
||||||
# First run the normal FormField validation
|
|
||||||
base_valid = super().validate(form, extra_validators)
|
|
||||||
|
|
||||||
# Apply conditional validation if configured
|
|
||||||
if self.conditional_field and hasattr(form, self.conditional_field):
|
|
||||||
conditional_field_obj = getattr(form, self.conditional_field)
|
|
||||||
|
|
||||||
# If the conditional field is False/unchecked, check if this FormField has any values
|
|
||||||
if not conditional_field_obj.data:
|
|
||||||
# Use custom test function if provided, otherwise use generic fallback
|
|
||||||
if self.conditional_test_function:
|
|
||||||
has_any_value = self.conditional_test_function(self.form)
|
|
||||||
else:
|
|
||||||
# Generic fallback - check if any field has truthy data
|
|
||||||
has_any_value = any(field.data for field in self.form if hasattr(field, 'data') and field.data)
|
|
||||||
|
|
||||||
if not has_any_value:
|
|
||||||
self.top_errors.append(self.conditional_message)
|
|
||||||
base_valid = False
|
|
||||||
|
|
||||||
return base_valid
|
|
||||||
|
|
||||||
|
|
||||||
class RequiredFormField(FormField):
|
|
||||||
"""
|
|
||||||
A FormField that passes require_at_least_one=True to TimeBetweenCheckForm.
|
|
||||||
Use this when you want the sub-form to always require at least one value.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, form_class, label=None, validators=None, separator="-", **kwargs):
|
|
||||||
super().__init__(form_class, label, validators, separator, **kwargs)
|
|
||||||
|
|
||||||
def process(self, formdata, data=unset_value, extra_filters=None):
|
|
||||||
if extra_filters:
|
|
||||||
raise TypeError(
|
|
||||||
"FormField cannot take filters, as the encapsulated"
|
|
||||||
"data is not mutable."
|
|
||||||
)
|
|
||||||
|
|
||||||
if data is unset_value:
|
|
||||||
try:
|
|
||||||
data = self.default()
|
|
||||||
except TypeError:
|
|
||||||
data = self.default
|
|
||||||
self._obj = data
|
|
||||||
|
|
||||||
self.object_data = data
|
|
||||||
|
|
||||||
prefix = self.name + self.separator
|
|
||||||
# Pass require_at_least_one=True to the sub-form
|
|
||||||
if isinstance(data, dict):
|
|
||||||
self.form = self.form_class(formdata=formdata, prefix=prefix, require_at_least_one=True, **data)
|
|
||||||
else:
|
|
||||||
self.form = self.form_class(formdata=formdata, obj=data, prefix=prefix, require_at_least_one=True)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def errors(self):
|
|
||||||
"""Include sub-form validation errors"""
|
|
||||||
form_errors = self.form.errors
|
|
||||||
# Add any general form errors to a special 'form' key
|
|
||||||
if hasattr(self.form, '_formdata_errors') and self.form._formdata_errors:
|
|
||||||
form_errors = dict(form_errors) # Make a copy
|
|
||||||
form_errors['form'] = self.form._formdata_errors
|
|
||||||
return form_errors
|
|
||||||
|
|
||||||
|
|
||||||
# Separated by key:value
|
# Separated by key:value
|
||||||
class StringDictKeyValue(StringField):
|
class StringDictKeyValue(StringField):
|
||||||
widget = widgets.TextArea()
|
widget = widgets.TextArea()
|
||||||
@@ -487,8 +336,9 @@ class ValidateJinja2Template(object):
|
|||||||
"""
|
"""
|
||||||
def __call__(self, form, field):
|
def __call__(self, form, field):
|
||||||
from changedetectionio import notification
|
from changedetectionio import notification
|
||||||
from changedetectionio.jinja2_custom import create_jinja_env
|
|
||||||
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
|
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
|
||||||
|
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
||||||
from jinja2.meta import find_undeclared_variables
|
from jinja2.meta import find_undeclared_variables
|
||||||
import jinja2.exceptions
|
import jinja2.exceptions
|
||||||
|
|
||||||
@@ -496,11 +346,9 @@ class ValidateJinja2Template(object):
|
|||||||
joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}"
|
joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}"
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Use the shared helper to create a properly configured environment
|
jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader)
|
||||||
jinja2_env = create_jinja_env(loader=BaseLoader)
|
|
||||||
|
|
||||||
# Add notification tokens for validation
|
|
||||||
jinja2_env.globals.update(notification.valid_tokens)
|
jinja2_env.globals.update(notification.valid_tokens)
|
||||||
|
# Extra validation tokens provided on the form_class(... extra_tokens={}) setup
|
||||||
if hasattr(field, 'extra_notification_tokens'):
|
if hasattr(field, 'extra_notification_tokens'):
|
||||||
jinja2_env.globals.update(field.extra_notification_tokens)
|
jinja2_env.globals.update(field.extra_notification_tokens)
|
||||||
|
|
||||||
@@ -512,7 +360,6 @@ class ValidateJinja2Template(object):
|
|||||||
except jinja2.exceptions.SecurityError as e:
|
except jinja2.exceptions.SecurityError as e:
|
||||||
raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
|
raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
|
||||||
|
|
||||||
# Check for undeclared variables
|
|
||||||
ast = jinja2_env.parse(joined_data)
|
ast = jinja2_env.parse(joined_data)
|
||||||
undefined = ", ".join(find_undeclared_variables(ast))
|
undefined = ", ".join(find_undeclared_variables(ast))
|
||||||
if undefined:
|
if undefined:
|
||||||
@@ -680,51 +527,6 @@ class ValidateCSSJSONXPATHInput(object):
|
|||||||
except:
|
except:
|
||||||
raise ValidationError("A system-error occurred when validating your jq expression")
|
raise ValidationError("A system-error occurred when validating your jq expression")
|
||||||
|
|
||||||
class ValidateSimpleURL:
|
|
||||||
"""Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc."""
|
|
||||||
def __init__(self, message=None):
|
|
||||||
self.message = message or "Invalid URL."
|
|
||||||
|
|
||||||
def __call__(self, form, field):
|
|
||||||
data = (field.data or "").strip()
|
|
||||||
if not data:
|
|
||||||
return # empty is OK — pair with validators.Optional()
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
|
|
||||||
parsed = urlparse(data)
|
|
||||||
if not parsed.scheme or not parsed.netloc:
|
|
||||||
raise ValidationError(self.message)
|
|
||||||
|
|
||||||
class ValidateStartsWithRegex(object):
|
|
||||||
def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True):
|
|
||||||
# compile with given flags (we’ll pass re.IGNORECASE below)
|
|
||||||
self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex
|
|
||||||
self.message = message
|
|
||||||
self.allow_empty = allow_empty
|
|
||||||
self.split_lines = split_lines
|
|
||||||
|
|
||||||
def __call__(self, form, field):
|
|
||||||
data = field.data
|
|
||||||
if not data:
|
|
||||||
return
|
|
||||||
|
|
||||||
# normalize into list of lines
|
|
||||||
if isinstance(data, str) and self.split_lines:
|
|
||||||
lines = data.splitlines()
|
|
||||||
elif isinstance(data, (list, tuple)):
|
|
||||||
lines = data
|
|
||||||
else:
|
|
||||||
lines = [data]
|
|
||||||
|
|
||||||
for line in lines:
|
|
||||||
stripped = line.strip()
|
|
||||||
if not stripped:
|
|
||||||
if self.allow_empty:
|
|
||||||
continue
|
|
||||||
raise ValidationError(self.message or "Empty value not allowed.")
|
|
||||||
if not self.pattern.match(stripped):
|
|
||||||
raise ValidationError(self.message or "Invalid value.")
|
|
||||||
|
|
||||||
class quickWatchForm(Form):
|
class quickWatchForm(Form):
|
||||||
from . import processors
|
from . import processors
|
||||||
|
|
||||||
@@ -746,13 +548,14 @@ class commonSettingsForm(Form):
|
|||||||
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||||
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||||
|
|
||||||
|
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
|
||||||
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||||
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||||
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
|
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
|
||||||
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||||
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||||
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
|
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
|
||||||
scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||||
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
|
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
|
||||||
|
|
||||||
|
|
||||||
@@ -779,16 +582,11 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
url = fields.URLField('URL', validators=[validateURL()])
|
url = fields.URLField('URL', validators=[validateURL()])
|
||||||
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
|
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
|
||||||
|
|
||||||
time_between_check = EnhancedFormField(
|
time_between_check = FormField(TimeBetweenCheckForm)
|
||||||
TimeBetweenCheckForm,
|
|
||||||
conditional_field='time_between_check_use_default',
|
|
||||||
conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT,
|
|
||||||
conditional_test_function=validate_time_between_check_has_values
|
|
||||||
)
|
|
||||||
|
|
||||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||||
|
|
||||||
time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False)
|
time_between_check_use_default = BooleanField('Use global settings for time between check', default=False)
|
||||||
|
|
||||||
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||||
|
|
||||||
@@ -806,7 +604,6 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
|
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
|
||||||
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
|
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
|
||||||
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
|
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
|
||||||
strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None)
|
|
||||||
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
|
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
|
||||||
|
|
||||||
filter_text_added = BooleanField('Added lines', default=True)
|
filter_text_added = BooleanField('Added lines', default=True)
|
||||||
@@ -819,18 +616,18 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||||
|
|
||||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
|
||||||
|
|
||||||
proxy = RadioField('Proxy')
|
proxy = RadioField('Proxy')
|
||||||
# filter_failure_notification_send @todo make ternary
|
|
||||||
filter_failure_notification_send = BooleanField(
|
filter_failure_notification_send = BooleanField(
|
||||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
'Send a notification when the filter can no longer be found on the page', default=False)
|
||||||
notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On")
|
|
||||||
|
notification_muted = BooleanField('Notifications Muted / Off', default=False)
|
||||||
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
||||||
|
|
||||||
conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
|
conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
|
||||||
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||||
use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None)
|
|
||||||
|
|
||||||
def extra_tab_content(self):
|
def extra_tab_content(self):
|
||||||
return None
|
return None
|
||||||
@@ -842,7 +639,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
if not super().validate():
|
if not super().validate():
|
||||||
return False
|
return False
|
||||||
|
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
result = True
|
result = True
|
||||||
|
|
||||||
# Fail form validation when a body is set for a GET
|
# Fail form validation when a body is set for a GET
|
||||||
@@ -905,36 +702,23 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
):
|
):
|
||||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
||||||
if kwargs and kwargs.get('default_system_settings'):
|
if kwargs and kwargs.get('default_system_settings'):
|
||||||
default_tz = kwargs.get('default_system_settings').get('application', {}).get('scheduler_timezone_default')
|
default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone')
|
||||||
if default_tz:
|
if default_tz:
|
||||||
self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz
|
self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class SingleExtraProxy(Form):
|
class SingleExtraProxy(Form):
|
||||||
|
|
||||||
# maybe better to set some <script>var..
|
# maybe better to set some <script>var..
|
||||||
proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||||
proxy_url = StringField('Proxy URL', [
|
proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
||||||
validators.Optional(),
|
# @todo do the validation here instead
|
||||||
ValidateStartsWithRegex(
|
|
||||||
regex=r'^(https?|socks5)://', # ✅ main pattern
|
|
||||||
flags=re.IGNORECASE, # ✅ makes it case-insensitive
|
|
||||||
message='Proxy URLs must start with http://, https:// or socks5://',
|
|
||||||
),
|
|
||||||
ValidateSimpleURL()
|
|
||||||
], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
|
||||||
|
|
||||||
class SingleExtraBrowser(Form):
|
class SingleExtraBrowser(Form):
|
||||||
browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||||
browser_connection_url = StringField('Browser connection URL', [
|
browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||||
validators.Optional(),
|
# @todo do the validation here instead
|
||||||
ValidateStartsWithRegex(
|
|
||||||
regex=r'^(wss?|ws)://',
|
|
||||||
flags=re.IGNORECASE,
|
|
||||||
message='Browser URLs must start with wss:// or ws://'
|
|
||||||
),
|
|
||||||
ValidateSimpleURL()
|
|
||||||
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
|
||||||
|
|
||||||
class DefaultUAInputForm(Form):
|
class DefaultUAInputForm(Form):
|
||||||
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||||
@@ -943,9 +727,9 @@ class DefaultUAInputForm(Form):
|
|||||||
|
|
||||||
# datastore.data['settings']['requests']..
|
# datastore.data['settings']['requests']..
|
||||||
class globalSettingsRequestForm(Form):
|
class globalSettingsRequestForm(Form):
|
||||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
time_between_check = FormField(TimeBetweenCheckForm)
|
||||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||||
proxy = RadioField('Default proxy')
|
proxy = RadioField('Proxy')
|
||||||
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
||||||
render_kw={"style": "width: 5em;"},
|
render_kw={"style": "width: 5em;"},
|
||||||
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||||
@@ -954,12 +738,7 @@ class globalSettingsRequestForm(Form):
|
|||||||
render_kw={"style": "width: 5em;"},
|
render_kw={"style": "width: 5em;"},
|
||||||
validators=[validators.NumberRange(min=1, max=50,
|
validators=[validators.NumberRange(min=1, max=50,
|
||||||
message="Should be between 1 and 50")])
|
message="Should be between 1 and 50")])
|
||||||
|
|
||||||
timeout = IntegerField('Requests timeout in seconds',
|
|
||||||
render_kw={"style": "width: 5em;"},
|
|
||||||
validators=[validators.NumberRange(min=1, max=999,
|
|
||||||
message="Should be between 1 and 999")])
|
|
||||||
|
|
||||||
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
||||||
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
||||||
|
|
||||||
@@ -976,7 +755,6 @@ class globalSettingsApplicationUIForm(Form):
|
|||||||
open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
|
open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
|
||||||
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
|
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
|
||||||
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
|
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
|
||||||
use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True
|
|
||||||
|
|
||||||
# datastore.data['settings']['application']..
|
# datastore.data['settings']['application']..
|
||||||
class globalSettingsApplicationForm(commonSettingsForm):
|
class globalSettingsApplicationForm(commonSettingsForm):
|
||||||
@@ -1001,14 +779,9 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||||||
|
|
||||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||||
shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()])
|
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
||||||
strip_ignored_lines = BooleanField('Strip ignored lines')
|
|
||||||
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
|
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
|
||||||
validators=[validators.Optional()])
|
validators=[validators.Optional()])
|
||||||
|
|
||||||
rss_reader_mode = BooleanField('RSS reader mode ', default=False,
|
|
||||||
validators=[validators.Optional()])
|
|
||||||
|
|
||||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
||||||
render_kw={"style": "width: 5em;"},
|
render_kw={"style": "width: 5em;"},
|
||||||
validators=[validators.NumberRange(min=0,
|
validators=[validators.NumberRange(min=0,
|
||||||
@@ -1028,7 +801,7 @@ class globalSettingsForm(Form):
|
|||||||
|
|
||||||
requests = FormField(globalSettingsRequestForm)
|
requests = FormField(globalSettingsRequestForm)
|
||||||
application = FormField(globalSettingsApplicationForm)
|
application = FormField(globalSettingsApplicationForm)
|
||||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
|
||||||
|
|
||||||
|
|
||||||
class extractDataForm(Form):
|
class extractDataForm(Form):
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
from lxml import etree
|
||||||
from typing import List
|
from typing import List
|
||||||
import html
|
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -9,11 +9,6 @@ TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
|
|||||||
TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
|
TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
|
||||||
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||||
|
|
||||||
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
|
|
||||||
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
|
|
||||||
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
|
|
||||||
|
|
||||||
|
|
||||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||||
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
|
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
|
||||||
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
|
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
|
||||||
@@ -57,17 +52,13 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
|
|||||||
|
|
||||||
return html_block
|
return html_block
|
||||||
|
|
||||||
def subtractive_css_selector(css_selector, content):
|
def subtractive_css_selector(css_selector, html_content):
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
soup = BeautifulSoup(content, "html.parser")
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
|
|
||||||
# So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
|
# So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
|
||||||
elements_to_remove = soup.select(css_selector)
|
elements_to_remove = soup.select(css_selector)
|
||||||
|
|
||||||
if not elements_to_remove:
|
|
||||||
# Better to return the original that rebuild with BeautifulSoup
|
|
||||||
return content
|
|
||||||
|
|
||||||
# Then, remove them in a separate loop
|
# Then, remove them in a separate loop
|
||||||
for item in elements_to_remove:
|
for item in elements_to_remove:
|
||||||
item.decompose()
|
item.decompose()
|
||||||
@@ -75,7 +66,6 @@ def subtractive_css_selector(css_selector, content):
|
|||||||
return str(soup)
|
return str(soup)
|
||||||
|
|
||||||
def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
|
def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
|
||||||
from lxml import etree
|
|
||||||
# Parse the HTML content using lxml
|
# Parse the HTML content using lxml
|
||||||
html_tree = etree.HTML(html_content)
|
html_tree = etree.HTML(html_content)
|
||||||
|
|
||||||
@@ -87,10 +77,6 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
|
|||||||
# Collect elements for each selector
|
# Collect elements for each selector
|
||||||
elements_to_remove.extend(html_tree.xpath(selector))
|
elements_to_remove.extend(html_tree.xpath(selector))
|
||||||
|
|
||||||
# If no elements were found, return the original HTML content
|
|
||||||
if not elements_to_remove:
|
|
||||||
return html_content
|
|
||||||
|
|
||||||
# Then, remove them in a separate loop
|
# Then, remove them in a separate loop
|
||||||
for element in elements_to_remove:
|
for element in elements_to_remove:
|
||||||
if element.getparent() is not None: # Ensure the element has a parent before removing
|
if element.getparent() is not None: # Ensure the element has a parent before removing
|
||||||
@@ -108,7 +94,7 @@ def element_removal(selectors: List[str], html_content):
|
|||||||
xpath_selectors = []
|
xpath_selectors = []
|
||||||
|
|
||||||
for selector in selectors:
|
for selector in selectors:
|
||||||
if selector.strip().startswith(('xpath:', 'xpath1:', '//')):
|
if selector.startswith(('xpath:', 'xpath1:', '//')):
|
||||||
# Handle XPath selectors separately
|
# Handle XPath selectors separately
|
||||||
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
|
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
|
||||||
xpath_selectors.append(xpath_selector)
|
xpath_selectors.append(xpath_selector)
|
||||||
@@ -303,92 +289,70 @@ def _get_stripped_text_from_json_match(match):
|
|||||||
|
|
||||||
return stripped_text_from_html
|
return stripped_text_from_html
|
||||||
|
|
||||||
def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter):
|
|
||||||
from bs4 import BeautifulSoup
|
|
||||||
stripped_text_from_html = ''
|
|
||||||
|
|
||||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
|
||||||
# As a last resort, try to parse the whole <body>
|
|
||||||
soup = BeautifulSoup(content, 'html.parser')
|
|
||||||
|
|
||||||
if ensure_is_ldjson_info_type:
|
|
||||||
bs_result = soup.find_all('script', {"type": "application/ld+json"})
|
|
||||||
else:
|
|
||||||
bs_result = soup.find_all('script')
|
|
||||||
bs_result += soup.find_all('body')
|
|
||||||
|
|
||||||
bs_jsons = []
|
|
||||||
|
|
||||||
for result in bs_result:
|
|
||||||
# result.text is how bs4 magically strips JSON from the body
|
|
||||||
content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else ''
|
|
||||||
# Skip empty tags, and things that dont even look like JSON
|
|
||||||
if not result.text or not (content_start[0] == '{' or content_start[0] == '['):
|
|
||||||
continue
|
|
||||||
try:
|
|
||||||
json_data = json.loads(result.text)
|
|
||||||
bs_jsons.append(json_data)
|
|
||||||
except json.JSONDecodeError:
|
|
||||||
# Skip objects which cannot be parsed
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not bs_jsons:
|
|
||||||
raise JSONNotFound("No parsable JSON found in this document")
|
|
||||||
|
|
||||||
for json_data in bs_jsons:
|
|
||||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
|
||||||
|
|
||||||
if ensure_is_ldjson_info_type:
|
|
||||||
# Could sometimes be list, string or something else random
|
|
||||||
if isinstance(json_data, dict):
|
|
||||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
|
||||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
|
||||||
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
|
|
||||||
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
|
|
||||||
# 1833 - could be either str or dict, should not be anything else
|
|
||||||
|
|
||||||
t = json_data.get('@type')
|
|
||||||
if t and stripped_text_from_html:
|
|
||||||
|
|
||||||
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
|
|
||||||
break
|
|
||||||
# The non-standard part, some have a list
|
|
||||||
elif isinstance(t, list):
|
|
||||||
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
|
|
||||||
break
|
|
||||||
|
|
||||||
elif stripped_text_from_html:
|
|
||||||
break
|
|
||||||
|
|
||||||
return stripped_text_from_html
|
|
||||||
|
|
||||||
# content - json
|
# content - json
|
||||||
# json_filter - ie json:$..price
|
# json_filter - ie json:$..price
|
||||||
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
|
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
|
||||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
||||||
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
stripped_text_from_html = False
|
stripped_text_from_html = False
|
||||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||||
|
try:
|
||||||
|
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||||
|
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(str(e))
|
||||||
|
|
||||||
# Looks like clean JSON, dont bother extracting from HTML
|
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||||
|
# As a last resort, try to parse the whole <body>
|
||||||
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
|
|
||||||
content_start = content.lstrip("\ufeff").strip()[:100]
|
if ensure_is_ldjson_info_type:
|
||||||
|
bs_result = soup.find_all('script', {"type": "application/ld+json"})
|
||||||
|
else:
|
||||||
|
bs_result = soup.find_all('script')
|
||||||
|
bs_result += soup.find_all('body')
|
||||||
|
|
||||||
if content_start[0] == '{' or content_start[0] == '[':
|
bs_jsons = []
|
||||||
try:
|
for result in bs_result:
|
||||||
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
# Skip empty tags, and things that dont even look like JSON
|
||||||
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter)
|
if not result.text or '{' not in result.text:
|
||||||
except json.JSONDecodeError as e:
|
continue
|
||||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
try:
|
||||||
else:
|
json_data = json.loads(result.text)
|
||||||
# Probably something else, go fish inside for it
|
bs_jsons.append(json_data)
|
||||||
try:
|
except json.JSONDecodeError:
|
||||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
# Skip objects which cannot be parsed
|
||||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
continue
|
||||||
json_filter=json_filter )
|
|
||||||
except json.JSONDecodeError as e:
|
if not bs_jsons:
|
||||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
raise JSONNotFound("No parsable JSON found in this document")
|
||||||
|
|
||||||
|
for json_data in bs_jsons:
|
||||||
|
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||||
|
|
||||||
|
if ensure_is_ldjson_info_type:
|
||||||
|
# Could sometimes be list, string or something else random
|
||||||
|
if isinstance(json_data, dict):
|
||||||
|
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||||
|
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||||
|
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
|
||||||
|
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
|
||||||
|
# 1833 - could be either str or dict, should not be anything else
|
||||||
|
|
||||||
|
t = json_data.get('@type')
|
||||||
|
if t and stripped_text_from_html:
|
||||||
|
|
||||||
|
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
|
||||||
|
break
|
||||||
|
# The non-standard part, some have a list
|
||||||
|
elif isinstance(t, list):
|
||||||
|
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
|
||||||
|
break
|
||||||
|
|
||||||
|
elif stripped_text_from_html:
|
||||||
|
break
|
||||||
|
|
||||||
if not stripped_text_from_html:
|
if not stripped_text_from_html:
|
||||||
# Re 265 - Just return an empty string when filter not found
|
# Re 265 - Just return an empty string when filter not found
|
||||||
@@ -546,43 +510,3 @@ def get_triggered_text(content, trigger_text):
|
|||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
return triggered_text
|
return triggered_text
|
||||||
|
|
||||||
|
|
||||||
def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None:
|
|
||||||
try:
|
|
||||||
# Only decode/process the prefix we need for title extraction
|
|
||||||
match data:
|
|
||||||
case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")):
|
|
||||||
prefix = data[:scan_chars * 2].decode("utf-16", errors="replace")
|
|
||||||
case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")):
|
|
||||||
prefix = data[:scan_chars * 4].decode("utf-32", errors="replace")
|
|
||||||
case bytes():
|
|
||||||
try:
|
|
||||||
prefix = data[:scan_chars].decode("utf-8")
|
|
||||||
except UnicodeDecodeError:
|
|
||||||
try:
|
|
||||||
head = data[:sniff_bytes].decode("ascii", errors="ignore")
|
|
||||||
if m := (META_CS.search(head) or META_CT.search(head)):
|
|
||||||
enc = m.group(1).lower()
|
|
||||||
else:
|
|
||||||
enc = "cp1252"
|
|
||||||
prefix = data[:scan_chars * 2].decode(enc, errors="replace")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Title extraction encoding detection failed: {e}")
|
|
||||||
return None
|
|
||||||
case str():
|
|
||||||
prefix = data[:scan_chars] if len(data) > scan_chars else data
|
|
||||||
case _:
|
|
||||||
logger.error(f"Title extraction received unsupported data type: {type(data)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Search only in the prefix
|
|
||||||
if m := TITLE_RE.search(prefix):
|
|
||||||
title = html.unescape(" ".join(m.group(1).split())).strip()
|
|
||||||
# Some safe limit
|
|
||||||
return title[:2000]
|
|
||||||
return None
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Title extraction failed: {e}")
|
|
||||||
return None
|
|
||||||
@@ -1,20 +0,0 @@
|
|||||||
"""
|
|
||||||
Jinja2 custom extensions and safe rendering utilities.
|
|
||||||
"""
|
|
||||||
from .extensions.TimeExtension import TimeExtension
|
|
||||||
from .safe_jinja import (
|
|
||||||
render,
|
|
||||||
render_fully_escaped,
|
|
||||||
create_jinja_env,
|
|
||||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE,
|
|
||||||
DEFAULT_JINJA2_EXTENSIONS,
|
|
||||||
)
|
|
||||||
|
|
||||||
__all__ = [
|
|
||||||
'TimeExtension',
|
|
||||||
'render',
|
|
||||||
'render_fully_escaped',
|
|
||||||
'create_jinja_env',
|
|
||||||
'JINJA2_MAX_RETURN_PAYLOAD_SIZE',
|
|
||||||
'DEFAULT_JINJA2_EXTENSIONS',
|
|
||||||
]
|
|
||||||
@@ -1,221 +0,0 @@
|
|||||||
"""
|
|
||||||
Jinja2 TimeExtension - Custom date/time handling for templates.
|
|
||||||
|
|
||||||
This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware
|
|
||||||
date/time formatting with support for time offsets.
|
|
||||||
|
|
||||||
Why This Extension Exists:
|
|
||||||
The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot
|
|
||||||
directly call Python functions - they need extensions or filters to expose functionality.
|
|
||||||
|
|
||||||
This TimeExtension serves as a Jinja2-to-Arrow bridge that:
|
|
||||||
|
|
||||||
1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags
|
|
||||||
through extensions. You cannot use arrow.now() directly in a template.
|
|
||||||
|
|
||||||
2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags:
|
|
||||||
{% now 'UTC' %}
|
|
||||||
{% now 'UTC' + 'hours=2' %}
|
|
||||||
{% now 'Europe/London', '%Y-%m-%d' %}
|
|
||||||
|
|
||||||
3. Adds convenience features on top of Arrow:
|
|
||||||
- Default timezone from environment variable (TZ) or config
|
|
||||||
- Default datetime format configuration
|
|
||||||
- Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30)
|
|
||||||
- Empty string timezone support to use configured defaults
|
|
||||||
|
|
||||||
4. Maintains security - Works within Jinja2's sandboxed environment so users
|
|
||||||
cannot access arbitrary Python code or objects.
|
|
||||||
|
|
||||||
Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that
|
|
||||||
provides user-friendly template syntax while maintaining security.
|
|
||||||
|
|
||||||
Basic Usage:
|
|
||||||
{% now 'UTC' %}
|
|
||||||
# Output: Wed, 09 Dec 2015 23:33:01
|
|
||||||
|
|
||||||
Custom Format:
|
|
||||||
{% now 'UTC', '%Y-%m-%d %H:%M:%S' %}
|
|
||||||
# Output: 2015-12-09 23:33:01
|
|
||||||
|
|
||||||
Timezone Support:
|
|
||||||
{% now 'America/New_York' %}
|
|
||||||
{% now 'Europe/London' %}
|
|
||||||
{% now '' %} # Uses default timezone from environment.default_timezone
|
|
||||||
|
|
||||||
Time Offsets (Addition):
|
|
||||||
{% now 'UTC' + 'hours=2' %}
|
|
||||||
{% now 'UTC' + 'hours=2,minutes=30' %}
|
|
||||||
{% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %}
|
|
||||||
|
|
||||||
Time Offsets (Subtraction):
|
|
||||||
{% now 'UTC' - 'minutes=11' %}
|
|
||||||
{% now 'UTC' - 'days=2,minutes=33,seconds=1' %}
|
|
||||||
|
|
||||||
Time Offsets with Custom Format:
|
|
||||||
{% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %}
|
|
||||||
# Output: 2015-12-10 01:33:01
|
|
||||||
|
|
||||||
Weekday Support (for finding next/previous weekday):
|
|
||||||
{% now 'UTC' + 'weekday=0' %} # Next Monday (0=Monday, 6=Sunday)
|
|
||||||
{% now 'UTC' + 'weekday=4' %} # Next Friday
|
|
||||||
|
|
||||||
Configuration:
|
|
||||||
- Default timezone: Set via TZ environment variable or override environment.default_timezone
|
|
||||||
- Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format)
|
|
||||||
|
|
||||||
Environment Customization:
|
|
||||||
from changedetectionio.jinja2_custom import create_jinja_env
|
|
||||||
|
|
||||||
jinja2_env = create_jinja_env()
|
|
||||||
jinja2_env.default_timezone = 'America/New_York' # Override default timezone
|
|
||||||
jinja2_env.datetime_format = '%Y-%m-%d %H:%M' # Override default format
|
|
||||||
|
|
||||||
Supported Offset Parameters:
|
|
||||||
- years, months, weeks, days
|
|
||||||
- hours, minutes, seconds, microseconds
|
|
||||||
- weekday (0=Monday through 6=Sunday, must be integer)
|
|
||||||
|
|
||||||
Note:
|
|
||||||
This extension uses the Arrow library for timezone-aware datetime handling.
|
|
||||||
All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York').
|
|
||||||
"""
|
|
||||||
import arrow
|
|
||||||
|
|
||||||
from jinja2 import nodes
|
|
||||||
from jinja2.ext import Extension
|
|
||||||
import os
|
|
||||||
|
|
||||||
class TimeExtension(Extension):
|
|
||||||
"""
|
|
||||||
Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering.
|
|
||||||
|
|
||||||
This extension adds two attributes to the Jinja2 environment:
|
|
||||||
- datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S')
|
|
||||||
- default_timezone: Default timezone for rendering (default: TZ env var or 'UTC')
|
|
||||||
|
|
||||||
Both can be overridden after environment creation by setting the attributes directly.
|
|
||||||
"""
|
|
||||||
|
|
||||||
tags = {'now'}
|
|
||||||
|
|
||||||
def __init__(self, environment):
|
|
||||||
"""Jinja2 Extension constructor."""
|
|
||||||
super().__init__(environment)
|
|
||||||
|
|
||||||
environment.extend(
|
|
||||||
datetime_format='%a, %d %b %Y %H:%M:%S',
|
|
||||||
default_timezone=os.getenv('TZ', 'UTC').strip()
|
|
||||||
)
|
|
||||||
|
|
||||||
def _datetime(self, timezone, operator, offset, datetime_format):
|
|
||||||
"""
|
|
||||||
Get current datetime with time offset applied.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
|
|
||||||
operator: '+' for addition or '-' for subtraction
|
|
||||||
offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30')
|
|
||||||
datetime_format: strftime format string or None to use environment default
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted datetime string with offset applied
|
|
||||||
|
|
||||||
Example:
|
|
||||||
_datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S')
|
|
||||||
# Returns current time + 2.5 hours
|
|
||||||
"""
|
|
||||||
# Use default timezone if none specified
|
|
||||||
if not timezone or timezone == '':
|
|
||||||
timezone = self.environment.default_timezone
|
|
||||||
|
|
||||||
d = arrow.now(timezone)
|
|
||||||
|
|
||||||
# parse shift params from offset and include operator
|
|
||||||
shift_params = {}
|
|
||||||
for param in offset.split(','):
|
|
||||||
interval, value = param.split('=')
|
|
||||||
shift_params[interval.strip()] = float(operator + value.strip())
|
|
||||||
|
|
||||||
# Fix weekday parameter can not be float
|
|
||||||
if 'weekday' in shift_params:
|
|
||||||
shift_params['weekday'] = int(shift_params['weekday'])
|
|
||||||
|
|
||||||
d = d.shift(**shift_params)
|
|
||||||
|
|
||||||
if datetime_format is None:
|
|
||||||
datetime_format = self.environment.datetime_format
|
|
||||||
return d.strftime(datetime_format)
|
|
||||||
|
|
||||||
def _now(self, timezone, datetime_format):
|
|
||||||
"""
|
|
||||||
Get current datetime without any offset.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
|
|
||||||
datetime_format: strftime format string or None to use environment default
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted datetime string for current time
|
|
||||||
|
|
||||||
Example:
|
|
||||||
_now('America/New_York', '%Y-%m-%d %H:%M:%S')
|
|
||||||
# Returns current time in New York timezone
|
|
||||||
"""
|
|
||||||
# Use default timezone if none specified
|
|
||||||
if not timezone or timezone == '':
|
|
||||||
timezone = self.environment.default_timezone
|
|
||||||
|
|
||||||
if datetime_format is None:
|
|
||||||
datetime_format = self.environment.datetime_format
|
|
||||||
return arrow.now(timezone).strftime(datetime_format)
|
|
||||||
|
|
||||||
def parse(self, parser):
|
|
||||||
"""
|
|
||||||
Parse the {% now %} tag and generate appropriate AST nodes.
|
|
||||||
|
|
||||||
This method is called by Jinja2 when it encounters a {% now %} tag.
|
|
||||||
It parses the tag syntax and determines whether to call _now() or _datetime()
|
|
||||||
based on whether offset operations (+ or -) are present.
|
|
||||||
|
|
||||||
Supported syntax:
|
|
||||||
{% now 'timezone' %} -> calls _now()
|
|
||||||
{% now 'timezone', 'format' %} -> calls _now()
|
|
||||||
{% now 'timezone' + 'offset' %} -> calls _datetime()
|
|
||||||
{% now 'timezone' + 'offset', 'format' %} -> calls _datetime()
|
|
||||||
{% now 'timezone' - 'offset', 'format' %} -> calls _datetime()
|
|
||||||
|
|
||||||
Args:
|
|
||||||
parser: Jinja2 parser instance
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
nodes.Output: AST output node containing the formatted datetime string
|
|
||||||
"""
|
|
||||||
lineno = next(parser.stream).lineno
|
|
||||||
|
|
||||||
node = parser.parse_expression()
|
|
||||||
|
|
||||||
if parser.stream.skip_if('comma'):
|
|
||||||
datetime_format = parser.parse_expression()
|
|
||||||
else:
|
|
||||||
datetime_format = nodes.Const(None)
|
|
||||||
|
|
||||||
if isinstance(node, nodes.Add):
|
|
||||||
call_method = self.call_method(
|
|
||||||
'_datetime',
|
|
||||||
[node.left, nodes.Const('+'), node.right, datetime_format],
|
|
||||||
lineno=lineno,
|
|
||||||
)
|
|
||||||
elif isinstance(node, nodes.Sub):
|
|
||||||
call_method = self.call_method(
|
|
||||||
'_datetime',
|
|
||||||
[node.left, nodes.Const('-'), node.right, datetime_format],
|
|
||||||
lineno=lineno,
|
|
||||||
)
|
|
||||||
else:
|
|
||||||
call_method = self.call_method(
|
|
||||||
'_now',
|
|
||||||
[node, datetime_format],
|
|
||||||
lineno=lineno,
|
|
||||||
)
|
|
||||||
return nodes.Output([call_method], lineno=lineno)
|
|
||||||
@@ -1,55 +0,0 @@
|
|||||||
"""
|
|
||||||
Safe Jinja2 render with max payload sizes
|
|
||||||
|
|
||||||
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
|
|
||||||
"""
|
|
||||||
|
|
||||||
import jinja2.sandbox
|
|
||||||
import typing as t
|
|
||||||
import os
|
|
||||||
from .extensions.TimeExtension import TimeExtension
|
|
||||||
|
|
||||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
|
|
||||||
|
|
||||||
# Default extensions - can be overridden in create_jinja_env()
|
|
||||||
DEFAULT_JINJA2_EXTENSIONS = [TimeExtension]
|
|
||||||
|
|
||||||
|
|
||||||
def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment:
|
|
||||||
"""
|
|
||||||
Create a sandboxed Jinja2 environment with our custom extensions and default timezone.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS)
|
|
||||||
**kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Configured Jinja2 environment
|
|
||||||
"""
|
|
||||||
if extensions is None:
|
|
||||||
extensions = DEFAULT_JINJA2_EXTENSIONS
|
|
||||||
|
|
||||||
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(
|
|
||||||
extensions=extensions,
|
|
||||||
**kwargs
|
|
||||||
)
|
|
||||||
|
|
||||||
# Get default timezone from environment variable
|
|
||||||
default_timezone = os.getenv('TZ', 'UTC').strip()
|
|
||||||
jinja2_env.default_timezone = default_timezone
|
|
||||||
|
|
||||||
return jinja2_env
|
|
||||||
|
|
||||||
|
|
||||||
# This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available.
|
|
||||||
# (Which also limits available functions that could be called)
|
|
||||||
def render(template_str, **args: t.Any) -> str:
|
|
||||||
jinja2_env = create_jinja_env()
|
|
||||||
output = jinja2_env.from_string(template_str).render(args)
|
|
||||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
|
||||||
|
|
||||||
def render_fully_escaped(content):
|
|
||||||
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
|
||||||
template = env.from_string("{{ some_html|e }}")
|
|
||||||
return template.render(some_html=content)
|
|
||||||
|
|
||||||
@@ -39,12 +39,12 @@ class model(dict):
|
|||||||
'api_access_token_enabled': True,
|
'api_access_token_enabled': True,
|
||||||
'base_url' : None,
|
'base_url' : None,
|
||||||
'empty_pages_are_a_change': False,
|
'empty_pages_are_a_change': False,
|
||||||
|
'extract_title_as_title': False,
|
||||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||||
'global_subtractive_selectors': [],
|
'global_subtractive_selectors': [],
|
||||||
'ignore_whitespace': True,
|
'ignore_whitespace': True,
|
||||||
'ignore_status_codes': False, #@todo implement, as ternary.
|
|
||||||
'notification_body': default_notification_body,
|
'notification_body': default_notification_body,
|
||||||
'notification_format': default_notification_format,
|
'notification_format': default_notification_format,
|
||||||
'notification_title': default_notification_title,
|
'notification_title': default_notification_title,
|
||||||
@@ -55,15 +55,12 @@ class model(dict):
|
|||||||
'rss_access_token': None,
|
'rss_access_token': None,
|
||||||
'rss_content_format': RSS_FORMAT_TYPES[0][0],
|
'rss_content_format': RSS_FORMAT_TYPES[0][0],
|
||||||
'rss_hide_muted_watches': True,
|
'rss_hide_muted_watches': True,
|
||||||
'rss_reader_mode': False,
|
|
||||||
'scheduler_timezone_default': None, # Default IANA timezone name
|
|
||||||
'schema_version' : 0,
|
'schema_version' : 0,
|
||||||
'shared_diff_access': False,
|
'shared_diff_access': False,
|
||||||
'strip_ignored_lines': False,
|
|
||||||
'tags': {}, #@todo use Tag.model initialisers
|
|
||||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||||
|
'tags': {}, #@todo use Tag.model initialisers
|
||||||
|
'timezone': None, # Default IANA timezone name
|
||||||
'ui': {
|
'ui': {
|
||||||
'use_page_title_in_list': True,
|
|
||||||
'open_diff_in_new_tab': True,
|
'open_diff_in_new_tab': True,
|
||||||
'socket_io_enabled': True,
|
'socket_io_enabled': True,
|
||||||
'favicons_enabled': True
|
'favicons_enabled': True
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
from blinker import signal
|
from blinker import signal
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
from . import watch_base
|
from . import watch_base
|
||||||
import os
|
import os
|
||||||
import re
|
import re
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from .. import jinja2_custom as safe_jinja
|
from .. import safe_jinja
|
||||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||||
|
|
||||||
# Allowable protocols, protects against javascript: etc
|
# Allowable protocols, protects against javascript: etc
|
||||||
@@ -169,8 +169,8 @@ class model(watch_base):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def label(self):
|
def label(self):
|
||||||
# Used for sorting, display, etc
|
# Used for sorting
|
||||||
return self.get('title') or self.get('page_title') or self.link
|
return self.get('title') if self.get('title') else self.get('url')
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def last_changed(self):
|
def last_changed(self):
|
||||||
|
|||||||
@@ -24,6 +24,7 @@ class watch_base(dict):
|
|||||||
'content-type': None,
|
'content-type': None,
|
||||||
'date_created': None,
|
'date_created': None,
|
||||||
'extract_text': [], # Extract text by regex after filters
|
'extract_text': [], # Extract text by regex after filters
|
||||||
|
'extract_title_as_title': False,
|
||||||
'fetch_backend': 'system', # plaintext, playwright etc
|
'fetch_backend': 'system', # plaintext, playwright etc
|
||||||
'fetch_time': 0.0,
|
'fetch_time': 0.0,
|
||||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||||
@@ -34,7 +35,6 @@ class watch_base(dict):
|
|||||||
'has_ldjson_price_data': None,
|
'has_ldjson_price_data': None,
|
||||||
'headers': {}, # Extra headers to send
|
'headers': {}, # Extra headers to send
|
||||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||||
'ignore_status_codes': None,
|
|
||||||
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
|
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
|
||||||
'include_filters': [],
|
'include_filters': [],
|
||||||
'last_checked': 0,
|
'last_checked': 0,
|
||||||
@@ -49,7 +49,6 @@ class watch_base(dict):
|
|||||||
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||||
'notification_title': None,
|
'notification_title': None,
|
||||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||||
'page_title': None, # <title> from the page
|
|
||||||
'paused': False,
|
'paused': False,
|
||||||
'previous_md5': False,
|
'previous_md5': False,
|
||||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||||
@@ -58,7 +57,6 @@ class watch_base(dict):
|
|||||||
'proxy': None, # Preferred proxy connection
|
'proxy': None, # Preferred proxy connection
|
||||||
'remote_server_reply': None, # From 'server' reply header
|
'remote_server_reply': None, # From 'server' reply header
|
||||||
'sort_text_alphabetically': False,
|
'sort_text_alphabetically': False,
|
||||||
'strip_ignored_lines': None,
|
|
||||||
'subtractive_selectors': [],
|
'subtractive_selectors': [],
|
||||||
'tag': '', # Old system of text name for a tag, to be removed
|
'tag': '', # Old system of text name for a tag, to be removed
|
||||||
'tags': [], # list of UUIDs to App.Tags
|
'tags': [], # list of UUIDs to App.Tags
|
||||||
@@ -124,13 +122,12 @@ class watch_base(dict):
|
|||||||
}
|
}
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
'title': None, # An arbitrary field that overrides 'page_title'
|
'title': None,
|
||||||
'track_ldjson_price_data': None,
|
'track_ldjson_price_data': None,
|
||||||
'trim_text_whitespace': False,
|
'trim_text_whitespace': False,
|
||||||
'remove_duplicate_lines': False,
|
'remove_duplicate_lines': False,
|
||||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||||
'url': '',
|
'url': '',
|
||||||
'use_page_title_in_list': None, # None = use system settings
|
|
||||||
'uuid': str(uuid.uuid4()),
|
'uuid': str(uuid.uuid4()),
|
||||||
'webdriver_delay': None,
|
'webdriver_delay': None,
|
||||||
'webdriver_js_execute_code': None, # Run before change-detection
|
'webdriver_js_execute_code': None, # Run before change-detection
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from loguru import logger
|
|||||||
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
|
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
|
||||||
|
|
||||||
def process_notification(n_object, datastore):
|
def process_notification(n_object, datastore):
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
|
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
|
||||||
# be sure its registered
|
# be sure its registered
|
||||||
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
|
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
|
||||||
@@ -149,7 +149,7 @@ def create_notification_parameters(n_object, datastore):
|
|||||||
uuid = n_object['uuid'] if 'uuid' in n_object else ''
|
uuid = n_object['uuid'] if 'uuid' in n_object else ''
|
||||||
|
|
||||||
if uuid:
|
if uuid:
|
||||||
watch_title = datastore.data['watching'][uuid].label
|
watch_title = datastore.data['watching'][uuid].get('title', '')
|
||||||
tag_list = []
|
tag_list = []
|
||||||
tags = datastore.get_all_tags_for_watch(uuid)
|
tags = datastore.get_all_tags_for_watch(uuid)
|
||||||
if tags:
|
if tags:
|
||||||
|
|||||||
@@ -102,7 +102,7 @@ class difference_detection_processor():
|
|||||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||||
|
|
||||||
# Tweak the base config with the per-watch ones
|
# Tweak the base config with the per-watch ones
|
||||||
from changedetectionio.jinja2_custom import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
request_headers = CaseInsensitiveDict()
|
request_headers = CaseInsensitiveDict()
|
||||||
|
|
||||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||||
|
|||||||
@@ -1,133 +0,0 @@
|
|||||||
"""
|
|
||||||
Content Type Detection and Stream Classification
|
|
||||||
|
|
||||||
This module provides intelligent content-type detection for changedetection.io.
|
|
||||||
It addresses the common problem where HTTP Content-Type headers are missing, incorrect,
|
|
||||||
or too generic, which would otherwise cause the wrong processor to be used.
|
|
||||||
|
|
||||||
The guess_stream_type class combines:
|
|
||||||
1. HTTP Content-Type headers (when available and reliable)
|
|
||||||
2. Python-magic library for MIME detection (analyzing actual file content)
|
|
||||||
3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.)
|
|
||||||
|
|
||||||
This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF,
|
|
||||||
plain text, CSV, YAML, and XML formats - even when servers provide misleading headers.
|
|
||||||
|
|
||||||
Used by: processors/text_json_diff/processor.py and other content processors
|
|
||||||
"""
|
|
||||||
|
|
||||||
# When to apply the 'cdata to real HTML' hack
|
|
||||||
RSS_XML_CONTENT_TYPES = [
|
|
||||||
"application/rss+xml",
|
|
||||||
"application/rdf+xml",
|
|
||||||
"application/atom+xml",
|
|
||||||
"text/rss+xml", # rare, non-standard
|
|
||||||
"application/x-rss+xml", # legacy (older feed software)
|
|
||||||
"application/x-atom+xml", # legacy (older Atom)
|
|
||||||
]
|
|
||||||
|
|
||||||
# JSON Content-types
|
|
||||||
JSON_CONTENT_TYPES = [
|
|
||||||
"application/activity+json",
|
|
||||||
"application/feed+json",
|
|
||||||
"application/json",
|
|
||||||
"application/ld+json",
|
|
||||||
"application/vnd.api+json",
|
|
||||||
]
|
|
||||||
|
|
||||||
|
|
||||||
# Generic XML Content-types (non-RSS/Atom)
|
|
||||||
XML_CONTENT_TYPES = [
|
|
||||||
"text/xml",
|
|
||||||
"application/xml",
|
|
||||||
]
|
|
||||||
|
|
||||||
HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
|
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
class guess_stream_type():
|
|
||||||
is_pdf = False
|
|
||||||
is_json = False
|
|
||||||
is_html = False
|
|
||||||
is_plaintext = False
|
|
||||||
is_rss = False
|
|
||||||
is_csv = False
|
|
||||||
is_xml = False # Generic XML, not RSS/Atom
|
|
||||||
is_yaml = False
|
|
||||||
|
|
||||||
def __init__(self, http_content_header, content):
|
|
||||||
import re
|
|
||||||
magic_content_header = http_content_header
|
|
||||||
test_content = content[:200].lower().strip()
|
|
||||||
|
|
||||||
# Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.)
|
|
||||||
test_content_normalized = re.sub(r'<\s+', '<', test_content)
|
|
||||||
|
|
||||||
# Use puremagic for lightweight MIME detection (saves ~14MB vs python-magic)
|
|
||||||
magic_result = None
|
|
||||||
try:
|
|
||||||
import puremagic
|
|
||||||
|
|
||||||
# puremagic needs bytes, so encode if we have a string
|
|
||||||
content_bytes = content[:200].encode('utf-8') if isinstance(content, str) else content[:200]
|
|
||||||
|
|
||||||
# puremagic returns a list of PureMagic objects with confidence scores
|
|
||||||
detections = puremagic.magic_string(content_bytes)
|
|
||||||
if detections:
|
|
||||||
# Get the highest confidence detection
|
|
||||||
mime = detections[0].mime_type
|
|
||||||
logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'")
|
|
||||||
if mime and "/" in mime:
|
|
||||||
magic_result = mime
|
|
||||||
# Ignore generic/fallback mime types
|
|
||||||
if mime in ['application/octet-stream', 'application/x-empty', 'binary']:
|
|
||||||
logger.debug(f"Ignoring generic mime type '{mime}' from puremagic library")
|
|
||||||
# Trust puremagic for non-text types immediately
|
|
||||||
elif mime not in ['text/html', 'text/plain']:
|
|
||||||
magic_content_header = mime
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
|
|
||||||
|
|
||||||
# Content-based detection (most reliable for text formats)
|
|
||||||
# Check for HTML patterns first - if found, override magic's text/plain
|
|
||||||
has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
|
|
||||||
|
|
||||||
# Always trust headers first
|
|
||||||
if 'text/plain' in http_content_header:
|
|
||||||
self.is_plaintext = True
|
|
||||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
|
||||||
self.is_rss = True
|
|
||||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
|
||||||
self.is_json = True
|
|
||||||
elif 'pdf' in magic_content_header:
|
|
||||||
self.is_pdf = True
|
|
||||||
elif has_html_patterns or http_content_header == 'text/html':
|
|
||||||
self.is_html = True
|
|
||||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
|
||||||
self.is_json = True
|
|
||||||
# magic will call a rss document 'xml'
|
|
||||||
# Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
|
|
||||||
# This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
|
|
||||||
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
|
|
||||||
self.is_rss = True
|
|
||||||
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
|
|
||||||
# Only mark as generic XML if not already detected as RSS
|
|
||||||
if not self.is_rss:
|
|
||||||
self.is_xml = True
|
|
||||||
elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
|
|
||||||
# Generic XML that's not RSS/Atom (RSS/Atom checked above)
|
|
||||||
self.is_xml = True
|
|
||||||
elif '%pdf-1' in test_content:
|
|
||||||
self.is_pdf = True
|
|
||||||
elif http_content_header.startswith('text/'):
|
|
||||||
self.is_plaintext = True
|
|
||||||
# Only trust magic for 'text' if no other patterns matched
|
|
||||||
elif 'text' in magic_content_header:
|
|
||||||
self.is_plaintext = True
|
|
||||||
# If magic says text/plain and we found no HTML patterns, trust it
|
|
||||||
elif magic_result == 'text/plain':
|
|
||||||
self.is_plaintext = True
|
|
||||||
logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")
|
|
||||||
|
|
||||||
@@ -13,17 +13,12 @@ from changedetectionio import html_tools, content_fetchers
|
|||||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from changedetectionio.processors.magic import guess_stream_type
|
|
||||||
|
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||||
description = 'Detects all text changes where possible'
|
description = 'Detects all text changes where possible'
|
||||||
|
|
||||||
JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:']
|
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
|
||||||
|
|
||||||
# Assume it's this type if the server says nothing on content-type
|
|
||||||
DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html'
|
|
||||||
|
|
||||||
class FilterNotFoundInResponse(ValueError):
|
class FilterNotFoundInResponse(ValueError):
|
||||||
def __init__(self, msg, screenshot=None, xpath_data=None):
|
def __init__(self, msg, screenshot=None, xpath_data=None):
|
||||||
@@ -37,560 +32,356 @@ class PDFToHTMLToolNotFound(ValueError):
|
|||||||
ValueError.__init__(self, msg)
|
ValueError.__init__(self, msg)
|
||||||
|
|
||||||
|
|
||||||
class FilterConfig:
|
|
||||||
"""Consolidates all filter and rule configurations from watch, tags, and global settings."""
|
|
||||||
|
|
||||||
def __init__(self, watch, datastore):
|
|
||||||
self.watch = watch
|
|
||||||
self.datastore = datastore
|
|
||||||
self.watch_uuid = watch.get('uuid')
|
|
||||||
# Cache computed properties to avoid repeated list operations
|
|
||||||
self._include_filters_cache = None
|
|
||||||
self._subtractive_selectors_cache = None
|
|
||||||
|
|
||||||
def _get_merged_rules(self, attr, include_global=False):
|
|
||||||
"""Merge rules from watch, tags, and optionally global settings."""
|
|
||||||
watch_rules = self.watch.get(attr, [])
|
|
||||||
tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr)
|
|
||||||
rules = list(dict.fromkeys(watch_rules + tag_rules))
|
|
||||||
|
|
||||||
if include_global:
|
|
||||||
global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', [])
|
|
||||||
rules = list(dict.fromkeys(rules + global_rules))
|
|
||||||
|
|
||||||
return rules
|
|
||||||
|
|
||||||
@property
|
|
||||||
def include_filters(self):
|
|
||||||
if self._include_filters_cache is None:
|
|
||||||
filters = self._get_merged_rules('include_filters')
|
|
||||||
# Inject LD+JSON price tracker rule if enabled
|
|
||||||
if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
|
||||||
filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
|
|
||||||
self._include_filters_cache = filters
|
|
||||||
return self._include_filters_cache
|
|
||||||
|
|
||||||
@property
|
|
||||||
def subtractive_selectors(self):
|
|
||||||
if self._subtractive_selectors_cache is None:
|
|
||||||
watch_selectors = self.watch.get("subtractive_selectors", [])
|
|
||||||
tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors')
|
|
||||||
global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
|
|
||||||
self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
|
|
||||||
return self._subtractive_selectors_cache
|
|
||||||
|
|
||||||
@property
|
|
||||||
def extract_text(self):
|
|
||||||
return self._get_merged_rules('extract_text')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def ignore_text(self):
|
|
||||||
return self._get_merged_rules('ignore_text', include_global=True)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def trigger_text(self):
|
|
||||||
return self._get_merged_rules('trigger_text')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def text_should_not_be_present(self):
|
|
||||||
return self._get_merged_rules('text_should_not_be_present')
|
|
||||||
|
|
||||||
@property
|
|
||||||
def has_include_filters(self):
|
|
||||||
return bool(self.include_filters) and bool(self.include_filters[0].strip())
|
|
||||||
|
|
||||||
@property
|
|
||||||
def has_include_json_filters(self):
|
|
||||||
return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES)
|
|
||||||
|
|
||||||
@property
|
|
||||||
def has_subtractive_selectors(self):
|
|
||||||
return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip())
|
|
||||||
|
|
||||||
|
|
||||||
class ContentTransformer:
|
|
||||||
"""Handles text transformations like trimming, sorting, and deduplication."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def trim_whitespace(text):
|
|
||||||
"""Remove leading/trailing whitespace from each line."""
|
|
||||||
# Use generator expression to avoid building intermediate list
|
|
||||||
return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines())
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def remove_duplicate_lines(text):
|
|
||||||
"""Remove duplicate lines while preserving order."""
|
|
||||||
return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines()))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def sort_alphabetically(text):
|
|
||||||
"""Sort lines alphabetically (case-insensitive)."""
|
|
||||||
# Remove double line feeds before sorting
|
|
||||||
text = text.replace("\n\n", "\n")
|
|
||||||
return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def extract_by_regex(text, regex_patterns):
|
|
||||||
"""Extract text matching regex patterns."""
|
|
||||||
# Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior)
|
|
||||||
regex_matched_output = []
|
|
||||||
|
|
||||||
for s_re in regex_patterns:
|
|
||||||
# Check if it's perl-style regex /.../
|
|
||||||
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
|
|
||||||
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
|
|
||||||
result = re.findall(regex, text)
|
|
||||||
|
|
||||||
for match in result:
|
|
||||||
if type(match) is tuple:
|
|
||||||
regex_matched_output.extend(match)
|
|
||||||
regex_matched_output.append('\n')
|
|
||||||
else:
|
|
||||||
regex_matched_output.append(match)
|
|
||||||
regex_matched_output.append('\n')
|
|
||||||
else:
|
|
||||||
# Plain text search (case-insensitive)
|
|
||||||
r = re.compile(re.escape(s_re), re.IGNORECASE)
|
|
||||||
res = r.findall(text)
|
|
||||||
if res:
|
|
||||||
for match in res:
|
|
||||||
regex_matched_output.append(match)
|
|
||||||
regex_matched_output.append('\n')
|
|
||||||
|
|
||||||
return ''.join(regex_matched_output) if regex_matched_output else ''
|
|
||||||
|
|
||||||
|
|
||||||
class RuleEngine:
|
|
||||||
"""Evaluates blocking rules (triggers, conditions, text_should_not_be_present)."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def evaluate_trigger_text(content, trigger_patterns):
|
|
||||||
"""
|
|
||||||
Check if trigger text is present. If trigger_text is configured,
|
|
||||||
content is blocked UNLESS the trigger is found.
|
|
||||||
Returns True if blocked, False if allowed.
|
|
||||||
"""
|
|
||||||
if not trigger_patterns:
|
|
||||||
return False
|
|
||||||
|
|
||||||
# Assume blocked if trigger_text is configured
|
|
||||||
result = html_tools.strip_ignore_text(
|
|
||||||
content=str(content),
|
|
||||||
wordlist=trigger_patterns,
|
|
||||||
mode="line numbers"
|
|
||||||
)
|
|
||||||
# Unblock if trigger was found
|
|
||||||
return not bool(result)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def evaluate_text_should_not_be_present(content, patterns):
|
|
||||||
"""
|
|
||||||
Check if forbidden text is present. If found, block the change.
|
|
||||||
Returns True if blocked, False if allowed.
|
|
||||||
"""
|
|
||||||
if not patterns:
|
|
||||||
return False
|
|
||||||
|
|
||||||
result = html_tools.strip_ignore_text(
|
|
||||||
content=str(content),
|
|
||||||
wordlist=patterns,
|
|
||||||
mode="line numbers"
|
|
||||||
)
|
|
||||||
# Block if forbidden text was found
|
|
||||||
return bool(result)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def evaluate_conditions(watch, datastore, content):
|
|
||||||
"""
|
|
||||||
Evaluate custom conditions ruleset.
|
|
||||||
Returns True if blocked, False if allowed.
|
|
||||||
"""
|
|
||||||
if not watch.get('conditions') or not watch.get('conditions_match_logic'):
|
|
||||||
return False
|
|
||||||
|
|
||||||
conditions_result = execute_ruleset_against_all_plugins(
|
|
||||||
current_watch_uuid=watch.get('uuid'),
|
|
||||||
application_datastruct=datastore.data,
|
|
||||||
ephemeral_data={'text': content}
|
|
||||||
)
|
|
||||||
|
|
||||||
# Block if conditions not met
|
|
||||||
return not conditions_result.get('result')
|
|
||||||
|
|
||||||
|
|
||||||
class ContentProcessor:
|
|
||||||
"""Handles content preprocessing, filtering, and extraction."""
|
|
||||||
|
|
||||||
def __init__(self, fetcher, watch, filter_config, datastore):
|
|
||||||
self.fetcher = fetcher
|
|
||||||
self.watch = watch
|
|
||||||
self.filter_config = filter_config
|
|
||||||
self.datastore = datastore
|
|
||||||
|
|
||||||
def preprocess_rss(self, content):
|
|
||||||
"""
|
|
||||||
Convert CDATA/comments in RSS to usable text.
|
|
||||||
|
|
||||||
Supports two RSS processing modes:
|
|
||||||
- 'default': Inline CDATA replacement (original behavior)
|
|
||||||
- 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked)
|
|
||||||
"""
|
|
||||||
from changedetectionio import rss_tools
|
|
||||||
rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode")
|
|
||||||
if rss_mode:
|
|
||||||
# Format RSS items nicely with CDATA content unmarked and converted to text
|
|
||||||
return rss_tools.format_rss_items(content)
|
|
||||||
else:
|
|
||||||
# Default: Original inline CDATA replacement
|
|
||||||
return cdata_in_document_to_text(html_content=content)
|
|
||||||
|
|
||||||
def preprocess_pdf(self, raw_content):
|
|
||||||
"""Convert PDF to HTML using external tool."""
|
|
||||||
from shutil import which
|
|
||||||
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
|
||||||
if not which(tool):
|
|
||||||
raise PDFToHTMLToolNotFound(
|
|
||||||
f"Command-line `{tool}` tool was not found in system PATH, was it installed?"
|
|
||||||
)
|
|
||||||
|
|
||||||
import subprocess
|
|
||||||
proc = subprocess.Popen(
|
|
||||||
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
|
|
||||||
stdout=subprocess.PIPE,
|
|
||||||
stdin=subprocess.PIPE
|
|
||||||
)
|
|
||||||
proc.stdin.write(raw_content)
|
|
||||||
proc.stdin.close()
|
|
||||||
html_content = proc.stdout.read().decode('utf-8')
|
|
||||||
proc.wait(timeout=60)
|
|
||||||
|
|
||||||
# Add metadata for change detection
|
|
||||||
metadata = (
|
|
||||||
f"<p>Added by changedetection.io: Document checksum - "
|
|
||||||
f"{hashlib.md5(raw_content).hexdigest().upper()} "
|
|
||||||
f"Original file size - {len(raw_content)} bytes</p>"
|
|
||||||
)
|
|
||||||
return html_content.replace('</body>', metadata + '</body>')
|
|
||||||
|
|
||||||
def preprocess_json(self, raw_content):
|
|
||||||
"""Format and sort JSON content."""
|
|
||||||
# Then we re-format it, else it does have filters (later on) which will reformat it anyway
|
|
||||||
content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$")
|
|
||||||
|
|
||||||
# Sort JSON to avoid false alerts from reordering
|
|
||||||
try:
|
|
||||||
content = json.dumps(json.loads(content), sort_keys=True, indent=4)
|
|
||||||
except Exception:
|
|
||||||
# Might be malformed JSON, continue anyway
|
|
||||||
pass
|
|
||||||
|
|
||||||
return content
|
|
||||||
|
|
||||||
def apply_include_filters(self, content, stream_content_type):
|
|
||||||
"""Apply CSS, XPath, or JSON filters to extract specific content."""
|
|
||||||
filtered_content = ""
|
|
||||||
|
|
||||||
for filter_rule in self.filter_config.include_filters:
|
|
||||||
# XPath filters
|
|
||||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
|
||||||
filtered_content += html_tools.xpath_filter(
|
|
||||||
xpath_filter=filter_rule.replace('xpath:', ''),
|
|
||||||
html_content=content,
|
|
||||||
append_pretty_line_formatting=not self.watch.is_source_type_url,
|
|
||||||
is_rss=stream_content_type.is_rss
|
|
||||||
)
|
|
||||||
|
|
||||||
# XPath1 filters (first match only)
|
|
||||||
elif filter_rule.startswith('xpath1:'):
|
|
||||||
filtered_content += html_tools.xpath1_filter(
|
|
||||||
xpath_filter=filter_rule.replace('xpath1:', ''),
|
|
||||||
html_content=content,
|
|
||||||
append_pretty_line_formatting=not self.watch.is_source_type_url,
|
|
||||||
is_rss=stream_content_type.is_rss
|
|
||||||
)
|
|
||||||
|
|
||||||
# JSON filters
|
|
||||||
elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES):
|
|
||||||
filtered_content += html_tools.extract_json_as_string(
|
|
||||||
content=content,
|
|
||||||
json_filter=filter_rule
|
|
||||||
)
|
|
||||||
|
|
||||||
# CSS selectors, default fallback
|
|
||||||
else:
|
|
||||||
filtered_content += html_tools.include_filters(
|
|
||||||
include_filters=filter_rule,
|
|
||||||
html_content=content,
|
|
||||||
append_pretty_line_formatting=not self.watch.is_source_type_url
|
|
||||||
)
|
|
||||||
|
|
||||||
# Raise error if filter returned nothing
|
|
||||||
if not filtered_content.strip():
|
|
||||||
raise FilterNotFoundInResponse(
|
|
||||||
msg=self.filter_config.include_filters,
|
|
||||||
screenshot=self.fetcher.screenshot,
|
|
||||||
xpath_data=self.fetcher.xpath_data
|
|
||||||
)
|
|
||||||
|
|
||||||
return filtered_content
|
|
||||||
|
|
||||||
def apply_subtractive_selectors(self, content):
|
|
||||||
"""Remove elements matching subtractive selectors."""
|
|
||||||
return html_tools.element_removal(self.filter_config.subtractive_selectors, content)
|
|
||||||
|
|
||||||
def extract_text_from_html(self, html_content, stream_content_type):
|
|
||||||
"""Convert HTML to plain text."""
|
|
||||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
|
||||||
return html_tools.html_to_text(
|
|
||||||
html_content=html_content,
|
|
||||||
render_anchor_tag_content=do_anchor,
|
|
||||||
is_rss=stream_content_type.is_rss
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
class ChecksumCalculator:
|
|
||||||
"""Calculates checksums with various options."""
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def calculate(text, ignore_whitespace=False):
|
|
||||||
"""Calculate MD5 checksum of text content."""
|
|
||||||
if ignore_whitespace:
|
|
||||||
text = text.translate(TRANSLATE_WHITESPACE_TABLE)
|
|
||||||
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
|
||||||
|
|
||||||
|
|
||||||
# Some common stuff here that can be moved to a base class
|
# Some common stuff here that can be moved to a base class
|
||||||
# (set_proxy_from_list)
|
# (set_proxy_from_list)
|
||||||
class perform_site_check(difference_detection_processor):
|
class perform_site_check(difference_detection_processor):
|
||||||
|
|
||||||
def run_changedetection(self, watch):
|
def run_changedetection(self, watch):
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
|
html_content = ""
|
||||||
|
screenshot = False # as bytes
|
||||||
|
stripped_text_from_html = ""
|
||||||
|
|
||||||
if not watch:
|
if not watch:
|
||||||
raise Exception("Watch no longer exists.")
|
raise Exception("Watch no longer exists.")
|
||||||
|
|
||||||
# Initialize components
|
|
||||||
filter_config = FilterConfig(watch, self.datastore)
|
|
||||||
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
|
||||||
transformer = ContentTransformer()
|
|
||||||
rule_engine = RuleEngine()
|
|
||||||
|
|
||||||
# Get content type and stream info
|
|
||||||
ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower()
|
|
||||||
stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content)
|
|
||||||
|
|
||||||
# Unset any existing notification error
|
# Unset any existing notification error
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
|
|
||||||
url = watch.link
|
url = watch.link
|
||||||
|
|
||||||
self.screenshot = self.fetcher.screenshot
|
self.screenshot = self.fetcher.screenshot
|
||||||
self.xpath_data = self.fetcher.xpath_data
|
self.xpath_data = self.fetcher.xpath_data
|
||||||
|
|
||||||
# Track the content type and checksum before filters
|
# Track the content type
|
||||||
update_obj['content_type'] = ctype_header
|
update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||||
|
|
||||||
|
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||||
|
# Saves a lot of CPU
|
||||||
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
# === CONTENT PREPROCESSING ===
|
# Fetching complete, now filters
|
||||||
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
|
||||||
content = self.fetcher.content
|
|
||||||
|
|
||||||
# RSS preprocessing
|
# @note: I feel like the following should be in a more obvious chain system
|
||||||
if stream_content_type.is_rss:
|
# - Check filter text
|
||||||
content = content_processor.preprocess_rss(content)
|
# - Is the checksum different?
|
||||||
if self.datastore.data["settings"]["application"].get("rss_reader_mode"):
|
# - Do we convert to JSON?
|
||||||
# Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc)
|
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
||||||
stream_content_type.is_rss = False
|
# return content().textfilter().jsonextract().checksumcompare() ?
|
||||||
stream_content_type.is_html = True
|
|
||||||
self.fetcher.content = content
|
|
||||||
|
|
||||||
# PDF preprocessing
|
is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||||
if watch.is_pdf or stream_content_type.is_pdf:
|
is_html = not is_json
|
||||||
content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content)
|
is_rss = False
|
||||||
stream_content_type.is_html = True
|
|
||||||
|
|
||||||
# JSON - Always reformat it nicely for consistency.
|
ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||||
|
# Go into RSS preprocess for converting CDATA/comment to usable text
|
||||||
|
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
|
||||||
|
if '<rss' in self.fetcher.content[:100].lower():
|
||||||
|
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
|
||||||
|
is_rss = True
|
||||||
|
|
||||||
if stream_content_type.is_json:
|
# source: support, basically treat it as plaintext
|
||||||
if not filter_config.has_include_json_filters:
|
|
||||||
content = content_processor.preprocess_json(raw_content=content)
|
|
||||||
#else, otherwise it gets sorted/formatted in the filter stage anyway
|
|
||||||
|
|
||||||
# HTML obfuscation workarounds
|
|
||||||
if stream_content_type.is_html:
|
|
||||||
content = html_tools.workarounds_for_obfuscations(content)
|
|
||||||
|
|
||||||
# Check for LD+JSON price data (for HTML content)
|
|
||||||
if stream_content_type.is_html:
|
|
||||||
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content)
|
|
||||||
|
|
||||||
# === FILTER APPLICATION ===
|
|
||||||
# Start with content reference, avoid copy until modification
|
|
||||||
html_content = content
|
|
||||||
|
|
||||||
# Apply include filters (CSS, XPath, JSON)
|
|
||||||
# Except for plaintext (incase they tried to confuse the system, it will HTML escape
|
|
||||||
#if not stream_content_type.is_plaintext:
|
|
||||||
if filter_config.has_include_filters:
|
|
||||||
html_content = content_processor.apply_include_filters(content, stream_content_type)
|
|
||||||
|
|
||||||
# Apply subtractive selectors
|
|
||||||
if filter_config.has_subtractive_selectors:
|
|
||||||
html_content = content_processor.apply_subtractive_selectors(html_content)
|
|
||||||
|
|
||||||
# === TEXT EXTRACTION ===
|
|
||||||
if watch.is_source_type_url:
|
if watch.is_source_type_url:
|
||||||
# For source URLs, keep raw content
|
is_html = False
|
||||||
stripped_text = html_content
|
is_json = False
|
||||||
elif stream_content_type.is_plaintext:
|
|
||||||
# For plaintext, keep as-is without HTML-to-text conversion
|
inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10]
|
||||||
stripped_text = html_content
|
if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
|
||||||
else:
|
from shutil import which
|
||||||
# Extract text from HTML/RSS content (not generic XML)
|
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
||||||
if stream_content_type.is_html or stream_content_type.is_rss:
|
if not which(tool):
|
||||||
stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type)
|
raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool))
|
||||||
|
|
||||||
|
import subprocess
|
||||||
|
proc = subprocess.Popen(
|
||||||
|
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
|
||||||
|
stdout=subprocess.PIPE,
|
||||||
|
stdin=subprocess.PIPE)
|
||||||
|
proc.stdin.write(self.fetcher.raw_content)
|
||||||
|
proc.stdin.close()
|
||||||
|
self.fetcher.content = proc.stdout.read().decode('utf-8')
|
||||||
|
proc.wait(timeout=60)
|
||||||
|
|
||||||
|
# Add a little metadata so we know if the file changes (like if an image changes, but the text is the same
|
||||||
|
# @todo may cause problems with non-UTF8?
|
||||||
|
metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format(
|
||||||
|
hashlib.md5(self.fetcher.raw_content).hexdigest().upper(),
|
||||||
|
len(self.fetcher.content))
|
||||||
|
|
||||||
|
self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>')
|
||||||
|
|
||||||
|
# Better would be if Watch.model could access the global data also
|
||||||
|
# and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
|
||||||
|
# https://realpython.com/inherit-python-dict/ instead of doing it procedurely
|
||||||
|
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
|
||||||
|
|
||||||
|
# 1845 - remove duplicated filters in both group and watch include filter
|
||||||
|
include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
|
||||||
|
|
||||||
|
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
|
||||||
|
*watch.get("subtractive_selectors", []),
|
||||||
|
*self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
|
||||||
|
]
|
||||||
|
|
||||||
|
# Inject a virtual LD+JSON price tracker rule
|
||||||
|
if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
||||||
|
include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
|
||||||
|
|
||||||
|
has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
|
||||||
|
has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
|
||||||
|
|
||||||
|
if is_json and not has_filter_rule:
|
||||||
|
include_filters_rule.append("json:$")
|
||||||
|
has_filter_rule = True
|
||||||
|
|
||||||
|
if is_json:
|
||||||
|
# Sort the JSON so we dont get false alerts when the content is just re-ordered
|
||||||
|
try:
|
||||||
|
self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True)
|
||||||
|
except Exception as e:
|
||||||
|
# Might have just been a snippet, or otherwise bad JSON, continue
|
||||||
|
pass
|
||||||
|
|
||||||
|
if has_filter_rule:
|
||||||
|
for filter in include_filters_rule:
|
||||||
|
if any(prefix in filter for prefix in json_filter_prefixes):
|
||||||
|
stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter)
|
||||||
|
is_html = False
|
||||||
|
|
||||||
|
if is_html or watch.is_source_type_url:
|
||||||
|
|
||||||
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
|
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
|
||||||
|
html_content = self.fetcher.content
|
||||||
|
|
||||||
|
# If not JSON, and if it's not text/plain..
|
||||||
|
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower():
|
||||||
|
# Don't run get_text or xpath/css filters on plaintext
|
||||||
|
stripped_text_from_html = html_content
|
||||||
else:
|
else:
|
||||||
stripped_text = html_content
|
# Does it have some ld+json price data? used for easier monitoring
|
||||||
|
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content)
|
||||||
|
|
||||||
|
# Then we assume HTML
|
||||||
|
if has_filter_rule:
|
||||||
|
html_content = ""
|
||||||
|
|
||||||
|
for filter_rule in include_filters_rule:
|
||||||
|
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||||
|
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||||
|
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||||
|
html_content=self.fetcher.content,
|
||||||
|
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||||
|
is_rss=is_rss)
|
||||||
|
|
||||||
|
elif filter_rule.startswith('xpath1:'):
|
||||||
|
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||||
|
html_content=self.fetcher.content,
|
||||||
|
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||||
|
is_rss=is_rss)
|
||||||
|
else:
|
||||||
|
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||||
|
html_content=self.fetcher.content,
|
||||||
|
append_pretty_line_formatting=not watch.is_source_type_url)
|
||||||
|
|
||||||
|
if not html_content.strip():
|
||||||
|
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
|
||||||
|
|
||||||
|
if has_subtractive_selectors:
|
||||||
|
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
||||||
|
|
||||||
|
if watch.is_source_type_url:
|
||||||
|
stripped_text_from_html = html_content
|
||||||
|
else:
|
||||||
|
# extract text
|
||||||
|
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||||
|
stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
|
||||||
|
render_anchor_tag_content=do_anchor,
|
||||||
|
is_rss=is_rss) # 1874 activate the <title workaround hack
|
||||||
|
|
||||||
# === TEXT TRANSFORMATIONS ===
|
|
||||||
if watch.get('trim_text_whitespace'):
|
if watch.get('trim_text_whitespace'):
|
||||||
stripped_text = transformer.trim_whitespace(stripped_text)
|
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
|
||||||
|
|
||||||
# Save text before ignore filters (for diff calculation)
|
# Re #340 - return the content before the 'ignore text' was applied
|
||||||
text_content_before_ignored_filter = stripped_text
|
# Also used to calculate/show what was removed
|
||||||
|
text_content_before_ignored_filter = stripped_text_from_html
|
||||||
|
|
||||||
|
# @todo whitespace coming from missing rtrim()?
|
||||||
|
# stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
|
||||||
|
# Rewrite's the processing text based on only what diff result they want to see
|
||||||
|
|
||||||
# === DIFF FILTERING ===
|
|
||||||
# If user wants specific diff types (added/removed/replaced only)
|
|
||||||
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
|
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
|
||||||
stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter)
|
# Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
|
||||||
if stripped_text is None:
|
from changedetectionio import diff
|
||||||
# No differences found, but content exists
|
# needs to not include (added) etc or it may get used twice
|
||||||
c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True)
|
# Replace the processed text with the preferred result
|
||||||
return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8')
|
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
|
||||||
|
newest_version_file_contents=stripped_text_from_html,
|
||||||
|
include_equal=False, # not the same lines
|
||||||
|
include_added=watch.get('filter_text_added', True),
|
||||||
|
include_removed=watch.get('filter_text_removed', True),
|
||||||
|
include_replaced=watch.get('filter_text_replaced', True),
|
||||||
|
line_feed_sep="\n",
|
||||||
|
include_change_type_prefix=False)
|
||||||
|
|
||||||
# === EMPTY PAGE CHECK ===
|
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
|
||||||
|
|
||||||
|
if not rendered_diff and stripped_text_from_html:
|
||||||
|
# We had some content, but no differences were found
|
||||||
|
# Store our new file as the MD5 so it will trigger in the future
|
||||||
|
c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
|
||||||
|
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
|
||||||
|
else:
|
||||||
|
stripped_text_from_html = rendered_diff
|
||||||
|
|
||||||
|
# Treat pages with no renderable text content as a change? No by default
|
||||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||||
if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0:
|
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
|
||||||
raise content_fetchers.exceptions.ReplyWithContentButNoText(
|
raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
|
||||||
url=url,
|
status_code=self.fetcher.get_last_status_code(),
|
||||||
status_code=self.fetcher.get_last_status_code(),
|
screenshot=self.fetcher.screenshot,
|
||||||
screenshot=self.fetcher.screenshot,
|
has_filters=has_filter_rule,
|
||||||
has_filters=filter_config.has_include_filters,
|
html_content=html_content,
|
||||||
html_content=html_content,
|
xpath_data=self.fetcher.xpath_data
|
||||||
xpath_data=self.fetcher.xpath_data
|
)
|
||||||
)
|
|
||||||
|
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
||||||
|
# in the future we'll implement other mechanisms.
|
||||||
|
|
||||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||||
|
|
||||||
# === REGEX EXTRACTION ===
|
# 615 Extract text by regex
|
||||||
if filter_config.extract_text:
|
extract_text = watch.get('extract_text', [])
|
||||||
extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
|
extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
|
||||||
stripped_text = extracted
|
if len(extract_text) > 0:
|
||||||
|
regex_matched_output = []
|
||||||
|
for s_re in extract_text:
|
||||||
|
# incase they specified something in '/.../x'
|
||||||
|
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
|
||||||
|
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
|
||||||
|
result = re.findall(regex, stripped_text_from_html)
|
||||||
|
|
||||||
|
for l in result:
|
||||||
|
if type(l) is tuple:
|
||||||
|
# @todo - some formatter option default (between groups)
|
||||||
|
regex_matched_output += list(l) + ['\n']
|
||||||
|
else:
|
||||||
|
# @todo - some formatter option default (between each ungrouped result)
|
||||||
|
regex_matched_output += [l] + ['\n']
|
||||||
|
else:
|
||||||
|
# Doesnt look like regex, just hunt for plaintext and return that which matches
|
||||||
|
# `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
|
||||||
|
r = re.compile(re.escape(s_re), re.IGNORECASE)
|
||||||
|
res = r.findall(stripped_text_from_html)
|
||||||
|
if res:
|
||||||
|
for match in res:
|
||||||
|
regex_matched_output += [match] + ['\n']
|
||||||
|
|
||||||
|
##########################################################
|
||||||
|
stripped_text_from_html = ''
|
||||||
|
|
||||||
|
if regex_matched_output:
|
||||||
|
# @todo some formatter for presentation?
|
||||||
|
stripped_text_from_html = ''.join(regex_matched_output)
|
||||||
|
|
||||||
# === MORE TEXT TRANSFORMATIONS ===
|
|
||||||
if watch.get('remove_duplicate_lines'):
|
if watch.get('remove_duplicate_lines'):
|
||||||
stripped_text = transformer.remove_duplicate_lines(stripped_text)
|
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||||
|
|
||||||
|
|
||||||
if watch.get('sort_text_alphabetically'):
|
if watch.get('sort_text_alphabetically'):
|
||||||
stripped_text = transformer.sort_alphabetically(stripped_text)
|
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||||
|
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||||
|
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
|
||||||
|
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
|
||||||
|
|
||||||
# === CHECKSUM CALCULATION ===
|
### CALCULATE MD5
|
||||||
text_for_checksuming = stripped_text
|
# If there's text to ignore
|
||||||
|
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||||
|
text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
|
||||||
|
|
||||||
# Apply ignore_text for checksum calculation
|
text_for_checksuming = stripped_text_from_html
|
||||||
if filter_config.ignore_text:
|
if text_to_ignore:
|
||||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text)
|
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||||
|
|
||||||
# Optionally remove ignored lines from output
|
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||||
strip_ignored_lines = watch.get('strip_ignored_lines')
|
if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||||
if strip_ignored_lines is None:
|
fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
|
||||||
strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines')
|
else:
|
||||||
if strip_ignored_lines:
|
fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
|
||||||
stripped_text = text_for_checksuming
|
|
||||||
|
|
||||||
# Calculate checksum
|
############ Blocking rules, after checksum #################
|
||||||
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False)
|
|
||||||
fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace)
|
|
||||||
|
|
||||||
# === BLOCKING RULES EVALUATION ===
|
|
||||||
blocked = False
|
blocked = False
|
||||||
|
trigger_text = watch.get('trigger_text', [])
|
||||||
# Check trigger_text
|
trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
|
||||||
if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
|
if len(trigger_text):
|
||||||
|
# Assume blocked
|
||||||
blocked = True
|
blocked = True
|
||||||
|
# Filter and trigger works the same, so reuse it
|
||||||
|
# It should return the line numbers that match
|
||||||
|
# Unblock flow if the trigger was found (some text remained after stripped what didnt match)
|
||||||
|
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||||
|
wordlist=trigger_text,
|
||||||
|
mode="line numbers")
|
||||||
|
# Unblock if the trigger was found
|
||||||
|
if result:
|
||||||
|
blocked = False
|
||||||
|
|
||||||
# Check text_should_not_be_present
|
text_should_not_be_present = watch.get('text_should_not_be_present', [])
|
||||||
if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present):
|
text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
|
||||||
blocked = True
|
if len(text_should_not_be_present):
|
||||||
|
# If anything matched, then we should block a change from happening
|
||||||
|
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||||
|
wordlist=text_should_not_be_present,
|
||||||
|
mode="line numbers")
|
||||||
|
if result:
|
||||||
|
blocked = True
|
||||||
|
|
||||||
# Check custom conditions
|
# And check if 'conditions' will let this pass through
|
||||||
if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text):
|
if watch.get('conditions') and watch.get('conditions_match_logic'):
|
||||||
blocked = True
|
conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
|
||||||
|
application_datastruct=self.datastore.data,
|
||||||
|
ephemeral_data={
|
||||||
|
'text': stripped_text_from_html
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
# === CHANGE DETECTION ===
|
if not conditions_result.get('result'):
|
||||||
|
# Conditions say "Condition not met" so we block it.
|
||||||
|
blocked = True
|
||||||
|
|
||||||
|
# Looks like something changed, but did it match all the rules?
|
||||||
if blocked:
|
if blocked:
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
else:
|
else:
|
||||||
# Compare checksums
|
# The main thing that all this at the moment comes down to :)
|
||||||
if watch.get('previous_md5') != fetched_md5:
|
if watch.get('previous_md5') != fetched_md5:
|
||||||
changed_detected = True
|
changed_detected = True
|
||||||
|
|
||||||
# Always record the new checksum
|
# Always record the new checksum
|
||||||
update_obj["previous_md5"] = fetched_md5
|
update_obj["previous_md5"] = fetched_md5
|
||||||
|
|
||||||
# On first run, initialize previous_md5
|
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
|
||||||
if not watch.get('previous_md5'):
|
if not watch.get('previous_md5'):
|
||||||
watch['previous_md5'] = fetched_md5
|
watch['previous_md5'] = fetched_md5
|
||||||
|
|
||||||
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||||
|
|
||||||
# === UNIQUE LINES CHECK ===
|
if changed_detected:
|
||||||
if changed_detected and watch.get('check_unique_lines', False):
|
if watch.get('check_unique_lines', False):
|
||||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
|
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
|
||||||
lines=stripped_text.splitlines(),
|
|
||||||
ignore_whitespace=ignore_whitespace
|
|
||||||
)
|
|
||||||
|
|
||||||
if not has_unique_lines:
|
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
|
||||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
lines=stripped_text_from_html.splitlines(),
|
||||||
changed_detected = False
|
ignore_whitespace=ignore_whitespace
|
||||||
else:
|
)
|
||||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
|
||||||
|
|
||||||
# Note: Explicit cleanup is only needed here because text_json_diff handles
|
# One or more lines? unsure?
|
||||||
# large strings (100KB-300KB for RSS/HTML). The other processors work with
|
if not has_unique_lines:
|
||||||
# small strings and don't need this.
|
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
||||||
#
|
changed_detected = False
|
||||||
# Python would clean these up automatically, but explicit `del` frees memory
|
else:
|
||||||
# immediately rather than waiting for function return, reducing peak memory usage.
|
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
||||||
del content
|
|
||||||
if 'html_content' in locals() and html_content is not stripped_text:
|
|
||||||
del html_content
|
|
||||||
if 'text_content_before_ignored_filter' in locals() and text_content_before_ignored_filter is not stripped_text:
|
|
||||||
del text_content_before_ignored_filter
|
|
||||||
if 'text_for_checksuming' in locals() and text_for_checksuming is not stripped_text:
|
|
||||||
del text_for_checksuming
|
|
||||||
|
|
||||||
return changed_detected, update_obj, stripped_text
|
|
||||||
|
|
||||||
def _apply_diff_filtering(self, watch, stripped_text, text_before_filter):
|
# stripped_text_from_html - Everything after filters and NO 'ignored' content
|
||||||
"""Apply user's diff filtering preferences (show only added/removed/replaced lines)."""
|
return changed_detected, update_obj, stripped_text_from_html
|
||||||
from changedetectionio import diff
|
|
||||||
|
|
||||||
rendered_diff = diff.render_diff(
|
|
||||||
previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
|
|
||||||
newest_version_file_contents=stripped_text,
|
|
||||||
include_equal=False,
|
|
||||||
include_added=watch.get('filter_text_added', True),
|
|
||||||
include_removed=watch.get('filter_text_removed', True),
|
|
||||||
include_replaced=watch.get('filter_text_replaced', True),
|
|
||||||
line_feed_sep="\n",
|
|
||||||
include_change_type_prefix=False
|
|
||||||
)
|
|
||||||
|
|
||||||
watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8'))
|
|
||||||
|
|
||||||
if not rendered_diff and stripped_text:
|
|
||||||
# No differences found
|
|
||||||
return None
|
|
||||||
|
|
||||||
return rendered_diff
|
|
||||||
|
|||||||
@@ -243,15 +243,14 @@ def handle_watch_update(socketio, **kwargs):
|
|||||||
|
|
||||||
general_stats = {
|
general_stats = {
|
||||||
'count_errors': errored_count,
|
'count_errors': errored_count,
|
||||||
'unread_changes_count': datastore.unread_changes_count
|
'has_unviewed': datastore.has_unviewed
|
||||||
}
|
}
|
||||||
|
|
||||||
# Debug what's being emitted
|
# Debug what's being emitted
|
||||||
# logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
|
# logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
|
||||||
|
|
||||||
# Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
|
# Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
|
||||||
socketio.emit("watch_update", {'watch': watch_data})
|
socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats})
|
||||||
socketio.emit("general_stats_update", general_stats)
|
|
||||||
|
|
||||||
# Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
|
# Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
|
||||||
logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")
|
logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")
|
||||||
|
|||||||
@@ -1,130 +0,0 @@
|
|||||||
"""
|
|
||||||
RSS/Atom feed processing tools for changedetection.io
|
|
||||||
"""
|
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
import re
|
|
||||||
|
|
||||||
|
|
||||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
|
||||||
"""
|
|
||||||
Process CDATA sections in HTML/XML content - inline replacement.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
html_content: The HTML/XML content to process
|
|
||||||
render_anchor_tag_content: Whether to render anchor tag content
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Processed HTML/XML content with CDATA sections replaced inline
|
|
||||||
"""
|
|
||||||
from xml.sax.saxutils import escape as xml_escape
|
|
||||||
from .html_tools import html_to_text
|
|
||||||
|
|
||||||
pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
|
|
||||||
|
|
||||||
def repl(m):
|
|
||||||
text = m.group(1)
|
|
||||||
return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip()
|
|
||||||
|
|
||||||
return re.sub(pattern, repl, html_content)
|
|
||||||
|
|
||||||
|
|
||||||
def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
|
|
||||||
"""
|
|
||||||
Format RSS/Atom feed items in a readable text format using feedparser.
|
|
||||||
|
|
||||||
Converts RSS <item> or Atom <entry> elements to formatted text with:
|
|
||||||
- <title> → <h1>Title</h1>
|
|
||||||
- <link> → Link: [url]
|
|
||||||
- <guid> → Guid: [id]
|
|
||||||
- <pubDate> → PubDate: [date]
|
|
||||||
- <description> or <content> → Raw HTML content (CDATA and entities automatically handled)
|
|
||||||
|
|
||||||
Args:
|
|
||||||
rss_content: The RSS/Atom feed content
|
|
||||||
render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Formatted HTML content ready for html_to_text conversion
|
|
||||||
"""
|
|
||||||
try:
|
|
||||||
import feedparser
|
|
||||||
from xml.sax.saxutils import escape as xml_escape
|
|
||||||
|
|
||||||
# Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc.
|
|
||||||
feed = feedparser.parse(rss_content)
|
|
||||||
|
|
||||||
formatted_items = []
|
|
||||||
|
|
||||||
# Determine feed type for appropriate labels when fields are missing
|
|
||||||
# feedparser sets feed.version to things like 'rss20', 'atom10', etc.
|
|
||||||
is_atom = feed.version and 'atom' in feed.version
|
|
||||||
|
|
||||||
for entry in feed.entries:
|
|
||||||
item_parts = []
|
|
||||||
|
|
||||||
# Title - feedparser handles CDATA and entity unescaping automatically
|
|
||||||
if hasattr(entry, 'title') and entry.title:
|
|
||||||
item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>')
|
|
||||||
|
|
||||||
# Link
|
|
||||||
if hasattr(entry, 'link') and entry.link:
|
|
||||||
item_parts.append(f'Link: {xml_escape(entry.link)}<br>')
|
|
||||||
|
|
||||||
# GUID/ID
|
|
||||||
if hasattr(entry, 'id') and entry.id:
|
|
||||||
item_parts.append(f'Guid: {xml_escape(entry.id)}<br>')
|
|
||||||
|
|
||||||
# Date - feedparser normalizes all date field names to 'published'
|
|
||||||
if hasattr(entry, 'published') and entry.published:
|
|
||||||
item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>')
|
|
||||||
|
|
||||||
# Description/Content - feedparser handles CDATA and entity unescaping automatically
|
|
||||||
# Only add "Summary:" label for Atom <summary> tags
|
|
||||||
content = None
|
|
||||||
add_label = False
|
|
||||||
|
|
||||||
if hasattr(entry, 'content') and entry.content:
|
|
||||||
# Atom <content> - no label, just content
|
|
||||||
content = entry.content[0].value if entry.content[0].value else None
|
|
||||||
elif hasattr(entry, 'summary'):
|
|
||||||
# Could be RSS <description> or Atom <summary>
|
|
||||||
# feedparser maps both to entry.summary
|
|
||||||
content = entry.summary if entry.summary else None
|
|
||||||
# Only add "Summary:" label for Atom feeds (which use <summary> tag)
|
|
||||||
if is_atom:
|
|
||||||
add_label = True
|
|
||||||
|
|
||||||
# Add content with or without label
|
|
||||||
if content:
|
|
||||||
if add_label:
|
|
||||||
item_parts.append(f'Summary:<br>{content}')
|
|
||||||
else:
|
|
||||||
item_parts.append(content)
|
|
||||||
else:
|
|
||||||
# No content - just show <none>
|
|
||||||
item_parts.append('<none>')
|
|
||||||
|
|
||||||
# Join all parts of this item
|
|
||||||
if item_parts:
|
|
||||||
formatted_items.append('\n'.join(item_parts))
|
|
||||||
|
|
||||||
# Wrap each item in a div with classes (first, last, item-N)
|
|
||||||
items_html = []
|
|
||||||
total_items = len(formatted_items)
|
|
||||||
for idx, item in enumerate(formatted_items):
|
|
||||||
classes = ['rss-item']
|
|
||||||
if idx == 0:
|
|
||||||
classes.append('first')
|
|
||||||
if idx == total_items - 1:
|
|
||||||
classes.append('last')
|
|
||||||
classes.append(f'item-{idx + 1}')
|
|
||||||
|
|
||||||
class_str = ' '.join(classes)
|
|
||||||
items_html.append(f'<div class="{class_str}">{item}</div>')
|
|
||||||
return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>'
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Error formatting RSS items: {str(e)}")
|
|
||||||
# Fall back to original content
|
|
||||||
return rss_content
|
|
||||||
@@ -9,7 +9,7 @@ set -x
|
|||||||
# SOCKS5 related - start simple Socks5 proxy server
|
# SOCKS5 related - start simple Socks5 proxy server
|
||||||
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched
|
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched
|
||||||
docker run --network changedet-network -d --hostname socks5proxy --rm --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy
|
docker run --network changedet-network -d --hostname socks5proxy --rm --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy
|
||||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy
|
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth serjs/go-socks5-proxy
|
||||||
|
|
||||||
echo "---------------------------------- SOCKS5 -------------------"
|
echo "---------------------------------- SOCKS5 -------------------"
|
||||||
# SOCKS5 related - test from proxies.json
|
# SOCKS5 related - test from proxies.json
|
||||||
|
|||||||
24
changedetectionio/safe_jinja.py
Normal file
24
changedetectionio/safe_jinja.py
Normal file
@@ -0,0 +1,24 @@
|
|||||||
|
"""
|
||||||
|
Safe Jinja2 render with max payload sizes
|
||||||
|
|
||||||
|
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
|
||||||
|
"""
|
||||||
|
|
||||||
|
import jinja2.sandbox
|
||||||
|
import typing as t
|
||||||
|
import os
|
||||||
|
|
||||||
|
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
|
||||||
|
|
||||||
|
# This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available.
|
||||||
|
# (Which also limits available functions that could be called)
|
||||||
|
def render(template_str, **args: t.Any) -> str:
|
||||||
|
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension'])
|
||||||
|
output = jinja2_env.from_string(template_str).render(args)
|
||||||
|
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||||
|
|
||||||
|
def render_fully_escaped(content):
|
||||||
|
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
||||||
|
template = env.from_string("{{ some_html|e }}")
|
||||||
|
return template.render(some_html=content)
|
||||||
|
|
||||||
@@ -29,7 +29,7 @@ $(document).ready(function () {
|
|||||||
$(this).text(new Date($(this).data("utc")).toLocaleString());
|
$(this).text(new Date($(this).data("utc")).toLocaleString());
|
||||||
})
|
})
|
||||||
|
|
||||||
const timezoneInput = $('#application-scheduler_timezone_default');
|
const timezoneInput = $('#application-timezone');
|
||||||
if(timezoneInput.length) {
|
if(timezoneInput.length) {
|
||||||
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
|
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
|
||||||
if (!timezoneInput.val().trim()) {
|
if (!timezoneInput.val().trim()) {
|
||||||
|
|||||||
@@ -117,16 +117,15 @@ $(document).ready(function () {
|
|||||||
}
|
}
|
||||||
})
|
})
|
||||||
|
|
||||||
socket.on('general_stats_update', function (general_stats) {
|
|
||||||
// Tabs at bottom of list
|
|
||||||
$('#watch-table-wrapper').toggleClass("has-unread-changes", general_stats.unread_changes_count !==0)
|
|
||||||
$('#watch-table-wrapper').toggleClass("has-error", general_stats.count_errors !== 0)
|
|
||||||
$('#post-list-with-errors a').text(`With errors (${ new Intl.NumberFormat(navigator.language).format(general_stats.count_errors) })`);
|
|
||||||
$('#unread-tab-counter').text(new Intl.NumberFormat(navigator.language).format(general_stats.unread_changes_count));
|
|
||||||
});
|
|
||||||
|
|
||||||
socket.on('watch_update', function (data) {
|
socket.on('watch_update', function (data) {
|
||||||
const watch = data.watch;
|
const watch = data.watch;
|
||||||
|
const general_stats = data.general_stats;
|
||||||
|
|
||||||
|
// Log the entire watch object for debugging
|
||||||
|
console.log('!!! WATCH UPDATE EVENT RECEIVED !!!');
|
||||||
|
console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`);
|
||||||
|
console.log('Watch data:', watch);
|
||||||
|
console.log('General stats:', general_stats);
|
||||||
|
|
||||||
// Updating watch table rows
|
// Updating watch table rows
|
||||||
const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]');
|
const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]');
|
||||||
@@ -151,6 +150,12 @@ $(document).ready(function () {
|
|||||||
|
|
||||||
console.log('Updated UI for watch:', watch.uuid);
|
console.log('Updated UI for watch:', watch.uuid);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Tabs at bottom of list
|
||||||
|
$('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed);
|
||||||
|
$('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0)
|
||||||
|
$('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`);
|
||||||
|
|
||||||
$('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid));
|
$('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid));
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -51,7 +51,6 @@ $(document).ready(function () {
|
|||||||
$('#notification_body').val('');
|
$('#notification_body').val('');
|
||||||
$('#notification_format').val('System default');
|
$('#notification_format').val('System default');
|
||||||
$('#notification_urls').val('');
|
$('#notification_urls').val('');
|
||||||
$('#notification_muted_none').prop('checked', true); // in the case of a ternary field
|
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
});
|
});
|
||||||
$("#notification-token-toggle").click(function (e) {
|
$("#notification-token-toggle").click(function (e) {
|
||||||
|
|||||||
@@ -17,6 +17,15 @@ body.checking-now {
|
|||||||
position: fixed;
|
position: fixed;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#post-list-buttons {
|
||||||
|
#post-list-with-errors.has-error {
|
||||||
|
display: inline-block !important;
|
||||||
|
}
|
||||||
|
#post-list-mark-views.has-unviewed {
|
||||||
|
display: inline-block !important;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -127,44 +127,5 @@
|
|||||||
display: inline-block !important;
|
display: inline-block !important;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
#watch-table-wrapper {
|
|
||||||
/* general styling */
|
|
||||||
#post-list-buttons {
|
|
||||||
text-align: right;
|
|
||||||
padding: 0px;
|
|
||||||
margin: 0px;
|
|
||||||
|
|
||||||
li {
|
|
||||||
display: inline-block;
|
|
||||||
}
|
|
||||||
|
|
||||||
a {
|
|
||||||
border-top-left-radius: initial;
|
|
||||||
border-top-right-radius: initial;
|
|
||||||
border-bottom-left-radius: 5px;
|
|
||||||
border-bottom-right-radius: 5px;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* post list dynamically on/off stuff */
|
|
||||||
|
|
||||||
&.has-error {
|
|
||||||
#post-list-buttons {
|
|
||||||
#post-list-with-errors {
|
|
||||||
display: inline-block !important;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
&.has-unread-changes {
|
|
||||||
#post-list-buttons {
|
|
||||||
#post-list-unread, #post-list-mark-views, #post-list-unread {
|
|
||||||
display: inline-block !important;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|||||||
@@ -1,114 +0,0 @@
|
|||||||
|
|
||||||
// Ternary radio button group component
|
|
||||||
.ternary-radio-group {
|
|
||||||
display: flex;
|
|
||||||
gap: 0;
|
|
||||||
border: 1px solid var(--color-grey-750);
|
|
||||||
border-radius: 4px;
|
|
||||||
overflow: hidden;
|
|
||||||
width: fit-content;
|
|
||||||
background: var(--color-background);
|
|
||||||
|
|
||||||
.ternary-radio-option {
|
|
||||||
position: relative;
|
|
||||||
cursor: pointer;
|
|
||||||
margin: 0;
|
|
||||||
display: flex;
|
|
||||||
align-items: center;
|
|
||||||
|
|
||||||
input[type="radio"] {
|
|
||||||
position: absolute;
|
|
||||||
opacity: 0;
|
|
||||||
width: 0;
|
|
||||||
height: 0;
|
|
||||||
}
|
|
||||||
|
|
||||||
.ternary-radio-label {
|
|
||||||
padding: 8px 16px;
|
|
||||||
background: var(--color-grey-900);
|
|
||||||
border: none;
|
|
||||||
border-right: 1px solid var(--color-grey-750);
|
|
||||||
font-size: 13px;
|
|
||||||
font-weight: 500;
|
|
||||||
color: var(--color-text);
|
|
||||||
transition: all 0.2s ease;
|
|
||||||
cursor: pointer;
|
|
||||||
display: block;
|
|
||||||
text-align: center;
|
|
||||||
}
|
|
||||||
|
|
||||||
&:last-child .ternary-radio-label {
|
|
||||||
border-right: none;
|
|
||||||
}
|
|
||||||
|
|
||||||
input:checked + .ternary-radio-label {
|
|
||||||
background: var(--color-link);
|
|
||||||
color: var(--color-text-button);
|
|
||||||
font-weight: 600;
|
|
||||||
|
|
||||||
&.ternary-default {
|
|
||||||
background: var(--color-grey-600);
|
|
||||||
color: var(--color-text-button);
|
|
||||||
}
|
|
||||||
|
|
||||||
&:hover {
|
|
||||||
background: #1a7bc4;
|
|
||||||
|
|
||||||
&.ternary-default {
|
|
||||||
background: var(--color-grey-500);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
&:hover .ternary-radio-label {
|
|
||||||
background: var(--color-grey-800);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
@media (max-width: 480px) {
|
|
||||||
width: 100%;
|
|
||||||
|
|
||||||
.ternary-radio-label {
|
|
||||||
flex: 1;
|
|
||||||
min-width: auto;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Standard radio button styling
|
|
||||||
input[type="radio"].pure-radio:checked + label,
|
|
||||||
input[type="radio"].pure-radio:checked {
|
|
||||||
background: var(--color-link);
|
|
||||||
color: var(--color-text-button);
|
|
||||||
}
|
|
||||||
|
|
||||||
html[data-darkmode="true"] {
|
|
||||||
.ternary-radio-group {
|
|
||||||
.ternary-radio-option {
|
|
||||||
.ternary-radio-label {
|
|
||||||
background: var(--color-grey-350);
|
|
||||||
}
|
|
||||||
|
|
||||||
&:hover .ternary-radio-label {
|
|
||||||
background: var(--color-grey-400);
|
|
||||||
}
|
|
||||||
|
|
||||||
input:checked + .ternary-radio-label {
|
|
||||||
background: var(--color-link);
|
|
||||||
color: var(--color-text-button);
|
|
||||||
|
|
||||||
&.ternary-default {
|
|
||||||
background: var(--color-grey-600);
|
|
||||||
}
|
|
||||||
|
|
||||||
&:hover {
|
|
||||||
background: #1a7bc4;
|
|
||||||
|
|
||||||
&.ternary-default {
|
|
||||||
background: var(--color-grey-500);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
@@ -20,7 +20,7 @@
|
|||||||
@use "parts/lister_extra";
|
@use "parts/lister_extra";
|
||||||
@use "parts/socket";
|
@use "parts/socket";
|
||||||
@use "parts/visualselector";
|
@use "parts/visualselector";
|
||||||
@use "parts/widgets";
|
|
||||||
|
|
||||||
body {
|
body {
|
||||||
color: var(--color-text);
|
color: var(--color-text);
|
||||||
@@ -203,6 +203,24 @@ code {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
#post-list-buttons {
|
||||||
|
text-align: right;
|
||||||
|
padding: 0px;
|
||||||
|
margin: 0px;
|
||||||
|
|
||||||
|
li {
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
|
||||||
|
a {
|
||||||
|
border-top-left-radius: initial;
|
||||||
|
border-top-right-radius: initial;
|
||||||
|
border-bottom-left-radius: 5px;
|
||||||
|
border-bottom-right-radius: 5px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
body:after {
|
body:after {
|
||||||
content: "";
|
content: "";
|
||||||
background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%);
|
background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%);
|
||||||
@@ -344,7 +362,7 @@ label {
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
.grey-form-border {
|
#notification-customisation {
|
||||||
border: 1px solid var(--color-border-notification);
|
border: 1px solid var(--color-border-notification);
|
||||||
padding: 0.5rem;
|
padding: 0.5rem;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
@@ -1112,12 +1130,11 @@ ul {
|
|||||||
}
|
}
|
||||||
|
|
||||||
#realtime-conn-error {
|
#realtime-conn-error {
|
||||||
position: fixed;
|
position: absolute;
|
||||||
bottom: 0;
|
bottom: 0;
|
||||||
left: 0;
|
left: 30px;
|
||||||
background: var(--color-warning);
|
background: var(--color-warning);
|
||||||
padding: 10px;
|
padding: 10px;
|
||||||
font-size: 0.8rem;
|
font-size: 0.8rem;
|
||||||
color: #fff;
|
color: #fff;
|
||||||
opacity: 0.8;
|
|
||||||
}
|
}
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -202,13 +202,14 @@ class ChangeDetectionStore:
|
|||||||
return seconds
|
return seconds
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def unread_changes_count(self):
|
def has_unviewed(self):
|
||||||
unread_changes_count = 0
|
if not self.__data.get('watching'):
|
||||||
|
return None
|
||||||
|
|
||||||
for uuid, watch in self.__data['watching'].items():
|
for uuid, watch in self.__data['watching'].items():
|
||||||
if watch.history_n >= 2 and watch.viewed == False:
|
if watch.history_n >= 2 and watch.viewed == False:
|
||||||
unread_changes_count += 1
|
return True
|
||||||
|
return False
|
||||||
return unread_changes_count
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def data(self):
|
def data(self):
|
||||||
@@ -261,6 +262,11 @@ class ChangeDetectionStore:
|
|||||||
extras = deepcopy(self.data['watching'][uuid])
|
extras = deepcopy(self.data['watching'][uuid])
|
||||||
new_uuid = self.add_watch(url=url, extras=extras)
|
new_uuid = self.add_watch(url=url, extras=extras)
|
||||||
watch = self.data['watching'][new_uuid]
|
watch = self.data['watching'][new_uuid]
|
||||||
|
|
||||||
|
if self.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
|
||||||
|
# Because it will be recalculated on the next fetch
|
||||||
|
self.data['watching'][new_uuid]['title'] = None
|
||||||
|
|
||||||
return new_uuid
|
return new_uuid
|
||||||
|
|
||||||
def url_exists(self, url):
|
def url_exists(self, url):
|
||||||
@@ -302,6 +308,7 @@ class ChangeDetectionStore:
|
|||||||
'browser_steps',
|
'browser_steps',
|
||||||
'css_filter',
|
'css_filter',
|
||||||
'extract_text',
|
'extract_text',
|
||||||
|
'extract_title_as_title',
|
||||||
'headers',
|
'headers',
|
||||||
'ignore_text',
|
'ignore_text',
|
||||||
'include_filters',
|
'include_filters',
|
||||||
@@ -316,7 +323,6 @@ class ChangeDetectionStore:
|
|||||||
'title',
|
'title',
|
||||||
'trigger_text',
|
'trigger_text',
|
||||||
'url',
|
'url',
|
||||||
'use_page_title_in_list',
|
|
||||||
'webdriver_js_execute_code',
|
'webdriver_js_execute_code',
|
||||||
]:
|
]:
|
||||||
if res.get(k):
|
if res.get(k):
|
||||||
@@ -967,20 +973,6 @@ class ChangeDetectionStore:
|
|||||||
f_d.write(zlib.compress(f_j.read()))
|
f_d.write(zlib.compress(f_j.read()))
|
||||||
os.unlink(json_path)
|
os.unlink(json_path)
|
||||||
|
|
||||||
def update_20(self):
|
|
||||||
for uuid, watch in self.data['watching'].items():
|
|
||||||
if self.data['watching'][uuid].get('extract_title_as_title'):
|
|
||||||
self.data['watching'][uuid]['use_page_title_in_list'] = self.data['watching'][uuid].get('extract_title_as_title')
|
|
||||||
del self.data['watching'][uuid]['extract_title_as_title']
|
|
||||||
|
|
||||||
if self.data['settings']['application'].get('extract_title_as_title'):
|
|
||||||
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
|
|
||||||
|
|
||||||
def update_21(self):
|
|
||||||
self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone')
|
|
||||||
del self.data['settings']['application']['timezone']
|
|
||||||
|
|
||||||
|
|
||||||
def add_notification_url(self, notification_url):
|
def add_notification_url(self, notification_url):
|
||||||
|
|
||||||
logger.debug(f">>> Adding new notification_url - '{notification_url}'")
|
logger.debug(f">>> Adding new notification_url - '{notification_url}'")
|
||||||
|
|||||||
@@ -33,7 +33,7 @@
|
|||||||
<div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div>
|
<div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group grey-form-border">
|
<div id="notification-customisation" class="pure-control-group">
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }}
|
{{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }}
|
||||||
<span class="pure-form-message-inline">Title for all notifications</span>
|
<span class="pure-form-message-inline">Title for all notifications</span>
|
||||||
@@ -70,7 +70,7 @@
|
|||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code>{{ '{{watch_title}}' }}</code></td>
|
<td><code>{{ '{{watch_title}}' }}</code></td>
|
||||||
<td>The page title of the watch, uses <title> if not set, falls back to URL</td>
|
<td>The title of the watch.</td>
|
||||||
</tr>
|
</tr>
|
||||||
<tr>
|
<tr>
|
||||||
<td><code>{{ '{{watch_tag}}' }}</code></td>
|
<td><code>{{ '{{watch_tag}}' }}</code></td>
|
||||||
|
|||||||
@@ -1,47 +1,14 @@
|
|||||||
{% macro render_field(field) %}
|
{% macro render_field(field) %}
|
||||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field.label }}</div>
|
<div {% if field.errors %} class="error" {% endif %}>{{ field.label }}</div>
|
||||||
<div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}
|
<div {% if field.errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }}
|
||||||
{% if field.top_errors %}
|
{% if field.errors %}
|
||||||
top
|
<ul class=errors>
|
||||||
<ul class="errors top-errors">
|
{% for error in field.errors %}
|
||||||
{% for error in field.top_errors %}
|
<li>{{ error }}</li>
|
||||||
<li>{{ error }}</li>
|
{% endfor %}
|
||||||
{% endfor %}
|
</ul>
|
||||||
</ul>
|
{% endif %}
|
||||||
{% endif %}
|
</div>
|
||||||
{% if field.errors %}
|
|
||||||
<ul class=errors>
|
|
||||||
{% if field.errors is mapping and 'form' in field.errors %}
|
|
||||||
{# and subfield form errors, such as used in RequiredFormField() for TimeBetweenCheckForm sub form #}
|
|
||||||
{% set errors = field.errors['form'] %}
|
|
||||||
{% for error in errors %}
|
|
||||||
<li>{{ error }}</li>
|
|
||||||
{% endfor %}
|
|
||||||
{% elif field.type == 'FieldList' %}
|
|
||||||
{# Handle FieldList of FormFields - errors is a list of dicts, one per entry #}
|
|
||||||
{% for idx, entry_errors in field.errors|enumerate %}
|
|
||||||
{% if entry_errors is mapping and entry_errors %}
|
|
||||||
{# Only show entries that have actual errors #}
|
|
||||||
<li><strong>Entry {{ idx + 1 }}:</strong>
|
|
||||||
<ul>
|
|
||||||
{% for field_name, messages in entry_errors.items() %}
|
|
||||||
{% for message in messages %}
|
|
||||||
<li>{{ field_name }}: {{ message }}</li>
|
|
||||||
{% endfor %}
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
</li>
|
|
||||||
{% endif %}
|
|
||||||
{% endfor %}
|
|
||||||
{% else %}
|
|
||||||
{# regular list of errors with this field #}
|
|
||||||
{% for error in field.errors %}
|
|
||||||
<li>{{ error }}</li>
|
|
||||||
{% endfor %}
|
|
||||||
{% endif %}
|
|
||||||
</ul>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
{% endmacro %}
|
{% endmacro %}
|
||||||
|
|
||||||
{% macro render_checkbox_field(field) %}
|
{% macro render_checkbox_field(field) %}
|
||||||
@@ -57,23 +24,6 @@
|
|||||||
</div>
|
</div>
|
||||||
{% endmacro %}
|
{% endmacro %}
|
||||||
|
|
||||||
{% macro render_ternary_field(field, BooleanField=false) %}
|
|
||||||
{% if BooleanField %}
|
|
||||||
{% set _ = field.__setattr__('boolean_mode', true) %}
|
|
||||||
{% endif %}
|
|
||||||
<div class="ternary-field {% if field.errors %} error {% endif %}">
|
|
||||||
<div class="ternary-field-label">{{ field.label }}</div>
|
|
||||||
<div class="ternary-field-widget">{{ field(**kwargs)|safe }}</div>
|
|
||||||
{% if field.errors %}
|
|
||||||
<ul class=errors>
|
|
||||||
{% for error in field.errors %}
|
|
||||||
<li>{{ error }}</li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
{% endif %}
|
|
||||||
</div>
|
|
||||||
{% endmacro %}
|
|
||||||
|
|
||||||
|
|
||||||
{% macro render_simple_field(field) %}
|
{% macro render_simple_field(field) %}
|
||||||
<span class="label {% if field.errors %}error{% endif %}">{{ field.label }}</span>
|
<span class="label {% if field.errors %}error{% endif %}">{{ field.label }}</span>
|
||||||
@@ -111,39 +61,6 @@
|
|||||||
{{ field(**kwargs)|safe }}
|
{{ field(**kwargs)|safe }}
|
||||||
{% endmacro %}
|
{% endmacro %}
|
||||||
|
|
||||||
{% macro render_fieldlist_with_inline_errors(fieldlist) %}
|
|
||||||
{# Specialized macro for FieldList(FormField(...)) that renders errors inline with each field #}
|
|
||||||
<div {% if fieldlist.errors %} class="error" {% endif %}>{{ fieldlist.label }}</div>
|
|
||||||
<div {% if fieldlist.errors %} class="error" {% endif %}>
|
|
||||||
<ul id="{{ fieldlist.id }}">
|
|
||||||
{% for entry in fieldlist %}
|
|
||||||
<li {% if entry.errors %} class="error" {% endif %}>
|
|
||||||
<label for="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>{{ fieldlist.label.text }}-{{ loop.index0 }}</label>
|
|
||||||
<table id="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>
|
|
||||||
<tbody>
|
|
||||||
{% for subfield in entry %}
|
|
||||||
<tr {% if subfield.errors %} class="error" {% endif %}>
|
|
||||||
<th {% if subfield.errors %} class="error" {% endif %}><label for="{{ subfield.id }}" {% if subfield.errors %} class="error" {% endif %}>{{ subfield.label.text }}</label></th>
|
|
||||||
<td {% if subfield.errors %} class="error" {% endif %}>
|
|
||||||
{{ subfield(**kwargs)|safe }}
|
|
||||||
{% if subfield.errors %}
|
|
||||||
<ul class="errors">
|
|
||||||
{% for error in subfield.errors %}
|
|
||||||
<li class="error">{{ error }}</li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
{% endif %}
|
|
||||||
</td>
|
|
||||||
</tr>
|
|
||||||
{% endfor %}
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</li>
|
|
||||||
{% endfor %}
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
{% endmacro %}
|
|
||||||
|
|
||||||
{% macro render_conditions_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %}
|
{% macro render_conditions_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %}
|
||||||
<div class="fieldlist_formfields" id="{{ table_id }}">
|
<div class="fieldlist_formfields" id="{{ table_id }}">
|
||||||
<div class="fieldlist-header">
|
<div class="fieldlist-header">
|
||||||
|
|||||||
@@ -5,7 +5,6 @@
|
|||||||
<meta charset="utf-8" >
|
<meta charset="utf-8" >
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" >
|
<meta name="viewport" content="width=device-width, initial-scale=1.0" >
|
||||||
<meta name="description" content="Self hosted website change detection." >
|
<meta name="description" content="Self hosted website change detection." >
|
||||||
<meta name="robots" content="noindex">
|
|
||||||
<title>Change Detection{{extra_title}}</title>
|
<title>Change Detection{{extra_title}}</title>
|
||||||
{% if app_rss_token %}
|
{% if app_rss_token %}
|
||||||
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag_uuid %}- {{active_tag.title}}{% endif %}" href="{{ url_for('rss.feed', tag=active_tag_uuid , token=app_rss_token)}}" >
|
<link rel="alternate" type="application/rss+xml" title="Changedetection.io » Feed{% if active_tag_uuid %}- {{active_tag.title}}{% endif %}" href="{{ url_for('rss.feed', tag=active_tag_uuid , token=app_rss_token)}}" >
|
||||||
@@ -41,7 +40,7 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</head>
|
</head>
|
||||||
|
|
||||||
<body class="{{extra_classes}}">
|
<body class="">
|
||||||
<div class="header">
|
<div class="header">
|
||||||
<div class="pure-menu-fixed" style="width: 100%;">
|
<div class="pure-menu-fixed" style="width: 100%;">
|
||||||
<div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
|
<div class="home-menu pure-menu pure-menu-horizontal" id="nav-menu">
|
||||||
@@ -237,7 +236,7 @@
|
|||||||
<script src="{{url_for('static_content', group='js', filename='toggle-theme.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='toggle-theme.js')}}" defer></script>
|
||||||
|
|
||||||
<div id="checking-now-fixed-tab" style="display: none;"><span class="spinner"></span><span> Checking now</span></div>
|
<div id="checking-now-fixed-tab" style="display: none;"><span class="spinner"></span><span> Checking now</span></div>
|
||||||
<div id="realtime-conn-error" style="display:none">Real-time updates offline</div>
|
<div id="realtime-conn-error" style="display:none">Offline</div>
|
||||||
</body>
|
</body>
|
||||||
|
|
||||||
</html>
|
</html>
|
||||||
|
|||||||
@@ -1,6 +1,6 @@
|
|||||||
{% extends 'base.html' %}
|
{% extends 'base.html' %}
|
||||||
{% block content %}
|
{% block content %}
|
||||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
|
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
|
||||||
{% from '_common_fields.html' import render_common_settings_form %}
|
{% from '_common_fields.html' import render_common_settings_form %}
|
||||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
|
||||||
@@ -72,16 +72,15 @@
|
|||||||
<div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
|
<div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
|
||||||
<div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
<div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_field(form.tags) }}
|
|
||||||
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_field(form.processor) }}
|
{{ render_field(form.processor) }}
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.title, class="m-d", placeholder=watch.label) }}
|
{{ render_field(form.title, class="m-d") }}
|
||||||
<span class="pure-form-message-inline">Automatically uses the page title if found, you can also use your own title/description here</span>
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_field(form.tags) }}
|
||||||
|
<span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group time-between-check border-fieldset">
|
<div class="pure-control-group time-between-check border-fieldset">
|
||||||
|
|
||||||
@@ -102,16 +101,15 @@
|
|||||||
</div>
|
</div>
|
||||||
<br>
|
<br>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.extract_title_as_title) }}
|
||||||
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_checkbox_field(form.filter_failure_notification_send) }}
|
{{ render_checkbox_field(form.filter_failure_notification_send) }}
|
||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">
|
||||||
Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
|
Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_ternary_field(form.use_page_title_in_list) }}
|
|
||||||
</div>
|
|
||||||
</fieldset>
|
</fieldset>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -264,7 +262,7 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
<div class="tab-pane-inner" id="notifications">
|
<div class="tab-pane-inner" id="notifications">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_ternary_field(form.notification_muted, BooleanField=true) }}
|
{{ render_checkbox_field(form.notification_muted) }}
|
||||||
</div>
|
</div>
|
||||||
{% if watch_needs_selenium_or_playwright %}
|
{% if watch_needs_selenium_or_playwright %}
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
@@ -471,11 +469,11 @@ Math: {{ 1 + 1 }}") }}
|
|||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_button(form.save_button) }}
|
{{ render_button(form.save_button) }}
|
||||||
<a href="{{url_for('ui.form_delete', uuid=uuid)}}"
|
<a href="{{url_for('ui.form_delete', uuid=uuid)}}"
|
||||||
class="pure-button button-error ">Delete</a>
|
class="pure-button button-small button-error ">Delete</a>
|
||||||
{% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}"
|
{% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}"
|
||||||
class="pure-button button-error">Clear History</a>{% endif %}
|
class="pure-button button-small button-error ">Clear History</a>{% endif %}
|
||||||
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
|
<a href="{{url_for('ui.form_clone', uuid=uuid)}}"
|
||||||
class="pure-button">Clone & Edit</a>
|
class="pure-button button-small ">Clone & Edit</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
@@ -26,10 +26,7 @@
|
|||||||
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
|
||||||
</ul>
|
</ul>
|
||||||
</span>
|
</span>
|
||||||
<br><br>
|
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_ternary_field(form.strip_ignored_lines) }}
|
|
||||||
</div>
|
|
||||||
</fieldset>
|
</fieldset>
|
||||||
|
|
||||||
<fieldset>
|
<fieldset>
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ import time
|
|||||||
from threading import Thread
|
from threading import Thread
|
||||||
|
|
||||||
import pytest
|
import pytest
|
||||||
import arrow
|
|
||||||
from changedetectionio import changedetection_app
|
from changedetectionio import changedetection_app
|
||||||
from changedetectionio import store
|
from changedetectionio import store
|
||||||
import os
|
import os
|
||||||
@@ -30,39 +29,16 @@ def reportlog(pytestconfig):
|
|||||||
logger.remove(handler_id)
|
logger.remove(handler_id)
|
||||||
|
|
||||||
|
|
||||||
@pytest.fixture
|
|
||||||
def environment(mocker):
|
|
||||||
"""Mock arrow.now() to return a fixed datetime for testing jinja2 time extension."""
|
|
||||||
# Fixed datetime: Wed, 09 Dec 2015 23:33:01 UTC
|
|
||||||
# This is calculated to match the test expectations when offsets are applied
|
|
||||||
fixed_datetime = arrow.Arrow(2015, 12, 9, 23, 33, 1, tzinfo='UTC')
|
|
||||||
# Patch arrow.now in the TimeExtension module where it's actually used
|
|
||||||
mocker.patch('changedetectionio.jinja2_custom.extensions.TimeExtension.arrow.now', return_value=fixed_datetime)
|
|
||||||
return fixed_datetime
|
|
||||||
|
|
||||||
|
|
||||||
def format_memory_human(bytes_value):
|
|
||||||
"""Format memory in human-readable units (KB, MB, GB)"""
|
|
||||||
if bytes_value < 1024:
|
|
||||||
return f"{bytes_value} B"
|
|
||||||
elif bytes_value < 1024 ** 2:
|
|
||||||
return f"{bytes_value / 1024:.2f} KB"
|
|
||||||
elif bytes_value < 1024 ** 3:
|
|
||||||
return f"{bytes_value / (1024 ** 2):.2f} MB"
|
|
||||||
else:
|
|
||||||
return f"{bytes_value / (1024 ** 3):.2f} GB"
|
|
||||||
|
|
||||||
def track_memory(memory_usage, ):
|
def track_memory(memory_usage, ):
|
||||||
process = psutil.Process(os.getpid())
|
process = psutil.Process(os.getpid())
|
||||||
while not memory_usage["stop"]:
|
while not memory_usage["stop"]:
|
||||||
current_rss = process.memory_info().rss
|
current_rss = process.memory_info().rss
|
||||||
memory_usage["peak"] = max(memory_usage["peak"], current_rss)
|
memory_usage["peak"] = max(memory_usage["peak"], current_rss)
|
||||||
memory_usage["current"] = current_rss # Keep updating current
|
|
||||||
time.sleep(0.01) # Adjust the sleep time as needed
|
time.sleep(0.01) # Adjust the sleep time as needed
|
||||||
|
|
||||||
@pytest.fixture(scope='function')
|
@pytest.fixture(scope='function')
|
||||||
def measure_memory_usage(request):
|
def measure_memory_usage(request):
|
||||||
memory_usage = {"peak": 0, "current": 0, "stop": False}
|
memory_usage = {"peak": 0, "stop": False}
|
||||||
tracker_thread = Thread(target=track_memory, args=(memory_usage,))
|
tracker_thread = Thread(target=track_memory, args=(memory_usage,))
|
||||||
tracker_thread.start()
|
tracker_thread.start()
|
||||||
|
|
||||||
@@ -71,17 +47,16 @@ def measure_memory_usage(request):
|
|||||||
memory_usage["stop"] = True
|
memory_usage["stop"] = True
|
||||||
tracker_thread.join()
|
tracker_thread.join()
|
||||||
|
|
||||||
# Note: psutil returns RSS memory in bytes
|
# Note: ru_maxrss is in kilobytes on Unix-based systems
|
||||||
peak_human = format_memory_human(memory_usage["peak"])
|
max_memory_used = memory_usage["peak"] / 1024 # Convert to MB
|
||||||
|
s = f"Peak memory used by the test {request.node.fspath} - '{request.node.name}': {max_memory_used:.2f} MB"
|
||||||
s = f"{time.time()} {request.node.fspath} - '{request.node.name}' - Peak memory: {peak_human}"
|
|
||||||
logger.debug(s)
|
logger.debug(s)
|
||||||
|
|
||||||
with open("test-memory.log", 'a') as f:
|
with open("test-memory.log", 'a') as f:
|
||||||
f.write(f"{s}\n")
|
f.write(f"{s}\n")
|
||||||
|
|
||||||
# Assert that the memory usage is less than 200MB
|
# Assert that the memory usage is less than 200MB
|
||||||
# assert peak_memory_kb < 150 * 1024, f"Memory usage exceeded 150MB: {peak_human}"
|
# assert max_memory_used < 150, f"Memory usage exceeded 200MB: {max_memory_used:.2f} MB"
|
||||||
|
|
||||||
|
|
||||||
def cleanup(datastore_path):
|
def cleanup(datastore_path):
|
||||||
|
|||||||
@@ -29,8 +29,13 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
|
|||||||
assert b"Settings updated." in res.data
|
assert b"Settings updated." in res.data
|
||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
if make_test_use_extra_browser:
|
if make_test_use_extra_browser:
|
||||||
@@ -50,8 +55,7 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
|
|||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
'fetch_backend': f"extra_browser_{custom_browser_name}",
|
'fetch_backend': f"extra_browser_{custom_browser_name}",
|
||||||
'webdriver_js_execute_code': '',
|
'webdriver_js_execute_code': ''
|
||||||
"time_between_check_use_default": "y"
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -28,7 +28,6 @@ def test_execute_custom_js(client, live_server, measure_memory_usage):
|
|||||||
'fetch_backend': "html_webdriver",
|
'fetch_backend': "html_webdriver",
|
||||||
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();',
|
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();',
|
||||||
'headers': "testheader: yes\buser-agent: MyCustomAgent",
|
'headers': "testheader: yes\buser-agent: MyCustomAgent",
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -27,7 +27,6 @@ def test_preferred_proxy(client, live_server, measure_memory_usage):
|
|||||||
"proxy": "proxy-two",
|
"proxy": "proxy-two",
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"url": url,
|
"url": url,
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -62,7 +62,6 @@ def test_noproxy_option(client, live_server, measure_memory_usage):
|
|||||||
"proxy": "no-proxy",
|
"proxy": "no-proxy",
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"url": url,
|
"url": url,
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -44,7 +44,6 @@ def test_proxy_noconnect_custom(client, live_server, measure_memory_usage):
|
|||||||
"url": test_url,
|
"url": test_url,
|
||||||
"fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
|
"fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests",
|
||||||
"proxy": "ui-0custom-test-proxy",
|
"proxy": "ui-0custom-test-proxy",
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
}
|
}
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
|
|||||||
@@ -49,39 +49,3 @@ def test_select_custom(client, live_server, measure_memory_usage):
|
|||||||
#
|
#
|
||||||
# Now we should see the request in the container logs for "squid-squid-custom" because it will be the only default
|
# Now we should see the request in the container logs for "squid-squid-custom" because it will be the only default
|
||||||
|
|
||||||
|
|
||||||
def test_custom_proxy_validation(client, live_server, measure_memory_usage):
|
|
||||||
# live_server_setup(live_server) # Setup on conftest per function
|
|
||||||
|
|
||||||
# Goto settings, add our custom one
|
|
||||||
res = client.post(
|
|
||||||
url_for("settings.settings_page"),
|
|
||||||
data={
|
|
||||||
"requests-time_between_check-minutes": 180,
|
|
||||||
"application-ignore_whitespace": "y",
|
|
||||||
"application-fetch_backend": 'html_requests',
|
|
||||||
"requests-extra_proxies-0-proxy_name": "custom-test-proxy",
|
|
||||||
"requests-extra_proxies-0-proxy_url": "xxxxhtt/333??p://test:awesome@squid-custom:3128",
|
|
||||||
},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert b"Settings updated." not in res.data
|
|
||||||
assert b'Proxy URLs must start with' in res.data
|
|
||||||
|
|
||||||
|
|
||||||
res = client.post(
|
|
||||||
url_for("settings.settings_page"),
|
|
||||||
data={
|
|
||||||
"requests-time_between_check-minutes": 180,
|
|
||||||
"application-ignore_whitespace": "y",
|
|
||||||
"application-fetch_backend": 'html_requests',
|
|
||||||
"requests-extra_proxies-0-proxy_name": "custom-test-proxy",
|
|
||||||
"requests-extra_proxies-0-proxy_url": "https://",
|
|
||||||
},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert b"Settings updated." not in res.data
|
|
||||||
assert b"Invalid URL." in res.data
|
|
||||||
|
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, delete_all_watches
|
from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||||
|
|
||||||
|
|
||||||
def set_response():
|
def set_response():
|
||||||
@@ -66,7 +66,6 @@ def test_socks5(client, live_server, measure_memory_usage):
|
|||||||
"proxy": "ui-0socks5proxy",
|
"proxy": "ui-0socks5proxy",
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -98,5 +97,6 @@ def test_socks5(client, live_server, measure_memory_usage):
|
|||||||
)
|
)
|
||||||
assert b"OK" in res.data
|
assert b"OK" in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|||||||
@@ -53,7 +53,6 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage)
|
|||||||
"proxy": "socks5proxy",
|
"proxy": "socks5proxy",
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import re
|
|||||||
from flask import url_for
|
from flask import url_for
|
||||||
from changedetectionio.tests.util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, \
|
from changedetectionio.tests.util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, \
|
||||||
wait_for_all_checks, \
|
wait_for_all_checks, \
|
||||||
set_longer_modified_response, delete_all_watches
|
set_longer_modified_response
|
||||||
from changedetectionio.tests.util import extract_UUID_from_client
|
from changedetectionio.tests.util import extract_UUID_from_client
|
||||||
import logging
|
import logging
|
||||||
import base64
|
import base64
|
||||||
@@ -85,7 +85,8 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas
|
|||||||
assert '(added) So let\'s see what happens.\r\n' in msg # The plaintext part with \r\n
|
assert '(added) So let\'s see what happens.\r\n' in msg # The plaintext part with \r\n
|
||||||
assert 'Content-Type: text/html' in msg
|
assert 'Content-Type: text/html' in msg
|
||||||
assert '(added) So let\'s see what happens.<br>' in msg # the html part
|
assert '(added) So let\'s see what happens.<br>' in msg # the html part
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|
||||||
def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage):
|
def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage):
|
||||||
@@ -156,8 +157,7 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
|
|||||||
data={
|
data={
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"notification_format": 'HTML',
|
"notification_format": 'HTML',
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests"},
|
||||||
"time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -178,4 +178,5 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv
|
|||||||
assert '<' not in msg
|
assert '<' not in msg
|
||||||
assert 'Content-Type: text/html' in msg
|
assert 'Content-Type: text/html' in msg
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ from .util import live_server_setup, wait_for_all_checks
|
|||||||
from flask import url_for
|
from flask import url_for
|
||||||
import time
|
import time
|
||||||
|
|
||||||
def test_check_access_control(app, client, live_server, measure_memory_usage):
|
def test_check_access_control(app, client, live_server):
|
||||||
# Still doesnt work, but this is closer.
|
# Still doesnt work, but this is closer.
|
||||||
# live_server_setup(live_server) # Setup on conftest per function
|
# live_server_setup(live_server) # Setup on conftest per function
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import os.path
|
import os.path
|
||||||
|
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output
|
||||||
import time
|
import time
|
||||||
|
|
||||||
def set_original(excluding=None, add_line=None):
|
def set_original(excluding=None, add_line=None):
|
||||||
@@ -44,8 +44,12 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
|||||||
set_original()
|
set_original()
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -57,8 +61,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
|||||||
data={"trigger_text": 'The golden line',
|
data={"trigger_text": 'The golden line',
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests",
|
||||||
'filter_text_removed': 'y',
|
'filter_text_removed': 'y'},
|
||||||
"time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -71,7 +74,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
|||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
time.sleep(0.5)
|
time.sleep(0.5)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
# The trigger line is REMOVED, this should trigger
|
# The trigger line is REMOVED, this should trigger
|
||||||
set_original(excluding='The golden line')
|
set_original(excluding='The golden line')
|
||||||
@@ -80,7 +83,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
|||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
@@ -94,21 +97,23 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory
|
|||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
# Remove it again, and we should get a trigger
|
# Remove it again, and we should get a trigger
|
||||||
set_original(excluding='The golden line')
|
set_original(excluding='The golden line')
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|
||||||
def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage):
|
def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
# Give the endpoint time to spin up
|
||||||
@@ -131,8 +136,12 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
|||||||
set_original()
|
set_original()
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -145,8 +154,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
|||||||
'processor': 'text_json_diff',
|
'processor': 'text_json_diff',
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests",
|
||||||
'filter_text_removed': '',
|
'filter_text_removed': '',
|
||||||
'filter_text_added': 'y',
|
'filter_text_added': 'y'},
|
||||||
"time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -159,7 +167,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
|||||||
|
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
# The trigger line is ADDED, this should trigger
|
# The trigger line is ADDED, this should trigger
|
||||||
set_original(add_line='<p>Oh yes please</p>')
|
set_original(add_line='<p>Oh yes please</p>')
|
||||||
@@ -167,7 +175,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
|||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
|
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
# Takes a moment for apprise to fire
|
# Takes a moment for apprise to fire
|
||||||
wait_for_notification_endpoint_output()
|
wait_for_notification_endpoint_output()
|
||||||
@@ -177,4 +185,5 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
|
|||||||
assert b'-Oh yes please' in response
|
assert b'-Oh yes please' in response
|
||||||
assert '网站监测 内容更新了'.encode('utf-8') in response
|
assert '网站监测 内容更新了'.encode('utf-8') in response
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
import json
|
import json
|
||||||
import uuid
|
import uuid
|
||||||
@@ -276,7 +276,8 @@ def test_access_denied(client, live_server, measure_memory_usage):
|
|||||||
assert res.status_code == 200
|
assert res.status_code == 200
|
||||||
|
|
||||||
# Cleanup everything
|
# Cleanup everything
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("settings.settings_page"),
|
url_for("settings.settings_page"),
|
||||||
@@ -310,7 +311,7 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
|
|||||||
"value": "." # contains anything
|
"value": "." # contains anything
|
||||||
}
|
}
|
||||||
],
|
],
|
||||||
"conditions_match_logic": "ALL",
|
"conditions_match_logic": "ALL"
|
||||||
}
|
}
|
||||||
),
|
),
|
||||||
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
headers={'content-type': 'application/json', 'x-api-key': api_key},
|
||||||
@@ -327,7 +328,6 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
|
|||||||
)
|
)
|
||||||
|
|
||||||
watch_uuid = list(res.json.keys())[0]
|
watch_uuid = list(res.json.keys())[0]
|
||||||
assert not res.json[watch_uuid].get('viewed'), 'A newly created watch can only be unviewed'
|
|
||||||
|
|
||||||
# Check in the edit page just to be sure
|
# Check in the edit page just to be sure
|
||||||
res = client.get(
|
res = client.get(
|
||||||
@@ -341,12 +341,7 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
|
|||||||
res = client.put(
|
res = client.put(
|
||||||
url_for("watch", uuid=watch_uuid),
|
url_for("watch", uuid=watch_uuid),
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
||||||
data=json.dumps({
|
data=json.dumps({"title": "new title", 'time_between_check': {'minutes': 552}, 'headers': {'cookie': 'all eaten'}}),
|
||||||
"title": "new title",
|
|
||||||
'time_between_check': {'minutes': 552},
|
|
||||||
'headers': {'cookie': 'all eaten'},
|
|
||||||
'last_viewed': int(time.time())
|
|
||||||
}),
|
|
||||||
)
|
)
|
||||||
assert res.status_code == 200, "HTTP PUT update was sent OK"
|
assert res.status_code == 200, "HTTP PUT update was sent OK"
|
||||||
|
|
||||||
@@ -356,7 +351,6 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
|
|||||||
headers={'x-api-key': api_key}
|
headers={'x-api-key': api_key}
|
||||||
)
|
)
|
||||||
assert res.json.get('title') == 'new title'
|
assert res.json.get('title') == 'new title'
|
||||||
assert res.json.get('viewed'), 'With the timestamp greater than "changed" a watch can be updated to viewed'
|
|
||||||
|
|
||||||
# Check in the edit page just to be sure
|
# Check in the edit page just to be sure
|
||||||
res = client.get(
|
res = client.get(
|
||||||
@@ -384,17 +378,18 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage):
|
|||||||
assert b'Additional properties are not allowed' in res.data
|
assert b'Additional properties are not allowed' in res.data
|
||||||
|
|
||||||
# Cleanup everything
|
# Cleanup everything
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|
||||||
def test_api_import(client, live_server, measure_memory_usage):
|
def test_api_import(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("import") + "?tag=import-test",
|
url_for("import") + "?tag=import-test",
|
||||||
data='https://website1.com\r\nhttps://website2.com',
|
data='https://website1.com\r\nhttps://website2.com',
|
||||||
headers={'x-api-key': api_key, 'content-type': 'text/plain'},
|
headers={'x-api-key': api_key},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ from flask import url_for
|
|||||||
from .util import live_server_setup
|
from .util import live_server_setup
|
||||||
import json
|
import json
|
||||||
|
|
||||||
def test_api_notifications_crud(client, live_server, measure_memory_usage):
|
def test_api_notifications_crud(client, live_server):
|
||||||
# live_server_setup(live_server) # Setup on conftest per function
|
# live_server_setup(live_server) # Setup on conftest per function
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||||
|
|
||||||
|
|||||||
@@ -1,199 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
"""
|
|
||||||
OpenAPI validation tests for ChangeDetection.io API
|
|
||||||
|
|
||||||
This test file specifically verifies that OpenAPI validation is working correctly
|
|
||||||
by testing various scenarios that should trigger validation errors.
|
|
||||||
"""
|
|
||||||
|
|
||||||
import time
|
|
||||||
import json
|
|
||||||
from flask import url_for
|
|
||||||
from .util import live_server_setup, wait_for_all_checks
|
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage):
|
|
||||||
"""Test that creating a watch with invalid content-type triggers OpenAPI validation error."""
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
|
|
||||||
# Try to create a watch with JSON data but without proper content-type header
|
|
||||||
res = client.post(
|
|
||||||
url_for("createwatch"),
|
|
||||||
data=json.dumps({"url": "https://example.com", "title": "Test Watch"}),
|
|
||||||
headers={'x-api-key': api_key}, # Missing 'content-type': 'application/json'
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should get 400 error due to OpenAPI validation failure
|
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
|
||||||
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
|
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage):
|
|
||||||
"""Test that creating a watch without required URL field triggers OpenAPI validation error."""
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
|
|
||||||
# Try to create a watch without the required 'url' field
|
|
||||||
res = client.post(
|
|
||||||
url_for("createwatch"),
|
|
||||||
data=json.dumps({"title": "Test Watch Without URL"}), # Missing required 'url' field
|
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should get 400 error due to missing required field
|
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
|
||||||
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
|
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage):
|
|
||||||
"""Test that including invalid fields triggers OpenAPI validation error."""
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
|
|
||||||
# First create a valid watch
|
|
||||||
res = client.post(
|
|
||||||
url_for("createwatch"),
|
|
||||||
data=json.dumps({"url": "https://example.com", "title": "Test Watch"}),
|
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert res.status_code == 201, "Watch creation should succeed"
|
|
||||||
|
|
||||||
# Get the watch list to find the UUID
|
|
||||||
res = client.get(
|
|
||||||
url_for("createwatch"),
|
|
||||||
headers={'x-api-key': api_key}
|
|
||||||
)
|
|
||||||
assert res.status_code == 200
|
|
||||||
watch_uuid = list(res.json.keys())[0]
|
|
||||||
|
|
||||||
# Now try to update the watch with an invalid field
|
|
||||||
res = client.put(
|
|
||||||
url_for("watch", uuid=watch_uuid),
|
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
|
||||||
data=json.dumps({
|
|
||||||
"title": "Updated title",
|
|
||||||
"invalid_field_that_doesnt_exist": "this should cause validation error"
|
|
||||||
}),
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should get 400 error due to invalid field (this will be caught by internal validation)
|
|
||||||
# Note: This tests the flow where OpenAPI validation passes but internal validation catches it
|
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
|
||||||
assert b"Additional properties are not allowed" in res.data, "Should contain validation error about additional properties"
|
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage):
|
|
||||||
"""Test that import endpoint with wrong content-type triggers OpenAPI validation error."""
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
|
|
||||||
# Try to import URLs with JSON content-type instead of text/plain
|
|
||||||
res = client.post(
|
|
||||||
url_for("import") + "?tag=test-import",
|
|
||||||
data='https://website1.com\nhttps://website2.com',
|
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'}, # Wrong content-type
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should get 400 error due to content-type mismatch
|
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
|
||||||
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
|
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage):
|
|
||||||
"""Test that import endpoint with correct content-type succeeds (positive test)."""
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
|
|
||||||
# Import URLs with correct text/plain content-type
|
|
||||||
res = client.post(
|
|
||||||
url_for("import") + "?tag=test-import",
|
|
||||||
data='https://website1.com\nhttps://website2.com',
|
|
||||||
headers={'x-api-key': api_key, 'content-type': 'text/plain'}, # Correct content-type
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should succeed
|
|
||||||
assert res.status_code == 200, f"Expected 200 but got {res.status_code}"
|
|
||||||
assert len(res.json) == 2, "Should import 2 URLs"
|
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage):
|
|
||||||
"""Test that GET requests bypass OpenAPI validation entirely."""
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
|
|
||||||
# Disable API token requirement first
|
|
||||||
res = client.post(
|
|
||||||
url_for("settings.settings_page"),
|
|
||||||
data={
|
|
||||||
"requests-time_between_check-minutes": 180,
|
|
||||||
"application-fetch_backend": "html_requests",
|
|
||||||
"application-api_access_token_enabled": ""
|
|
||||||
},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"Settings updated." in res.data
|
|
||||||
|
|
||||||
# Make GET request to list watches - should succeed even without API key or content-type
|
|
||||||
res = client.get(url_for("createwatch")) # No headers needed for GET
|
|
||||||
assert res.status_code == 200, f"GET requests should succeed without OpenAPI validation, got {res.status_code}"
|
|
||||||
|
|
||||||
# Should return JSON with watch list (empty in this case)
|
|
||||||
assert isinstance(res.json, dict), "Should return JSON dictionary for watch list"
|
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage):
|
|
||||||
"""Test that creating a tag without required title triggers OpenAPI validation error."""
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
|
|
||||||
# Try to create a tag without the required 'title' field
|
|
||||||
res = client.post(
|
|
||||||
url_for("tag"),
|
|
||||||
data=json.dumps({"notification_urls": ["mailto:test@example.com"]}), # Missing required 'title' field
|
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should get 400 error due to missing required field
|
|
||||||
assert res.status_code == 400, f"Expected 400 but got {res.status_code}"
|
|
||||||
assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message"
|
|
||||||
|
|
||||||
|
|
||||||
def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage):
|
|
||||||
"""Test that watch updates allow partial updates without requiring all fields (positive test)."""
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
|
|
||||||
# First create a valid watch
|
|
||||||
res = client.post(
|
|
||||||
url_for("createwatch"),
|
|
||||||
data=json.dumps({"url": "https://example.com", "title": "Test Watch"}),
|
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert res.status_code == 201, "Watch creation should succeed"
|
|
||||||
|
|
||||||
# Get the watch list to find the UUID
|
|
||||||
res = client.get(
|
|
||||||
url_for("createwatch"),
|
|
||||||
headers={'x-api-key': api_key}
|
|
||||||
)
|
|
||||||
assert res.status_code == 200
|
|
||||||
watch_uuid = list(res.json.keys())[0]
|
|
||||||
|
|
||||||
# Update only the title (partial update) - should succeed
|
|
||||||
res = client.put(
|
|
||||||
url_for("watch", uuid=watch_uuid),
|
|
||||||
headers={'x-api-key': api_key, 'content-type': 'application/json'},
|
|
||||||
data=json.dumps({"title": "Updated Title Only"}), # Only updating title, not URL
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should succeed because UpdateWatch schema allows partial updates
|
|
||||||
assert res.status_code == 200, f"Partial updates should succeed, got {res.status_code}"
|
|
||||||
|
|
||||||
# Verify the update worked
|
|
||||||
res = client.get(
|
|
||||||
url_for("watch", uuid=watch_uuid),
|
|
||||||
headers={'x-api-key': api_key}
|
|
||||||
)
|
|
||||||
assert res.status_code == 200
|
|
||||||
assert res.json.get('title') == 'Updated Title Only', "Title should be updated"
|
|
||||||
assert res.json.get('url') == 'https://example.com', "URL should remain unchanged"
|
|
||||||
@@ -6,7 +6,7 @@ import time
|
|||||||
from .util import live_server_setup, wait_for_all_checks
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
def test_api_search(client, live_server, measure_memory_usage):
|
def test_api_search(client, live_server):
|
||||||
# live_server_setup(live_server) # Setup on conftest per function
|
# live_server_setup(live_server) # Setup on conftest per function
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
||||||
|
|
||||||
|
|||||||
@@ -12,14 +12,18 @@ def test_basic_auth(client, live_server, measure_memory_usage):
|
|||||||
# This page will echo back any auth info
|
# This page will echo back any auth info
|
||||||
test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
|
test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@")
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
# Check form validation
|
# Check form validation
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"include_filters": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
data={"include_filters": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
|
|||||||
@@ -86,8 +86,12 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Should get a notice that it's available
|
# Should get a notice that it's available
|
||||||
@@ -125,8 +129,12 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'ldjson-price-track-offer' not in res.data
|
assert b'ldjson-price-track-offer' not in res.data
|
||||||
@@ -138,8 +146,12 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage
|
|||||||
def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):
|
def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data):
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
for k,v in client.application.config.get('DATASTORE').data['watching'].items():
|
for k,v in client.application.config.get('DATASTORE').data['watching'].items():
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
|
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \
|
||||||
extract_UUID_from_client, delete_all_watches
|
extract_UUID_from_client
|
||||||
|
|
||||||
sleep_time_for_fetch_thread = 3
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
@@ -38,9 +38,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
assert b'test-endpoint' in res.data
|
assert b'test-endpoint' in res.data
|
||||||
|
|
||||||
# Default no password set, this stuff should be always available.
|
# Default no password set, this stuff should be always available.
|
||||||
@@ -74,9 +74,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid))
|
res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid))
|
||||||
assert b'which has this one new line' in res.data
|
assert b'which has this one new line' in res.data
|
||||||
|
|
||||||
# Now something should be ready, indicated by having a 'has-unread-changes' class
|
# Now something should be ready, indicated by having a 'unviewed' class
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
# #75, and it should be in the RSS feed
|
# #75, and it should be in the RSS feed
|
||||||
rss_token = extract_rss_token_from_UI(client)
|
rss_token = extract_rss_token_from_UI(client)
|
||||||
@@ -89,8 +89,8 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
assert b'CDATA' in res.data
|
assert b'CDATA' in res.data
|
||||||
|
|
||||||
assert expected_url.encode('utf-8') in res.data
|
assert expected_url.encode('utf-8') in res.data
|
||||||
#
|
|
||||||
# Following the 'diff' link, it should no longer display as 'has-unread-changes' even after we recheck it a few times
|
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
|
||||||
res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid))
|
res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid))
|
||||||
assert b'selected=""' in res.data, "Confirm diff history page loaded"
|
assert b'selected=""' in res.data, "Confirm diff history page loaded"
|
||||||
|
|
||||||
@@ -104,34 +104,26 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
|
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
# Do this a few times.. ensures we dont accidently set the status
|
||||||
# Do this a few times.. ensures we don't accidently set the status
|
|
||||||
for n in range(2):
|
for n in range(2):
|
||||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
assert b'class="has-unviewed' not in res.data
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'head title' not in res.data # Should not be present because this is off by default
|
||||||
assert b'class="has-unread-changes' not in res.data
|
|
||||||
assert b'head title' in res.data # Should be ON by default
|
|
||||||
assert b'test-endpoint' in res.data
|
assert b'test-endpoint' in res.data
|
||||||
|
|
||||||
# Recheck it but only with a title change, content wasnt changed
|
set_original_response()
|
||||||
set_original_response(extra_title=" and more")
|
|
||||||
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
# Enable auto pickup of <title> in settings
|
||||||
wait_for_all_checks(client)
|
|
||||||
res = client.get(url_for("watchlist.index"))
|
|
||||||
assert b'head title and more' in res.data
|
|
||||||
|
|
||||||
# disable <title> pickup
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("settings.settings_page"),
|
url_for("settings.settings_page"),
|
||||||
data={"application-ui-use_page_title_in_list": "", "requests-time_between_check-minutes": 180,
|
data={"application-extract_title_as_title": "1", "requests-time_between_check-minutes": 180,
|
||||||
'application-fetch_backend': "html_requests"},
|
'application-fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -140,19 +132,21 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
assert b'class="has-unread-changes' in res.data
|
assert b'class="has-unviewed' in res.data
|
||||||
assert b'head title' not in res.data # should now be off
|
|
||||||
|
|
||||||
|
# It should have picked up the <title>
|
||||||
|
assert b'head title' in res.data
|
||||||
|
|
||||||
# Be sure the last_viewed is going to be greater than the last snapshot
|
# Be sure the last_viewed is going to be greater than the last snapshot
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
# hit the mark all viewed link
|
# hit the mark all viewed link
|
||||||
res = client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
res = client.get(url_for("ui.mark_all_viewed"), follow_redirects=True)
|
||||||
|
time.sleep(0.2)
|
||||||
|
|
||||||
assert b'class="has-unread-changes' not in res.data
|
assert b'class="has-unviewed' not in res.data
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
|
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
|
||||||
client.get(url_for("ui.clear_watch_history", uuid=uuid))
|
client.get(url_for("ui.clear_watch_history", uuid=uuid))
|
||||||
@@ -163,219 +157,5 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
|
|||||||
|
|
||||||
#
|
#
|
||||||
# Cleanup everything
|
# Cleanup everything
|
||||||
delete_all_watches(client)
|
|
||||||
|
|
||||||
|
|
||||||
# Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that
|
|
||||||
def test_requests_timeout(client, live_server, measure_memory_usage):
|
|
||||||
delay = 2
|
|
||||||
test_url = url_for('test_endpoint', delay=delay, _external=True)
|
|
||||||
|
|
||||||
res = client.post(
|
|
||||||
url_for("settings.settings_page"),
|
|
||||||
data={"application-ui-use_page_title_in_list": "",
|
|
||||||
"requests-time_between_check-minutes": 180,
|
|
||||||
"requests-timeout": delay - 1,
|
|
||||||
'application-fetch_backend': "html_requests"},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
|
|
||||||
# requests takes >2 sec but we timeout at 1 second
|
|
||||||
res = client.get(url_for("watchlist.index"))
|
|
||||||
assert b'Read timed out. (read timeout=1)' in res.data
|
|
||||||
|
|
||||||
##### Now set a longer timeout
|
|
||||||
res = client.post(
|
|
||||||
url_for("settings.settings_page"),
|
|
||||||
data={"application-ui-use_page_title_in_list": "",
|
|
||||||
"requests-time_between_check-minutes": 180,
|
|
||||||
"requests-timeout": delay + 1, # timeout should be a second more than the reply time
|
|
||||||
'application-fetch_backend': "html_requests"},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
|
|
||||||
res = client.get(url_for("watchlist.index"))
|
|
||||||
assert b'Read timed out' not in res.data
|
|
||||||
|
|
||||||
def test_non_text_mime_or_downloads(client, live_server, measure_memory_usage):
|
|
||||||
"""
|
|
||||||
|
|
||||||
https://github.com/dgtlmoon/changedetection.io/issues/3434
|
|
||||||
I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8,
|
|
||||||
but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog),
|
|
||||||
changedetection somehow ignores all line breaks and treats the document file as if everything is on one line.
|
|
||||||
|
|
||||||
WHAT THIS DOES - makes the system rely on 'magic' to determine what is it
|
|
||||||
|
|
||||||
:param client:
|
|
||||||
:param live_server:
|
|
||||||
:param measure_memory_usage:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write("""some random text that should be split by line
|
|
||||||
and not parsed with html_to_text
|
|
||||||
this way we know that it correctly parsed as plain text
|
|
||||||
\r\n
|
|
||||||
ok\r\n
|
|
||||||
got it\r\n
|
|
||||||
""")
|
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', content_type="application/octet-stream", _external=True)
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
|
|
||||||
### check the front end
|
|
||||||
res = client.get(
|
|
||||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"some random text that should be split by line\n" in res.data
|
|
||||||
####
|
|
||||||
|
|
||||||
# Check the snapshot by API that it has linefeeds too
|
|
||||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
res = client.get(
|
|
||||||
url_for("watchhistory", uuid=watch_uuid),
|
|
||||||
headers={'x-api-key': api_key},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Fetch a snapshot by timestamp, check the right one was found
|
|
||||||
res = client.get(
|
|
||||||
url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]),
|
|
||||||
headers={'x-api-key': api_key},
|
|
||||||
)
|
|
||||||
assert b"some random text that should be split by line\n" in res.data
|
|
||||||
|
|
||||||
|
|
||||||
delete_all_watches(client)
|
|
||||||
|
|
||||||
|
|
||||||
def test_standard_text_plain(client, live_server, measure_memory_usage):
|
|
||||||
"""
|
|
||||||
|
|
||||||
https://github.com/dgtlmoon/changedetection.io/issues/3434
|
|
||||||
I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8,
|
|
||||||
but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog),
|
|
||||||
changedetection somehow ignores all line breaks and treats the document file as if everything is on one line.
|
|
||||||
|
|
||||||
The real bug here can be that it will try to process plain-text as HTML, losing <etc>
|
|
||||||
|
|
||||||
:param client:
|
|
||||||
:param live_server:
|
|
||||||
:param measure_memory_usage:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write("""some random text that should be split by line
|
|
||||||
and not parsed with html_to_text
|
|
||||||
<title>Even this title should stay because we are just plain text</title>
|
|
||||||
this way we know that it correctly parsed as plain text
|
|
||||||
\r\n
|
|
||||||
ok\r\n
|
|
||||||
got it\r\n
|
|
||||||
""")
|
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
|
|
||||||
### check the front end
|
|
||||||
res = client.get(
|
|
||||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert b"some random text that should be split by line\n" in res.data
|
|
||||||
####
|
|
||||||
|
|
||||||
# Check the snapshot by API that it has linefeeds too
|
|
||||||
watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
|
||||||
api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token')
|
|
||||||
res = client.get(
|
|
||||||
url_for("watchhistory", uuid=watch_uuid),
|
|
||||||
headers={'x-api-key': api_key},
|
|
||||||
)
|
|
||||||
|
|
||||||
# Fetch a snapshot by timestamp, check the right one was found
|
|
||||||
res = client.get(
|
|
||||||
url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]),
|
|
||||||
headers={'x-api-key': api_key},
|
|
||||||
)
|
|
||||||
assert b"some random text that should be split by line\n" in res.data
|
|
||||||
assert b"<title>Even this title should stay because we are just plain text</title>" in res.data
|
|
||||||
|
|
||||||
delete_all_watches(client)
|
|
||||||
|
|
||||||
# Server says its plaintext, we should always treat it as plaintext
|
|
||||||
def test_plaintext_even_if_xml_content(client, live_server, measure_memory_usage):
|
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write("""<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<resources xmlns:tools="http://schemas.android.com/tools">
|
|
||||||
<!--Activity and fragment titles-->
|
|
||||||
<string name="feed_update_receiver_name">Abonnementen bijwerken</string>
|
|
||||||
</resources>
|
|
||||||
""")
|
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', content_type="text/plain", _external=True)
|
|
||||||
|
|
||||||
# Add our URL to the import page
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
|
|
||||||
res = client.get(
|
|
||||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert b'<string name="feed_update_receiver_name"' in res.data
|
|
||||||
|
|
||||||
delete_all_watches(client)
|
|
||||||
|
|
||||||
# Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that
|
|
||||||
def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server, measure_memory_usage):
|
|
||||||
|
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
|
||||||
f.write("""<?xml version="1.0" encoding="utf-8"?>
|
|
||||||
<resources xmlns:tools="http://schemas.android.com/tools">
|
|
||||||
<!--Activity and fragment titles-->
|
|
||||||
<string name="feed_update_receiver_name">Abonnementen bijwerken</string>
|
|
||||||
<foobar>ok man</foobar>
|
|
||||||
</resources>
|
|
||||||
""")
|
|
||||||
|
|
||||||
test_url=url_for('test_endpoint', content_type="text/plain", _external=True)
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": ['//string']})
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
|
|
||||||
res = client.get(
|
|
||||||
url_for("ui.ui_views.preview_page", uuid="first"),
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
|
|
||||||
assert b'<string name="feed_update_receiver_name"' in res.data
|
|
||||||
assert b'<foobar' not in res.data
|
|
||||||
|
|
||||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -58,7 +58,6 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
|
|||||||
|
|
||||||
has_watch_update = False
|
has_watch_update = False
|
||||||
has_unviewed_update = False
|
has_unviewed_update = False
|
||||||
got_general_stats_update = False
|
|
||||||
|
|
||||||
for i in range(10):
|
for i in range(10):
|
||||||
# Get received events
|
# Get received events
|
||||||
@@ -66,11 +65,15 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
|
|||||||
|
|
||||||
if received:
|
if received:
|
||||||
logger.info(f"Received {len(received)} events after {i+1} seconds")
|
logger.info(f"Received {len(received)} events after {i+1} seconds")
|
||||||
|
|
||||||
|
# Check for watch_update events with unviewed=True
|
||||||
for event in received:
|
for event in received:
|
||||||
if event['name'] == 'watch_update':
|
if event['name'] == 'watch_update':
|
||||||
has_watch_update = True
|
has_watch_update = True
|
||||||
if event['name'] == 'general_stats_update':
|
if event['args'][0]['watch'].get('unviewed', False):
|
||||||
got_general_stats_update = True
|
has_unviewed_update = True
|
||||||
|
logger.info("Found unviewed update event!")
|
||||||
|
break
|
||||||
|
|
||||||
if has_unviewed_update:
|
if has_unviewed_update:
|
||||||
break
|
break
|
||||||
@@ -89,7 +92,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""):
|
|||||||
assert has_watch_update, "No watch_update events received"
|
assert has_watch_update, "No watch_update events received"
|
||||||
|
|
||||||
# Verify we received an unviewed event
|
# Verify we received an unviewed event
|
||||||
assert got_general_stats_update, "Got general stats update event"
|
assert has_unviewed_update, "No watch_update event with unviewed=True received"
|
||||||
|
|
||||||
# Alternatively, check directly if the watch in the datastore is marked as unviewed
|
# Alternatively, check directly if the watch in the datastore is marked as unviewed
|
||||||
from changedetectionio.flask_app import app
|
from changedetectionio.flask_app import app
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
from changedetectionio import html_tools
|
from changedetectionio import html_tools
|
||||||
|
|
||||||
def set_original_ignore_response():
|
def set_original_ignore_response():
|
||||||
@@ -70,8 +70,12 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -82,8 +86,7 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
|||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"text_should_not_be_present": ignore_text,
|
data={"text_should_not_be_present": ignore_text,
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests"
|
||||||
"time_between_check_use_default": "y"
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -103,9 +106,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
assert b'/test-endpoint' in res.data
|
assert b'/test-endpoint' in res.data
|
||||||
|
|
||||||
# The page changed, BUT the text is still there, just the rest of it changes, we should not see a change
|
# The page changed, BUT the text is still there, just the rest of it changes, we should not see a change
|
||||||
@@ -116,9 +119,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
assert b'/test-endpoint' in res.data
|
assert b'/test-endpoint' in res.data
|
||||||
|
|
||||||
# 2548
|
# 2548
|
||||||
@@ -127,7 +130,7 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
|||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
|
||||||
# Now we set a change where the text is gone AND its different content, it should now trigger
|
# Now we set a change where the text is gone AND its different content, it should now trigger
|
||||||
@@ -135,9 +138,10 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu
|
|||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -14,8 +14,12 @@ def test_clone_functionality(client, live_server, measure_memory_usage):
|
|||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# So that we can be sure the same history doesnt carry over
|
# So that we can be sure the same history doesnt carry over
|
||||||
|
|||||||
@@ -3,7 +3,7 @@ import json
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
from ..model import CONDITIONS_MATCH_LOGIC_DEFAULT
|
from ..model import CONDITIONS_MATCH_LOGIC_DEFAULT
|
||||||
|
|
||||||
|
|
||||||
@@ -47,11 +47,11 @@ def set_number_out_of_range_response(number="150"):
|
|||||||
f.write(test_return_data)
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
|
||||||
# def test_setup(client, live_server, measure_memory_usage):
|
# def test_setup(client, live_server):
|
||||||
"""Test that both text and number conditions work together with AND logic."""
|
"""Test that both text and number conditions work together with AND logic."""
|
||||||
# live_server_setup(live_server) # Setup on conftest per function
|
# live_server_setup(live_server) # Setup on conftest per function
|
||||||
|
|
||||||
def test_conditions_with_text_and_number(client, live_server, measure_memory_usage):
|
def test_conditions_with_text_and_number(client, live_server):
|
||||||
"""Test that both text and number conditions work together with AND logic."""
|
"""Test that both text and number conditions work together with AND logic."""
|
||||||
|
|
||||||
set_original_response("50")
|
set_original_response("50")
|
||||||
@@ -60,8 +60,12 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
|||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Configure the watch with two conditions connected with AND:
|
# Configure the watch with two conditions connected with AND:
|
||||||
@@ -101,7 +105,6 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
|||||||
"conditions-5-operator": "contains_regex",
|
"conditions-5-operator": "contains_regex",
|
||||||
"conditions-5-field": "page_filtered_text",
|
"conditions-5-field": "page_filtered_text",
|
||||||
"conditions-5-value": "\d",
|
"conditions-5-value": "\d",
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -121,7 +124,7 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
|||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
# 75 is > 20 and < 100 and contains "5"
|
# 75 is > 20 and < 100 and contains "5"
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
|
||||||
# Case 2: Change with one condition violated
|
# Case 2: Change with one condition violated
|
||||||
@@ -137,20 +140,25 @@ def test_conditions_with_text_and_number(client, live_server, measure_memory_usa
|
|||||||
|
|
||||||
# Should NOT be marked as having changes since not all conditions are met
|
# Should NOT be marked as having changes since not all conditions are met
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
# The 'validate' button next to each rule row
|
# The 'validate' button next to each rule row
|
||||||
def test_condition_validate_rule_row(client, live_server, measure_memory_usage):
|
def test_condition_validate_rule_row(client, live_server):
|
||||||
|
|
||||||
set_original_response("50")
|
set_original_response("50")
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||||
@@ -221,8 +229,12 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -276,8 +288,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
|
|||||||
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT, # ALL = AND logic
|
"conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT, # ALL = AND logic
|
||||||
"conditions-0-field": "levenshtein_ratio",
|
"conditions-0-field": "levenshtein_ratio",
|
||||||
"conditions-0-operator": "<",
|
"conditions-0-operator": "<",
|
||||||
"conditions-0-value": "0.8", # needs to be more of a diff to trigger a change
|
"conditions-0-value": "0.8" # needs to be more of a diff to trigger a change
|
||||||
"time_between_check_use_default": "y"
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -286,7 +297,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
# Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
|
# Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions
|
||||||
res = client.get(
|
res = client.get(
|
||||||
@@ -313,7 +324,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
|
|||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data #because this will be like 0.90 not 0.8 threshold
|
assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold
|
||||||
|
|
||||||
############### Now change it a MORE THAN 50%
|
############### Now change it a MORE THAN 50%
|
||||||
test_return_data = """<html>
|
test_return_data = """<html>
|
||||||
@@ -332,7 +343,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage):
|
|||||||
assert b'Queued 1 watch for rechecking.' in res.data
|
assert b'Queued 1 watch for rechecking.' in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
# cleanup for the next
|
# cleanup for the next
|
||||||
client.get(
|
client.get(
|
||||||
url_for("ui.form_delete", uuid="all"),
|
url_for("ui.form_delete", uuid="all"),
|
||||||
|
|||||||
@@ -81,8 +81,12 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
@@ -91,7 +95,7 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"include_filters": include_filters, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
data={"include_filters": include_filters, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -112,10 +116,10 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
# It should have 'has-unread-changes' still
|
# It should have 'unviewed' still
|
||||||
# Because it should be looking at only that 'sametext' id
|
# Because it should be looking at only that 'sametext' id
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
|
||||||
# Tests the whole stack works with the CSS Filter
|
# Tests the whole stack works with the CSS Filter
|
||||||
@@ -134,8 +138,12 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Goto the edit page, add our ignore text
|
# Goto the edit page, add our ignore text
|
||||||
@@ -146,8 +154,7 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage):
|
|||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests"},
|
||||||
"time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -185,8 +192,12 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Goto the edit page, add our ignore text
|
# Goto the edit page, add our ignore text
|
||||||
@@ -197,8 +208,7 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa
|
|||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests"},
|
||||||
"time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ import time
|
|||||||
from flask import url_for
|
from flask import url_for
|
||||||
|
|
||||||
from ..html_tools import *
|
from ..html_tools import *
|
||||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -171,7 +171,6 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
|
|||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
"fetch_backend": "html_requests",
|
"fetch_backend": "html_requests",
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
},
|
},
|
||||||
follow_redirects=True,
|
follow_redirects=True,
|
||||||
)
|
)
|
||||||
@@ -190,7 +189,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# so that we set the state to 'has-unread-changes' after all the edits
|
# so that we set the state to 'unviewed' after all the edits
|
||||||
client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
|
client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
|
||||||
|
|
||||||
# Make a change to header/footer/nav
|
# Make a change to header/footer/nav
|
||||||
@@ -209,32 +208,47 @@ def test_element_removal_full(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
# Re #2752
|
# Re #2752
|
||||||
def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage):
|
def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
|
|
||||||
set_response_with_multiple_index()
|
set_response_with_multiple_index()
|
||||||
subtractive_selectors_data = [
|
subtractive_selectors_data = ["""
|
||||||
### css style ###
|
body > table > tr:nth-child(1) > th:nth-child(2)
|
||||||
"""body > table > tr:nth-child(1) > th:nth-child(2)
|
|
||||||
body > table > tr:nth-child(2) > td:nth-child(2)
|
body > table > tr:nth-child(2) > td:nth-child(2)
|
||||||
body > table > tr:nth-child(3) > td:nth-child(2)
|
body > table > tr:nth-child(3) > td:nth-child(2)
|
||||||
body > table > tr:nth-child(1) > th:nth-child(3)
|
body > table > tr:nth-child(1) > th:nth-child(3)
|
||||||
body > table > tr:nth-child(2) > td:nth-child(3)
|
body > table > tr:nth-child(2) > td:nth-child(3)
|
||||||
body > table > tr:nth-child(3) > td:nth-child(3)""",
|
body > table > tr:nth-child(3) > td:nth-child(3)""",
|
||||||
### second type, xpath ###
|
|
||||||
"""//body/table/tr[1]/th[2]
|
"""//body/table/tr[1]/th[2]
|
||||||
//body/table/tr[2]/td[2]
|
//body/table/tr[2]/td[2]
|
||||||
//body/table/tr[3]/td[2]
|
//body/table/tr[3]/td[2]
|
||||||
//body/table/tr[1]/th[3]
|
//body/table/tr[1]/th[3]
|
||||||
//body/table/tr[2]/td[3]
|
//body/table/tr[2]/td[3]
|
||||||
//body/table/tr[3]/td[3]"""]
|
//body/table/tr[3]/td[3]"""]
|
||||||
|
|
||||||
test_url = url_for("test_endpoint", _external=True)
|
|
||||||
|
|
||||||
for selector_list in subtractive_selectors_data:
|
for selector_list in subtractive_selectors_data:
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"subtractive_selectors": selector_list.splitlines()})
|
# Add our URL to the import page
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
test_url = url_for("test_endpoint", _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("imports.import_page"), data={"urls": test_url}, follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
|
data={
|
||||||
|
"subtractive_selectors": selector_list,
|
||||||
|
"url": test_url,
|
||||||
|
"tags": "",
|
||||||
|
"fetch_backend": "html_requests",
|
||||||
|
},
|
||||||
|
follow_redirects=True,
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
res = client.get(
|
res = client.get(
|
||||||
@@ -242,7 +256,6 @@ body > table > tr:nth-child(3) > td:nth-child(3)""",
|
|||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# the filters above should have removed this but they never say to remove the "emil" column
|
|
||||||
assert b"Tobias" not in res.data
|
assert b"Tobias" not in res.data
|
||||||
assert b"Linus" not in res.data
|
assert b"Linus" not in res.data
|
||||||
assert b"Person 2" not in res.data
|
assert b"Person 2" not in res.data
|
||||||
|
|||||||
@@ -28,8 +28,11 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
test_url = url_for('test_endpoint', content_type="text/html", _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -56,8 +59,11 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -19,15 +19,19 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text):
|
|||||||
status_code=http_code,
|
status_code=http_code,
|
||||||
_external=True)
|
_external=True)
|
||||||
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
# no change
|
# no change
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
assert bytes(expected_text.encode('utf-8')) in res.data
|
assert bytes(expected_text.encode('utf-8')) in res.data
|
||||||
|
|
||||||
|
|
||||||
@@ -43,7 +47,8 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text):
|
|||||||
#assert b'Error Screenshot' in res.data
|
#assert b'Error Screenshot' in res.data
|
||||||
|
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|
||||||
def test_http_error_handler(client, live_server, measure_memory_usage):
|
def test_http_error_handler(client, live_server, measure_memory_usage):
|
||||||
@@ -51,7 +56,8 @@ def test_http_error_handler(client, live_server, measure_memory_usage):
|
|||||||
_runner_test_http_errors(client, live_server, 404, 'Page not found')
|
_runner_test_http_errors(client, live_server, 404, 'Page not found')
|
||||||
_runner_test_http_errors(client, live_server, 500, '(Internal server error) received')
|
_runner_test_http_errors(client, live_server, 500, '(Internal server error) received')
|
||||||
_runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
|
_runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400')
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
# Just to be sure error text is properly handled
|
# Just to be sure error text is properly handled
|
||||||
def test_DNS_errors(client, live_server, measure_memory_usage):
|
def test_DNS_errors(client, live_server, measure_memory_usage):
|
||||||
@@ -81,7 +87,8 @@ def test_DNS_errors(client, live_server, measure_memory_usage):
|
|||||||
assert found_name_resolution_error
|
assert found_name_resolution_error
|
||||||
# Should always record that we tried
|
# Should always record that we tried
|
||||||
assert bytes("just now".encode('utf-8')) in res.data
|
assert bytes("just now".encode('utf-8')) in res.data
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
# Re 1513
|
# Re 1513
|
||||||
def test_low_level_errors_clear_correctly(client, live_server, measure_memory_usage):
|
def test_low_level_errors_clear_correctly(client, live_server, measure_memory_usage):
|
||||||
@@ -120,8 +127,7 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us
|
|||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={
|
data={
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"fetch_backend": "html_requests",
|
"fetch_backend": "html_requests"},
|
||||||
"time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -138,4 +144,5 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us
|
|||||||
)
|
)
|
||||||
assert not found_name_resolution_error
|
assert not found_name_resolution_error
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
from ..html_tools import *
|
from ..html_tools import *
|
||||||
|
|
||||||
@@ -76,8 +76,12 @@ def test_check_filter_multiline(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
@@ -91,8 +95,7 @@ def test_check_filter_multiline(client, live_server, measure_memory_usage):
|
|||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests"
|
||||||
"time_between_check_use_default": "y"
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -127,8 +130,12 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -142,8 +149,7 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
|
|||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tags": "",
|
"tags": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests"
|
||||||
"time_between_check_use_default": "y"
|
|
||||||
},
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
@@ -166,10 +172,10 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should have 'has-unread-changes' still
|
# It should have 'unviewed' still
|
||||||
# Because it should be looking at only that 'sametext' id
|
# Because it should be looking at only that 'sametext' id
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
# Check HTML conversion detected and workd
|
# Check HTML conversion detected and workd
|
||||||
res = client.get(
|
res = client.get(
|
||||||
@@ -204,19 +210,23 @@ def test_regex_error_handling(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
### test regex error handling
|
### test regex error handling
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"extract_text": '/something bad\d{3/XYZ',
|
data={"extract_text": '/something bad\d{3/XYZ',
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"fetch_backend": "html_requests",
|
"fetch_backend": "html_requests"},
|
||||||
"time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
assert b'is not a valid regular expression.' in res.data
|
assert b'is not a valid regular expression.' in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -94,8 +94,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
|
|||||||
"title": "my title",
|
"title": "my title",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
"include_filters": '.ticket-available',
|
"include_filters": '.ticket-available',
|
||||||
"fetch_backend": "html_requests",
|
"fetch_backend": "html_requests"})
|
||||||
"time_between_check_use_default": "y"})
|
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
|
|||||||
@@ -42,8 +42,13 @@ def run_filter_test(client, live_server, content_filter):
|
|||||||
if os.path.isfile("test-datastore/notification.txt"):
|
if os.path.isfile("test-datastore/notification.txt"):
|
||||||
os.unlink("test-datastore/notification.txt")
|
os.unlink("test-datastore/notification.txt")
|
||||||
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
||||||
@@ -67,7 +72,6 @@ def run_filter_test(client, live_server, content_filter):
|
|||||||
"notification_format": "Text",
|
"notification_format": "Text",
|
||||||
"fetch_backend": "html_requests",
|
"fetch_backend": "html_requests",
|
||||||
"filter_failure_notification_send": 'y',
|
"filter_failure_notification_send": 'y',
|
||||||
"time_between_check_use_default": "y",
|
|
||||||
"headers": "",
|
"headers": "",
|
||||||
"tags": "my tag",
|
"tags": "my tag",
|
||||||
"title": "my title 123",
|
"title": "my title 123",
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client
|
||||||
import os
|
import os
|
||||||
|
|
||||||
|
|
||||||
@@ -127,7 +127,8 @@ def test_setup_group_tag(client, live_server, measure_memory_usage):
|
|||||||
assert b"should-be-excluded" not in res.data
|
assert b"should-be-excluded" not in res.data
|
||||||
assert res.status_code == 200
|
assert res.status_code == 200
|
||||||
assert b"first-imported=1" in res.data
|
assert b"first-imported=1" in res.data
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_tag_import_singular(client, live_server, measure_memory_usage):
|
def test_tag_import_singular(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
@@ -146,7 +147,8 @@ def test_tag_import_singular(client, live_server, measure_memory_usage):
|
|||||||
)
|
)
|
||||||
# Should be only 1 tag because they both had the same
|
# Should be only 1 tag because they both had the same
|
||||||
assert res.data.count(b'test-tag') == 1
|
assert res.data.count(b'test-tag') == 1
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_tag_add_in_ui(client, live_server, measure_memory_usage):
|
def test_tag_add_in_ui(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
@@ -162,7 +164,8 @@ def test_tag_add_in_ui(client, live_server, measure_memory_usage):
|
|||||||
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
|
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
|
||||||
assert b'All tags deleted' in res.data
|
assert b'All tags deleted' in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_group_tag_notification(client, live_server, measure_memory_usage):
|
def test_group_tag_notification(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
@@ -229,7 +232,8 @@ def test_group_tag_notification(client, live_server, measure_memory_usage):
|
|||||||
|
|
||||||
#@todo Test that multiple notifications fired
|
#@todo Test that multiple notifications fired
|
||||||
#@todo Test that each of multiple notifications with different settings
|
#@todo Test that each of multiple notifications with different settings
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_limit_tag_ui(client, live_server, measure_memory_usage):
|
def test_limit_tag_ui(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
@@ -260,12 +264,15 @@ def test_limit_tag_ui(client, live_server, measure_memory_usage):
|
|||||||
client.get(url_for('ui.mark_all_viewed', tag=tag_uuid), follow_redirects=True)
|
client.get(url_for('ui.mark_all_viewed', tag=tag_uuid), follow_redirects=True)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
|
with open('/tmp/fuck.html', 'wb') as f:
|
||||||
|
f.write(res.data)
|
||||||
# Should be only 1 unviewed
|
# Should be only 1 unviewed
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert res.data.count(b' unviewed ') == 1
|
assert res.data.count(b' unviewed ') == 1
|
||||||
|
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
|
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
|
||||||
assert b'All tags deleted' in res.data
|
assert b'All tags deleted' in res.data
|
||||||
|
|
||||||
@@ -292,7 +299,8 @@ def test_clone_tag_on_import(client, live_server, measure_memory_usage):
|
|||||||
# 2 times plus the top link to tag
|
# 2 times plus the top link to tag
|
||||||
assert res.data.count(b'test-tag') == 3
|
assert res.data.count(b'test-tag') == 3
|
||||||
assert res.data.count(b'another-tag') == 3
|
assert res.data.count(b'another-tag') == 3
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usage):
|
def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usage):
|
||||||
|
|
||||||
@@ -319,7 +327,8 @@ def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usa
|
|||||||
# 2 times plus the top link to tag
|
# 2 times plus the top link to tag
|
||||||
assert res.data.count(b'test-tag') == 3
|
assert res.data.count(b'test-tag') == 3
|
||||||
assert res.data.count(b'another-tag') == 3
|
assert res.data.count(b'another-tag') == 3
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
|
res = client.get(url_for("tags.delete_all"), follow_redirects=True)
|
||||||
assert b'All tags deleted' in res.data
|
assert b'All tags deleted' in res.data
|
||||||
@@ -382,8 +391,12 @@ def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measu
|
|||||||
f.write(d)
|
f.write(d)
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
filters = [
|
filters = [
|
||||||
@@ -411,8 +424,7 @@ def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measu
|
|||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tags": "test-tag-keep-order",
|
"tags": "test-tag-keep-order",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
'fetch_backend': "html_requests",
|
'fetch_backend': "html_requests"},
|
||||||
"time_between_check_use_default": "y"},
|
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -469,4 +481,5 @@ the {test} appeared before. {test in res.data[:n]=}
|
|||||||
"""
|
"""
|
||||||
n += t_index + len(test)
|
n += t_index + len(test)
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -3,8 +3,9 @@
|
|||||||
import time
|
import time
|
||||||
import os
|
import os
|
||||||
import json
|
import json
|
||||||
|
import logging
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
from urllib.parse import urlparse, parse_qs
|
from urllib.parse import urlparse, parse_qs
|
||||||
|
|
||||||
def test_consistent_history(client, live_server, measure_memory_usage):
|
def test_consistent_history(client, live_server, measure_memory_usage):
|
||||||
@@ -80,15 +81,19 @@ def test_consistent_history(client, live_server, measure_memory_usage):
|
|||||||
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
|
assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved"
|
||||||
|
|
||||||
|
|
||||||
def test_check_text_history_view(client, live_server, measure_memory_usage):
|
def test_check_text_history_view(client, live_server):
|
||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
f.write("<html>test-one</html>")
|
f.write("<html>test-one</html>")
|
||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -117,4 +122,5 @@ def test_check_text_history_view(client, live_server, measure_memory_usage):
|
|||||||
assert b'test-two' in res.data
|
assert b'test-two' in res.data
|
||||||
assert b'test-one' not in res.data
|
assert b'test-one' not in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|||||||
@@ -27,8 +27,12 @@ def test_ignore(client, live_server, measure_memory_usage):
|
|||||||
# live_server_setup(live_server) # Setup on conftest per function
|
# live_server_setup(live_server) # Setup on conftest per function
|
||||||
set_original_ignore_response()
|
set_original_ignore_response()
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -54,35 +58,3 @@ def test_ignore(client, live_server, measure_memory_usage):
|
|||||||
# Should be in base.html
|
# Should be in base.html
|
||||||
assert b'csrftoken' in res.data
|
assert b'csrftoken' in res.data
|
||||||
|
|
||||||
|
|
||||||
def test_strip_ignore_lines(client, live_server, measure_memory_usage):
|
|
||||||
# live_server_setup(live_server) # Setup on conftest per function
|
|
||||||
set_original_ignore_response()
|
|
||||||
|
|
||||||
|
|
||||||
# Goto the settings page, add our ignore text
|
|
||||||
res = client.post(
|
|
||||||
url_for("settings.settings_page"),
|
|
||||||
data={
|
|
||||||
"requests-time_between_check-minutes": 180,
|
|
||||||
"application-ignore_whitespace": "y",
|
|
||||||
"application-strip_ignored_lines": "y",
|
|
||||||
"application-global_ignore_text": "Which is across multiple",
|
|
||||||
'application-fetch_backend': "html_requests"
|
|
||||||
},
|
|
||||||
follow_redirects=True
|
|
||||||
)
|
|
||||||
assert b"Settings updated." in res.data
|
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
|
||||||
wait_for_all_checks(client)
|
|
||||||
uuid = next(iter(live_server.app.config['DATASTORE'].data['watching']))
|
|
||||||
|
|
||||||
# It should not be in the preview anymore
|
|
||||||
res = client.get(url_for("ui.ui_views.preview_page", uuid=uuid))
|
|
||||||
assert b'<div class="ignored">' not in res.data
|
|
||||||
assert b'Which is across multiple' not in res.data
|
|
||||||
|
|||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
from changedetectionio import html_tools
|
from changedetectionio import html_tools
|
||||||
|
|
||||||
|
|
||||||
@@ -97,8 +97,12 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -107,7 +111,7 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"ignore_text": ignore_text, "url": test_url, 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
data={"ignore_text": ignore_text, "url": test_url, 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -124,9 +128,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
assert b'/test-endpoint' in res.data
|
assert b'/test-endpoint' in res.data
|
||||||
|
|
||||||
# Make a change
|
# Make a change
|
||||||
@@ -137,9 +141,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
assert b'/test-endpoint' in res.data
|
assert b'/test-endpoint' in res.data
|
||||||
|
|
||||||
|
|
||||||
@@ -150,7 +154,7 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
|||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
res = client.get(url_for("ui.ui_views.preview_page", uuid="first"))
|
res = client.get(url_for("ui.ui_views.preview_page", uuid="first"))
|
||||||
|
|
||||||
@@ -159,7 +163,8 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa
|
|||||||
# it is only ignored, it is not removed (it will be highlighted too)
|
# it is only ignored, it is not removed (it will be highlighted too)
|
||||||
assert b'new ignore stuff' in res.data
|
assert b'new ignore stuff' in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
# When adding some ignore text, it should not trigger a change, even if something else on that line changes
|
# When adding some ignore text, it should not trigger a change, even if something else on that line changes
|
||||||
def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
||||||
@@ -187,8 +192,12 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
|||||||
# Switch to source mode so we can test that too!
|
# Switch to source mode so we can test that too!
|
||||||
test_url = "source:"+test_url
|
test_url = "source:"+test_url
|
||||||
|
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
@@ -196,7 +205,7 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
|||||||
#Adding some ignore text should not trigger a change
|
#Adding some ignore text should not trigger a change
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
data={"ignore_text": "something irrelevent but just to check", "url": test_url, 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -213,9 +222,9 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
|||||||
# Trigger a check
|
# Trigger a check
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
# It should report nothing found (no new 'has-unread-changes' class), adding random ignore text should not cause a change
|
# It should report nothing found (no new 'unviewed' class), adding random ignore text should not cause a change
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
assert b'/test-endpoint' in res.data
|
assert b'/test-endpoint' in res.data
|
||||||
#####
|
#####
|
||||||
|
|
||||||
@@ -229,10 +238,10 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
|
|
||||||
assert b'has-unread-changes' not in res.data
|
assert b'unviewed' not in res.data
|
||||||
assert b'/test-endpoint' in res.data
|
assert b'/test-endpoint' in res.data
|
||||||
|
|
||||||
# Just to be sure.. set a regular modified change that will trigger it
|
# Just to be sure.. set a regular modified change that will trigger it
|
||||||
@@ -240,14 +249,15 @@ def _run_test_global_ignore(client, as_source=False, extra_ignore=""):
|
|||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage):
|
def test_check_global_ignore_text_functionality(client, live_server):
|
||||||
|
|
||||||
_run_test_global_ignore(client, as_source=False)
|
_run_test_global_ignore(client, as_source=False)
|
||||||
|
|
||||||
def test_check_global_ignore_text_functionality_as_source(client, live_server, measure_memory_usage):
|
def test_check_global_ignore_text_functionality_as_source(client, live_server):
|
||||||
|
|
||||||
_run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/')
|
_run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/')
|
||||||
|
|||||||
@@ -3,7 +3,9 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
|
from .util import live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def set_original_ignore_response():
|
def set_original_ignore_response():
|
||||||
@@ -109,11 +111,13 @@ def test_render_anchor_tag_content_true(client, live_server, measure_memory_usag
|
|||||||
assert '(/modified_link)' in res.data.decode()
|
assert '(/modified_link)' in res.data.decode()
|
||||||
|
|
||||||
# since the link has changed, and we chose to render anchor tag content,
|
# since the link has changed, and we chose to render anchor tag content,
|
||||||
# we should detect a change (new 'has-unread-changes' class)
|
# we should detect a change (new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b"unviewed" in res.data
|
assert b"unviewed" in res.data
|
||||||
assert b"/test-endpoint" in res.data
|
assert b"/test-endpoint" in res.data
|
||||||
|
|
||||||
# Cleanup everything
|
# Cleanup everything
|
||||||
delete_all_watches(client)
|
res = client.get(url_for("ui.form_delete", uuid="all"),
|
||||||
|
follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
|||||||
@@ -60,8 +60,12 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
@@ -73,9 +77,9 @@ def test_normal_page_check_works_with_ignore_status_code(client, live_server, me
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'has-unread-changes' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
assert b'/test-endpoint' in res.data
|
assert b'/test-endpoint' in res.data
|
||||||
|
|
||||||
|
|
||||||
@@ -90,8 +94,12 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
|
|||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
test_url = url_for('test_endpoint', status_code=403, _external=True)
|
test_url = url_for('test_endpoint', status_code=403, _external=True)
|
||||||
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url)
|
res = client.post(
|
||||||
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
url_for("imports.import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
@@ -100,7 +108,7 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("ui.ui_edit.edit_page", uuid="first"),
|
url_for("ui.ui_edit.edit_page", uuid="first"),
|
||||||
data={"ignore_status_codes": "y", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"},
|
data={"ignore_status_codes": "y", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -116,8 +124,8 @@ def test_403_page_check_works_with_ignore_status_code(client, live_server, measu
|
|||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should have 'has-unread-changes' still
|
# It should have 'unviewed' still
|
||||||
# Because it should be looking at only that 'sametext' id
|
# Because it should be looking at only that 'sametext' id
|
||||||
res = client.get(url_for("watchlist.index"))
|
res = client.get(url_for("watchlist.index"))
|
||||||
assert b'has-unread-changes' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
|||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user