mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-01 23:28:06 +00:00
Compare commits
1 Commits
more-build
...
3248-conta
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
88752877ad |
@@ -33,6 +33,7 @@ venv/
|
||||
# Test and development files
|
||||
test-datastore/
|
||||
tests/
|
||||
docs/
|
||||
*.md
|
||||
!README.md
|
||||
|
||||
|
||||
51
.github/actions/extract-memory-report/action.yml
vendored
51
.github/actions/extract-memory-report/action.yml
vendored
@@ -1,51 +0,0 @@
|
||||
name: 'Extract Memory Test Report'
|
||||
description: 'Extracts and displays memory test report from a container'
|
||||
inputs:
|
||||
container-name:
|
||||
description: 'Name of the container to extract logs from'
|
||||
required: true
|
||||
python-version:
|
||||
description: 'Python version for artifact naming'
|
||||
required: true
|
||||
output-dir:
|
||||
description: 'Directory to store output logs'
|
||||
required: false
|
||||
default: 'output-logs'
|
||||
|
||||
runs:
|
||||
using: "composite"
|
||||
steps:
|
||||
- name: Create output directory
|
||||
shell: bash
|
||||
run: |
|
||||
mkdir -p ${{ inputs.output-dir }}
|
||||
|
||||
- name: Dump container log
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Disabled for now"
|
||||
# return
|
||||
# docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout"
|
||||
# docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr"
|
||||
|
||||
- name: Extract and display memory test report
|
||||
shell: bash
|
||||
run: |
|
||||
echo "Disabled for now"
|
||||
# echo "Extracting test-memory.log from container..."
|
||||
# docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container"
|
||||
#
|
||||
# echo "=== Top 10 Highest Peak Memory Tests ==="
|
||||
# if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then
|
||||
# grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \
|
||||
# sed 's/.*Peak memory: //' | \
|
||||
# paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \
|
||||
# sort -t'|' -k1 -nr | \
|
||||
# cut -d'|' -f2 | \
|
||||
# head -10
|
||||
# echo ""
|
||||
# echo "=== Full Memory Test Report ==="
|
||||
# cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log
|
||||
# else
|
||||
# echo "No memory log available"
|
||||
# fi
|
||||
8
.github/dependabot.yml
vendored
8
.github/dependabot.yml
vendored
@@ -4,11 +4,11 @@ updates:
|
||||
directory: /
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
"caronc/apprise":
|
||||
versioning-strategy: "increase"
|
||||
schedule:
|
||||
interval: "daily"
|
||||
groups:
|
||||
all:
|
||||
patterns:
|
||||
- "*"
|
||||
- package-ecosystem: pip
|
||||
directory: /
|
||||
schedule:
|
||||
interval: "weekly"
|
||||
|
||||
6
.github/test/Dockerfile-alpine
vendored
6
.github/test/Dockerfile-alpine
vendored
@@ -2,7 +2,7 @@
|
||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
||||
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.22
|
||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.21
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
|
||||
COPY requirements.txt /requirements.txt
|
||||
@@ -18,19 +18,17 @@ RUN \
|
||||
libxslt-dev \
|
||||
openssl-dev \
|
||||
python3-dev \
|
||||
file \
|
||||
zip \
|
||||
zlib-dev && \
|
||||
apk add --update --no-cache \
|
||||
libjpeg \
|
||||
libxslt \
|
||||
file \
|
||||
nodejs \
|
||||
poppler-utils \
|
||||
python3 && \
|
||||
echo "**** pip3 install test of changedetection.io ****" && \
|
||||
python3 -m venv /lsiopy && \
|
||||
pip install -U pip wheel setuptools && \
|
||||
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \
|
||||
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
|
||||
apk del --purge \
|
||||
build-dependencies
|
||||
|
||||
8
.github/workflows/codeql-analysis.yml
vendored
8
.github/workflows/codeql-analysis.yml
vendored
@@ -30,11 +30,11 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Checkout repository
|
||||
uses: actions/checkout@v5
|
||||
uses: actions/checkout@v4
|
||||
|
||||
# Initializes the CodeQL tools for scanning.
|
||||
- name: Initialize CodeQL
|
||||
uses: github/codeql-action/init@v4
|
||||
uses: github/codeql-action/init@v3
|
||||
with:
|
||||
languages: ${{ matrix.language }}
|
||||
# If you wish to specify custom queries, you can do so here or in a config file.
|
||||
@@ -45,7 +45,7 @@ jobs:
|
||||
# Autobuild attempts to build any compiled languages (C/C++, C#, or Java).
|
||||
# If this step fails, then you should remove it and run the build manually (see below)
|
||||
- name: Autobuild
|
||||
uses: github/codeql-action/autobuild@v4
|
||||
uses: github/codeql-action/autobuild@v3
|
||||
|
||||
# ℹ️ Command-line programs to run using the OS shell.
|
||||
# 📚 https://git.io/JvXDl
|
||||
@@ -59,4 +59,4 @@ jobs:
|
||||
# make release
|
||||
|
||||
- name: Perform CodeQL Analysis
|
||||
uses: github/codeql-action/analyze@v4
|
||||
uses: github/codeql-action/analyze@v3
|
||||
|
||||
32
.github/workflows/containers.yml
vendored
32
.github/workflows/containers.yml
vendored
@@ -39,20 +39,12 @@ jobs:
|
||||
# Or if we are in a tagged release scenario.
|
||||
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
- name: Install dependencies
|
||||
run: |
|
||||
python -m pip install --upgrade pip
|
||||
@@ -103,13 +95,9 @@ jobs:
|
||||
push: true
|
||||
tags: |
|
||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: |
|
||||
type=gha,scope=buildkit-master
|
||||
type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:buildcache
|
||||
cache-to: |
|
||||
type=gha,mode=max,scope=buildkit-master
|
||||
type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:buildcache,mode=max
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
# Looks like this was disabled
|
||||
# provenance: false
|
||||
@@ -145,13 +133,9 @@ jobs:
|
||||
file: ./Dockerfile
|
||||
push: true
|
||||
tags: ${{ steps.meta.outputs.tags }}
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
|
||||
cache-from: |
|
||||
type=gha,scope=buildkit-release
|
||||
type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:buildcache
|
||||
cache-to: |
|
||||
type=gha,mode=max,scope=buildkit-release
|
||||
type=registry,ref=${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:buildcache,mode=max
|
||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
# Looks like this was disabled
|
||||
# provenance: false
|
||||
|
||||
|
||||
39
.github/workflows/pypi-release.yml
vendored
39
.github/workflows/pypi-release.yml
vendored
@@ -7,9 +7,9 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: "3.11"
|
||||
- name: Install pypa/build
|
||||
@@ -21,60 +21,39 @@ jobs:
|
||||
- name: Build a binary wheel and a source tarball
|
||||
run: python3 -m build
|
||||
- name: Store the distribution packages
|
||||
uses: actions/upload-artifact@v5
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
|
||||
test-pypi-package:
|
||||
name: Test the built package works basically.
|
||||
name: Test the built 📦 package works basically.
|
||||
runs-on: ubuntu-latest
|
||||
needs:
|
||||
- build
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
- name: Test that the basic pip built package runs without error
|
||||
run: |
|
||||
set -ex
|
||||
ls -alR
|
||||
|
||||
# Install the first wheel found in dist/
|
||||
WHEEL=$(find dist -type f -name "*.whl" -print -quit)
|
||||
echo Installing $WHEEL
|
||||
python3 -m pip install --upgrade pip
|
||||
python3 -m pip install "$WHEEL"
|
||||
# Find and install the first .whl file
|
||||
find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
|
||||
changedetection.io -d /tmp -p 10000 &
|
||||
|
||||
sleep 3
|
||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
|
||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
|
||||
|
||||
# --- API test ---
|
||||
# This also means that the docs/api-spec.yml was shipped and could be read
|
||||
test -f /tmp/url-watches.json
|
||||
API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json)
|
||||
echo Test API KEY is $API_KEY
|
||||
curl -X POST "http://127.0.0.1:10000/api/v1/watch" \
|
||||
-H "x-api-key: ${API_KEY}" \
|
||||
-H "Content-Type: application/json" \
|
||||
--show-error --fail \
|
||||
--retry 6 --retry-delay 1 --retry-connrefused \
|
||||
-d '{
|
||||
"url": "https://example.com",
|
||||
"title": "Example Site Monitor",
|
||||
"time_between_check": { "hours": 1 }
|
||||
}'
|
||||
|
||||
killall changedetection.io
|
||||
|
||||
|
||||
@@ -93,7 +72,7 @@ jobs:
|
||||
|
||||
steps:
|
||||
- name: Download all the dists
|
||||
uses: actions/download-artifact@v6
|
||||
uses: actions/download-artifact@v4
|
||||
with:
|
||||
name: python-package-distributions
|
||||
path: dist/
|
||||
|
||||
14
.github/workflows/test-container-build.yml
vendored
14
.github/workflows/test-container-build.yml
vendored
@@ -38,26 +38,20 @@ jobs:
|
||||
dockerfile: ./Dockerfile
|
||||
- platform: linux/arm/v8
|
||||
dockerfile: ./Dockerfile
|
||||
- platform: linux/arm64/v8
|
||||
dockerfile: ./Dockerfile
|
||||
# Alpine Dockerfile platforms (musl via alpine check)
|
||||
- platform: linux/amd64
|
||||
dockerfile: ./.github/test/Dockerfile-alpine
|
||||
- platform: linux/arm64
|
||||
dockerfile: ./.github/test/Dockerfile-alpine
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v4
|
||||
- name: Set up Python 3.11
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: 3.11
|
||||
|
||||
- name: Cache pip packages
|
||||
uses: actions/cache@v4
|
||||
with:
|
||||
path: ~/.cache/pip
|
||||
key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
|
||||
restore-keys: |
|
||||
${{ runner.os }}-pip-
|
||||
|
||||
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||
- name: Set up QEMU
|
||||
uses: docker/setup-qemu-action@v3
|
||||
|
||||
17
.github/workflows/test-only.yml
vendored
17
.github/workflows/test-only.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
||||
lint-code:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v4
|
||||
- name: Lint with Ruff
|
||||
run: |
|
||||
pip install ruff
|
||||
@@ -15,29 +15,21 @@ jobs:
|
||||
ruff check . --select E9,F63,F7,F82
|
||||
# Complete check with errors treated as warnings
|
||||
ruff check . --exit-zero
|
||||
- name: Validate OpenAPI spec
|
||||
run: |
|
||||
pip install openapi-spec-validator
|
||||
python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"
|
||||
|
||||
test-application-3-10:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.10'
|
||||
|
||||
|
||||
test-application-3-11:
|
||||
# Always run
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.11'
|
||||
|
||||
test-application-3-12:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
@@ -45,11 +37,8 @@ jobs:
|
||||
skip-pypuppeteer: true
|
||||
|
||||
test-application-3-13:
|
||||
# Only run on push to master (including PR merges)
|
||||
if: github.event_name == 'push' && github.ref == 'refs/heads/master'
|
||||
needs: lint-code
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.13'
|
||||
skip-pypuppeteer: true
|
||||
|
||||
skip-pypuppeteer: true
|
||||
455
.github/workflows/test-stack-reusable-workflow.yml
vendored
455
.github/workflows/test-stack-reusable-workflow.yml
vendored
@@ -15,298 +15,138 @@ on:
|
||||
default: false
|
||||
|
||||
jobs:
|
||||
# Build the Docker image once and share it with all test jobs
|
||||
build:
|
||||
test-application:
|
||||
runs-on: ubuntu-latest
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- uses: actions/checkout@v4
|
||||
|
||||
# Mainly just for link/flake8
|
||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||
uses: actions/setup-python@v6
|
||||
uses: actions/setup-python@v5
|
||||
with:
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Set up Docker Buildx
|
||||
uses: docker/setup-buildx-action@v3
|
||||
|
||||
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
|
||||
uses: docker/build-push-action@v6
|
||||
with:
|
||||
context: .
|
||||
build-args: |
|
||||
PYTHON_VERSION=${{ env.PYTHON_VERSION }}
|
||||
LOGGER_LEVEL=TRACE
|
||||
tags: test-changedetectionio
|
||||
load: true
|
||||
cache-from: type=gha
|
||||
cache-to: type=gha,mode=max
|
||||
|
||||
- name: Verify pip packages installed
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
|
||||
# Build a changedetection.io container and start testing inside
|
||||
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
|
||||
# Debug info
|
||||
docker run test-changedetectionio bash -c 'pip list'
|
||||
|
||||
- name: We should be Python ${{ env.PYTHON_VERSION }} ...
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'python3 --version'
|
||||
|
||||
- name: Spin up ancillary testable services
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'python3 --version'
|
||||
|
||||
docker network create changedet-network
|
||||
|
||||
# Selenium
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
|
||||
# SocketPuppetBrowser + Extra for custom browser test
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Save Docker image
|
||||
- name: Spin up ancillary SMTP+Echo message test server
|
||||
run: |
|
||||
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
|
||||
# Debug SMTP server/echo message back server
|
||||
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
|
||||
docker ps
|
||||
|
||||
- name: Upload Docker image artifact
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp/test-changedetectionio.tar
|
||||
retention-days: 1
|
||||
|
||||
# Unit tests (lightweight, no ancillary services needed)
|
||||
unit-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
- name: Show docker container state and other debug info
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
set -x
|
||||
echo "Running processes in docker..."
|
||||
docker ps
|
||||
|
||||
- name: Run Unit Tests
|
||||
run: |
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
# Unit tests
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||
|
||||
# Basic pytest tests with ancillary services
|
||||
basic-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 25
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
- name: Test built container with Pytest (generally as requests/plaintext fetching)
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
# All tests
|
||||
echo "run test with pytest"
|
||||
# The default pytest logger_level is TRACE
|
||||
# To change logger_level for pytest(test/conftest.py),
|
||||
# append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG'
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
|
||||
- name: Test built container with Pytest
|
||||
# PLAYWRIGHT/NODE-> CDP
|
||||
- name: Playwright and SocketPuppetBrowser - Specific tests in built container
|
||||
run: |
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
|
||||
# Playwright via Sockpuppetbrowser fetch
|
||||
# tests/visualselector/test_fetch_data.py will do browser steps
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
|
||||
- name: Extract memory report and logs
|
||||
if: always()
|
||||
uses: ./.github/actions/extract-memory-report
|
||||
with:
|
||||
container-name: test-cdio-basic-tests
|
||||
python-version: ${{ env.PYTHON_VERSION }}
|
||||
|
||||
- name: Store test artifacts
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v5
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: output-logs
|
||||
- name: Playwright and SocketPuppetBrowser - Headers and requests
|
||||
run: |
|
||||
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
|
||||
|
||||
# Playwright tests
|
||||
playwright-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
- name: Playwright and SocketPuppetBrowser - Restock detection
|
||||
run: |
|
||||
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
# STRAIGHT TO CDP
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
# Playwright via Sockpuppetbrowser fetch
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
|
||||
- name: Spin up ancillary services
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
|
||||
- name: Playwright - Specific tests in built container
|
||||
run: |
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
|
||||
- name: Playwright - Headers and requests
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
|
||||
|
||||
- name: Playwright - Restock detection
|
||||
run: |
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
# Pyppeteer tests
|
||||
pyppeteer-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Pyppeteer - Specific tests in built container
|
||||
run: |
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||
|
||||
- name: Pyppeteer - Headers and requests checks
|
||||
run: |
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
|
||||
- name: Pyppeteer - Restock detection
|
||||
run: |
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
# Selenium tests
|
||||
selenium-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
sleep 3
|
||||
|
||||
- name: Specific tests for headers and requests checks with Selenium
|
||||
run: |
|
||||
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
- name: Pyppeteer and SocketPuppetBrowser - Restock detection
|
||||
if: ${{ inputs.skip-pypuppeteer == false }}
|
||||
run: |
|
||||
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
|
||||
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
|
||||
|
||||
# SELENIUM
|
||||
- name: Specific tests in built container for Selenium
|
||||
run: |
|
||||
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
# Selenium fetch
|
||||
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
|
||||
|
||||
|
||||
# SMTP tests
|
||||
smtp-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
- name: Specific tests in built container for headers and requests checks with Selenium
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up SMTP test server
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
|
||||
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
|
||||
|
||||
# OTHER STUFF
|
||||
- name: Test SMTP notification mime types
|
||||
run: |
|
||||
# SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above
|
||||
# "mailserver" hostname defined above
|
||||
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
|
||||
|
||||
# Proxy tests
|
||||
proxy-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Test proxy Squid style interaction
|
||||
# @todo Add a test via playwright/puppeteer
|
||||
# squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py
|
||||
- name: Test proxy squid style interaction
|
||||
run: |
|
||||
cd changedetectionio
|
||||
./run_proxy_tests.sh
|
||||
docker ps
|
||||
cd ..
|
||||
|
||||
- name: Test proxy SOCKS5 style interaction
|
||||
@@ -315,65 +155,28 @@ jobs:
|
||||
./run_socks_proxy_tests.sh
|
||||
cd ..
|
||||
|
||||
# Custom browser URL tests
|
||||
custom-browser-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Spin up ancillary services
|
||||
run: |
|
||||
docker network create changedet-network
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
|
||||
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
|
||||
|
||||
- name: Test custom browser URL
|
||||
run: |
|
||||
cd changedetectionio
|
||||
./run_custom_browser_url_tests.sh
|
||||
cd ..
|
||||
|
||||
# Container startup tests
|
||||
container-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
- name: Test changedetection.io container starts+runs basically without error
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Test container starts+runs basically without error
|
||||
run: |
|
||||
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
|
||||
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
|
||||
sleep 3
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
|
||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||
# Should return 0 (no error) when grep finds it
|
||||
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
|
||||
|
||||
# and IPv6
|
||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||
|
||||
# Check whether TRACE log is enabled.
|
||||
# Also, check whether TRACE came from STDOUT
|
||||
docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
|
||||
# Check whether DEBUG is came from STDOUT
|
||||
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
|
||||
|
||||
docker kill test-changedetectionio
|
||||
|
||||
- name: Test HTTPS SSL mode
|
||||
@@ -381,66 +184,78 @@ jobs:
|
||||
openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost"
|
||||
docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio
|
||||
sleep 3
|
||||
# Should return 0 (no error) when grep finds it
|
||||
# -k because its self-signed
|
||||
curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid
|
||||
|
||||
docker kill test-changedetectionio-ssl
|
||||
|
||||
- name: Test IPv6 Mode
|
||||
run: |
|
||||
# IPv6 - :: bind to all interfaces inside container (like 0.0.0.0), ::1 would be localhost only
|
||||
docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio
|
||||
sleep 3
|
||||
# Should return 0 (no error) when grep finds it on localhost
|
||||
curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid
|
||||
docker kill test-changedetectionio-ipv6
|
||||
|
||||
# Signal tests
|
||||
signal-tests:
|
||||
runs-on: ubuntu-latest
|
||||
needs: build
|
||||
timeout-minutes: 10
|
||||
env:
|
||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||
steps:
|
||||
- uses: actions/checkout@v5
|
||||
|
||||
- name: Download Docker image artifact
|
||||
uses: actions/download-artifact@v5
|
||||
with:
|
||||
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
|
||||
path: /tmp
|
||||
|
||||
- name: Load Docker image
|
||||
run: |
|
||||
docker load -i /tmp/test-changedetectionio.tar
|
||||
|
||||
- name: Test SIGTERM and SIGINT signal shutdown
|
||||
- name: Test changedetection.io SIGTERM and SIGINT signal shutdown
|
||||
run: |
|
||||
|
||||
echo SIGINT Shutdown request test
|
||||
docker run --name sig-test -d test-changedetectionio
|
||||
sleep 3
|
||||
echo ">>> Sending SIGINT to sig-test container"
|
||||
docker kill --signal=SIGINT sig-test
|
||||
sleep 3
|
||||
# invert the check (it should be not 0/not running)
|
||||
docker ps
|
||||
# check signal catch(STDERR) log. Because of
|
||||
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
|
||||
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1
|
||||
test -z "`docker ps|grep sig-test`"
|
||||
if [ $? -ne 0 ]; then
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Looks like container was running when it shouldnt be"
|
||||
docker ps
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# @todo - scan the container log to see the right "graceful shutdown" text exists
|
||||
docker rm sig-test
|
||||
|
||||
|
||||
echo SIGTERM Shutdown request test
|
||||
docker run --name sig-test -d test-changedetectionio
|
||||
sleep 3
|
||||
echo ">>> Sending SIGTERM to sig-test container"
|
||||
docker kill --signal=SIGTERM sig-test
|
||||
sleep 3
|
||||
# invert the check (it should be not 0/not running)
|
||||
docker ps
|
||||
# check signal catch(STDERR) log. Because of
|
||||
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
|
||||
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1
|
||||
test -z "`docker ps|grep sig-test`"
|
||||
if [ $? -ne 0 ]; then
|
||||
if [ $? -ne 0 ]
|
||||
then
|
||||
echo "Looks like container was running when it shouldnt be"
|
||||
docker ps
|
||||
exit 1
|
||||
fi
|
||||
|
||||
# @todo - scan the container log to see the right "graceful shutdown" text exists
|
||||
docker rm sig-test
|
||||
|
||||
- name: Dump container log
|
||||
if: always()
|
||||
run: |
|
||||
mkdir output-logs
|
||||
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
|
||||
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
|
||||
|
||||
- name: Store everything including test-datastore
|
||||
if: always()
|
||||
uses: actions/upload-artifact@v4
|
||||
with:
|
||||
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
|
||||
path: .
|
||||
|
||||
55
Dockerfile
55
Dockerfile
@@ -4,11 +4,8 @@ ARG PYTHON_VERSION=3.11
|
||||
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
||||
|
||||
# Declare platform variables for cache mount IDs
|
||||
ARG TARGETARCH
|
||||
ARG TARGETVARIANT
|
||||
|
||||
# See `cryptography` pin comment in requirements.txt
|
||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
|
||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
g++ \
|
||||
@@ -19,8 +16,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
libssl-dev \
|
||||
libxslt-dev \
|
||||
make \
|
||||
patch \
|
||||
pkg-config \
|
||||
zlib1g-dev
|
||||
|
||||
RUN mkdir /install
|
||||
@@ -30,35 +25,23 @@ COPY requirements.txt /requirements.txt
|
||||
|
||||
# Use cache mounts and multiple wheel sources for faster ARM builds
|
||||
ENV PIP_CACHE_DIR=/tmp/pip-cache
|
||||
# Help Rust find OpenSSL for cryptography package compilation on ARM
|
||||
ENV PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/lib/arm-linux-gnueabihf/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig"
|
||||
ENV PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1
|
||||
ENV OPENSSL_DIR="/usr"
|
||||
ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
|
||||
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
|
||||
# Additional environment variables for cryptography Rust build
|
||||
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
--extra-index-url https://www.piwheels.org/simple \
|
||||
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
-r /requirements.txt
|
||||
|
||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--extra-index-url https://www.piwheels.org/simple \
|
||||
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
-r /requirements.txt
|
||||
|
||||
# Playwright is an alternative to Selenium
|
||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
||||
RUN --mount=type=cache,id=pip-$TARGETARCH$TARGETVARIANT,sharing=locked,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--prefer-binary \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
playwright~=1.48.0 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
||||
pip install \
|
||||
--cache-dir=/tmp/pip-cache \
|
||||
--target=/dependencies \
|
||||
playwright~=1.48.0 \
|
||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||
|
||||
# Final image stage
|
||||
FROM python:${PYTHON_VERSION}-slim-bookworm
|
||||
@@ -70,8 +53,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||
locales \
|
||||
# For pdftohtml
|
||||
poppler-utils \
|
||||
# favicon type detection and other uses
|
||||
file \
|
||||
zlib1g \
|
||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||
|
||||
@@ -92,11 +73,6 @@ EXPOSE 5000
|
||||
|
||||
# The actual flask app module
|
||||
COPY changedetectionio /app/changedetectionio
|
||||
|
||||
# Also for OpenAPI validation wrapper - needs the YML
|
||||
RUN [ ! -d "/app/docs" ] && mkdir /app/docs
|
||||
COPY docs/api-spec.yaml /app/docs/api-spec.yaml
|
||||
|
||||
# Starting wrapper
|
||||
COPY changedetection.py /app/changedetection.py
|
||||
|
||||
@@ -105,9 +81,6 @@ COPY changedetection.py /app/changedetection.py
|
||||
ARG LOGGER_LEVEL=''
|
||||
ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||
|
||||
# Default
|
||||
ENV LC_ALL=en_US.UTF-8
|
||||
|
||||
WORKDIR /app
|
||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||
|
||||
|
||||
2
LICENSE
2
LICENSE
@@ -186,7 +186,7 @@
|
||||
same "printed page" as the copyright notice for easier
|
||||
identification within third-party archives.
|
||||
|
||||
Copyright 2025 Web Technologies s.r.o.
|
||||
Copyright [yyyy] [name of copyright owner]
|
||||
|
||||
Licensed under the Apache License, Version 2.0 (the "License");
|
||||
you may not use this file except in compliance with the License.
|
||||
|
||||
@@ -1,9 +1,7 @@
|
||||
recursive-include changedetectionio/api *
|
||||
include docs/api-spec.yaml
|
||||
recursive-include changedetectionio/blueprint *
|
||||
recursive-include changedetectionio/conditions *
|
||||
recursive-include changedetectionio/content_fetchers *
|
||||
recursive-include changedetectionio/jinja2_custom *
|
||||
recursive-include changedetectionio/conditions *
|
||||
recursive-include changedetectionio/model *
|
||||
recursive-include changedetectionio/notification *
|
||||
recursive-include changedetectionio/processors *
|
||||
@@ -11,7 +9,6 @@ recursive-include changedetectionio/realtime *
|
||||
recursive-include changedetectionio/static *
|
||||
recursive-include changedetectionio/templates *
|
||||
recursive-include changedetectionio/tests *
|
||||
recursive-include changedetectionio/widgets *
|
||||
prune changedetectionio/static/package-lock.json
|
||||
prune changedetectionio/static/styles/node_modules
|
||||
prune changedetectionio/static/styles/package-lock.json
|
||||
|
||||
@@ -1,21 +1,11 @@
|
||||
# Monitor website changes
|
||||
## Web Site Change Detection, Monitoring and Notification.
|
||||
|
||||
Detect WebPage Changes Automatically — Monitor Web Page Changes in Real Time
|
||||
|
||||
Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more.
|
||||
|
||||
Detect web page content changes and get instant alerts.
|
||||
|
||||
|
||||
[Changedetection.io is the best tool to monitor web-pages for changes](https://changedetection.io) Track website content changes and receive notifications via Discord, Email, Slack, Telegram and 90+ more
|
||||
|
||||
Ideal for monitoring price changes, content edits, conditional changes and more.
|
||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
|
||||
|
||||
|
||||
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
|
||||
[**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||
|
||||
|
||||
### Target specific parts of the webpage using the Visual Selector tool.
|
||||
|
||||
20
README.md
20
README.md
@@ -1,13 +1,11 @@
|
||||
# Detect Website Changes Automatically — Monitor Web Page Changes in Real Time
|
||||
## Web Site Change Detection, Restock monitoring and notifications.
|
||||
|
||||
Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more.
|
||||
**_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**
|
||||
|
||||
**Detect web page content changes and get instant alerts.**
|
||||
|
||||
Ideal for monitoring price changes, content edits, conditional changes and more.
|
||||
_Live your data-life pro-actively._
|
||||
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Web site page change monitoring" title="Web site page change monitoring" />](https://changedetection.io?src=github)
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web site page change monitoring" title="Self-hosted web site page change monitoring" />](https://changedetection.io?src=github)
|
||||
|
||||
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
||||
|
||||
@@ -15,7 +13,6 @@ Ideal for monitoring price changes, content edits, conditional changes and more.
|
||||
|
||||
[**Get started with website page change monitoring straight away. Don't have time? Try our $8.99/month subscription, use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_
|
||||
|
||||
|
||||
- Chrome browser included.
|
||||
- Nothing to install, access via browser login after signup.
|
||||
- Super fast, no registration needed setup.
|
||||
@@ -102,7 +99,9 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
||||
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
|
||||
- Send a screenshot with the notification when a change is detected in the web page
|
||||
|
||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $150 using our signup link.
|
||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
|
||||
|
||||
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.
|
||||
|
||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||
|
||||
@@ -280,10 +279,7 @@ Excel import is recommended - that way you can better organise tags/groups of we
|
||||
|
||||
## API Support
|
||||
|
||||
Full REST API for programmatic management of watches, tags, notifications and more.
|
||||
|
||||
- **[Interactive API Documentation](https://changedetection.io/docs/api_v1/index.html)** - Complete API reference with live testing
|
||||
- **[OpenAPI Specification](docs/api-spec.yaml)** - Generate SDKs for any programming language
|
||||
Supports managing the website watch list [via our API](https://changedetection.io/docs/api_v1/index.html)
|
||||
|
||||
## Support us
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.50.35'
|
||||
__version__ = '0.50.2'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -35,22 +35,13 @@ def sigshutdown_handler(_signo, _stack_frame):
|
||||
app.config.exit.set()
|
||||
datastore.stop_thread = True
|
||||
|
||||
# Shutdown workers and queues immediately
|
||||
# Shutdown workers immediately
|
||||
try:
|
||||
from changedetectionio import worker_handler
|
||||
worker_handler.shutdown_workers()
|
||||
except Exception as e:
|
||||
logger.error(f"Error shutting down workers: {str(e)}")
|
||||
|
||||
# Close janus queues properly
|
||||
try:
|
||||
from changedetectionio.flask_app import update_q, notification_q
|
||||
update_q.close()
|
||||
notification_q.close()
|
||||
logger.debug("Janus queues closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
|
||||
|
||||
# Shutdown socketio server fast
|
||||
from changedetectionio.flask_app import socketio_server
|
||||
if socketio_server and hasattr(socketio_server, 'shutdown'):
|
||||
|
||||
@@ -1,22 +1,9 @@
|
||||
import os
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request
|
||||
from functools import wraps
|
||||
from . import auth, validate_openapi_request
|
||||
from ..validate_url import is_safe_valid_url
|
||||
|
||||
|
||||
def default_content_type(content_type='text/plain'):
|
||||
"""Decorator to set a default Content-Type header if none is provided."""
|
||||
def decorator(f):
|
||||
@wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
if not request.content_type:
|
||||
# Set default content type in the request environment
|
||||
request.environ['CONTENT_TYPE'] = content_type
|
||||
return f(*args, **kwargs)
|
||||
return wrapper
|
||||
return decorator
|
||||
import validators
|
||||
from . import auth
|
||||
|
||||
|
||||
class Import(Resource):
|
||||
@@ -25,10 +12,17 @@ class Import(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@default_content_type('text/plain') #3547 #3542
|
||||
@validate_openapi_request('importWatches')
|
||||
def post(self):
|
||||
"""Import a list of watched URLs."""
|
||||
"""
|
||||
@api {post} /api/v1/import Import a list of watched URLs
|
||||
@apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line.
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a"
|
||||
@apiName Import
|
||||
@apiGroup Watch
|
||||
@apiSuccess (200) {List} OK List of watch UUIDs added
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
|
||||
extras = {}
|
||||
|
||||
@@ -49,13 +43,14 @@ class Import(Resource):
|
||||
|
||||
urls = request.get_data().decode('utf8').splitlines()
|
||||
added = []
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
for url in urls:
|
||||
url = url.strip()
|
||||
if not len(url):
|
||||
continue
|
||||
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
if not is_safe_valid_url(url):
|
||||
if not validators.url(url, simple_host=allow_simplehost):
|
||||
return f"Invalid or unsupported URL - {url}", 400
|
||||
|
||||
if dedupe and self.datastore.url_exists(url):
|
||||
|
||||
@@ -1,7 +1,9 @@
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import Resource, abort
|
||||
from flask_restful import Resource
|
||||
from . import auth
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request
|
||||
from . import auth, validate_openapi_request
|
||||
from . import auth
|
||||
from . import schema_create_notification_urls, schema_delete_notification_urls
|
||||
|
||||
class Notifications(Resource):
|
||||
@@ -10,9 +12,19 @@ class Notifications(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getNotifications')
|
||||
def get(self):
|
||||
"""Return Notification URL List."""
|
||||
"""
|
||||
@api {get} /api/v1/notifications Return Notification URL List
|
||||
@apiDescription Return the Notification URL List from the configuration
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
HTTP/1.0 200
|
||||
{
|
||||
'notification_urls': ["notification-urls-list"]
|
||||
}
|
||||
@apiName Get
|
||||
@apiGroup Notifications
|
||||
"""
|
||||
|
||||
notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])
|
||||
|
||||
@@ -21,10 +33,18 @@ class Notifications(Resource):
|
||||
}, 200
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('addNotifications')
|
||||
@expects_json(schema_create_notification_urls)
|
||||
def post(self):
|
||||
"""Create Notification URLs."""
|
||||
"""
|
||||
@api {post} /api/v1/notifications Create Notification URLs
|
||||
@apiDescription Add one or more notification URLs from the configuration
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/notifications/batch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||
@apiName CreateBatch
|
||||
@apiGroup Notifications
|
||||
@apiSuccess (201) {Object[]} notification_urls List of added notification URLs
|
||||
@apiError (400) {String} Invalid input
|
||||
"""
|
||||
|
||||
json_data = request.get_json()
|
||||
notification_urls = json_data.get("notification_urls", [])
|
||||
@@ -49,10 +69,18 @@ class Notifications(Resource):
|
||||
return {'notification_urls': added_urls}, 201
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('replaceNotifications')
|
||||
@expects_json(schema_create_notification_urls)
|
||||
def put(self):
|
||||
"""Replace Notification URLs."""
|
||||
"""
|
||||
@api {put} /api/v1/notifications Replace Notification URLs
|
||||
@apiDescription Replace all notification URLs with the provided list (can be empty)
|
||||
@apiExample {curl} Example usage:
|
||||
curl -X PUT http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||
@apiName Replace
|
||||
@apiGroup Notifications
|
||||
@apiSuccess (200) {Object[]} notification_urls List of current notification URLs
|
||||
@apiError (400) {String} Invalid input
|
||||
"""
|
||||
json_data = request.get_json()
|
||||
notification_urls = json_data.get("notification_urls", [])
|
||||
|
||||
@@ -72,10 +100,19 @@ class Notifications(Resource):
|
||||
return {'notification_urls': clean_urls}, 200
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteNotifications')
|
||||
@expects_json(schema_delete_notification_urls)
|
||||
def delete(self):
|
||||
"""Delete Notification URLs."""
|
||||
"""
|
||||
@api {delete} /api/v1/notifications Delete Notification URLs
|
||||
@apiDescription Deletes one or more notification URLs from the configuration
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/notifications -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||
@apiParam {String[]} notification_urls The notification URLs to delete.
|
||||
@apiName Delete
|
||||
@apiGroup Notifications
|
||||
@apiSuccess (204) {String} OK Deleted
|
||||
@apiError (400) {String} No matching notification URLs found.
|
||||
"""
|
||||
|
||||
json_data = request.get_json()
|
||||
urls_to_delete = json_data.get("notification_urls", [])
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
from flask_restful import Resource, abort
|
||||
from flask import request
|
||||
from . import auth, validate_openapi_request
|
||||
from . import auth
|
||||
|
||||
class Search(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
@@ -8,9 +8,21 @@ class Search(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('searchWatches')
|
||||
def get(self):
|
||||
"""Search for watches by URL or title text."""
|
||||
"""
|
||||
@api {get} /api/v1/search Search for watches
|
||||
@apiDescription Search watches by URL or title text
|
||||
@apiExample {curl} Example usage:
|
||||
curl "http://localhost:5000/api/v1/search?q=https://example.com/page1" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/search?q=https://example.com/page1?tag=Favourites" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/search?q=https://example.com?partial=true" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiName Search
|
||||
@apiGroup Watch Management
|
||||
@apiQuery {String} q Search query to match against watch URLs and titles
|
||||
@apiQuery {String} [tag] Optional name of tag to limit results (name not UUID)
|
||||
@apiQuery {String} [partial] Allow partial matching of URL query
|
||||
@apiSuccess (200) {Object} JSON Object containing matched watches
|
||||
"""
|
||||
query = request.args.get('q', '').strip()
|
||||
tag_limit = request.args.get('tag', '').strip()
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from flask_restful import Resource
|
||||
from . import auth, validate_openapi_request
|
||||
from . import auth
|
||||
|
||||
|
||||
class SystemInfo(Resource):
|
||||
@@ -9,9 +9,23 @@ class SystemInfo(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getSystemInfo')
|
||||
def get(self):
|
||||
"""Return system info."""
|
||||
"""
|
||||
@api {get} /api/v1/systeminfo Return system info
|
||||
@apiDescription Return some info about the current system state
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
HTTP/1.0 200
|
||||
{
|
||||
'queue_size': 10 ,
|
||||
'overdue_watches': ["watch-uuid-list"],
|
||||
'uptime': 38344.55,
|
||||
'watch_count': 800,
|
||||
'version': "0.40.1"
|
||||
}
|
||||
@apiName Get Info
|
||||
@apiGroup System Information
|
||||
"""
|
||||
import time
|
||||
overdue_watches = []
|
||||
|
||||
|
||||
@@ -1,46 +1,39 @@
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from flask_expects_json import expects_json
|
||||
from flask_restful import abort, Resource
|
||||
|
||||
from flask import request
|
||||
from . import auth
|
||||
|
||||
# Import schemas from __init__.py
|
||||
from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request
|
||||
from . import schema_tag, schema_create_tag, schema_update_tag
|
||||
|
||||
|
||||
class Tag(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
# Get information about a single tag
|
||||
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getTag')
|
||||
def get(self, uuid):
|
||||
"""Get data for a single tag/group, toggle notification muting, or recheck all."""
|
||||
"""
|
||||
@api {get} /api/v1/tag/:uuid Single tag - get data or toggle notification muting.
|
||||
@apiDescription Retrieve tag information and set notification_muted status
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=muted" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiName Tag
|
||||
@apiGroup Tag
|
||||
@apiParam {uuid} uuid Tag unique ID.
|
||||
@apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
|
||||
@apiSuccess (200) {String} OK When muted operation OR full JSON object of the tag
|
||||
@apiSuccess (200) {JSON} TagJSON JSON Full JSON object of the tag
|
||||
"""
|
||||
from copy import deepcopy
|
||||
tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
|
||||
if not tag:
|
||||
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||
|
||||
if request.args.get('recheck'):
|
||||
# Recheck all, including muted
|
||||
# Get most overdue first
|
||||
i=0
|
||||
for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused'] and tag['uuid'] not in watch['tags']:
|
||||
continue
|
||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||
i+=1
|
||||
|
||||
return f"OK, {i} watches queued", 200
|
||||
|
||||
if request.args.get('muted', '') == 'muted':
|
||||
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||
return "OK", 200
|
||||
@@ -51,9 +44,16 @@ class Tag(Resource):
|
||||
return tag
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteTag')
|
||||
def delete(self, uuid):
|
||||
"""Delete a tag/group and remove it from all watches."""
|
||||
"""
|
||||
@api {delete} /api/v1/tag/:uuid Delete a tag and remove it from all watches
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiParam {uuid} uuid Tag unique ID.
|
||||
@apiName DeleteTag
|
||||
@apiGroup Tag
|
||||
@apiSuccess (200) {String} OK Was deleted
|
||||
"""
|
||||
if not self.datastore.data['settings']['application']['tags'].get(uuid):
|
||||
abort(400, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
|
||||
@@ -68,10 +68,21 @@ class Tag(Resource):
|
||||
return 'OK', 204
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('updateTag')
|
||||
@expects_json(schema_update_tag)
|
||||
def put(self, uuid):
|
||||
"""Update tag information."""
|
||||
"""
|
||||
@api {put} /api/v1/tag/:uuid Update tag information
|
||||
@apiExample {curl} Example usage:
|
||||
Update (PUT)
|
||||
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"title": "New Tag Title"}'
|
||||
|
||||
@apiDescription Updates an existing tag using JSON
|
||||
@apiParam {uuid} uuid Tag unique ID.
|
||||
@apiName UpdateTag
|
||||
@apiGroup Tag
|
||||
@apiSuccess (200) {String} OK Was updated
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||
if not tag:
|
||||
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||
@@ -83,10 +94,17 @@ class Tag(Resource):
|
||||
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('createTag')
|
||||
# Only cares for {'title': 'xxxx'}
|
||||
def post(self):
|
||||
"""Create a single tag/group."""
|
||||
"""
|
||||
@api {post} /api/v1/watch Create a single tag
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"name": "Work related"}'
|
||||
@apiName Create
|
||||
@apiGroup Tag
|
||||
@apiSuccess (200) {String} OK Was created
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
|
||||
json_data = request.get_json()
|
||||
title = json_data.get("title",'').strip()
|
||||
@@ -104,9 +122,28 @@ class Tags(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('listTags')
|
||||
def get(self):
|
||||
"""List tags/groups."""
|
||||
"""
|
||||
@api {get} /api/v1/tags List tags
|
||||
@apiDescription Return list of available tags
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/tags -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
{
|
||||
"cc0cfffa-f449-477b-83ea-0caafd1dc091": {
|
||||
"title": "Tech News",
|
||||
"notification_muted": false,
|
||||
"date_created": 1677103794
|
||||
},
|
||||
"e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
|
||||
"title": "Shopping",
|
||||
"notification_muted": true,
|
||||
"date_created": 1676662819
|
||||
}
|
||||
}
|
||||
@apiName ListTags
|
||||
@apiGroup Tag Management
|
||||
@apiSuccess (200) {String} OK JSON dict
|
||||
"""
|
||||
result = {}
|
||||
for uuid, tag in self.datastore.data['settings']['application']['tags'].items():
|
||||
result[uuid] = {
|
||||
|
||||
@@ -1,50 +1,17 @@
|
||||
import os
|
||||
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from flask_expects_json import expects_json
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio import worker_handler
|
||||
from flask_restful import abort, Resource
|
||||
from flask import request, make_response, send_from_directory
|
||||
from flask import request, make_response
|
||||
import validators
|
||||
from . import auth
|
||||
import copy
|
||||
|
||||
# Import schemas from __init__.py
|
||||
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
|
||||
|
||||
|
||||
def validate_time_between_check_required(json_data):
|
||||
"""
|
||||
Validate that at least one time interval is specified when not using default settings.
|
||||
Returns None if valid, or error message string if invalid.
|
||||
Defaults to using global settings if time_between_check_use_default is not provided.
|
||||
"""
|
||||
# Default to using global settings if not specified
|
||||
use_default = json_data.get('time_between_check_use_default', True)
|
||||
|
||||
# If using default settings, no validation needed
|
||||
if use_default:
|
||||
return None
|
||||
|
||||
# If not using defaults, check if time_between_check exists and has at least one non-zero value
|
||||
time_check = json_data.get('time_between_check')
|
||||
if not time_check:
|
||||
# No time_between_check provided and not using defaults - this is an error
|
||||
return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
|
||||
|
||||
# time_between_check exists, check if it has at least one non-zero value
|
||||
if any([
|
||||
(time_check.get('weeks') or 0) > 0,
|
||||
(time_check.get('days') or 0) > 0,
|
||||
(time_check.get('hours') or 0) > 0,
|
||||
(time_check.get('minutes') or 0) > 0,
|
||||
(time_check.get('seconds') or 0) > 0
|
||||
]):
|
||||
return None
|
||||
|
||||
# time_between_check exists but all values are 0 or empty - this is an error
|
||||
return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
|
||||
from . import schema, schema_create_watch, schema_update_watch
|
||||
|
||||
|
||||
class Watch(Resource):
|
||||
@@ -58,9 +25,23 @@ class Watch(Resource):
|
||||
# @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
|
||||
# ?recheck=true
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatch')
|
||||
def get(self, uuid):
|
||||
"""Get information about a single watch, recheck, pause, or mute."""
|
||||
"""
|
||||
@api {get} /api/v1/watch/:uuid Single watch - get data, recheck, pause, mute.
|
||||
@apiDescription Retrieve watch information and set muted/paused status
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=unmuted" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?paused=unpaused" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiName Watch
|
||||
@apiGroup Watch
|
||||
@apiParam {uuid} uuid Watch unique ID.
|
||||
@apiQuery {Boolean} [recheck] Recheck this watch `recheck=1`
|
||||
@apiQuery {String} [paused] =`paused` or =`unpaused` , Sets the PAUSED state
|
||||
@apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
|
||||
@apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch
|
||||
@apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch
|
||||
"""
|
||||
from copy import deepcopy
|
||||
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||
if not watch:
|
||||
@@ -88,14 +69,19 @@ class Watch(Resource):
|
||||
# attr .last_changed will check for the last written text snapshot on change
|
||||
watch['last_changed'] = watch.last_changed
|
||||
watch['viewed'] = watch.viewed
|
||||
watch['link'] = watch.link,
|
||||
|
||||
return watch
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('deleteWatch')
|
||||
def delete(self, uuid):
|
||||
"""Delete a watch and related history."""
|
||||
"""
|
||||
@api {delete} /api/v1/watch/:uuid Delete a watch and related history
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
@apiParam {uuid} uuid Watch unique ID.
|
||||
@apiName Delete
|
||||
@apiGroup Watch
|
||||
@apiSuccess (200) {String} OK Was deleted
|
||||
"""
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
abort(400, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
|
||||
@@ -103,10 +89,21 @@ class Watch(Resource):
|
||||
return 'OK', 204
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('updateWatch')
|
||||
@expects_json(schema_update_watch)
|
||||
def put(self, uuid):
|
||||
"""Update watch information."""
|
||||
"""
|
||||
@api {put} /api/v1/watch/:uuid Update watch information
|
||||
@apiExample {curl} Example usage:
|
||||
Update (PUT)
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "new list"}'
|
||||
|
||||
@apiDescription Updates an existing watch using JSON, accepts the same structure as returned in <a href="#api-Watch-Watch">get single watch information</a>
|
||||
@apiParam {uuid} uuid Watch unique ID.
|
||||
@apiName Update a watch
|
||||
@apiGroup Watch
|
||||
@apiSuccess (200) {String} OK Was updated
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
@@ -116,15 +113,6 @@ class Watch(Resource):
|
||||
if not request.json.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(request.json)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
# XSS etc protection
|
||||
if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
|
||||
return "Invalid URL", 400
|
||||
|
||||
watch.update(request.json)
|
||||
|
||||
return "OK", 200
|
||||
@@ -138,9 +126,22 @@ class WatchHistory(Resource):
|
||||
# Get a list of available history for a watch by UUID
|
||||
# curl http://localhost:5000/api/v1/watch/<string:uuid>/history
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchHistory')
|
||||
def get(self, uuid):
|
||||
"""Get a list of all historical snapshots available for a watch."""
|
||||
"""
|
||||
@api {get} /api/v1/watch/<string:uuid>/history Get a list of all historical snapshots available for a watch
|
||||
@apiDescription Requires `uuid`, returns list
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
|
||||
{
|
||||
"1676649279": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/cb7e9be8258368262246910e6a2a4c30.txt",
|
||||
"1677092785": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/e20db368d6fc633e34f559ff67bb4044.txt",
|
||||
"1677103794": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/02efdd37dacdae96554a8cc85dc9c945.txt"
|
||||
}
|
||||
@apiName Get list of available stored snapshots for watch
|
||||
@apiGroup Watch History
|
||||
@apiSuccess (200) {String} OK
|
||||
@apiSuccess (404) {String} ERR Not found
|
||||
"""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||
@@ -153,9 +154,18 @@ class WatchSingleHistory(Resource):
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchSnapshot')
|
||||
def get(self, uuid, timestamp):
|
||||
"""Get single snapshot from watch."""
|
||||
"""
|
||||
@api {get} /api/v1/watch/<string:uuid>/history/<int:timestamp> Get single snapshot from watch
|
||||
@apiDescription Requires watch `uuid` and `timestamp`. `timestamp` of "`latest`" for latest available snapshot, or <a href="#api-Watch_History-Get_list_of_available_stored_snapshots_for_watch">use the list returned here</a>
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
|
||||
@apiName Get single snapshot content
|
||||
@apiGroup Watch History
|
||||
@apiParam {String} [html] Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
|
||||
@apiSuccess (200) {String} OK
|
||||
@apiSuccess (404) {String} ERR Not found
|
||||
"""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
||||
@@ -181,39 +191,6 @@ class WatchSingleHistory(Resource):
|
||||
|
||||
return response
|
||||
|
||||
class WatchFavicon(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
# datastore is a black box dependency
|
||||
self.datastore = kwargs['datastore']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('getWatchFavicon')
|
||||
def get(self, uuid):
|
||||
"""Get favicon for a watch."""
|
||||
watch = self.datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
|
||||
abort(404, message=f'No Favicon available for {uuid}')
|
||||
|
||||
|
||||
class CreateWatch(Resource):
|
||||
def __init__(self, **kwargs):
|
||||
@@ -222,15 +199,25 @@ class CreateWatch(Resource):
|
||||
self.update_q = kwargs['update_q']
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('createWatch')
|
||||
@expects_json(schema_create_watch)
|
||||
def post(self):
|
||||
"""Create a single watch."""
|
||||
"""
|
||||
@api {post} /api/v1/watch Create a single watch
|
||||
@apiDescription Requires atleast `url` set, can accept the same structure as <a href="#api-Watch-Watch">get single watch information</a> to create.
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "nice list"}'
|
||||
@apiName Create
|
||||
@apiGroup Watch
|
||||
@apiSuccess (200) {String} OK Was created
|
||||
@apiSuccess (500) {String} ERR Some other error
|
||||
"""
|
||||
|
||||
json_data = request.get_json()
|
||||
url = json_data['url'].strip()
|
||||
|
||||
if not is_safe_valid_url(url):
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
if not validators.url(url, simple_host=allow_simplehost):
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
if json_data.get('proxy'):
|
||||
@@ -238,11 +225,6 @@ class CreateWatch(Resource):
|
||||
if not json_data.get('proxy') in plist:
|
||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||
|
||||
# Validate time_between_check when not using defaults
|
||||
validation_error = validate_time_between_check_required(json_data)
|
||||
if validation_error:
|
||||
return validation_error, 400
|
||||
|
||||
extras = copy.deepcopy(json_data)
|
||||
|
||||
# Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
|
||||
@@ -261,9 +243,35 @@ class CreateWatch(Resource):
|
||||
return "Invalid or unsupported URL", 400
|
||||
|
||||
@auth.check_token
|
||||
@validate_openapi_request('listWatches')
|
||||
def get(self):
|
||||
"""List watches."""
|
||||
"""
|
||||
@api {get} /api/v1/watch List watches
|
||||
@apiDescription Return concise list of available watches and some very basic info
|
||||
@apiExample {curl} Example usage:
|
||||
curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||
{
|
||||
"6a4b7d5c-fee4-4616-9f43-4ac97046b595": {
|
||||
"last_changed": 1677103794,
|
||||
"last_checked": 1677103794,
|
||||
"last_error": false,
|
||||
"title": "",
|
||||
"url": "http://www.quotationspage.com/random.php"
|
||||
},
|
||||
"e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
|
||||
"last_changed": 0,
|
||||
"last_checked": 1676662819,
|
||||
"last_error": false,
|
||||
"title": "QuickLook",
|
||||
"url": "https://github.com/QL-Win/QuickLook/tags"
|
||||
}
|
||||
}
|
||||
|
||||
@apiParam {String} [recheck_all] Optional Set to =1 to force recheck of all watches
|
||||
@apiParam {String} [tag] Optional name of tag to limit results
|
||||
@apiName ListWatches
|
||||
@apiGroup Watch Management
|
||||
@apiSuccess (200) {String} OK JSON dict
|
||||
"""
|
||||
list = {}
|
||||
|
||||
tag_limit = request.args.get('tag', '').lower()
|
||||
@@ -277,8 +285,6 @@ class CreateWatch(Resource):
|
||||
'last_changed': watch.last_changed,
|
||||
'last_checked': watch['last_checked'],
|
||||
'last_error': watch['last_error'],
|
||||
'link': watch.link,
|
||||
'page_title': watch['page_title'],
|
||||
'title': watch['title'],
|
||||
'url': watch['url'],
|
||||
'viewed': watch.viewed
|
||||
|
||||
@@ -1,7 +1,4 @@
|
||||
import copy
|
||||
import functools
|
||||
from flask import request, abort
|
||||
from loguru import logger
|
||||
from . import api_schema
|
||||
from ..model import watch_base
|
||||
|
||||
@@ -11,7 +8,6 @@ schema = api_schema.build_watch_json_schema(watch_base_config)
|
||||
|
||||
schema_create_watch = copy.deepcopy(schema)
|
||||
schema_create_watch['required'] = ['url']
|
||||
del schema_create_watch['properties']['last_viewed']
|
||||
|
||||
schema_update_watch = copy.deepcopy(schema)
|
||||
schema_update_watch['additionalProperties'] = False
|
||||
@@ -29,58 +25,9 @@ schema_create_notification_urls['required'] = ['notification_urls']
|
||||
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||
schema_delete_notification_urls['required'] = ['notification_urls']
|
||||
|
||||
@functools.cache
|
||||
def get_openapi_spec():
|
||||
"""Lazy load OpenAPI spec and dependencies only when validation is needed."""
|
||||
import os
|
||||
import yaml # Lazy import - only loaded when API validation is actually used
|
||||
from openapi_core import OpenAPI # Lazy import - saves ~10.7 MB on startup
|
||||
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
|
||||
if not os.path.exists(spec_path):
|
||||
# Possibly for pip3 packages
|
||||
spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
|
||||
|
||||
with open(spec_path, 'r') as f:
|
||||
spec_dict = yaml.safe_load(f)
|
||||
_openapi_spec = OpenAPI.from_dict(spec_dict)
|
||||
return _openapi_spec
|
||||
|
||||
def validate_openapi_request(operation_id):
|
||||
"""Decorator to validate incoming requests against OpenAPI spec."""
|
||||
def decorator(f):
|
||||
@functools.wraps(f)
|
||||
def wrapper(*args, **kwargs):
|
||||
try:
|
||||
# Skip OpenAPI validation for GET requests since they don't have request bodies
|
||||
if request.method.upper() != 'GET':
|
||||
# Lazy import - only loaded when actually validating a request
|
||||
from openapi_core.contrib.flask import FlaskOpenAPIRequest
|
||||
|
||||
spec = get_openapi_spec()
|
||||
openapi_request = FlaskOpenAPIRequest(request)
|
||||
result = spec.unmarshal_request(openapi_request)
|
||||
if result.errors:
|
||||
from werkzeug.exceptions import BadRequest
|
||||
error_details = []
|
||||
for error in result.errors:
|
||||
error_details.append(str(error))
|
||||
raise BadRequest(f"OpenAPI validation failed: {error_details}")
|
||||
except BadRequest:
|
||||
# Re-raise BadRequest exceptions (validation failures)
|
||||
raise
|
||||
except Exception as e:
|
||||
# If OpenAPI spec loading fails, log but don't break existing functionality
|
||||
logger.critical(f"OpenAPI validation warning for {operation_id}: {e}")
|
||||
abort(500)
|
||||
return f(*args, **kwargs)
|
||||
return wrapper
|
||||
return decorator
|
||||
|
||||
# Import all API resources
|
||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon
|
||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch
|
||||
from .Tags import Tags, Tag
|
||||
from .Import import Import
|
||||
from .SystemInfo import SystemInfo
|
||||
from .Notifications import Notifications
|
||||
|
||||
|
||||
@@ -78,13 +78,6 @@ def build_watch_json_schema(d):
|
||||
]:
|
||||
schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
|
||||
|
||||
for v in ['last_viewed']:
|
||||
schema['properties'][v] = {
|
||||
"type": "integer",
|
||||
"description": "Unix timestamp in seconds of the last time the watch was viewed.",
|
||||
"minimum": 0
|
||||
}
|
||||
|
||||
# None or Boolean
|
||||
schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
|
||||
|
||||
@@ -119,12 +112,6 @@ def build_watch_json_schema(d):
|
||||
|
||||
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
||||
|
||||
schema['properties']['time_between_check_use_default'] = {
|
||||
"type": "boolean",
|
||||
"default": True,
|
||||
"description": "Whether to use global settings for time between checks - defaults to true if not set"
|
||||
}
|
||||
|
||||
schema['properties']['browser_steps'] = {
|
||||
"anyOf": [
|
||||
{
|
||||
|
||||
@@ -7,7 +7,6 @@ from changedetectionio.flask_app import watch_check_update
|
||||
import asyncio
|
||||
import importlib
|
||||
import os
|
||||
import queue
|
||||
import time
|
||||
|
||||
from loguru import logger
|
||||
@@ -38,23 +37,13 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
watch = None
|
||||
|
||||
try:
|
||||
# Use native janus async interface - no threads needed!
|
||||
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
|
||||
|
||||
# Use asyncio wait_for to make queue.get() cancellable
|
||||
queued_item_data = await asyncio.wait_for(q.get(), timeout=1.0)
|
||||
except asyncio.TimeoutError:
|
||||
# No jobs available, continue loop
|
||||
continue
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
|
||||
|
||||
# Log queue health for debugging
|
||||
try:
|
||||
queue_size = q.qsize()
|
||||
is_empty = q.empty()
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
|
||||
except Exception as health_e:
|
||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
|
||||
|
||||
logger.error(f"Worker {worker_id} error getting queue item: {e}")
|
||||
await asyncio.sleep(0.1)
|
||||
continue
|
||||
|
||||
@@ -310,6 +299,15 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
continue
|
||||
|
||||
if process_changedetection_results:
|
||||
# Extract title if needed
|
||||
if datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
|
||||
if not watch['title'] or not len(watch['title']):
|
||||
try:
|
||||
update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
|
||||
logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}")
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")
|
||||
|
||||
try:
|
||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
||||
|
||||
@@ -334,10 +332,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
|
||||
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
|
||||
|
||||
# Explicitly delete large content variables to free memory IMMEDIATELY after saving
|
||||
# These are no longer needed after being saved to history
|
||||
del contents
|
||||
|
||||
# Send notifications on second+ check
|
||||
if watch.history_n >= 2:
|
||||
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
|
||||
@@ -352,14 +346,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
# Always record attempt count
|
||||
count = watch.get('check_count', 0) + 1
|
||||
|
||||
# Always record page title (used in notifications, and can change even when the content is the same)
|
||||
try:
|
||||
page_title = html_tools.extract_title(data=update_handler.fetcher.content)
|
||||
logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
|
||||
datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
|
||||
|
||||
# Record server header
|
||||
try:
|
||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
||||
@@ -367,21 +353,9 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
except Exception as e:
|
||||
pass
|
||||
|
||||
# Store favicon if necessary
|
||||
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
|
||||
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
|
||||
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
|
||||
)
|
||||
|
||||
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
||||
'check_count': count})
|
||||
|
||||
# NOW clear fetcher content - after all processing is complete
|
||||
# This is the last point where we need the fetcher data
|
||||
if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
logger.debug(f"Cleared fetcher content for UUID {uuid}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
||||
@@ -402,28 +376,7 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
||||
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
||||
|
||||
# Explicitly clean up update_handler and all its references
|
||||
if update_handler:
|
||||
# Clear fetcher content using the proper method
|
||||
if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
|
||||
update_handler.fetcher.clear_content()
|
||||
|
||||
# Clear processor references
|
||||
if hasattr(update_handler, 'content_processor'):
|
||||
update_handler.content_processor = None
|
||||
|
||||
update_handler = None
|
||||
|
||||
# Clear local contents variable if it still exists
|
||||
if 'contents' in locals():
|
||||
del contents
|
||||
|
||||
# Note: We don't set watch = None here because:
|
||||
# 1. watch is just a local reference to datastore.data['watching'][uuid]
|
||||
# 2. Setting it to None doesn't affect the datastore
|
||||
# 3. GC can't collect the object anyway (still referenced by datastore)
|
||||
# 4. It would just cause confusion
|
||||
|
||||
update_handler = None
|
||||
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
|
||||
except Exception as cleanup_error:
|
||||
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
|
||||
|
||||
@@ -6,7 +6,7 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
def long_task(uuid, preferred_proxy):
|
||||
import time
|
||||
from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
status = {'status': '', 'length': 0, 'text': ''}
|
||||
|
||||
|
||||
@@ -1,6 +1,5 @@
|
||||
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.notification.handler import apply_service_tweaks
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from feedgen.feed import FeedGenerator
|
||||
from flask import Blueprint, make_response, request, url_for, redirect
|
||||
@@ -109,25 +108,18 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
fe.link(link=diff_link)
|
||||
|
||||
# Same logic as watch-overview.html
|
||||
if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
|
||||
watch_label = watch.label
|
||||
else:
|
||||
watch_label = watch.get('url')
|
||||
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
|
||||
|
||||
fe.title(title=watch_label)
|
||||
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
||||
fe.title(title=watch_title)
|
||||
try:
|
||||
|
||||
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
||||
include_equal=False,
|
||||
line_feed_sep="<br>"
|
||||
line_feed_sep="<br>",
|
||||
html_colour=html_colour_enable
|
||||
)
|
||||
|
||||
|
||||
requested_output_format = 'htmlcolor' if html_colour_enable else 'html'
|
||||
html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
|
||||
|
||||
except FileNotFoundError as e:
|
||||
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
|
||||
|
||||
@@ -135,7 +127,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
||||
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
||||
|
||||
content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
|
||||
content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
|
||||
|
||||
# Out of range chars could also break feedgen
|
||||
if scan_invalid_chars_in_rss(content):
|
||||
|
||||
@@ -119,7 +119,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||
min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
|
||||
settings_application=datastore.data['settings']['application'],
|
||||
timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
||||
timezone_default_config=datastore.data['settings']['application'].get('timezone'),
|
||||
utc_time=utc_time,
|
||||
)
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
{% extends 'base.html' %}
|
||||
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script>
|
||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
|
||||
@@ -72,23 +72,33 @@
|
||||
<span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
|
||||
</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
||||
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
|
||||
</div>
|
||||
<div class="grey-form-border">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.rss_content_format) }}
|
||||
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.rss_reader_mode) }}
|
||||
<span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
|
||||
</div>
|
||||
{% if form.requests.proxy %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">
|
||||
Choose a default proxy for all watches
|
||||
</span>
|
||||
</div>
|
||||
{% endif %}
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
@@ -131,10 +141,6 @@
|
||||
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
||||
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.requests.form.timeout) }}
|
||||
<span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
|
||||
</div>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_field(form.requests.form.default_ua) }}
|
||||
<span class="pure-form-message-inline">
|
||||
@@ -193,17 +199,11 @@ nav
|
||||
</ul>
|
||||
</span>
|
||||
</fieldset>
|
||||
<fieldset class="pure-group">
|
||||
{{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
|
||||
<span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
|
||||
<i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
|
||||
</span>
|
||||
</fieldset>
|
||||
</div>
|
||||
|
||||
<div class="tab-pane-inner" id="api">
|
||||
<h4>API Access</h4>
|
||||
<p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p>
|
||||
<p>Drive your changedetection.io via API, More about <a href="https://github.com/dgtlmoon/changedetection.io/wiki/API-Reference">API access here</a></p>
|
||||
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
|
||||
@@ -238,9 +238,11 @@ nav
|
||||
<p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
|
||||
<p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
|
||||
<p>
|
||||
{{ render_field(form.application.form.scheduler_timezone_default) }}
|
||||
{{ render_field(form.application.form.timezone) }}
|
||||
<datalist id="timezones" style="display: none;">
|
||||
{%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%}
|
||||
{% for tz_name in available_timezones %}
|
||||
<option value="{{ tz_name }}">{{ tz_name }}</option>
|
||||
{% endfor %}
|
||||
</datalist>
|
||||
</p>
|
||||
</div>
|
||||
@@ -254,18 +256,6 @@ nav
|
||||
{{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }}
|
||||
<span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
|
||||
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.application.form.pager_size) }}
|
||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="proxies">
|
||||
<div id="recommended-proxy">
|
||||
@@ -314,33 +304,23 @@ nav
|
||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
||||
|
||||
<div class="pure-control-group" id="extra-proxies-setting">
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
|
||||
{{ render_field(form.requests.form.extra_proxies) }}
|
||||
<span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
|
||||
<span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
|
||||
{% if form.requests.proxy %}
|
||||
<div>
|
||||
<br>
|
||||
<div class="inline-radio">
|
||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||
<span class="pure-form-message-inline">Choose a default proxy for all watches</span>
|
||||
</div>
|
||||
</div>
|
||||
{% endif %}
|
||||
</div>
|
||||
<div class="pure-control-group" id="extra-browsers-setting">
|
||||
<p>
|
||||
<span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br>
|
||||
<span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span>
|
||||
</p>
|
||||
{{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
|
||||
{{ render_field(form.requests.form.extra_browsers) }}
|
||||
</div>
|
||||
|
||||
</div>
|
||||
<div id="actions">
|
||||
<div class="pure-control-group">
|
||||
{{ render_button(form.save_button) }}
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a>
|
||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a>
|
||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a>
|
||||
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
|
||||
</div>
|
||||
</div>
|
||||
</form>
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
{% extends 'base.html' %}
|
||||
{% block content %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_ternary_field %}
|
||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||
{% from '_common_fields.html' import render_common_settings_form %}
|
||||
<script>
|
||||
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}";
|
||||
@@ -64,7 +64,7 @@
|
||||
<div class="tab-pane-inner" id="notifications">
|
||||
<fieldset>
|
||||
<div class="pure-control-group inline-radio">
|
||||
{{ render_ternary_field(form.notification_muted, BooleanField=True) }}
|
||||
{{ render_checkbox_field(form.notification_muted) }}
|
||||
</div>
|
||||
{% if 1 %}
|
||||
<div class="pure-control-group inline-radio">
|
||||
|
||||
@@ -76,14 +76,14 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat
|
||||
|
||||
elif (op == 'notification-default'):
|
||||
from changedetectionio.notification import (
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
default_notification_format_for_watch
|
||||
)
|
||||
for uuid in uuids:
|
||||
if datastore.data['watching'].get(uuid):
|
||||
datastore.data['watching'][uuid]['notification_title'] = None
|
||||
datastore.data['watching'][uuid]['notification_body'] = None
|
||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
||||
datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch
|
||||
if emit_flash:
|
||||
flash(f"{len(uuids)} watches set to use default notification settings")
|
||||
|
||||
@@ -159,20 +159,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
||||
def mark_all_viewed():
|
||||
# Save the current newest history as the most recently viewed
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
tag_limit = request.args.get('tag')
|
||||
logger.debug(f"Limiting to tag {tag_limit}")
|
||||
now = int(time.time())
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
datastore.set_last_viewed(watch_uuid, int(time.time()))
|
||||
|
||||
if tag_limit and ( not watch.get('tags') or tag_limit not in watch['tags'] ):
|
||||
logger.debug(f"Skipping watch {watch_uuid}")
|
||||
continue
|
||||
|
||||
datastore.set_last_viewed(watch_uuid, now)
|
||||
|
||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@ui_blueprint.route("/delete", methods=['GET'])
|
||||
@login_optionally_required
|
||||
|
||||
@@ -187,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
tz_name = time_schedule_limit.get('timezone')
|
||||
if not tz_name:
|
||||
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
|
||||
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
|
||||
|
||||
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
||||
try:
|
||||
@@ -242,7 +242,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'available_timezones': sorted(available_timezones()),
|
||||
'browser_steps_config': browser_step_ui_config,
|
||||
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||
'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
|
||||
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
||||
'extra_processor_config': form.extra_tab_content(),
|
||||
'extra_title': f" - Edit - {watch.label}",
|
||||
@@ -257,7 +256,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
||||
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
|
||||
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'),
|
||||
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||
'uuid': uuid,
|
||||
'watch': watch,
|
||||
|
||||
@@ -2,7 +2,6 @@ from flask import Blueprint, request, make_response
|
||||
import random
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -20,7 +19,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
import apprise
|
||||
from changedetectionio.notification.handler import process_notification
|
||||
from changedetectionio.notification.apprise_plugin.assets import apprise_asset
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
|
||||
from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler
|
||||
|
||||
@@ -39,7 +37,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
|
||||
|
||||
watch = datastore.data['watching'].get(watch_uuid)
|
||||
notification_urls = request.form.get('notification_urls','').strip().splitlines()
|
||||
|
||||
notification_urls = None
|
||||
|
||||
if request.form.get('notification_urls'):
|
||||
notification_urls = request.form['notification_urls'].strip().splitlines()
|
||||
|
||||
if not notification_urls:
|
||||
logger.debug("Test notification - Trying by group/tag in the edit form if available")
|
||||
@@ -59,26 +61,20 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return 'Error: No Notification URLs set/found'
|
||||
|
||||
for n_url in notification_urls:
|
||||
# We are ONLY validating the apprise:// part here, convert all tags to something so as not to break apprise URLs
|
||||
generic_notification_context_data = NotificationContextData()
|
||||
generic_notification_context_data.set_random_for_validation()
|
||||
n_url = jinja_render(template_str=n_url, **generic_notification_context_data).strip()
|
||||
if len(n_url.strip()):
|
||||
if not apobj.add(n_url):
|
||||
return f'Error: {n_url} is not a valid AppRise URL.'
|
||||
|
||||
try:
|
||||
# use the same as when it is triggered, but then override it with the form test values
|
||||
n_object = NotificationContextData({
|
||||
n_object = {
|
||||
'watch_url': request.form.get('window_url', "https://changedetection.io"),
|
||||
'notification_urls': notification_urls
|
||||
})
|
||||
}
|
||||
|
||||
# Only use if present, if not set in n_object it should use the default system value
|
||||
if 'notification_format' in request.form and request.form['notification_format'].strip():
|
||||
n_object['notification_format'] = request.form.get('notification_format', '').strip()
|
||||
else:
|
||||
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
if 'notification_title' in request.form and request.form['notification_title'].strip():
|
||||
n_object['notification_title'] = request.form.get('notification_title', '').strip()
|
||||
|
||||
@@ -1,7 +1,8 @@
|
||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||
from flask_login import current_user
|
||||
import os
|
||||
import time
|
||||
from loguru import logger
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
@@ -77,46 +78,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
|
||||
return output
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['POST'])
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
def diff_history_page_build_report(uuid):
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
if uuid == 'first':
|
||||
uuid = list(datastore.data['watching'].keys()).pop()
|
||||
|
||||
try:
|
||||
watch = datastore.data['watching'][uuid]
|
||||
except KeyError:
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# For submission of requesting an extract
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
if not extract_form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
return _render_diff_template(uuid, extract_form)
|
||||
|
||||
else:
|
||||
extract_regex = request.form.get('extract_regex', '').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||
response.headers['Content-type'] = 'text/csv'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = "0"
|
||||
return response
|
||||
|
||||
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
|
||||
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
|
||||
|
||||
def _render_diff_template(uuid, extract_form=None):
|
||||
"""Helper function to render the diff template with all required data"""
|
||||
def diff_history_page(uuid):
|
||||
from changedetectionio import forms
|
||||
|
||||
# More for testing, possible to return the first/only
|
||||
@@ -130,36 +94,62 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
flash("No history found for the specified link, bad link?", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
# Use provided form or create a new one
|
||||
if extract_form is None:
|
||||
extract_form = forms.extractDataForm(formdata=request.form,
|
||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
||||
)
|
||||
# For submission of requesting an extract
|
||||
extract_form = forms.extractDataForm(request.form)
|
||||
if request.method == 'POST':
|
||||
if not extract_form.validate():
|
||||
flash("An error occurred, please see below.", "error")
|
||||
|
||||
else:
|
||||
extract_regex = request.form.get('extract_regex').strip()
|
||||
output = watch.extract_regex_from_all_history(extract_regex)
|
||||
if output:
|
||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||
response.headers['Content-type'] = 'text/csv'
|
||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||
response.headers['Pragma'] = 'no-cache'
|
||||
response.headers['Expires'] = 0
|
||||
return response
|
||||
|
||||
flash('Nothing matches that RegEx', 'error')
|
||||
redirect(url_for('ui_views.diff_history_page', uuid=uuid)+'#extract')
|
||||
|
||||
history = watch.history
|
||||
dates = list(history.keys())
|
||||
|
||||
# If a "from_version" was requested, then find it (or the closest one)
|
||||
# Also set "from version" to be the closest version to the one that was last viewed.
|
||||
if len(dates) < 2:
|
||||
flash("Not enough saved change detection snapshots to produce a report.", "error")
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
|
||||
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
|
||||
from_version = request.args.get('from_version', from_version_timestamp )
|
||||
# Save the current newest history as the most recently viewed
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
# Use the current one if nothing was specified
|
||||
to_version = request.args.get('to_version', str(dates[-1]))
|
||||
# Read as binary and force decode as UTF-8
|
||||
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||
from_version = request.args.get('from_version')
|
||||
from_version_index = -2 # second newest
|
||||
if from_version and from_version in dates:
|
||||
from_version_index = dates.index(from_version)
|
||||
else:
|
||||
from_version = dates[from_version_index]
|
||||
|
||||
try:
|
||||
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
|
||||
from_version_file_contents = watch.get_history_snapshot(dates[from_version_index])
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
|
||||
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
|
||||
from_version_file_contents = f"Unable to read to-version at index {dates[from_version_index]}.\n"
|
||||
|
||||
to_version = request.args.get('to_version')
|
||||
to_version_index = -1
|
||||
if to_version and to_version in dates:
|
||||
to_version_index = dates.index(to_version)
|
||||
else:
|
||||
to_version = dates[to_version_index]
|
||||
|
||||
try:
|
||||
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
|
||||
to_version_file_contents = watch.get_history_snapshot(dates[to_version_index])
|
||||
except Exception as e:
|
||||
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
|
||||
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
|
||||
to_version_file_contents = "Unable to read to-version at index{}.\n".format(dates[to_version_index])
|
||||
|
||||
screenshot_url = watch.get_screenshot()
|
||||
|
||||
@@ -173,9 +163,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||
|
||||
datastore.set_last_viewed(uuid, time.time())
|
||||
|
||||
return render_template("diff.html",
|
||||
output = render_template("diff.html",
|
||||
current_diff_url=watch['url'],
|
||||
from_version=str(from_version),
|
||||
to_version=str(to_version),
|
||||
@@ -198,10 +186,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
watch_a=watch
|
||||
)
|
||||
|
||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
||||
@login_optionally_required
|
||||
def diff_history_page(uuid):
|
||||
return _render_diff_template(uuid)
|
||||
return output
|
||||
|
||||
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
|
||||
@login_optionally_required
|
||||
|
||||
@@ -44,16 +44,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
# Sort by last_changed and add the uuid which is usually the key..
|
||||
sorted_watches = []
|
||||
with_errors = request.args.get('with_errors') == "1"
|
||||
unread_only = request.args.get('unread') == "1"
|
||||
errored_count = 0
|
||||
search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
|
||||
for uuid, watch in datastore.data['watching'].items():
|
||||
if with_errors and not watch.get('last_error'):
|
||||
continue
|
||||
|
||||
if unread_only and (watch.viewed or watch.last_changed == 0) :
|
||||
continue
|
||||
|
||||
if active_tag_uuid and not active_tag_uuid in watch['tags']:
|
||||
continue
|
||||
if watch.get('last_error'):
|
||||
@@ -87,6 +83,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
form=form,
|
||||
guid=datastore.data['app_guid'],
|
||||
has_proxies=datastore.proxy_list,
|
||||
has_unviewed=datastore.has_unviewed,
|
||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||
now_time_server=round(time.time()),
|
||||
pagination=pagination,
|
||||
@@ -96,7 +93,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||
tags=sorted_tags,
|
||||
unread_changes_count=datastore.unread_changes_count,
|
||||
watches=sorted_watches
|
||||
)
|
||||
|
||||
|
||||
@@ -4,7 +4,6 @@
|
||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||
<script>let nowtimeserver={{ now_time_server }};</script>
|
||||
<script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script>
|
||||
<script>
|
||||
// Initialize Feather icons after the page loads
|
||||
document.addEventListener('DOMContentLoaded', function() {
|
||||
@@ -81,26 +80,16 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
{%- if any_has_restock_price_processor -%}
|
||||
{%- set cols_required = cols_required + 1 -%}
|
||||
{%- endif -%}
|
||||
{%- set ui_settings = datastore.data['settings']['application']['ui'] -%}
|
||||
{%- set wrapper_classes = [
|
||||
'has-unread-changes' if unread_changes_count else '',
|
||||
'has-error' if errored_count else '',
|
||||
] -%}
|
||||
<div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}">
|
||||
{%- set table_classes = [
|
||||
'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled',
|
||||
] -%}
|
||||
<table class="pure-table pure-table-striped watch-table {{ table_classes | reject('equalto', '') | join(' ') }}">
|
||||
|
||||
<div id="watch-table-wrapper">
|
||||
|
||||
<table class="pure-table pure-table-striped watch-table">
|
||||
<thead>
|
||||
<tr>
|
||||
{%- set link_order = "desc" if sort_order == 'asc' else "asc" -%}
|
||||
{%- set arrow_span = "" -%}
|
||||
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
||||
<th>
|
||||
<a class="{{ 'active '+link_order if sort_attribute == 'paused' else 'inactive' }}" href="{{url_for('watchlist.index', sort='paused', order=link_order, tag=active_tag_uuid)}}"><i data-feather="pause" style="vertical-align: bottom; width: 14px; height: 14px; margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
|
||||
|
||||
<a class="{{ 'active '+link_order if sort_attribute == 'notification_muted' else 'inactive' }}" href="{{url_for('watchlist.index', sort='notification_muted', order=link_order, tag=active_tag_uuid)}}"><i data-feather="volume-2" style="vertical-align: bottom; width: 14px; height: 14px; margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
|
||||
</th>
|
||||
<th class="empty-cell"></th>
|
||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||
{%- if any_has_restock_price_processor -%}
|
||||
<th>Restock & Price</th>
|
||||
@@ -116,13 +105,10 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
|
||||
</tr>
|
||||
{%- endif -%}
|
||||
|
||||
{%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%}
|
||||
{%- set checking_now = is_checking_now(watch) -%}
|
||||
{%- set history_n = watch.history_n -%}
|
||||
{%- set favicon = watch.get_favicon_filename() -%}
|
||||
{%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list') -%}
|
||||
{# Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{# Mirror in changedetectionio/static/js/realtime.js for the frontend #}
|
||||
{%- set row_classes = [
|
||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
||||
'processor-' ~ watch['processor'],
|
||||
@@ -130,69 +116,49 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
||||
'unviewed' if watch.has_unviewed else '',
|
||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
||||
'has-favicon' if favicon else '',
|
||||
'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '',
|
||||
'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '',
|
||||
'queued' if watch.uuid in queued_uuids else '',
|
||||
'checking-now' if checking_now else '',
|
||||
'notification_muted' if watch.notification_muted else '',
|
||||
'single-history' if history_n == 1 else '',
|
||||
'multiple-history' if history_n >= 2 else '',
|
||||
'use-html-title' if system_use_url_watchlist else 'no-html-title',
|
||||
'multiple-history' if history_n >= 2 else ''
|
||||
] -%}
|
||||
<tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}">
|
||||
<td class="inline checkbox-uuid" ><div><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
|
||||
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
|
||||
<td class="inline watch-controls">
|
||||
<div>
|
||||
<a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
|
||||
<a class="ajax-op state-on pause-toggle" data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
||||
<a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a>
|
||||
<a class="ajax-op state-on mute-toggle" data-op="mute" style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a>
|
||||
</div>
|
||||
</td>
|
||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
|
||||
<td class="title-col inline">
|
||||
<div class="flex-wrapper">
|
||||
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
|
||||
<div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder' #}
|
||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} />
|
||||
</div>
|
||||
{% endif %}
|
||||
<div>
|
||||
<span class="watch-title">
|
||||
{% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
|
||||
{{ watch.label }}
|
||||
{% else %}
|
||||
{{ watch.get('title') or watch.link }}
|
||||
{% endif %}
|
||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
||||
</span>
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if watch['processor'] == 'restock_diff' -%}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
||||
{%- endif -%}
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||
{%- endfor -%}
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
{%- if watch.get_fetch_backend == "html_webdriver"
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
-%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{%- endif -%}
|
||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
||||
{%- if watch.get_fetch_backend == "html_webdriver"
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
-%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{%- endif -%}
|
||||
|
||||
</div>
|
||||
</div>
|
||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
||||
|
||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div>
|
||||
|
||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- if watch['processor'] == 'restock_diff' -%}
|
||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
||||
{%- endif -%}
|
||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||
{%- endfor -%}
|
||||
</td>
|
||||
{%- if any_has_restock_price_processor -%}
|
||||
<td class="restock-and-price">
|
||||
@@ -229,38 +195,28 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
Not yet
|
||||
{%- endif -%}
|
||||
</td>
|
||||
<td class="buttons">
|
||||
<div>
|
||||
<td>
|
||||
{%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%}
|
||||
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a>
|
||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
|
||||
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
|
||||
</div>
|
||||
</td>
|
||||
</tr>
|
||||
{%- endfor -%}
|
||||
</tbody>
|
||||
</table>
|
||||
<ul id="post-list-buttons">
|
||||
<li id="post-list-with-errors" style="display: none;" >
|
||||
<li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
|
||||
</li>
|
||||
<li id="post-list-mark-views" style="display: none;" >
|
||||
<li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" >
|
||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
|
||||
</li>
|
||||
{%- if active_tag_uuid -%}
|
||||
<li id="post-list-mark-views-tag">
|
||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
|
||||
</li>
|
||||
{%- endif -%}
|
||||
<li id="post-list-unread" style="display: none;" >
|
||||
<a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
|
||||
all {% if active_tag_uuid %} in '{{active_tag.title}}'{%endif%}</a>
|
||||
all {%- if active_tag_uuid-%} in "{{active_tag.title}}"{%endif%}</a>
|
||||
</li>
|
||||
<li>
|
||||
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||
|
||||
@@ -1,3 +1,5 @@
|
||||
from flask import Blueprint
|
||||
|
||||
from json_logic.builtins import BUILTINS
|
||||
|
||||
from .exceptions import EmptyConditionRuleRowNotUsable
|
||||
@@ -14,6 +16,7 @@ operator_choices = [
|
||||
("==", "Equals"),
|
||||
("!=", "Not Equals"),
|
||||
("in", "Contains"),
|
||||
("!in", "Does Not Contain"),
|
||||
]
|
||||
|
||||
# Fields available in the rules
|
||||
|
||||
@@ -21,21 +21,17 @@ def register_operators():
|
||||
def length_max(_, text, strlen):
|
||||
return len(text) <= int(strlen)
|
||||
|
||||
# Custom function for case-insensitive regex matching
|
||||
# ✅ Custom function for case-insensitive regex matching
|
||||
def contains_regex(_, text, pattern):
|
||||
"""Returns True if `text` contains `pattern` (case-insensitive regex match)."""
|
||||
return bool(re.search(pattern, str(text), re.IGNORECASE))
|
||||
|
||||
# Custom function for NOT matching case-insensitive regex
|
||||
# ✅ Custom function for NOT matching case-insensitive regex
|
||||
def not_contains_regex(_, text, pattern):
|
||||
"""Returns True if `text` does NOT contain `pattern` (case-insensitive regex match)."""
|
||||
return not bool(re.search(pattern, str(text), re.IGNORECASE))
|
||||
|
||||
def not_contains(_, text, pattern):
|
||||
return not pattern in text
|
||||
|
||||
return {
|
||||
"!in": not_contains,
|
||||
"!contains_regex": not_contains_regex,
|
||||
"contains_regex": contains_regex,
|
||||
"ends_with": ends_with,
|
||||
@@ -47,7 +43,6 @@ def register_operators():
|
||||
@hookimpl
|
||||
def register_operator_choices():
|
||||
return [
|
||||
("!in", "Does NOT Contain"),
|
||||
("starts_with", "Text Starts With"),
|
||||
("ends_with", "Text Ends With"),
|
||||
("length_min", "Length minimum"),
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
import pluggy
|
||||
from loguru import logger
|
||||
|
||||
LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000
|
||||
|
||||
# Support both plugin systems
|
||||
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
||||
@@ -74,17 +72,7 @@ def ui_edit_stats_extras(watch):
|
||||
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
|
||||
if len(watch.history.keys()) < 2:
|
||||
return "<p>Not enough history to calculate Levenshtein metrics</p>"
|
||||
|
||||
|
||||
# Protection against the algorithm getting stuck on huge documents
|
||||
k = list(watch.history.keys())
|
||||
if any(
|
||||
len(watch.get_history_snapshot(timestamp=k[idx])) > LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS
|
||||
for idx in (-1, -2)
|
||||
if len(k) >= abs(idx)
|
||||
):
|
||||
return "<p>Snapshot too large for edit statistics, skipping.</p>"
|
||||
|
||||
|
||||
try:
|
||||
lev_data = levenshtein_ratio_recent_history(watch)
|
||||
if not lev_data or not isinstance(lev_data, dict):
|
||||
|
||||
@@ -28,7 +28,6 @@ from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
||||
import importlib.resources
|
||||
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||
FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8')
|
||||
|
||||
|
||||
def available_fetchers():
|
||||
|
||||
@@ -48,7 +48,6 @@ class Fetcher():
|
||||
error = None
|
||||
fetcher_description = "No description"
|
||||
headers = {}
|
||||
favicon_blob = None
|
||||
instock_data = None
|
||||
instock_data_js = ""
|
||||
status_code = None
|
||||
@@ -64,35 +63,21 @@ class Fetcher():
|
||||
# Time ONTOP of the system defined env minimum time
|
||||
render_extract_delay = 0
|
||||
|
||||
def clear_content(self):
|
||||
"""
|
||||
Explicitly clear all content from memory to free up heap space.
|
||||
Call this after content has been saved to disk.
|
||||
"""
|
||||
self.content = None
|
||||
if hasattr(self, 'raw_content'):
|
||||
self.raw_content = None
|
||||
self.screenshot = None
|
||||
self.xpath_data = None
|
||||
# Keep headers and status_code as they're small
|
||||
|
||||
@abstractmethod
|
||||
def get_error(self):
|
||||
return self.error
|
||||
|
||||
@abstractmethod
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
# Should set self.error, self.status_code and self.content
|
||||
pass
|
||||
|
||||
@@ -140,7 +125,7 @@ class Fetcher():
|
||||
async def iterate_browser_steps(self, start_url=None):
|
||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||
from playwright._impl._errors import TimeoutError, Error
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
step_n = 0
|
||||
|
||||
if self.browser_steps is not None and len(self.browser_steps):
|
||||
|
||||
@@ -5,7 +5,7 @@ from urllib.parse import urlparse
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
@@ -143,17 +143,15 @@ class fetcher(Fetcher):
|
||||
f.write(content)
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
|
||||
from playwright.async_api import async_playwright
|
||||
import playwright._impl._errors
|
||||
@@ -236,13 +234,6 @@ class fetcher(Fetcher):
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
try:
|
||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
||||
await self.page.request_gc()
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page_async(self.page)
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
@@ -283,7 +274,6 @@ class fetcher(Fetcher):
|
||||
await self.page.request_gc()
|
||||
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
|
||||
|
||||
|
||||
# Bug 3 in Playwright screenshot handling
|
||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||
# JPEG is better here because the screenshots can be very very large
|
||||
|
||||
@@ -8,7 +8,7 @@ from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
|
||||
SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS
|
||||
SCREENSHOT_MAX_TOTAL_HEIGHT
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \
|
||||
BrowserConnectError
|
||||
@@ -145,16 +145,15 @@ class fetcher(Fetcher):
|
||||
# f.write(content)
|
||||
|
||||
async def fetch_page(self,
|
||||
current_include_filters,
|
||||
empty_pages_are_a_change,
|
||||
fetch_favicon,
|
||||
ignore_status_codes,
|
||||
is_binary,
|
||||
request_body,
|
||||
request_headers,
|
||||
request_method,
|
||||
timeout,
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes,
|
||||
current_include_filters,
|
||||
is_binary,
|
||||
empty_pages_are_a_change
|
||||
):
|
||||
import re
|
||||
self.delete_browser_steps_screenshots()
|
||||
@@ -180,11 +179,10 @@ class fetcher(Fetcher):
|
||||
except Exception as e:
|
||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
||||
|
||||
# more reliable is to just request a new page
|
||||
self.page = await browser.newPage()
|
||||
|
||||
# Add console handler to capture console.log from favicon fetcher
|
||||
#self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
|
||||
# Better is to launch chrome with the URL as arg
|
||||
# non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab
|
||||
# headless - ask a new page
|
||||
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
|
||||
|
||||
if '--window-size' in self.browser_connection_url:
|
||||
# Be sure the viewport is always the window-size, this is often not the same thing
|
||||
@@ -294,12 +292,6 @@ class fetcher(Fetcher):
|
||||
await browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if fetch_favicon:
|
||||
try:
|
||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
||||
except Exception as e:
|
||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = await capture_full_page(page=self.page)
|
||||
|
||||
@@ -351,18 +343,8 @@ class fetcher(Fetcher):
|
||||
async def main(self, **kwargs):
|
||||
await self.fetch_page(**kwargs)
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
async def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
||||
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
|
||||
|
||||
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||
max_time = int(os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180))
|
||||
@@ -370,17 +352,16 @@ class fetcher(Fetcher):
|
||||
# Now we run this properly in async context since we're called from async worker
|
||||
try:
|
||||
await asyncio.wait_for(self.main(
|
||||
current_include_filters=current_include_filters,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
fetch_favicon=fetch_favicon,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
is_binary=is_binary,
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
), timeout=max_time
|
||||
)
|
||||
timeout=timeout,
|
||||
request_headers=request_headers,
|
||||
request_body=request_body,
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=current_include_filters,
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
), timeout=max_time)
|
||||
except asyncio.TimeoutError:
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
|
||||
|
||||
@@ -51,7 +51,6 @@ class fetcher(Fetcher):
|
||||
|
||||
session = requests.Session()
|
||||
|
||||
|
||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||
from requests_file import FileAdapter
|
||||
session.mount('file://', FileAdapter())
|
||||
@@ -105,17 +104,15 @@ class fetcher(Fetcher):
|
||||
self.raw_content = r.content
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
||||
|
||||
loop = asyncio.get_event_loop()
|
||||
|
||||
@@ -1,101 +0,0 @@
|
||||
(async () => {
|
||||
// Define the function inside the IIFE for console testing
|
||||
window.getFaviconAsBlob = async function() {
|
||||
const links = Array.from(document.querySelectorAll(
|
||||
'link[rel~="apple-touch-icon"], link[rel~="icon"]'
|
||||
));
|
||||
|
||||
const icons = links.map(link => {
|
||||
const sizesStr = link.getAttribute('sizes');
|
||||
let size = 0;
|
||||
if (sizesStr) {
|
||||
const [w] = sizesStr.split('x').map(Number);
|
||||
if (!isNaN(w)) size = w;
|
||||
} else {
|
||||
size = 16;
|
||||
}
|
||||
return {
|
||||
size,
|
||||
rel: link.getAttribute('rel'),
|
||||
href: link.href,
|
||||
hasSizes: !!sizesStr
|
||||
};
|
||||
});
|
||||
|
||||
// If no icons found, add fallback favicon.ico
|
||||
if (icons.length === 0) {
|
||||
icons.push({
|
||||
size: 16,
|
||||
rel: 'icon',
|
||||
href: '/favicon.ico',
|
||||
hasSizes: false
|
||||
});
|
||||
}
|
||||
|
||||
// sort preference: highest resolution first, then apple-touch-icon, then regular icons
|
||||
icons.sort((a, b) => {
|
||||
// First priority: actual size (highest first)
|
||||
if (a.size !== b.size) {
|
||||
return b.size - a.size;
|
||||
}
|
||||
|
||||
// Second priority: apple-touch-icon over regular icon
|
||||
const isAppleA = /apple-touch-icon/.test(a.rel);
|
||||
const isAppleB = /apple-touch-icon/.test(b.rel);
|
||||
if (isAppleA && !isAppleB) return -1;
|
||||
if (!isAppleA && isAppleB) return 1;
|
||||
|
||||
// Third priority: icons with no size attribute (fallback icons) last
|
||||
const hasNoSizeA = !a.hasSizes;
|
||||
const hasNoSizeB = !b.hasSizes;
|
||||
if (hasNoSizeA && !hasNoSizeB) return 1;
|
||||
if (!hasNoSizeA && hasNoSizeB) return -1;
|
||||
|
||||
return 0;
|
||||
});
|
||||
|
||||
const timeoutMs = 2000;
|
||||
|
||||
for (const icon of icons) {
|
||||
try {
|
||||
const controller = new AbortController();
|
||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
||||
|
||||
const resp = await fetch(icon.href, {
|
||||
signal: controller.signal,
|
||||
redirect: 'follow'
|
||||
});
|
||||
|
||||
clearTimeout(timeout);
|
||||
|
||||
if (!resp.ok) {
|
||||
continue;
|
||||
}
|
||||
|
||||
const blob = await resp.blob();
|
||||
|
||||
// Convert blob to base64
|
||||
const reader = new FileReader();
|
||||
return await new Promise(resolve => {
|
||||
reader.onloadend = () => {
|
||||
resolve({
|
||||
url: icon.href,
|
||||
base64: reader.result.split(",")[1]
|
||||
});
|
||||
};
|
||||
reader.readAsDataURL(blob);
|
||||
});
|
||||
|
||||
} catch (e) {
|
||||
continue;
|
||||
}
|
||||
}
|
||||
|
||||
// nothing found
|
||||
return null;
|
||||
};
|
||||
|
||||
// Auto-execute and return result for page.evaluate()
|
||||
return await window.getFaviconAsBlob();
|
||||
})();
|
||||
|
||||
@@ -17,7 +17,6 @@ async () => {
|
||||
'back in stock soon',
|
||||
'back-order or out of stock',
|
||||
'backordered',
|
||||
'backorder',
|
||||
'benachrichtigt mich', // notify me
|
||||
'binnenkort leverbaar', // coming soon
|
||||
'brak na stanie',
|
||||
@@ -40,14 +39,12 @@ async () => {
|
||||
'mail me when available',
|
||||
'message if back in stock',
|
||||
'mevcut değil',
|
||||
'more on order',
|
||||
'nachricht bei',
|
||||
'nicht auf lager',
|
||||
'nicht lagernd',
|
||||
'nicht lieferbar',
|
||||
'nicht verfügbar',
|
||||
'nicht vorrätig',
|
||||
'nicht mehr lieferbar',
|
||||
'nicht zur verfügung',
|
||||
'nie znaleziono produktów',
|
||||
'niet beschikbaar',
|
||||
|
||||
@@ -4,10 +4,9 @@ import time
|
||||
from loguru import logger
|
||||
from changedetectionio.content_fetchers.base import Fetcher
|
||||
|
||||
|
||||
class fetcher(Fetcher):
|
||||
if os.getenv("WEBDRIVER_URL"):
|
||||
fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\""
|
||||
fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
|
||||
else:
|
||||
fetcher_description = "WebDriver Chrome/Javascript"
|
||||
|
||||
@@ -26,6 +25,7 @@ class fetcher(Fetcher):
|
||||
self.browser_connection_is_custom = True
|
||||
self.browser_connection_url = custom_browser_connection_url
|
||||
|
||||
|
||||
##### PROXY SETUP #####
|
||||
|
||||
proxy_sources = [
|
||||
@@ -38,7 +38,7 @@ class fetcher(Fetcher):
|
||||
os.getenv('webdriver_proxyHttps'),
|
||||
os.getenv('webdriver_httpsProxy'),
|
||||
os.getenv('webdriver_sslProxy'),
|
||||
proxy_override, # last one should override
|
||||
proxy_override, # last one should override
|
||||
]
|
||||
# The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
|
||||
for k in filter(None, proxy_sources):
|
||||
@@ -46,21 +46,20 @@ class fetcher(Fetcher):
|
||||
continue
|
||||
self.proxy_url = k.strip()
|
||||
|
||||
|
||||
async def run(self,
|
||||
fetch_favicon=True,
|
||||
current_include_filters=None,
|
||||
empty_pages_are_a_change=False,
|
||||
ignore_status_codes=False,
|
||||
is_binary=False,
|
||||
request_body=None,
|
||||
request_headers=None,
|
||||
request_method=None,
|
||||
timeout=None,
|
||||
url=None,
|
||||
):
|
||||
url,
|
||||
timeout,
|
||||
request_headers,
|
||||
request_body,
|
||||
request_method,
|
||||
ignore_status_codes=False,
|
||||
current_include_filters=None,
|
||||
is_binary=False,
|
||||
empty_pages_are_a_change=False):
|
||||
|
||||
import asyncio
|
||||
|
||||
|
||||
# Wrap the entire selenium operation in a thread executor
|
||||
def _run_sync():
|
||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||
@@ -141,3 +140,4 @@ class fetcher(Fetcher):
|
||||
# Run the selenium operations in a thread pool to avoid blocking the event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, _run_sync)
|
||||
|
||||
|
||||
@@ -1,32 +1,8 @@
|
||||
import difflib
|
||||
from typing import List, Iterator, Union
|
||||
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
|
||||
#HTML_ADDED_STYLE = "background-color: #d2f7c2; color: #255d00;"
|
||||
#HTML_CHANGED_INTO_STYLE = "background-color: #dafbe1; color: #116329;"
|
||||
#HTML_CHANGED_STYLE = "background-color: #ffd6cc; color: #7a2000;"
|
||||
#HTML_REMOVED_STYLE = "background-color: #ffebe9; color: #82071e;"
|
||||
|
||||
# @todo - In the future we can make this configurable
|
||||
HTML_ADDED_STYLE = "background-color: #eaf2c2; color: #406619"
|
||||
HTML_REMOVED_STYLE = "background-color: #fadad7; color: #b30000"
|
||||
HTML_CHANGED_STYLE = HTML_REMOVED_STYLE
|
||||
HTML_CHANGED_INTO_STYLE = HTML_ADDED_STYLE
|
||||
|
||||
|
||||
# These get set to html or telegram type or discord compatible or whatever in handler.py
|
||||
# Something that cant get escaped to HTML by accident
|
||||
REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN'
|
||||
REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED'
|
||||
|
||||
ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN'
|
||||
ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN'
|
||||
CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED'
|
||||
|
||||
CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN'
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
|
||||
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
|
||||
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
|
||||
|
||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
|
||||
"""Return a slice of the list, or a single element if start == end."""
|
||||
@@ -39,7 +15,8 @@ def customSequenceMatcher(
|
||||
include_removed: bool = True,
|
||||
include_added: bool = True,
|
||||
include_replaced: bool = True,
|
||||
include_change_type_prefix: bool = True
|
||||
include_change_type_prefix: bool = True,
|
||||
html_colour: bool = False
|
||||
) -> Iterator[List[str]]:
|
||||
"""
|
||||
Compare two sequences and yield differences based on specified parameters.
|
||||
@@ -52,6 +29,8 @@ def customSequenceMatcher(
|
||||
include_added (bool): Include added parts
|
||||
include_replaced (bool): Include replaced parts
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
html_colour (bool): Use HTML background colors for differences
|
||||
|
||||
Yields:
|
||||
List[str]: Differences between sequences
|
||||
"""
|
||||
@@ -63,22 +42,22 @@ def customSequenceMatcher(
|
||||
if include_equal and tag == 'equal':
|
||||
yield before[alo:ahi]
|
||||
elif include_removed and tag == 'delete':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)]
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi)
|
||||
yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi)
|
||||
elif include_replaced and tag == 'replace':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] + \
|
||||
[f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
if html_colour:
|
||||
yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \
|
||||
[f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
|
||||
yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \
|
||||
[f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
|
||||
elif include_added and tag == 'insert':
|
||||
if include_change_type_prefix:
|
||||
yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
|
||||
if html_colour:
|
||||
yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
|
||||
else:
|
||||
yield same_slicer(after, blo, bhi)
|
||||
|
||||
yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi)
|
||||
|
||||
def render_diff(
|
||||
previous_version_file_contents: str,
|
||||
@@ -89,7 +68,8 @@ def render_diff(
|
||||
include_replaced: bool = True,
|
||||
line_feed_sep: str = "\n",
|
||||
include_change_type_prefix: bool = True,
|
||||
patch_format: bool = False
|
||||
patch_format: bool = False,
|
||||
html_colour: bool = False
|
||||
) -> str:
|
||||
"""
|
||||
Render the difference between two file contents.
|
||||
@@ -104,6 +84,8 @@ def render_diff(
|
||||
line_feed_sep (str): Separator for lines in output
|
||||
include_change_type_prefix (bool): Add prefixes to indicate change types
|
||||
patch_format (bool): Use patch format for output
|
||||
html_colour (bool): Use HTML background colors for differences
|
||||
|
||||
Returns:
|
||||
str: Rendered difference
|
||||
"""
|
||||
@@ -121,7 +103,8 @@ def render_diff(
|
||||
include_removed=include_removed,
|
||||
include_added=include_added,
|
||||
include_replaced=include_replaced,
|
||||
include_change_type_prefix=include_change_type_prefix
|
||||
include_change_type_prefix=include_change_type_prefix,
|
||||
html_colour=html_colour
|
||||
)
|
||||
|
||||
def flatten(lst: List[Union[str, List[str]]]) -> str:
|
||||
|
||||
@@ -12,17 +12,19 @@ from blinker import signal
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from threading import Event
|
||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
||||
from changedetectionio.custom_queue import SignalPriorityQueue, AsyncSignalPriorityQueue, NotificationQueue
|
||||
from changedetectionio import worker_handler
|
||||
|
||||
from flask import (
|
||||
Flask,
|
||||
abort,
|
||||
flash,
|
||||
make_response,
|
||||
redirect,
|
||||
render_template,
|
||||
request,
|
||||
send_from_directory,
|
||||
session,
|
||||
url_for,
|
||||
)
|
||||
from flask_compress import Compress as FlaskCompress
|
||||
@@ -38,7 +40,7 @@ from loguru import logger
|
||||
|
||||
from changedetectionio import __version__
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications
|
||||
from changedetectionio.api.Search import Search
|
||||
from .time_handler import is_within_schedule
|
||||
|
||||
@@ -48,8 +50,8 @@ datastore = None
|
||||
ticker_thread = None
|
||||
extra_stylesheets = []
|
||||
|
||||
# Use bulletproof janus-based queues for sync/async reliability
|
||||
update_q = RecheckPriorityQueue()
|
||||
# Use async queue by default, keep sync for backward compatibility
|
||||
update_q = AsyncSignalPriorityQueue() if worker_handler.USE_ASYNC_WORKERS else SignalPriorityQueue()
|
||||
notification_q = NotificationQueue()
|
||||
MAX_QUEUE_SIZE = 2000
|
||||
|
||||
@@ -98,7 +100,7 @@ watch_api = Api(app, decorators=[csrf.exempt])
|
||||
def init_app_secret(datastore_path):
|
||||
secret = ""
|
||||
|
||||
path = os.path.join(datastore_path, "secret.txt")
|
||||
path = "{}/secret.txt".format(datastore_path)
|
||||
|
||||
try:
|
||||
with open(path, "r") as f:
|
||||
@@ -133,11 +135,6 @@ def get_socketio_path():
|
||||
# Socket.IO will be available at {prefix}/socket.io/
|
||||
return prefix
|
||||
|
||||
@app.template_global('is_safe_valid_url')
|
||||
def _is_safe_valid_url(test_url):
|
||||
from .validate_url import is_safe_valid_url
|
||||
return is_safe_valid_url(test_url)
|
||||
|
||||
|
||||
@app.template_filter('format_number_locale')
|
||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||
@@ -310,9 +307,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
watch_api.add_resource(WatchSingleHistory,
|
||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
watch_api.add_resource(WatchFavicon,
|
||||
'/api/v1/watch/<string:uuid>/favicon',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(WatchHistory,
|
||||
'/api/v1/watch/<string:uuid>/history',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
@@ -334,7 +329,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
|
||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
|
||||
watch_api.add_resource(Search, '/api/v1/search',
|
||||
resource_class_kwargs={'datastore': datastore})
|
||||
@@ -387,7 +382,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
# We would sometimes get login loop errors on sites hosted in sub-paths
|
||||
|
||||
# note for the future:
|
||||
# if not is_safe_valid_url(next):
|
||||
# if not is_safe_url(next):
|
||||
# return flask.abort(400)
|
||||
return redirect(url_for('watchlist.index'))
|
||||
|
||||
@@ -432,32 +427,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
if group == 'favicon':
|
||||
# Could be sensitive, follow password requirements
|
||||
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
|
||||
abort(403)
|
||||
# Get the watch object
|
||||
watch = datastore.data['watching'].get(filename)
|
||||
if not watch:
|
||||
abort(404)
|
||||
|
||||
favicon_filename = watch.get_favicon_filename()
|
||||
if favicon_filename:
|
||||
try:
|
||||
import magic
|
||||
mime = magic.from_file(
|
||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
||||
mime=True
|
||||
)
|
||||
except ImportError:
|
||||
# Fallback, no python-magic
|
||||
import mimetypes
|
||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
||||
|
||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
||||
response.headers['Content-type'] = mime
|
||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
||||
return response
|
||||
|
||||
if group == 'visual_selector_data':
|
||||
# Could be sensitive, follow password requirements
|
||||
@@ -800,7 +769,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
else:
|
||||
time_schedule_limit = watch.get('time_schedule_limit')
|
||||
logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
|
||||
tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
|
||||
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
|
||||
|
||||
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
||||
try:
|
||||
@@ -849,22 +818,16 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||
priority = int(time.time())
|
||||
logger.debug(
|
||||
f"> Queued watch UUID {uuid} "
|
||||
f"last checked at {watch['last_checked']} "
|
||||
f"queued at {now:0.2f} priority {priority} "
|
||||
f"jitter {watch.jitter_seconds:0.2f}s, "
|
||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||
|
||||
# Into the queue with you
|
||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
||||
item={'uuid': uuid})
|
||||
)
|
||||
if queued_successfully:
|
||||
logger.debug(
|
||||
f"> Queued watch UUID {uuid} "
|
||||
f"last checked at {watch['last_checked']} "
|
||||
f"queued at {now:0.2f} priority {priority} "
|
||||
f"jitter {watch.jitter_seconds:0.2f}s, "
|
||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||
else:
|
||||
logger.critical(f"CRITICAL: Failed to queue watch UUID {uuid} in ticker thread!")
|
||||
|
||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
|
||||
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
|
||||
@@ -5,7 +5,6 @@ from wtforms.widgets.core import TimeInput
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||
from changedetectionio.conditions.form import ConditionFormRow
|
||||
from changedetectionio.notification_service import NotificationContextData
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from wtforms import (
|
||||
@@ -24,11 +23,11 @@ from wtforms import (
|
||||
)
|
||||
from flask_wtf.file import FileField, FileAllowed
|
||||
from wtforms.fields import FieldList
|
||||
from wtforms.utils import unset_value
|
||||
|
||||
from wtforms.validators import ValidationError
|
||||
|
||||
from changedetectionio.widgets import TernaryNoneBooleanField
|
||||
from validators.url import url as url_validator
|
||||
|
||||
|
||||
# default
|
||||
# each select <option data-enabled="enabled-0-0"
|
||||
@@ -55,8 +54,6 @@ valid_method = {
|
||||
|
||||
default_method = 'GET'
|
||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
||||
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.'
|
||||
|
||||
class StringListField(StringField):
|
||||
widget = widgets.TextArea()
|
||||
@@ -213,35 +210,6 @@ class ScheduleLimitForm(Form):
|
||||
self.sunday.form.enabled.label.text = "Sunday"
|
||||
|
||||
|
||||
def validate_time_between_check_has_values(form):
|
||||
"""
|
||||
Custom validation function for TimeBetweenCheckForm.
|
||||
Returns True if at least one time interval field has a value > 0.
|
||||
"""
|
||||
res = any([
|
||||
form.weeks.data and int(form.weeks.data) > 0,
|
||||
form.days.data and int(form.days.data) > 0,
|
||||
form.hours.data and int(form.hours.data) > 0,
|
||||
form.minutes.data and int(form.minutes.data) > 0,
|
||||
form.seconds.data and int(form.seconds.data) > 0
|
||||
])
|
||||
|
||||
return res
|
||||
|
||||
|
||||
class RequiredTimeInterval(object):
|
||||
"""
|
||||
WTForms validator that ensures at least one time interval field has a value > 0.
|
||||
Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]).
|
||||
"""
|
||||
def __init__(self, message=None):
|
||||
self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
|
||||
|
||||
def __call__(self, form, field):
|
||||
if not validate_time_between_check_has_values(field.form):
|
||||
raise ValidationError(self.message)
|
||||
|
||||
|
||||
class TimeBetweenCheckForm(Form):
|
||||
weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
@@ -250,123 +218,6 @@ class TimeBetweenCheckForm(Form):
|
||||
seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
# @todo add total seconds minimum validatior = minimum_seconds_recheck_time
|
||||
|
||||
def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
|
||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
||||
self.require_at_least_one = kwargs.get('require_at_least_one', False)
|
||||
self.require_at_least_one_message = kwargs.get('require_at_least_one_message', REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT)
|
||||
|
||||
def validate(self, **kwargs):
|
||||
"""Custom validation that can optionally require at least one time interval."""
|
||||
# Run normal field validation first
|
||||
if not super().validate(**kwargs):
|
||||
return False
|
||||
|
||||
# Apply optional "at least one" validation
|
||||
if self.require_at_least_one:
|
||||
if not validate_time_between_check_has_values(self):
|
||||
# Add error to the form's general errors (not field-specific)
|
||||
if not hasattr(self, '_formdata_errors'):
|
||||
self._formdata_errors = []
|
||||
self._formdata_errors.append(self.require_at_least_one_message)
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class EnhancedFormField(FormField):
|
||||
"""
|
||||
An enhanced FormField that supports conditional validation with top-level error messages.
|
||||
Adds a 'top_errors' property for validation errors at the FormField level.
|
||||
"""
|
||||
|
||||
def __init__(self, form_class, label=None, validators=None, separator="-",
|
||||
conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs):
|
||||
"""
|
||||
Initialize EnhancedFormField with optional conditional validation.
|
||||
|
||||
:param conditional_field: Name of the field this FormField depends on (e.g. 'time_between_check_use_default')
|
||||
:param conditional_message: Error message to show when validation fails
|
||||
:param conditional_test_function: Custom function to test if FormField has valid values.
|
||||
Should take self.form as parameter and return True if valid.
|
||||
"""
|
||||
super().__init__(form_class, label, validators, separator, **kwargs)
|
||||
self.top_errors = []
|
||||
self.conditional_field = conditional_field
|
||||
self.conditional_message = conditional_message or "At least one field must have a value when not using defaults."
|
||||
self.conditional_test_function = conditional_test_function
|
||||
|
||||
def validate(self, form, extra_validators=()):
|
||||
"""
|
||||
Custom validation that supports conditional logic and stores top-level errors.
|
||||
"""
|
||||
self.top_errors = []
|
||||
|
||||
# First run the normal FormField validation
|
||||
base_valid = super().validate(form, extra_validators)
|
||||
|
||||
# Apply conditional validation if configured
|
||||
if self.conditional_field and hasattr(form, self.conditional_field):
|
||||
conditional_field_obj = getattr(form, self.conditional_field)
|
||||
|
||||
# If the conditional field is False/unchecked, check if this FormField has any values
|
||||
if not conditional_field_obj.data:
|
||||
# Use custom test function if provided, otherwise use generic fallback
|
||||
if self.conditional_test_function:
|
||||
has_any_value = self.conditional_test_function(self.form)
|
||||
else:
|
||||
# Generic fallback - check if any field has truthy data
|
||||
has_any_value = any(field.data for field in self.form if hasattr(field, 'data') and field.data)
|
||||
|
||||
if not has_any_value:
|
||||
self.top_errors.append(self.conditional_message)
|
||||
base_valid = False
|
||||
|
||||
return base_valid
|
||||
|
||||
|
||||
class RequiredFormField(FormField):
|
||||
"""
|
||||
A FormField that passes require_at_least_one=True to TimeBetweenCheckForm.
|
||||
Use this when you want the sub-form to always require at least one value.
|
||||
"""
|
||||
|
||||
def __init__(self, form_class, label=None, validators=None, separator="-", **kwargs):
|
||||
super().__init__(form_class, label, validators, separator, **kwargs)
|
||||
|
||||
def process(self, formdata, data=unset_value, extra_filters=None):
|
||||
if extra_filters:
|
||||
raise TypeError(
|
||||
"FormField cannot take filters, as the encapsulated"
|
||||
"data is not mutable."
|
||||
)
|
||||
|
||||
if data is unset_value:
|
||||
try:
|
||||
data = self.default()
|
||||
except TypeError:
|
||||
data = self.default
|
||||
self._obj = data
|
||||
|
||||
self.object_data = data
|
||||
|
||||
prefix = self.name + self.separator
|
||||
# Pass require_at_least_one=True to the sub-form
|
||||
if isinstance(data, dict):
|
||||
self.form = self.form_class(formdata=formdata, prefix=prefix, require_at_least_one=True, **data)
|
||||
else:
|
||||
self.form = self.form_class(formdata=formdata, obj=data, prefix=prefix, require_at_least_one=True)
|
||||
|
||||
@property
|
||||
def errors(self):
|
||||
"""Include sub-form validation errors"""
|
||||
form_errors = self.form.errors
|
||||
# Add any general form errors to a special 'form' key
|
||||
if hasattr(self.form, '_formdata_errors') and self.form._formdata_errors:
|
||||
form_errors = dict(form_errors) # Make a copy
|
||||
form_errors['form'] = self.form._formdata_errors
|
||||
return form_errors
|
||||
|
||||
|
||||
# Separated by key:value
|
||||
class StringDictKeyValue(StringField):
|
||||
widget = widgets.TextArea()
|
||||
@@ -467,16 +318,11 @@ class ValidateAppRiseServers(object):
|
||||
import apprise
|
||||
from .notification.apprise_plugin.assets import apprise_asset
|
||||
from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler # noqa: F401
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
|
||||
apobj = apprise.Apprise(asset=apprise_asset)
|
||||
|
||||
for server_url in field.data:
|
||||
generic_notification_context_data = NotificationContextData()
|
||||
# Make sure something is atleast in all those regular token fields
|
||||
generic_notification_context_data.set_random_for_validation()
|
||||
|
||||
url = jinja_render(template_str=server_url.strip(), **generic_notification_context_data).strip()
|
||||
url = server_url.strip()
|
||||
if url.startswith("#"):
|
||||
continue
|
||||
|
||||
@@ -490,8 +336,9 @@ class ValidateJinja2Template(object):
|
||||
"""
|
||||
def __call__(self, form, field):
|
||||
from changedetectionio import notification
|
||||
from changedetectionio.jinja2_custom import create_jinja_env
|
||||
|
||||
from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
|
||||
from jinja2.sandbox import ImmutableSandboxedEnvironment
|
||||
from jinja2.meta import find_undeclared_variables
|
||||
import jinja2.exceptions
|
||||
|
||||
@@ -499,11 +346,9 @@ class ValidateJinja2Template(object):
|
||||
joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}"
|
||||
|
||||
try:
|
||||
# Use the shared helper to create a properly configured environment
|
||||
jinja2_env = create_jinja_env(loader=BaseLoader)
|
||||
|
||||
# Add notification tokens for validation
|
||||
jinja2_env.globals.update(NotificationContextData())
|
||||
jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader)
|
||||
jinja2_env.globals.update(notification.valid_tokens)
|
||||
# Extra validation tokens provided on the form_class(... extra_tokens={}) setup
|
||||
if hasattr(field, 'extra_notification_tokens'):
|
||||
jinja2_env.globals.update(field.extra_notification_tokens)
|
||||
|
||||
@@ -515,7 +360,6 @@ class ValidateJinja2Template(object):
|
||||
except jinja2.exceptions.SecurityError as e:
|
||||
raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
|
||||
|
||||
# Check for undeclared variables
|
||||
ast = jinja2_env.parse(joined_data)
|
||||
undefined = ", ".join(find_undeclared_variables(ast))
|
||||
if undefined:
|
||||
@@ -538,23 +382,19 @@ class validateURL(object):
|
||||
|
||||
|
||||
def validate_url(test_url):
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
if not is_safe_valid_url(test_url):
|
||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||
try:
|
||||
url_validator(test_url, simple_host=allow_simplehost)
|
||||
except validators.ValidationError:
|
||||
#@todo check for xss
|
||||
message = f"'{test_url}' is not a valid URL."
|
||||
# This should be wtforms.validators.
|
||||
raise ValidationError('Watch protocol is not permitted or invalid URL format')
|
||||
|
||||
|
||||
class ValidateSinglePythonRegexString(object):
|
||||
def __init__(self, message=None):
|
||||
self.message = message
|
||||
|
||||
def __call__(self, form, field):
|
||||
try:
|
||||
re.compile(field.data)
|
||||
except re.error:
|
||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||
raise ValidationError(message % (field.data))
|
||||
raise ValidationError(message)
|
||||
|
||||
from .model.Watch import is_safe_url
|
||||
if not is_safe_url(test_url):
|
||||
# This should be wtforms.validators.
|
||||
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
|
||||
|
||||
class ValidateListRegex(object):
|
||||
"""
|
||||
@@ -574,7 +414,6 @@ class ValidateListRegex(object):
|
||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||
raise ValidationError(message % (line))
|
||||
|
||||
|
||||
class ValidateCSSJSONXPATHInput(object):
|
||||
"""
|
||||
Filter validation
|
||||
@@ -674,51 +513,6 @@ class ValidateCSSJSONXPATHInput(object):
|
||||
except:
|
||||
raise ValidationError("A system-error occurred when validating your jq expression")
|
||||
|
||||
class ValidateSimpleURL:
|
||||
"""Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc."""
|
||||
def __init__(self, message=None):
|
||||
self.message = message or "Invalid URL."
|
||||
|
||||
def __call__(self, form, field):
|
||||
data = (field.data or "").strip()
|
||||
if not data:
|
||||
return # empty is OK — pair with validators.Optional()
|
||||
from urllib.parse import urlparse
|
||||
|
||||
parsed = urlparse(data)
|
||||
if not parsed.scheme or not parsed.netloc:
|
||||
raise ValidationError(self.message)
|
||||
|
||||
class ValidateStartsWithRegex(object):
|
||||
def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True):
|
||||
# compile with given flags (we’ll pass re.IGNORECASE below)
|
||||
self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex
|
||||
self.message = message
|
||||
self.allow_empty = allow_empty
|
||||
self.split_lines = split_lines
|
||||
|
||||
def __call__(self, form, field):
|
||||
data = field.data
|
||||
if not data:
|
||||
return
|
||||
|
||||
# normalize into list of lines
|
||||
if isinstance(data, str) and self.split_lines:
|
||||
lines = data.splitlines()
|
||||
elif isinstance(data, (list, tuple)):
|
||||
lines = data
|
||||
else:
|
||||
lines = [data]
|
||||
|
||||
for line in lines:
|
||||
stripped = line.strip()
|
||||
if not stripped:
|
||||
if self.allow_empty:
|
||||
continue
|
||||
raise ValidationError(self.message or "Empty value not allowed.")
|
||||
if not self.pattern.match(stripped):
|
||||
raise ValidationError(self.message or "Invalid value.")
|
||||
|
||||
class quickWatchForm(Form):
|
||||
from . import processors
|
||||
|
||||
@@ -729,6 +523,7 @@ class quickWatchForm(Form):
|
||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
|
||||
|
||||
# Common to a single watch and the global settings
|
||||
class commonSettingsForm(Form):
|
||||
from . import processors
|
||||
@@ -739,23 +534,16 @@ class commonSettingsForm(Form):
|
||||
self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
|
||||
|
||||
extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
|
||||
fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||
notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_format = SelectField('Notification format', choices=list(valid_notification_formats.items()))
|
||||
notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
|
||||
notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
|
||||
notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
|
||||
processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
|
||||
scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
|
||||
webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
|
||||
|
||||
# Not true anymore but keep the validate_ hook for future use, we convert color tags
|
||||
# def validate_notification_urls(self, field):
|
||||
# """Validate that HTML Color format is not used with Telegram"""
|
||||
# if self.notification_format.data == 'HTML Color' and field.data:
|
||||
# for url in field.data:
|
||||
# if url and ('tgram://' in url or 'discord://' in url or 'discord.com/api/webhooks' in url):
|
||||
# raise ValidationError('HTML Color format is not supported by Telegram and Discord. Please choose another Notification Format (Plain Text, HTML, or Markdown to HTML).')
|
||||
|
||||
|
||||
class importForm(Form):
|
||||
from . import processors
|
||||
@@ -780,16 +568,11 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
url = fields.URLField('URL', validators=[validateURL()])
|
||||
tags = StringTagUUID('Group tag', [validators.Optional()], default='')
|
||||
|
||||
time_between_check = EnhancedFormField(
|
||||
TimeBetweenCheckForm,
|
||||
conditional_field='time_between_check_use_default',
|
||||
conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT,
|
||||
conditional_test_function=validate_time_between_check_has_values
|
||||
)
|
||||
time_between_check = FormField(TimeBetweenCheckForm)
|
||||
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
|
||||
time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False)
|
||||
time_between_check_use_default = BooleanField('Use global settings for time between check', default=False)
|
||||
|
||||
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||
|
||||
@@ -807,7 +590,6 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
|
||||
remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
|
||||
sort_text_alphabetically = BooleanField('Sort text alphabetically', default=False)
|
||||
strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None)
|
||||
trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
|
||||
|
||||
filter_text_added = BooleanField('Added lines', default=True)
|
||||
@@ -820,18 +602,18 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
|
||||
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
|
||||
|
||||
proxy = RadioField('Proxy')
|
||||
# filter_failure_notification_send @todo make ternary
|
||||
filter_failure_notification_send = BooleanField(
|
||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
||||
notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On")
|
||||
|
||||
notification_muted = BooleanField('Notifications Muted / Off', default=False)
|
||||
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
||||
|
||||
conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
|
||||
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||
use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None)
|
||||
|
||||
|
||||
def extra_tab_content(self):
|
||||
return None
|
||||
@@ -843,7 +625,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
if not super().validate():
|
||||
return False
|
||||
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
result = True
|
||||
|
||||
# Fail form validation when a body is set for a GET
|
||||
@@ -906,36 +688,23 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
):
|
||||
super().__init__(formdata, obj, prefix, data, meta, **kwargs)
|
||||
if kwargs and kwargs.get('default_system_settings'):
|
||||
default_tz = kwargs.get('default_system_settings').get('application', {}).get('scheduler_timezone_default')
|
||||
default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone')
|
||||
if default_tz:
|
||||
self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz
|
||||
|
||||
|
||||
|
||||
class SingleExtraProxy(Form):
|
||||
|
||||
# maybe better to set some <script>var..
|
||||
proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
proxy_url = StringField('Proxy URL', [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(https?|socks5)://', # ✅ main pattern
|
||||
flags=re.IGNORECASE, # ✅ makes it case-insensitive
|
||||
message='Proxy URLs must start with http://, https:// or socks5://',
|
||||
),
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
||||
proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
|
||||
# @todo do the validation here instead
|
||||
|
||||
class SingleExtraBrowser(Form):
|
||||
browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
|
||||
browser_connection_url = StringField('Browser connection URL', [
|
||||
validators.Optional(),
|
||||
ValidateStartsWithRegex(
|
||||
regex=r'^(wss?|ws)://',
|
||||
flags=re.IGNORECASE,
|
||||
message='Browser URLs must start with wss:// or ws://'
|
||||
),
|
||||
ValidateSimpleURL()
|
||||
], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
|
||||
# @todo do the validation here instead
|
||||
|
||||
class DefaultUAInputForm(Form):
|
||||
html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
|
||||
@@ -944,9 +713,9 @@ class DefaultUAInputForm(Form):
|
||||
|
||||
# datastore.data['settings']['requests']..
|
||||
class globalSettingsRequestForm(Form):
|
||||
time_between_check = RequiredFormField(TimeBetweenCheckForm)
|
||||
time_between_check = FormField(TimeBetweenCheckForm)
|
||||
time_schedule_limit = FormField(ScheduleLimitForm)
|
||||
proxy = RadioField('Default proxy')
|
||||
proxy = RadioField('Proxy')
|
||||
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||
@@ -955,12 +724,7 @@ class globalSettingsRequestForm(Form):
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=1, max=50,
|
||||
message="Should be between 1 and 50")])
|
||||
|
||||
timeout = IntegerField('Requests timeout in seconds',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=1, max=999,
|
||||
message="Should be between 1 and 999")])
|
||||
|
||||
|
||||
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
||||
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
||||
|
||||
@@ -974,10 +738,8 @@ class globalSettingsRequestForm(Form):
|
||||
return False
|
||||
|
||||
class globalSettingsApplicationUIForm(Form):
|
||||
open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
|
||||
open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()])
|
||||
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
|
||||
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
|
||||
use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True
|
||||
|
||||
# datastore.data['settings']['application']..
|
||||
class globalSettingsApplicationForm(commonSettingsForm):
|
||||
@@ -1002,14 +764,9 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
|
||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||
shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()])
|
||||
strip_ignored_lines = BooleanField('Strip ignored lines')
|
||||
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
||||
rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
|
||||
validators=[validators.Optional()])
|
||||
|
||||
rss_reader_mode = BooleanField('RSS reader mode ', default=False,
|
||||
validators=[validators.Optional()])
|
||||
|
||||
filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
@@ -1029,9 +786,9 @@ class globalSettingsForm(Form):
|
||||
|
||||
requests = FormField(globalSettingsRequestForm)
|
||||
application = FormField(globalSettingsApplicationForm)
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
|
||||
save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
|
||||
|
||||
|
||||
class extractDataForm(Form):
|
||||
extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
|
||||
extract_regex = StringField('RegEx to extract', validators=[validators.Length(min=1, message="Needs a RegEx")])
|
||||
extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})
|
||||
|
||||
@@ -1,8 +1,6 @@
|
||||
from functools import lru_cache
|
||||
|
||||
from loguru import logger
|
||||
from lxml import etree
|
||||
from typing import List
|
||||
import html
|
||||
import json
|
||||
import re
|
||||
|
||||
@@ -11,10 +9,6 @@ TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
|
||||
TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
|
||||
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
|
||||
|
||||
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
|
||||
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
|
||||
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
|
||||
|
||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
|
||||
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
|
||||
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
|
||||
@@ -23,9 +17,9 @@ class JSONNotFound(ValueError):
|
||||
def __init__(self, msg):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||
# So convert it to inline flag "(?i)foobar" type configuration
|
||||
@lru_cache(maxsize=100)
|
||||
def perl_style_slash_enclosed_regex_to_options(regex):
|
||||
|
||||
res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE)
|
||||
@@ -58,17 +52,13 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
|
||||
|
||||
return html_block
|
||||
|
||||
def subtractive_css_selector(css_selector, content):
|
||||
def subtractive_css_selector(css_selector, html_content):
|
||||
from bs4 import BeautifulSoup
|
||||
soup = BeautifulSoup(content, "html.parser")
|
||||
soup = BeautifulSoup(html_content, "html.parser")
|
||||
|
||||
# So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
|
||||
elements_to_remove = soup.select(css_selector)
|
||||
|
||||
if not elements_to_remove:
|
||||
# Better to return the original that rebuild with BeautifulSoup
|
||||
return content
|
||||
|
||||
# Then, remove them in a separate loop
|
||||
for item in elements_to_remove:
|
||||
item.decompose()
|
||||
@@ -76,7 +66,6 @@ def subtractive_css_selector(css_selector, content):
|
||||
return str(soup)
|
||||
|
||||
def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
|
||||
from lxml import etree
|
||||
# Parse the HTML content using lxml
|
||||
html_tree = etree.HTML(html_content)
|
||||
|
||||
@@ -88,10 +77,6 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
|
||||
# Collect elements for each selector
|
||||
elements_to_remove.extend(html_tree.xpath(selector))
|
||||
|
||||
# If no elements were found, return the original HTML content
|
||||
if not elements_to_remove:
|
||||
return html_content
|
||||
|
||||
# Then, remove them in a separate loop
|
||||
for element in elements_to_remove:
|
||||
if element.getparent() is not None: # Ensure the element has a parent before removing
|
||||
@@ -109,7 +94,7 @@ def element_removal(selectors: List[str], html_content):
|
||||
xpath_selectors = []
|
||||
|
||||
for selector in selectors:
|
||||
if selector.strip().startswith(('xpath:', 'xpath1:', '//')):
|
||||
if selector.startswith(('xpath:', 'xpath1:', '//')):
|
||||
# Handle XPath selectors separately
|
||||
xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
|
||||
xpath_selectors.append(xpath_selector)
|
||||
@@ -186,21 +171,8 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
# Solution: Register the default namespace with empty string prefix in elementpath
|
||||
# This is primarily for RSS/Atom feeds but works for any XML with default namespace
|
||||
if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
|
||||
# Register the default namespace with empty string prefix for elementpath
|
||||
# This allows //title to match elements in the default namespace
|
||||
namespaces[''] = tree.nsmap[None]
|
||||
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
|
||||
#@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
|
||||
#@note: //title/text() wont work where <title>CDATA..
|
||||
|
||||
if type(r) != list:
|
||||
r = [r]
|
||||
@@ -235,19 +207,8 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
|
||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
|
||||
html_block = ""
|
||||
|
||||
# Build namespace map for XPath queries
|
||||
namespaces = {'re': 'http://exslt.org/regular-expressions'}
|
||||
|
||||
# NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
|
||||
# For documents with default namespace (RSS/Atom feeds), users must use:
|
||||
# - local-name(): //*[local-name()='title']/text()
|
||||
# - Or use xpath_filter (not xpath1_filter) which supports default namespaces
|
||||
# XPath spec: unprefixed element names have no namespace, not the default namespace
|
||||
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
|
||||
#@note: xpath1 (lxml) does NOT automatically handle default namespaces
|
||||
#@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
|
||||
#@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
|
||||
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
|
||||
#@note: //title/text() wont work where <title>CDATA..
|
||||
|
||||
for element in r:
|
||||
# When there's more than 1 match, then add the suffix to separate each line
|
||||
@@ -328,92 +289,70 @@ def _get_stripped_text_from_json_match(match):
|
||||
|
||||
return stripped_text_from_html
|
||||
|
||||
def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter):
|
||||
from bs4 import BeautifulSoup
|
||||
stripped_text_from_html = ''
|
||||
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
# As a last resort, try to parse the whole <body>
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
|
||||
if ensure_is_ldjson_info_type:
|
||||
bs_result = soup.find_all('script', {"type": "application/ld+json"})
|
||||
else:
|
||||
bs_result = soup.find_all('script')
|
||||
bs_result += soup.find_all('body')
|
||||
|
||||
bs_jsons = []
|
||||
|
||||
for result in bs_result:
|
||||
# result.text is how bs4 magically strips JSON from the body
|
||||
content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else ''
|
||||
# Skip empty tags, and things that dont even look like JSON
|
||||
if not result.text or not (content_start[0] == '{' or content_start[0] == '['):
|
||||
continue
|
||||
try:
|
||||
json_data = json.loads(result.text)
|
||||
bs_jsons.append(json_data)
|
||||
except json.JSONDecodeError:
|
||||
# Skip objects which cannot be parsed
|
||||
continue
|
||||
|
||||
if not bs_jsons:
|
||||
raise JSONNotFound("No parsable JSON found in this document")
|
||||
|
||||
for json_data in bs_jsons:
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
|
||||
if ensure_is_ldjson_info_type:
|
||||
# Could sometimes be list, string or something else random
|
||||
if isinstance(json_data, dict):
|
||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
|
||||
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
|
||||
# 1833 - could be either str or dict, should not be anything else
|
||||
|
||||
t = json_data.get('@type')
|
||||
if t and stripped_text_from_html:
|
||||
|
||||
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
|
||||
break
|
||||
# The non-standard part, some have a list
|
||||
elif isinstance(t, list):
|
||||
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
|
||||
break
|
||||
|
||||
elif stripped_text_from_html:
|
||||
break
|
||||
|
||||
return stripped_text_from_html
|
||||
|
||||
# content - json
|
||||
# json_filter - ie json:$..price
|
||||
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
|
||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
stripped_text_from_html = False
|
||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||
try:
|
||||
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(str(e))
|
||||
|
||||
# Looks like clean JSON, dont bother extracting from HTML
|
||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||
# As a last resort, try to parse the whole <body>
|
||||
soup = BeautifulSoup(content, 'html.parser')
|
||||
|
||||
content_start = content.lstrip("\ufeff").strip()[:100]
|
||||
if ensure_is_ldjson_info_type:
|
||||
bs_result = soup.find_all('script', {"type": "application/ld+json"})
|
||||
else:
|
||||
bs_result = soup.find_all('script')
|
||||
bs_result += soup.find_all('body')
|
||||
|
||||
if content_start[0] == '{' or content_start[0] == '[':
|
||||
try:
|
||||
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter)
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
|
||||
else:
|
||||
# Probably something else, go fish inside for it
|
||||
try:
|
||||
stripped_text_from_html = extract_json_blob_from_html(content=content,
|
||||
ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
|
||||
json_filter=json_filter )
|
||||
except json.JSONDecodeError as e:
|
||||
logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
|
||||
bs_jsons = []
|
||||
for result in bs_result:
|
||||
# Skip empty tags, and things that dont even look like JSON
|
||||
if not result.text or '{' not in result.text:
|
||||
continue
|
||||
try:
|
||||
json_data = json.loads(result.text)
|
||||
bs_jsons.append(json_data)
|
||||
except json.JSONDecodeError:
|
||||
# Skip objects which cannot be parsed
|
||||
continue
|
||||
|
||||
if not bs_jsons:
|
||||
raise JSONNotFound("No parsable JSON found in this document")
|
||||
|
||||
for json_data in bs_jsons:
|
||||
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||
|
||||
if ensure_is_ldjson_info_type:
|
||||
# Could sometimes be list, string or something else random
|
||||
if isinstance(json_data, dict):
|
||||
# If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
|
||||
# (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
|
||||
# @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
|
||||
# LD_JSON auto-extract also requires some content PLUS the ldjson to be present
|
||||
# 1833 - could be either str or dict, should not be anything else
|
||||
|
||||
t = json_data.get('@type')
|
||||
if t and stripped_text_from_html:
|
||||
|
||||
if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
|
||||
break
|
||||
# The non-standard part, some have a list
|
||||
elif isinstance(t, list):
|
||||
if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
|
||||
break
|
||||
|
||||
elif stripped_text_from_html:
|
||||
break
|
||||
|
||||
if not stripped_text_from_html:
|
||||
# Re 265 - Just return an empty string when filter not found
|
||||
@@ -433,9 +372,6 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
||||
ignored_lines = []
|
||||
|
||||
for k in wordlist:
|
||||
# Skip empty strings to avoid matching everything
|
||||
if not k or not k.strip():
|
||||
continue
|
||||
# Is it a regex?
|
||||
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
|
||||
if res:
|
||||
@@ -574,43 +510,3 @@ def get_triggered_text(content, trigger_text):
|
||||
i += 1
|
||||
|
||||
return triggered_text
|
||||
|
||||
|
||||
def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None:
|
||||
try:
|
||||
# Only decode/process the prefix we need for title extraction
|
||||
match data:
|
||||
case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")):
|
||||
prefix = data[:scan_chars * 2].decode("utf-16", errors="replace")
|
||||
case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")):
|
||||
prefix = data[:scan_chars * 4].decode("utf-32", errors="replace")
|
||||
case bytes():
|
||||
try:
|
||||
prefix = data[:scan_chars].decode("utf-8")
|
||||
except UnicodeDecodeError:
|
||||
try:
|
||||
head = data[:sniff_bytes].decode("ascii", errors="ignore")
|
||||
if m := (META_CS.search(head) or META_CT.search(head)):
|
||||
enc = m.group(1).lower()
|
||||
else:
|
||||
enc = "cp1252"
|
||||
prefix = data[:scan_chars * 2].decode(enc, errors="replace")
|
||||
except Exception as e:
|
||||
logger.error(f"Title extraction encoding detection failed: {e}")
|
||||
return None
|
||||
case str():
|
||||
prefix = data[:scan_chars] if len(data) > scan_chars else data
|
||||
case _:
|
||||
logger.error(f"Title extraction received unsupported data type: {type(data)}")
|
||||
return None
|
||||
|
||||
# Search only in the prefix
|
||||
if m := TITLE_RE.search(prefix):
|
||||
title = html.unescape(" ".join(m.group(1).split())).strip()
|
||||
# Some safe limit
|
||||
return title[:2000]
|
||||
return None
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Title extraction failed: {e}")
|
||||
return None
|
||||
@@ -1,22 +0,0 @@
|
||||
"""
|
||||
Jinja2 custom extensions and safe rendering utilities.
|
||||
"""
|
||||
from .extensions.TimeExtension import TimeExtension
|
||||
from .safe_jinja import (
|
||||
render,
|
||||
render_fully_escaped,
|
||||
create_jinja_env,
|
||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE,
|
||||
DEFAULT_JINJA2_EXTENSIONS,
|
||||
)
|
||||
from .plugins.regex import regex_replace
|
||||
|
||||
__all__ = [
|
||||
'TimeExtension',
|
||||
'render',
|
||||
'render_fully_escaped',
|
||||
'create_jinja_env',
|
||||
'JINJA2_MAX_RETURN_PAYLOAD_SIZE',
|
||||
'DEFAULT_JINJA2_EXTENSIONS',
|
||||
'regex_replace',
|
||||
]
|
||||
@@ -1,221 +0,0 @@
|
||||
"""
|
||||
Jinja2 TimeExtension - Custom date/time handling for templates.
|
||||
|
||||
This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware
|
||||
date/time formatting with support for time offsets.
|
||||
|
||||
Why This Extension Exists:
|
||||
The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot
|
||||
directly call Python functions - they need extensions or filters to expose functionality.
|
||||
|
||||
This TimeExtension serves as a Jinja2-to-Arrow bridge that:
|
||||
|
||||
1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags
|
||||
through extensions. You cannot use arrow.now() directly in a template.
|
||||
|
||||
2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags:
|
||||
{% now 'UTC' %}
|
||||
{% now 'UTC' + 'hours=2' %}
|
||||
{% now 'Europe/London', '%Y-%m-%d' %}
|
||||
|
||||
3. Adds convenience features on top of Arrow:
|
||||
- Default timezone from environment variable (TZ) or config
|
||||
- Default datetime format configuration
|
||||
- Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30)
|
||||
- Empty string timezone support to use configured defaults
|
||||
|
||||
4. Maintains security - Works within Jinja2's sandboxed environment so users
|
||||
cannot access arbitrary Python code or objects.
|
||||
|
||||
Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that
|
||||
provides user-friendly template syntax while maintaining security.
|
||||
|
||||
Basic Usage:
|
||||
{% now 'UTC' %}
|
||||
# Output: Wed, 09 Dec 2015 23:33:01
|
||||
|
||||
Custom Format:
|
||||
{% now 'UTC', '%Y-%m-%d %H:%M:%S' %}
|
||||
# Output: 2015-12-09 23:33:01
|
||||
|
||||
Timezone Support:
|
||||
{% now 'America/New_York' %}
|
||||
{% now 'Europe/London' %}
|
||||
{% now '' %} # Uses default timezone from environment.default_timezone
|
||||
|
||||
Time Offsets (Addition):
|
||||
{% now 'UTC' + 'hours=2' %}
|
||||
{% now 'UTC' + 'hours=2,minutes=30' %}
|
||||
{% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %}
|
||||
|
||||
Time Offsets (Subtraction):
|
||||
{% now 'UTC' - 'minutes=11' %}
|
||||
{% now 'UTC' - 'days=2,minutes=33,seconds=1' %}
|
||||
|
||||
Time Offsets with Custom Format:
|
||||
{% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %}
|
||||
# Output: 2015-12-10 01:33:01
|
||||
|
||||
Weekday Support (for finding next/previous weekday):
|
||||
{% now 'UTC' + 'weekday=0' %} # Next Monday (0=Monday, 6=Sunday)
|
||||
{% now 'UTC' + 'weekday=4' %} # Next Friday
|
||||
|
||||
Configuration:
|
||||
- Default timezone: Set via TZ environment variable or override environment.default_timezone
|
||||
- Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format)
|
||||
|
||||
Environment Customization:
|
||||
from changedetectionio.jinja2_custom import create_jinja_env
|
||||
|
||||
jinja2_env = create_jinja_env()
|
||||
jinja2_env.default_timezone = 'America/New_York' # Override default timezone
|
||||
jinja2_env.datetime_format = '%Y-%m-%d %H:%M' # Override default format
|
||||
|
||||
Supported Offset Parameters:
|
||||
- years, months, weeks, days
|
||||
- hours, minutes, seconds, microseconds
|
||||
- weekday (0=Monday through 6=Sunday, must be integer)
|
||||
|
||||
Note:
|
||||
This extension uses the Arrow library for timezone-aware datetime handling.
|
||||
All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York').
|
||||
"""
|
||||
import arrow
|
||||
|
||||
from jinja2 import nodes
|
||||
from jinja2.ext import Extension
|
||||
import os
|
||||
|
||||
class TimeExtension(Extension):
|
||||
"""
|
||||
Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering.
|
||||
|
||||
This extension adds two attributes to the Jinja2 environment:
|
||||
- datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S')
|
||||
- default_timezone: Default timezone for rendering (default: TZ env var or 'UTC')
|
||||
|
||||
Both can be overridden after environment creation by setting the attributes directly.
|
||||
"""
|
||||
|
||||
tags = {'now'}
|
||||
|
||||
def __init__(self, environment):
|
||||
"""Jinja2 Extension constructor."""
|
||||
super().__init__(environment)
|
||||
|
||||
environment.extend(
|
||||
datetime_format='%a, %d %b %Y %H:%M:%S',
|
||||
default_timezone=os.getenv('TZ', 'UTC').strip()
|
||||
)
|
||||
|
||||
def _datetime(self, timezone, operator, offset, datetime_format):
|
||||
"""
|
||||
Get current datetime with time offset applied.
|
||||
|
||||
Args:
|
||||
timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
|
||||
operator: '+' for addition or '-' for subtraction
|
||||
offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30')
|
||||
datetime_format: strftime format string or None to use environment default
|
||||
|
||||
Returns:
|
||||
Formatted datetime string with offset applied
|
||||
|
||||
Example:
|
||||
_datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S')
|
||||
# Returns current time + 2.5 hours
|
||||
"""
|
||||
# Use default timezone if none specified
|
||||
if not timezone or timezone == '':
|
||||
timezone = self.environment.default_timezone
|
||||
|
||||
d = arrow.now(timezone)
|
||||
|
||||
# parse shift params from offset and include operator
|
||||
shift_params = {}
|
||||
for param in offset.split(','):
|
||||
interval, value = param.split('=')
|
||||
shift_params[interval.strip()] = float(operator + value.strip())
|
||||
|
||||
# Fix weekday parameter can not be float
|
||||
if 'weekday' in shift_params:
|
||||
shift_params['weekday'] = int(shift_params['weekday'])
|
||||
|
||||
d = d.shift(**shift_params)
|
||||
|
||||
if datetime_format is None:
|
||||
datetime_format = self.environment.datetime_format
|
||||
return d.strftime(datetime_format)
|
||||
|
||||
def _now(self, timezone, datetime_format):
|
||||
"""
|
||||
Get current datetime without any offset.
|
||||
|
||||
Args:
|
||||
timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
|
||||
datetime_format: strftime format string or None to use environment default
|
||||
|
||||
Returns:
|
||||
Formatted datetime string for current time
|
||||
|
||||
Example:
|
||||
_now('America/New_York', '%Y-%m-%d %H:%M:%S')
|
||||
# Returns current time in New York timezone
|
||||
"""
|
||||
# Use default timezone if none specified
|
||||
if not timezone or timezone == '':
|
||||
timezone = self.environment.default_timezone
|
||||
|
||||
if datetime_format is None:
|
||||
datetime_format = self.environment.datetime_format
|
||||
return arrow.now(timezone).strftime(datetime_format)
|
||||
|
||||
def parse(self, parser):
|
||||
"""
|
||||
Parse the {% now %} tag and generate appropriate AST nodes.
|
||||
|
||||
This method is called by Jinja2 when it encounters a {% now %} tag.
|
||||
It parses the tag syntax and determines whether to call _now() or _datetime()
|
||||
based on whether offset operations (+ or -) are present.
|
||||
|
||||
Supported syntax:
|
||||
{% now 'timezone' %} -> calls _now()
|
||||
{% now 'timezone', 'format' %} -> calls _now()
|
||||
{% now 'timezone' + 'offset' %} -> calls _datetime()
|
||||
{% now 'timezone' + 'offset', 'format' %} -> calls _datetime()
|
||||
{% now 'timezone' - 'offset', 'format' %} -> calls _datetime()
|
||||
|
||||
Args:
|
||||
parser: Jinja2 parser instance
|
||||
|
||||
Returns:
|
||||
nodes.Output: AST output node containing the formatted datetime string
|
||||
"""
|
||||
lineno = next(parser.stream).lineno
|
||||
|
||||
node = parser.parse_expression()
|
||||
|
||||
if parser.stream.skip_if('comma'):
|
||||
datetime_format = parser.parse_expression()
|
||||
else:
|
||||
datetime_format = nodes.Const(None)
|
||||
|
||||
if isinstance(node, nodes.Add):
|
||||
call_method = self.call_method(
|
||||
'_datetime',
|
||||
[node.left, nodes.Const('+'), node.right, datetime_format],
|
||||
lineno=lineno,
|
||||
)
|
||||
elif isinstance(node, nodes.Sub):
|
||||
call_method = self.call_method(
|
||||
'_datetime',
|
||||
[node.left, nodes.Const('-'), node.right, datetime_format],
|
||||
lineno=lineno,
|
||||
)
|
||||
else:
|
||||
call_method = self.call_method(
|
||||
'_now',
|
||||
[node, datetime_format],
|
||||
lineno=lineno,
|
||||
)
|
||||
return nodes.Output([call_method], lineno=lineno)
|
||||
@@ -1,6 +0,0 @@
|
||||
"""
|
||||
Jinja2 custom filter plugins for changedetection.io
|
||||
"""
|
||||
from .regex import regex_replace
|
||||
|
||||
__all__ = ['regex_replace']
|
||||
@@ -1,98 +0,0 @@
|
||||
"""
|
||||
Regex filter plugin for Jinja2 templates.
|
||||
|
||||
Provides regex_replace filter for pattern-based string replacements in templates.
|
||||
"""
|
||||
import re
|
||||
import signal
|
||||
from loguru import logger
|
||||
|
||||
|
||||
def regex_replace(value: str, pattern: str, replacement: str = '', count: int = 0) -> str:
|
||||
"""
|
||||
Replace occurrences of a regex pattern in a string.
|
||||
|
||||
Security: Protected against ReDoS (Regular Expression Denial of Service) attacks:
|
||||
- Limits input value size to prevent excessive processing
|
||||
- Uses timeout mechanism to prevent runaway regex operations
|
||||
- Validates pattern complexity to prevent catastrophic backtracking
|
||||
|
||||
Args:
|
||||
value: The input string to perform replacements on
|
||||
pattern: The regex pattern to search for
|
||||
replacement: The replacement string (default: '')
|
||||
count: Maximum number of replacements (0 = replace all, default: 0)
|
||||
|
||||
Returns:
|
||||
String with replacements applied, or original value on error
|
||||
|
||||
Example:
|
||||
{{ "hello world" | regex_replace("world", "universe") }}
|
||||
{{ diff | regex_replace("<td>([^<]+)</td><td>([^<]+)</td>", "Label1: \\1\\nLabel2: \\2") }}
|
||||
|
||||
Security limits:
|
||||
- Maximum input size: 10MB
|
||||
- Maximum pattern length: 500 characters
|
||||
- Operation timeout: 10 seconds
|
||||
- Dangerous nested quantifier patterns are rejected
|
||||
"""
|
||||
# Security limits
|
||||
MAX_INPUT_SIZE = 1024 * 1024 * 10 # 10MB max input size
|
||||
MAX_PATTERN_LENGTH = 500 # Maximum regex pattern length
|
||||
REGEX_TIMEOUT_SECONDS = 10 # Maximum time for regex operation
|
||||
|
||||
# Validate input sizes
|
||||
value_str = str(value)
|
||||
if len(value_str) > MAX_INPUT_SIZE:
|
||||
logger.warning(f"regex_replace: Input too large ({len(value_str)} bytes), truncating")
|
||||
value_str = value_str[:MAX_INPUT_SIZE]
|
||||
|
||||
if len(pattern) > MAX_PATTERN_LENGTH:
|
||||
logger.warning(f"regex_replace: Pattern too long ({len(pattern)} chars), rejecting")
|
||||
return value_str
|
||||
|
||||
# Check for potentially dangerous patterns (basic checks)
|
||||
# Nested quantifiers like (a+)+ can cause catastrophic backtracking
|
||||
dangerous_patterns = [
|
||||
r'\([^)]*\+[^)]*\)\+', # (x+)+
|
||||
r'\([^)]*\*[^)]*\)\+', # (x*)+
|
||||
r'\([^)]*\+[^)]*\)\*', # (x+)*
|
||||
r'\([^)]*\*[^)]*\)\*', # (x*)*
|
||||
]
|
||||
|
||||
for dangerous in dangerous_patterns:
|
||||
if re.search(dangerous, pattern):
|
||||
logger.warning(f"regex_replace: Potentially dangerous pattern detected: {pattern}")
|
||||
return value_str
|
||||
|
||||
def timeout_handler(signum, frame):
|
||||
raise TimeoutError("Regex operation timed out")
|
||||
|
||||
try:
|
||||
# Set up timeout for regex operation (Unix-like systems only)
|
||||
# This prevents ReDoS attacks
|
||||
old_handler = None
|
||||
if hasattr(signal, 'SIGALRM'):
|
||||
old_handler = signal.signal(signal.SIGALRM, timeout_handler)
|
||||
signal.alarm(REGEX_TIMEOUT_SECONDS)
|
||||
|
||||
try:
|
||||
result = re.sub(pattern, replacement, value_str, count=count)
|
||||
finally:
|
||||
# Cancel the alarm
|
||||
if hasattr(signal, 'SIGALRM'):
|
||||
signal.alarm(0)
|
||||
if old_handler is not None:
|
||||
signal.signal(signal.SIGALRM, old_handler)
|
||||
|
||||
return result
|
||||
|
||||
except TimeoutError:
|
||||
logger.error(f"regex_replace: Regex operation timed out - possible ReDoS attack. Pattern: {pattern}")
|
||||
return value_str
|
||||
except re.error as e:
|
||||
logger.warning(f"regex_replace: Invalid regex pattern: {e}")
|
||||
return value_str
|
||||
except Exception as e:
|
||||
logger.error(f"regex_replace: Unexpected error: {e}")
|
||||
return value_str
|
||||
@@ -1,58 +0,0 @@
|
||||
"""
|
||||
Safe Jinja2 render with max payload sizes
|
||||
|
||||
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
|
||||
"""
|
||||
|
||||
import jinja2.sandbox
|
||||
import typing as t
|
||||
import os
|
||||
from .extensions.TimeExtension import TimeExtension
|
||||
from .plugins import regex_replace
|
||||
|
||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
|
||||
|
||||
# Default extensions - can be overridden in create_jinja_env()
|
||||
DEFAULT_JINJA2_EXTENSIONS = [TimeExtension]
|
||||
|
||||
def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment:
|
||||
"""
|
||||
Create a sandboxed Jinja2 environment with our custom extensions and default timezone.
|
||||
|
||||
Args:
|
||||
extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS)
|
||||
**kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment
|
||||
|
||||
Returns:
|
||||
Configured Jinja2 environment
|
||||
"""
|
||||
if extensions is None:
|
||||
extensions = DEFAULT_JINJA2_EXTENSIONS
|
||||
|
||||
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(
|
||||
extensions=extensions,
|
||||
**kwargs
|
||||
)
|
||||
|
||||
# Get default timezone from environment variable
|
||||
default_timezone = os.getenv('TZ', 'UTC').strip()
|
||||
jinja2_env.default_timezone = default_timezone
|
||||
|
||||
# Register custom filters
|
||||
jinja2_env.filters['regex_replace'] = regex_replace
|
||||
|
||||
return jinja2_env
|
||||
|
||||
|
||||
# This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available.
|
||||
# (Which also limits available functions that could be called)
|
||||
def render(template_str, **args: t.Any) -> str:
|
||||
jinja2_env = create_jinja_env()
|
||||
output = jinja2_env.from_string(template_str).render(args)
|
||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||
|
||||
def render_fully_escaped(content):
|
||||
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
||||
template = env.from_string("{{ some_html|e }}")
|
||||
return template.render(some_html=content)
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
from os import getenv
|
||||
from copy import deepcopy
|
||||
|
||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||
|
||||
@@ -40,12 +39,12 @@ class model(dict):
|
||||
'api_access_token_enabled': True,
|
||||
'base_url' : None,
|
||||
'empty_pages_are_a_change': False,
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||
'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
|
||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'global_subtractive_selectors': [],
|
||||
'ignore_whitespace': True,
|
||||
'ignore_status_codes': False, #@todo implement, as ternary.
|
||||
'notification_body': default_notification_body,
|
||||
'notification_format': default_notification_format,
|
||||
'notification_title': default_notification_title,
|
||||
@@ -56,18 +55,14 @@ class model(dict):
|
||||
'rss_access_token': None,
|
||||
'rss_content_format': RSS_FORMAT_TYPES[0][0],
|
||||
'rss_hide_muted_watches': True,
|
||||
'rss_reader_mode': False,
|
||||
'scheduler_timezone_default': None, # Default IANA timezone name
|
||||
'schema_version' : 0,
|
||||
'shared_diff_access': False,
|
||||
'strip_ignored_lines': False,
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'timezone': None, # Default IANA timezone name
|
||||
'ui': {
|
||||
'use_page_title_in_list': True,
|
||||
'open_diff_in_new_tab': True,
|
||||
'socket_io_enabled': True,
|
||||
'favicons_enabled': True
|
||||
'socket_io_enabled': True
|
||||
},
|
||||
}
|
||||
}
|
||||
@@ -75,8 +70,7 @@ class model(dict):
|
||||
|
||||
def __init__(self, *arg, **kw):
|
||||
super(model, self).__init__(*arg, **kw)
|
||||
# CRITICAL: deepcopy to avoid sharing mutable objects between instances
|
||||
self.update(deepcopy(self.base_config))
|
||||
self.update(self.base_config)
|
||||
|
||||
|
||||
def parse_headers_from_text_file(filepath):
|
||||
|
||||
@@ -1,24 +1,39 @@
|
||||
from blinker import signal
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from . import watch_base
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from loguru import logger
|
||||
|
||||
from .. import jinja2_custom as safe_jinja
|
||||
from ..diff import ADDED_PLACEMARKER_OPEN
|
||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
|
||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
|
||||
|
||||
# Allowable protocols, protects against javascript: etc
|
||||
# file:// is further checked by ALLOW_FILE_URI
|
||||
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
|
||||
|
||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||
|
||||
|
||||
def is_safe_url(test_url):
|
||||
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
|
||||
|
||||
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
|
||||
# 'source:' is a valid way to tell us to return the source
|
||||
|
||||
r = re.compile(re.escape('source:'), re.IGNORECASE)
|
||||
test_url = r.sub('', test_url)
|
||||
|
||||
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
|
||||
if not pattern.match(test_url.strip()):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
|
||||
class model(watch_base):
|
||||
__newest_history_key = None
|
||||
__history_n = 0
|
||||
@@ -61,7 +76,7 @@ class model(watch_base):
|
||||
def link(self):
|
||||
|
||||
url = self.get('url', '')
|
||||
if not is_safe_valid_url(url):
|
||||
if not is_safe_url(url):
|
||||
return 'DISABLED'
|
||||
|
||||
ready_url = url
|
||||
@@ -71,8 +86,9 @@ class model(watch_base):
|
||||
ready_url = jinja_render(template_str=url)
|
||||
except Exception as e:
|
||||
logger.critical(f"Invalid URL template for: '{url}' - {str(e)}")
|
||||
from flask import flash, url_for
|
||||
from markupsafe import Markup
|
||||
from flask import (
|
||||
flash, Markup, url_for
|
||||
)
|
||||
message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format(
|
||||
url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', '')))
|
||||
flash(message, 'error')
|
||||
@@ -82,17 +98,10 @@ class model(watch_base):
|
||||
ready_url=ready_url.replace('source:', '')
|
||||
|
||||
# Also double check it after any Jinja2 formatting just incase
|
||||
if not is_safe_valid_url(ready_url):
|
||||
if not is_safe_url(ready_url):
|
||||
return 'DISABLED'
|
||||
return ready_url
|
||||
|
||||
@property
|
||||
def domain_only_from_link(self):
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(self.link)
|
||||
domain = parsed.hostname
|
||||
return domain
|
||||
|
||||
def clear_watch(self):
|
||||
import pathlib
|
||||
|
||||
@@ -150,8 +159,8 @@ class model(watch_base):
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
# Used for sorting, display, etc
|
||||
return self.get('title') or self.get('page_title') or self.link
|
||||
# Used for sorting
|
||||
return self.get('title') if self.get('title') else self.get('url')
|
||||
|
||||
@property
|
||||
def last_changed(self):
|
||||
@@ -403,154 +412,6 @@ class model(watch_base):
|
||||
# False is not an option for AppRise, must be type None
|
||||
return None
|
||||
|
||||
def favicon_is_expired(self):
|
||||
favicon_fname = self.get_favicon_filename()
|
||||
import glob
|
||||
import time
|
||||
|
||||
if not favicon_fname:
|
||||
return True
|
||||
try:
|
||||
fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
|
||||
logger.trace(f"Favicon file maybe found at {fname}")
|
||||
if os.path.isfile(fname):
|
||||
file_age = int(time.time() - os.path.getmtime(fname))
|
||||
logger.trace(f"Favicon file age is {file_age}s")
|
||||
if file_age < FAVICON_RESAVE_THRESHOLD_SECONDS:
|
||||
return False
|
||||
except Exception as e:
|
||||
logger.critical(f"Exception checking Favicon age {str(e)}")
|
||||
return True
|
||||
|
||||
# Also in the case that the file didnt exist
|
||||
return True
|
||||
|
||||
def bump_favicon(self, url, favicon_base_64: str) -> None:
|
||||
from urllib.parse import urlparse
|
||||
import base64
|
||||
import binascii
|
||||
decoded = None
|
||||
|
||||
if url:
|
||||
try:
|
||||
parsed = urlparse(url)
|
||||
filename = os.path.basename(parsed.path)
|
||||
(base, extension) = filename.lower().strip().rsplit('.', 1)
|
||||
except ValueError:
|
||||
logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
|
||||
return None
|
||||
else:
|
||||
# Assume favicon.ico
|
||||
base = "favicon"
|
||||
extension = "ico"
|
||||
|
||||
fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
|
||||
|
||||
try:
|
||||
# validate=True makes sure the string only contains valid base64 chars
|
||||
decoded = base64.b64decode(favicon_base_64, validate=True)
|
||||
except (binascii.Error, ValueError) as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
|
||||
else:
|
||||
if decoded:
|
||||
try:
|
||||
with open(fname, 'wb') as f:
|
||||
f.write(decoded)
|
||||
# A signal that could trigger the socket server to update the browser also
|
||||
watch_check_update = signal('watch_favicon_bump')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
|
||||
|
||||
# @todo - Store some checksum and only write when its different
|
||||
logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
|
||||
|
||||
def get_favicon_filename(self) -> str | None:
|
||||
"""
|
||||
Find any favicon.* file in the current working directory
|
||||
and return the contents of the newest one.
|
||||
|
||||
Returns:
|
||||
bytes: Contents of the newest favicon file, or None if not found.
|
||||
"""
|
||||
import glob
|
||||
|
||||
# Search for all favicon.* files
|
||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
||||
|
||||
if not files:
|
||||
return None
|
||||
|
||||
# Find the newest by modification time
|
||||
newest_file = max(files, key=os.path.getmtime)
|
||||
return os.path.basename(newest_file)
|
||||
|
||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
||||
"""Return path to a square thumbnail of the most recent screenshot.
|
||||
|
||||
Creates a 150x150 pixel thumbnail from the top portion of the screenshot.
|
||||
|
||||
Args:
|
||||
max_age: Maximum age in seconds before recreating thumbnail
|
||||
|
||||
Returns:
|
||||
Path to thumbnail or None if no screenshot exists
|
||||
"""
|
||||
import os
|
||||
import time
|
||||
|
||||
thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
|
||||
top_trim = 500 # Pixels from top of screenshot to use
|
||||
|
||||
screenshot_path = self.get_screenshot()
|
||||
if not screenshot_path:
|
||||
return None
|
||||
|
||||
# Reuse thumbnail if it's fresh and screenshot hasn't changed
|
||||
if os.path.isfile(thumbnail_path):
|
||||
thumbnail_mtime = os.path.getmtime(thumbnail_path)
|
||||
screenshot_mtime = os.path.getmtime(screenshot_path)
|
||||
|
||||
if screenshot_mtime <= thumbnail_mtime and time.time() - thumbnail_mtime < max_age:
|
||||
return thumbnail_path
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
|
||||
with Image.open(screenshot_path) as img:
|
||||
# Crop top portion first (full width, top_trim height)
|
||||
top_crop_height = min(top_trim, img.height)
|
||||
img = img.crop((0, 0, img.width, top_crop_height))
|
||||
|
||||
# Create a smaller intermediate image (to reduce memory usage)
|
||||
aspect = img.width / img.height
|
||||
interim_width = min(top_trim, img.width)
|
||||
interim_height = int(interim_width / aspect) if aspect > 0 else top_trim
|
||||
img = img.resize((interim_width, interim_height), Image.NEAREST)
|
||||
|
||||
# Convert to RGB if needed
|
||||
if img.mode != 'RGB':
|
||||
img = img.convert('RGB')
|
||||
|
||||
# Crop to square from top center
|
||||
square_size = min(img.width, img.height)
|
||||
left = (img.width - square_size) // 2
|
||||
img = img.crop((left, 0, left + square_size, square_size))
|
||||
|
||||
# Final resize to exact thumbnail size with better filter
|
||||
img = img.resize((350, 350), Image.BILINEAR)
|
||||
|
||||
# Save with optimized settings
|
||||
img.save(thumbnail_path, "JPEG", quality=75, optimize=True)
|
||||
|
||||
return thumbnail_path
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error creating thumbnail for {self.get('uuid')}: {str(e)}")
|
||||
return None
|
||||
|
||||
def __get_file_ctime(self, filename):
|
||||
fname = os.path.join(self.watch_data_dir, filename)
|
||||
if os.path.isfile(fname):
|
||||
@@ -644,7 +505,7 @@ class model(watch_base):
|
||||
if res:
|
||||
if not csv_writer:
|
||||
# A file on the disk can be transferred much faster via flask than a string reply
|
||||
csv_output_filename = f"report-{self.get('uuid')}.csv"
|
||||
csv_output_filename = 'report.csv'
|
||||
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
||||
# @todo some headers in the future
|
||||
#fieldnames = ['Epoch seconds', 'Date']
|
||||
@@ -830,11 +691,11 @@ class model(watch_base):
|
||||
output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>")))
|
||||
|
||||
else:
|
||||
# Lo_Fi version - no app context, cant rely on Jinja2 Markup
|
||||
# Lo_Fi version
|
||||
if last_error:
|
||||
output.append(safe_jinja.render_fully_escaped(last_error))
|
||||
output.append(str(Markup(last_error)))
|
||||
if self.get('last_notification_error'):
|
||||
output.append(safe_jinja.render_fully_escaped(self.get('last_notification_error')))
|
||||
output.append(str(Markup(self.get('last_notification_error'))))
|
||||
|
||||
res = "\n".join(output)
|
||||
return res
|
||||
|
||||
@@ -2,8 +2,7 @@ import os
|
||||
import uuid
|
||||
|
||||
from changedetectionio import strtobool
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
|
||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
||||
default_notification_format_for_watch = 'System default'
|
||||
|
||||
class watch_base(dict):
|
||||
|
||||
@@ -16,14 +15,13 @@ class watch_base(dict):
|
||||
'body': None,
|
||||
'browser_steps': [],
|
||||
'browser_steps_last_error_step': None,
|
||||
'conditions' : {},
|
||||
'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
|
||||
'check_count': 0,
|
||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||
'content-type': None,
|
||||
'date_created': None,
|
||||
'extract_text': [], # Extract text by regex after filters
|
||||
'extract_title_as_title': False,
|
||||
'fetch_backend': 'system', # plaintext, playwright etc
|
||||
'fetch_time': 0.0,
|
||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||
@@ -34,7 +32,6 @@ class watch_base(dict):
|
||||
'has_ldjson_price_data': None,
|
||||
'headers': {}, # Extra headers to send
|
||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||
'ignore_status_codes': None,
|
||||
'in_stock_only': True, # Only trigger change on going to instock from out-of-stock
|
||||
'include_filters': [],
|
||||
'last_checked': 0,
|
||||
@@ -44,12 +41,11 @@ class watch_base(dict):
|
||||
'method': 'GET',
|
||||
'notification_alert_count': 0,
|
||||
'notification_body': None,
|
||||
'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
|
||||
'notification_format': default_notification_format_for_watch,
|
||||
'notification_muted': False,
|
||||
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||
'notification_title': None,
|
||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||
'page_title': None, # <title> from the page
|
||||
'paused': False,
|
||||
'previous_md5': False,
|
||||
'previous_md5_before_filters': False, # Used for skipping changedetection entirely
|
||||
@@ -58,7 +54,6 @@ class watch_base(dict):
|
||||
'proxy': None, # Preferred proxy connection
|
||||
'remote_server_reply': None, # From 'server' reply header
|
||||
'sort_text_alphabetically': False,
|
||||
'strip_ignored_lines': None,
|
||||
'subtractive_selectors': [],
|
||||
'tag': '', # Old system of text name for a tag, to be removed
|
||||
'tags': [], # list of UUIDs to App.Tags
|
||||
@@ -124,13 +119,12 @@ class watch_base(dict):
|
||||
}
|
||||
},
|
||||
},
|
||||
'title': None, # An arbitrary field that overrides 'page_title'
|
||||
'title': None,
|
||||
'track_ldjson_price_data': None,
|
||||
'trim_text_whitespace': False,
|
||||
'remove_duplicate_lines': False,
|
||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||
'url': '',
|
||||
'use_page_title_in_list': None, # None = use system settings
|
||||
'uuid': str(uuid.uuid4()),
|
||||
'webdriver_delay': None,
|
||||
'webdriver_js_execute_code': None, # Run before change-detection
|
||||
|
||||
@@ -1,16 +1,35 @@
|
||||
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from changedetectionio.model import default_notification_format_for_watch
|
||||
|
||||
default_notification_format = 'htmlcolor'
|
||||
ult_notification_format_for_watch = 'System default'
|
||||
default_notification_format = 'HTML Color'
|
||||
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
|
||||
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
|
||||
|
||||
# The values (markdown etc) are from apprise NotifyFormat,
|
||||
# But to avoid importing the whole heavy module just use the same strings here.
|
||||
valid_notification_formats = {
|
||||
'text': 'Plain Text',
|
||||
'html': 'HTML',
|
||||
'htmlcolor': 'HTML Color',
|
||||
'markdown': 'Markdown to HTML',
|
||||
'Text': 'text',
|
||||
'Markdown': 'markdown',
|
||||
'HTML': 'html',
|
||||
'HTML Color': 'htmlcolor',
|
||||
# Used only for editing a watch (not for global)
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
default_notification_format_for_watch: default_notification_format_for_watch
|
||||
}
|
||||
|
||||
|
||||
valid_tokens = {
|
||||
'base_url': '',
|
||||
'current_snapshot': '',
|
||||
'diff': '',
|
||||
'diff_added': '',
|
||||
'diff_full': '',
|
||||
'diff_patch': '',
|
||||
'diff_removed': '',
|
||||
'diff_url': '',
|
||||
'preview_url': '',
|
||||
'triggered_text': '',
|
||||
'watch_tag': '',
|
||||
'watch_title': '',
|
||||
'watch_url': '',
|
||||
'watch_uuid': '',
|
||||
}
|
||||
|
||||
@@ -1,61 +1,10 @@
|
||||
"""
|
||||
Custom Apprise HTTP Handlers with format= Parameter Support
|
||||
|
||||
IMPORTANT: This module works around a limitation in Apprise's @notify decorator.
|
||||
|
||||
THE PROBLEM:
|
||||
-------------
|
||||
When using Apprise's @notify decorator to create custom notification handlers, the
|
||||
decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse
|
||||
URLs. This simple parsing mode does NOT extract the format= query parameter from the URL
|
||||
and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format.
|
||||
|
||||
As a result:
|
||||
1. URL: post://example.com/webhook?format=html
|
||||
2. Apprise parses this and sees format=html in qsd (query string dictionary)
|
||||
3. But it does NOT extract it and pass it to NotifyBase.__init__
|
||||
4. NotifyBase defaults to notify_format=TEXT
|
||||
5. When you call apobj.notify(body="<html>...", body_format="html"):
|
||||
- Apprise sees: input format = html, output format (notify_format) = text
|
||||
- Apprise calls convert_between("html", "text", body)
|
||||
- This strips all HTML tags, leaving only plain text
|
||||
6. Your custom handler receives stripped plain text instead of HTML
|
||||
|
||||
THE SOLUTION:
|
||||
-------------
|
||||
Instead of using the @notify decorator directly, we:
|
||||
1. Manually register custom plugins using plugins.N_MGR.add()
|
||||
2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin
|
||||
3. Override __init__ to extract format= from qsd and set it as kwargs['format']
|
||||
4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format']
|
||||
5. Set up _default_args like CustomNotifyPlugin does for compatibility
|
||||
|
||||
This ensures that when format=html is in the URL:
|
||||
- notify_format is set to HTML
|
||||
- Apprise sees: input format = html, output format = html
|
||||
- No conversion happens (convert_between returns content unchanged)
|
||||
- Your custom handler receives the original HTML intact
|
||||
|
||||
TESTING:
|
||||
--------
|
||||
To verify this works:
|
||||
>>> apobj = apprise.Apprise()
|
||||
>>> apobj.add('post://localhost:5005/test?format=html')
|
||||
>>> for server in apobj:
|
||||
... print(server.notify_format) # Should print: html (not text)
|
||||
>>> apobj.notify(body='<span>Test</span>', body_format='html')
|
||||
# Your handler should receive '<span>Test</span>' not 'Test'
|
||||
"""
|
||||
|
||||
import json
|
||||
import re
|
||||
from urllib.parse import unquote_plus
|
||||
|
||||
import requests
|
||||
from apprise import plugins
|
||||
from apprise.decorators.base import CustomNotifyPlugin
|
||||
from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly
|
||||
from apprise.utils.logic import dict_full_update
|
||||
from apprise.decorators import notify
|
||||
from apprise.utils.parse import parse_url as apprise_parse_url
|
||||
from loguru import logger
|
||||
from requests.structures import CaseInsensitiveDict
|
||||
|
||||
@@ -63,66 +12,13 @@ SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
|
||||
|
||||
|
||||
def notify_supported_methods(func):
|
||||
"""Register custom HTTP method handlers that properly support format= parameter."""
|
||||
for method in SUPPORTED_HTTP_METHODS:
|
||||
_register_http_handler(method, func)
|
||||
_register_http_handler(f"{method}s", func)
|
||||
func = notify(on=method)(func)
|
||||
# Add support for https, for each supported http method
|
||||
func = notify(on=f"{method}s")(func)
|
||||
return func
|
||||
|
||||
|
||||
def _register_http_handler(schema, send_func):
|
||||
"""Register a custom HTTP handler that extracts format= from URL query parameters."""
|
||||
|
||||
# Parse base URL
|
||||
base_url = f"{schema}://"
|
||||
base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True)
|
||||
|
||||
class CustomHTTPHandler(CustomNotifyPlugin):
|
||||
secure_protocol = schema
|
||||
service_name = f"Custom HTTP - {schema.upper()}"
|
||||
_base_args = base_args
|
||||
|
||||
def __init__(self, **kwargs):
|
||||
# Extract format from qsd and set it as a top-level kwarg
|
||||
# This allows NotifyBase.__init__ to properly set notify_format
|
||||
if 'qsd' in kwargs and 'format' in kwargs['qsd']:
|
||||
kwargs['format'] = kwargs['qsd']['format']
|
||||
|
||||
# Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__)
|
||||
super(CustomNotifyPlugin, self).__init__(**kwargs)
|
||||
|
||||
# Set up _default_args like CustomNotifyPlugin does
|
||||
self._default_args = {}
|
||||
kwargs.pop("secure", None)
|
||||
dict_full_update(self._default_args, self._base_args)
|
||||
dict_full_update(self._default_args, kwargs)
|
||||
self._default_args["url"] = url_assembly(**self._default_args)
|
||||
|
||||
__send = staticmethod(send_func)
|
||||
|
||||
def send(self, body, title="", notify_type="info", *args, **kwargs):
|
||||
"""Call the custom send function."""
|
||||
try:
|
||||
result = self.__send(
|
||||
body, title, notify_type,
|
||||
*args,
|
||||
meta=self._default_args,
|
||||
**kwargs
|
||||
)
|
||||
return True if result is None else bool(result)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"Exception in custom HTTP handler: {e}")
|
||||
return False
|
||||
|
||||
# Register the plugin
|
||||
plugins.N_MGR.add(
|
||||
plugin=CustomHTTPHandler,
|
||||
schemas=schema,
|
||||
send_func=send_func,
|
||||
url=base_url,
|
||||
)
|
||||
|
||||
|
||||
def _get_auth(parsed_url: dict) -> str | tuple[str, str]:
|
||||
user: str | None = parsed_url.get("user")
|
||||
password: str | None = parsed_url.get("password")
|
||||
@@ -174,12 +70,9 @@ def apprise_http_custom_handler(
|
||||
title: str,
|
||||
notify_type: str,
|
||||
meta: dict,
|
||||
body_format: str = None,
|
||||
*args,
|
||||
**kwargs,
|
||||
) -> bool:
|
||||
|
||||
|
||||
url: str = meta.get("url")
|
||||
schema: str = meta.get("schema")
|
||||
method: str = re.sub(r"s$", "", schema).upper()
|
||||
@@ -195,16 +88,25 @@ def apprise_http_custom_handler(
|
||||
|
||||
url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
|
||||
|
||||
response = requests.request(
|
||||
method=method,
|
||||
url=url,
|
||||
auth=auth,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=body.encode("utf-8") if isinstance(body, str) else body,
|
||||
)
|
||||
try:
|
||||
response = requests.request(
|
||||
method=method,
|
||||
url=url,
|
||||
auth=auth,
|
||||
headers=headers,
|
||||
params=params,
|
||||
data=body.encode("utf-8") if isinstance(body, str) else body,
|
||||
)
|
||||
|
||||
response.raise_for_status()
|
||||
response.raise_for_status()
|
||||
|
||||
logger.info(f"Successfully sent custom notification to {url}")
|
||||
return True
|
||||
logger.info(f"Successfully sent custom notification to {url}")
|
||||
return True
|
||||
|
||||
except requests.RequestException as e:
|
||||
logger.error(f"Remote host error while sending custom notification to {url}: {e}")
|
||||
return False
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}")
|
||||
return False
|
||||
|
||||
@@ -1,286 +0,0 @@
|
||||
"""
|
||||
Custom Discord plugin for changedetection.io
|
||||
Extends Apprise's Discord plugin to support custom colored embeds for removed/added content
|
||||
"""
|
||||
from apprise.plugins.discord import NotifyDiscord
|
||||
from apprise.decorators import notify
|
||||
from apprise.common import NotifyFormat
|
||||
from loguru import logger
|
||||
|
||||
# Import placeholders from changedetection's diff module
|
||||
from ...diff import (
|
||||
REMOVED_PLACEMARKER_OPEN,
|
||||
REMOVED_PLACEMARKER_CLOSED,
|
||||
ADDED_PLACEMARKER_OPEN,
|
||||
ADDED_PLACEMARKER_CLOSED,
|
||||
CHANGED_PLACEMARKER_OPEN,
|
||||
CHANGED_PLACEMARKER_CLOSED,
|
||||
CHANGED_INTO_PLACEMARKER_OPEN,
|
||||
CHANGED_INTO_PLACEMARKER_CLOSED,
|
||||
)
|
||||
|
||||
# Discord embed sidebar colors for different change types
|
||||
DISCORD_COLOR_UNCHANGED = 8421504 # Gray (#808080)
|
||||
DISCORD_COLOR_REMOVED = 16711680 # Red (#FF0000)
|
||||
DISCORD_COLOR_ADDED = 65280 # Green (#00FF00)
|
||||
DISCORD_COLOR_CHANGED = 16753920 # Orange (#FFA500)
|
||||
DISCORD_COLOR_CHANGED_INTO = 3447003 # Blue (#5865F2 - Discord blue)
|
||||
DISCORD_COLOR_WARNING = 16776960 # Yellow (#FFFF00)
|
||||
|
||||
|
||||
class NotifyDiscordCustom(NotifyDiscord):
|
||||
"""
|
||||
Custom Discord notification handler that supports multiple colored embeds
|
||||
for showing removed (red) and added (green) content separately.
|
||||
"""
|
||||
|
||||
def send(self, body, title="", notify_type=None, attach=None, **kwargs):
|
||||
"""
|
||||
Override send method to create custom embeds with red/green colors
|
||||
for removed/added content when placeholders are present.
|
||||
"""
|
||||
|
||||
# Check if body contains our diff placeholders
|
||||
has_removed = REMOVED_PLACEMARKER_OPEN in body
|
||||
has_added = ADDED_PLACEMARKER_OPEN in body
|
||||
has_changed = CHANGED_PLACEMARKER_OPEN in body
|
||||
has_changed_into = CHANGED_INTO_PLACEMARKER_OPEN in body
|
||||
|
||||
# If we have diff placeholders and we're in markdown/html format, create custom embeds
|
||||
if (has_removed or has_added or has_changed or has_changed_into) and self.notify_format in (NotifyFormat.MARKDOWN, NotifyFormat.HTML):
|
||||
return self._send_with_colored_embeds(body, title, notify_type, attach, **kwargs)
|
||||
|
||||
# Otherwise, use the parent class's default behavior
|
||||
return super().send(body, title, notify_type, attach, **kwargs)
|
||||
|
||||
def _send_with_colored_embeds(self, body, title, notify_type, attach, **kwargs):
|
||||
"""
|
||||
Send Discord message with embeds in the original diff order.
|
||||
Preserves the sequence: unchanged -> removed -> added -> unchanged, etc.
|
||||
"""
|
||||
from datetime import datetime, timezone
|
||||
|
||||
payload = {
|
||||
"tts": self.tts,
|
||||
"wait": self.tts is False,
|
||||
}
|
||||
|
||||
if self.flags:
|
||||
payload["flags"] = self.flags
|
||||
|
||||
# Acquire image_url
|
||||
image_url = self.image_url(notify_type)
|
||||
|
||||
if self.avatar and (image_url or self.avatar_url):
|
||||
payload["avatar_url"] = self.avatar_url if self.avatar_url else image_url
|
||||
|
||||
if self.user:
|
||||
payload["username"] = self.user
|
||||
|
||||
# Associate our thread_id with our message
|
||||
params = {"thread_id": self.thread_id} if self.thread_id else None
|
||||
|
||||
# Build embeds array preserving order
|
||||
embeds = []
|
||||
|
||||
# Add title as plain bold text in message content (not an embed)
|
||||
if title:
|
||||
payload["content"] = f"**{title}**"
|
||||
|
||||
# Parse the body into ordered chunks
|
||||
chunks = self._parse_body_into_chunks(body)
|
||||
|
||||
# Discord limits:
|
||||
# - Max 10 embeds per message
|
||||
# - Max 6000 characters total across all embeds
|
||||
# - Max 4096 characters per embed description
|
||||
max_embeds = 10
|
||||
max_total_chars = 6000
|
||||
max_embed_description = 4096
|
||||
|
||||
# All 10 embed slots are available for content
|
||||
max_content_embeds = max_embeds
|
||||
|
||||
# Start character count
|
||||
total_chars = 0
|
||||
|
||||
# Create embeds from chunks in order (no titles, just color coding)
|
||||
for chunk_type, content in chunks:
|
||||
if not content.strip():
|
||||
continue
|
||||
|
||||
# Truncate individual embed description if needed
|
||||
if len(content) > max_embed_description:
|
||||
content = content[:max_embed_description - 3] + "..."
|
||||
|
||||
# Check if we're approaching the embed count limit
|
||||
# We need room for the warning embed, so stop at max_content_embeds - 1
|
||||
current_content_embeds = len(embeds)
|
||||
if current_content_embeds >= max_content_embeds - 1:
|
||||
# Add a truncation notice (this will be the 10th embed)
|
||||
embeds.append({
|
||||
"description": "⚠️ Content truncated (Discord 10 embed limit reached) - Tip: Select 'Plain Text' or 'HTML' format for longer diffs",
|
||||
"color": DISCORD_COLOR_WARNING,
|
||||
})
|
||||
break
|
||||
|
||||
# Check if adding this embed would exceed total character limit
|
||||
if total_chars + len(content) > max_total_chars:
|
||||
# Add a truncation notice
|
||||
remaining_chars = max_total_chars - total_chars
|
||||
if remaining_chars > 100:
|
||||
# Add partial content if we have room
|
||||
truncated_content = content[:remaining_chars - 100] + "..."
|
||||
embeds.append({
|
||||
"description": truncated_content,
|
||||
"color": (DISCORD_COLOR_UNCHANGED if chunk_type == "unchanged"
|
||||
else DISCORD_COLOR_REMOVED if chunk_type == "removed"
|
||||
else DISCORD_COLOR_ADDED),
|
||||
})
|
||||
embeds.append({
|
||||
"description": "⚠️ Content truncated (Discord 6000 char limit reached)\nTip: Select 'Plain Text' or 'HTML' format for longer diffs",
|
||||
"color": DISCORD_COLOR_WARNING,
|
||||
})
|
||||
break
|
||||
|
||||
if chunk_type == "unchanged":
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_UNCHANGED,
|
||||
})
|
||||
elif chunk_type == "removed":
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_REMOVED,
|
||||
})
|
||||
elif chunk_type == "added":
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_ADDED,
|
||||
})
|
||||
elif chunk_type == "changed":
|
||||
# Changed (old value) - use orange to distinguish from pure removal
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_CHANGED,
|
||||
})
|
||||
elif chunk_type == "changed_into":
|
||||
# Changed into (new value) - use blue to distinguish from pure addition
|
||||
embeds.append({
|
||||
"description": content,
|
||||
"color": DISCORD_COLOR_CHANGED_INTO,
|
||||
})
|
||||
|
||||
total_chars += len(content)
|
||||
|
||||
if embeds:
|
||||
payload["embeds"] = embeds
|
||||
|
||||
# Send the payload using parent's _send method
|
||||
if not self._send(payload, params=params):
|
||||
return False
|
||||
|
||||
# Handle attachments if present
|
||||
if attach and self.attachment_support:
|
||||
payload.update({
|
||||
"tts": False,
|
||||
"wait": True,
|
||||
})
|
||||
payload.pop("embeds", None)
|
||||
payload.pop("content", None)
|
||||
payload.pop("allow_mentions", None)
|
||||
|
||||
for attachment in attach:
|
||||
self.logger.info(f"Posting Discord Attachment {attachment.name}")
|
||||
if not self._send(payload, params=params, attach=attachment):
|
||||
return False
|
||||
|
||||
return True
|
||||
|
||||
def _parse_body_into_chunks(self, body):
|
||||
"""
|
||||
Parse the body into ordered chunks of (type, content) tuples.
|
||||
Types: "unchanged", "removed", "added", "changed", "changed_into"
|
||||
Preserves the original order of the diff.
|
||||
"""
|
||||
chunks = []
|
||||
position = 0
|
||||
|
||||
while position < len(body):
|
||||
# Find the next marker
|
||||
next_removed = body.find(REMOVED_PLACEMARKER_OPEN, position)
|
||||
next_added = body.find(ADDED_PLACEMARKER_OPEN, position)
|
||||
next_changed = body.find(CHANGED_PLACEMARKER_OPEN, position)
|
||||
next_changed_into = body.find(CHANGED_INTO_PLACEMARKER_OPEN, position)
|
||||
|
||||
# Determine which marker comes first
|
||||
if next_removed == -1 and next_added == -1 and next_changed == -1 and next_changed_into == -1:
|
||||
# No more markers, rest is unchanged
|
||||
if position < len(body):
|
||||
chunks.append(("unchanged", body[position:]))
|
||||
break
|
||||
|
||||
# Find the earliest marker
|
||||
next_marker_pos = None
|
||||
next_marker_type = None
|
||||
|
||||
# Compare all marker positions to find the earliest
|
||||
markers = []
|
||||
if next_removed != -1:
|
||||
markers.append((next_removed, "removed"))
|
||||
if next_added != -1:
|
||||
markers.append((next_added, "added"))
|
||||
if next_changed != -1:
|
||||
markers.append((next_changed, "changed"))
|
||||
if next_changed_into != -1:
|
||||
markers.append((next_changed_into, "changed_into"))
|
||||
|
||||
if markers:
|
||||
next_marker_pos, next_marker_type = min(markers, key=lambda x: x[0])
|
||||
|
||||
# Add unchanged content before the marker
|
||||
if next_marker_pos > position:
|
||||
chunks.append(("unchanged", body[position:next_marker_pos]))
|
||||
|
||||
# Find the closing marker
|
||||
if next_marker_type == "removed":
|
||||
open_marker = REMOVED_PLACEMARKER_OPEN
|
||||
close_marker = REMOVED_PLACEMARKER_CLOSED
|
||||
elif next_marker_type == "added":
|
||||
open_marker = ADDED_PLACEMARKER_OPEN
|
||||
close_marker = ADDED_PLACEMARKER_CLOSED
|
||||
elif next_marker_type == "changed":
|
||||
open_marker = CHANGED_PLACEMARKER_OPEN
|
||||
close_marker = CHANGED_PLACEMARKER_CLOSED
|
||||
else: # changed_into
|
||||
open_marker = CHANGED_INTO_PLACEMARKER_OPEN
|
||||
close_marker = CHANGED_INTO_PLACEMARKER_CLOSED
|
||||
|
||||
close_pos = body.find(close_marker, next_marker_pos)
|
||||
|
||||
if close_pos == -1:
|
||||
# No closing marker, take rest as this type
|
||||
content = body[next_marker_pos + len(open_marker):]
|
||||
chunks.append((next_marker_type, content))
|
||||
break
|
||||
else:
|
||||
# Extract content between markers
|
||||
content = body[next_marker_pos + len(open_marker):close_pos]
|
||||
chunks.append((next_marker_type, content))
|
||||
position = close_pos + len(close_marker)
|
||||
|
||||
return chunks
|
||||
|
||||
|
||||
# Register the custom Discord handler with Apprise
|
||||
# This will override the built-in discord:// handler
|
||||
@notify(on="discord")
|
||||
def discord_custom_wrapper(body, title, notify_type, meta, body_format=None, *args, **kwargs):
|
||||
"""
|
||||
Wrapper function to make the custom Discord handler work with Apprise's decorator system.
|
||||
Note: This decorator approach may not work for overriding built-in plugins.
|
||||
The class-based approach above is the proper way to extend NotifyDiscord.
|
||||
"""
|
||||
logger.info("Custom Discord handler called")
|
||||
# This is here for potential future use with decorator-based registration
|
||||
return True
|
||||
@@ -1,42 +0,0 @@
|
||||
def as_monospaced_html_email(content: str, title: str) -> str:
|
||||
"""
|
||||
Wraps `content` in a minimal, email-safe HTML template
|
||||
that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc.
|
||||
|
||||
Args:
|
||||
content: The body text (plain text or HTML-like).
|
||||
title: The title plaintext
|
||||
Returns:
|
||||
A complete HTML document string suitable for sending as an email body.
|
||||
"""
|
||||
|
||||
# All line feed types should be removed and then this function should only be fed <br>'s
|
||||
# Then it works with our <pre> styling without double linefeeds
|
||||
content = content.translate(str.maketrans('', '', '\r\n'))
|
||||
|
||||
if title:
|
||||
import html
|
||||
title = html.escape(title)
|
||||
else:
|
||||
title = ''
|
||||
# 2. Full email-safe HTML
|
||||
html_email = f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="x-apple-disable-message-reformatting">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<!--[if mso]>
|
||||
<style>
|
||||
body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }}
|
||||
</style>
|
||||
<![endif]-->
|
||||
<title>{title}</title>
|
||||
</head>
|
||||
<body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;">
|
||||
<pre role="article" aria-roledescription="email" lang="en"
|
||||
style="font-family: monospace, 'Courier New', Courier; font-size: 0.9rem;
|
||||
white-space: pre-wrap; word-break: break-word;">{content}</pre>
|
||||
</body>
|
||||
</html>"""
|
||||
return html_email
|
||||
@@ -1,274 +1,30 @@
|
||||
|
||||
import time
|
||||
import apprise
|
||||
from apprise import NotifyFormat
|
||||
from loguru import logger
|
||||
from urllib.parse import urlparse
|
||||
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
|
||||
from .apprise_plugin.custom_handlers import SUPPORTED_HTTP_METHODS
|
||||
from .email_helpers import as_monospaced_html_email
|
||||
from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \
|
||||
ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \
|
||||
CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE
|
||||
from ..notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER
|
||||
|
||||
|
||||
|
||||
def markup_text_links_to_html(body):
|
||||
"""
|
||||
Convert plaintext to HTML with clickable links.
|
||||
Uses Jinja2's escape and Markup for XSS safety.
|
||||
"""
|
||||
from linkify_it import LinkifyIt
|
||||
from markupsafe import Markup, escape
|
||||
|
||||
linkify = LinkifyIt()
|
||||
|
||||
# Match URLs in the ORIGINAL text (before escaping)
|
||||
matches = linkify.match(body)
|
||||
|
||||
if not matches:
|
||||
# No URLs, just escape everything
|
||||
return Markup(escape(body))
|
||||
|
||||
result = []
|
||||
last_index = 0
|
||||
|
||||
# Process each URL match
|
||||
for match in matches:
|
||||
# Add escaped text before the URL
|
||||
if match.index > last_index:
|
||||
text_part = body[last_index:match.index]
|
||||
result.append(escape(text_part))
|
||||
|
||||
# Add the link with escaped URL (both in href and display)
|
||||
url = match.url
|
||||
result.append(Markup(f'<a href="{escape(url)}">{escape(url)}</a>'))
|
||||
|
||||
last_index = match.last_index
|
||||
|
||||
# Add remaining escaped text
|
||||
if last_index < len(body):
|
||||
result.append(escape(body[last_index:]))
|
||||
|
||||
# Join all parts
|
||||
return str(Markup(''.join(str(part) for part in result)))
|
||||
|
||||
def notification_format_align_with_apprise(n_format : str):
|
||||
"""
|
||||
Correctly align changedetection's formats with apprise's formats
|
||||
Probably these are the same - but good to be sure.
|
||||
These set the expected OUTPUT format type
|
||||
:param n_format:
|
||||
:return:
|
||||
"""
|
||||
|
||||
if n_format.startswith('html'):
|
||||
# Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here
|
||||
n_format = NotifyFormat.HTML.value
|
||||
elif n_format.startswith('markdown'):
|
||||
# probably the same but just to be safe
|
||||
n_format = NotifyFormat.MARKDOWN.value
|
||||
elif n_format.startswith('text'):
|
||||
# probably the same but just to be safe
|
||||
n_format = NotifyFormat.TEXT.value
|
||||
else:
|
||||
n_format = NotifyFormat.TEXT.value
|
||||
|
||||
return n_format
|
||||
|
||||
def apply_discord_markdown_to_body(n_body):
|
||||
"""
|
||||
Discord does not support <del> but it supports non-standard ~~strikethrough~~
|
||||
:param n_body:
|
||||
:return:
|
||||
"""
|
||||
import re
|
||||
# Define the mapping between your placeholders and markdown markers
|
||||
replacements = [
|
||||
(REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'),
|
||||
(ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
|
||||
(CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'),
|
||||
(CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
|
||||
]
|
||||
# So that the markdown gets added without any whitespace following it which would break it
|
||||
for open_tag, open_md, close_tag, close_md in replacements:
|
||||
# Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
|
||||
pattern = re.compile(
|
||||
re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
|
||||
flags=re.DOTALL
|
||||
)
|
||||
n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
|
||||
return n_body
|
||||
|
||||
def apply_standard_markdown_to_body(n_body):
|
||||
"""
|
||||
Apprise does not support ~~strikethrough~~ but it will convert <del> to HTML strikethrough.
|
||||
:param n_body:
|
||||
:return:
|
||||
"""
|
||||
import re
|
||||
# Define the mapping between your placeholders and markdown markers
|
||||
replacements = [
|
||||
(REMOVED_PLACEMARKER_OPEN, '<del>', REMOVED_PLACEMARKER_CLOSED, '</del>'),
|
||||
(ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
|
||||
(CHANGED_PLACEMARKER_OPEN, '<del>', CHANGED_PLACEMARKER_CLOSED, '</del>'),
|
||||
(CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
|
||||
]
|
||||
|
||||
# So that the markdown gets added without any whitespace following it which would break it
|
||||
for open_tag, open_md, close_tag, close_md in replacements:
|
||||
# Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
|
||||
pattern = re.compile(
|
||||
re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
|
||||
flags=re.DOTALL
|
||||
)
|
||||
n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
|
||||
return n_body
|
||||
|
||||
|
||||
def apply_service_tweaks(url, n_body, n_title, requested_output_format):
|
||||
|
||||
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
||||
# Because different notifications may require different pre-processing, run each sequentially :(
|
||||
# 2000 bytes minus -
|
||||
# 200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
|
||||
# Length of URL - Incase they specify a longer custom avatar_url
|
||||
|
||||
if not n_body or not n_body.strip():
|
||||
return url, n_body, n_title
|
||||
|
||||
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
|
||||
parsed = urlparse(url)
|
||||
k = '?' if not parsed.query else '&'
|
||||
if url and not 'avatar_url' in url \
|
||||
and not url.startswith('mail') \
|
||||
and not url.startswith('post') \
|
||||
and not url.startswith('get') \
|
||||
and not url.startswith('delete') \
|
||||
and not url.startswith('put'):
|
||||
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
|
||||
|
||||
if url.startswith('tgram://'):
|
||||
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
||||
# re https://github.com/dgtlmoon/changedetection.io/issues/555
|
||||
# @todo re-use an existing library we have already imported to strip all non-allowed tags
|
||||
n_body = n_body.replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n')
|
||||
|
||||
# Use strikethrough for removed content, bold for added content
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '<s>')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '</s>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '<b>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '</b>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, '<s>')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, '</s>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>')
|
||||
|
||||
# real limit is 4096, but minus some for extra metadata
|
||||
payload_max_size = 3600
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
|
||||
elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
|
||||
or url.startswith('https://discord.com/api'))\
|
||||
and 'html' in requested_output_format:
|
||||
# Discord doesn't support HTML, replace <br> with newlines
|
||||
n_body = n_body.strip().replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n')
|
||||
|
||||
# Don't replace placeholders or truncate here - let the custom Discord plugin handle it
|
||||
# The plugin will use embeds (6000 char limit across all embeds) if placeholders are present,
|
||||
# or plain content (2000 char limit) otherwise
|
||||
|
||||
# Only do placeholder replacement if NOT using htmlcolor (which triggers embeds in custom plugin)
|
||||
if requested_output_format == 'html':
|
||||
# No diff placeholders, use Discord markdown for any other formatting
|
||||
# Use Discord markdown: strikethrough for removed, bold for added
|
||||
n_body = apply_discord_markdown_to_body(n_body=n_body)
|
||||
|
||||
# Apply 2000 char limit for plain content
|
||||
payload_max_size = 1700
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
# else: our custom Discord plugin will convert any placeholders left over into embeds with color bars
|
||||
|
||||
# Is not discord/tgram and they want htmlcolor
|
||||
elif requested_output_format == 'htmlcolor':
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
|
||||
# Handle changed/replaced lines (old → new)
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
|
||||
n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n')
|
||||
elif requested_output_format == 'html':
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n')
|
||||
elif requested_output_format == 'markdown':
|
||||
# Markdown to HTML - Apprise will convert this to HTML
|
||||
n_body = apply_standard_markdown_to_body(n_body=n_body)
|
||||
|
||||
else: #plaintext etc default
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
|
||||
n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
|
||||
n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
|
||||
n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
|
||||
n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
|
||||
|
||||
return url, n_body, n_title
|
||||
|
||||
|
||||
def process_notification(n_object: NotificationContextData, datastore):
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats
|
||||
def process_notification(n_object, datastore):
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
|
||||
# be sure its registered
|
||||
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
|
||||
# Register custom Discord plugin
|
||||
from .apprise_plugin.discord import NotifyDiscordCustom
|
||||
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
|
||||
now = time.time()
|
||||
if n_object.get('notification_timestamp'):
|
||||
logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
|
||||
|
||||
# Insert variables into the notification content
|
||||
notification_parameters = create_notification_parameters(n_object, datastore)
|
||||
|
||||
requested_output_format = n_object.get('notification_format', default_notification_format)
|
||||
logger.debug(f"Requested notification output format: '{requested_output_format}'")
|
||||
n_format = valid_notification_formats.get(
|
||||
n_object.get('notification_format', default_notification_format),
|
||||
valid_notification_formats[default_notification_format],
|
||||
)
|
||||
|
||||
# If we arrived with 'System default' then look it up
|
||||
if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
if n_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch:
|
||||
# Initially text or whatever
|
||||
requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format)
|
||||
|
||||
requested_output_format_original = requested_output_format
|
||||
|
||||
# Now clean it up so it fits perfectly with apprise
|
||||
requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format)
|
||||
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
|
||||
|
||||
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s")
|
||||
|
||||
@@ -283,23 +39,16 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
|
||||
apobj = apprise.Apprise(debug=True, asset=apprise_asset)
|
||||
|
||||
# Override Apprise's built-in Discord plugin with our custom one
|
||||
# This allows us to use colored embeds for diff content
|
||||
# First remove the built-in discord plugin, then add our custom one
|
||||
apprise.plugins.N_MGR.remove('discord')
|
||||
apprise.plugins.N_MGR.add(NotifyDiscordCustom, schemas='discord')
|
||||
|
||||
if not n_object.get('notification_urls'):
|
||||
return None
|
||||
|
||||
with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
|
||||
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
|
||||
for url in n_object['notification_urls']:
|
||||
|
||||
# Get the notification body from datastore
|
||||
n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters)
|
||||
|
||||
if n_object.get('markup_text_links_to_html_links'):
|
||||
n_body = markup_text_links_to_html(body=n_body)
|
||||
if n_object.get('notification_format', '').startswith('HTML'):
|
||||
n_body = n_body.replace("\n", '<br>')
|
||||
|
||||
n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
|
||||
|
||||
@@ -315,88 +64,74 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
logger.info(f">> Process Notification: AppRise notifying {url}")
|
||||
url = jinja_render(template_str=url, **notification_parameters)
|
||||
|
||||
# If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped
|
||||
watch_mime_type = n_object.get('watch_mime_type')
|
||||
if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower():
|
||||
if 'html' in requested_output_format:
|
||||
from markupsafe import escape
|
||||
n_body = str(escape(n_body))
|
||||
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
||||
# Because different notifications may require different pre-processing, run each sequentially :(
|
||||
# 2000 bytes minus -
|
||||
# 200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
|
||||
# Length of URL - Incase they specify a longer custom avatar_url
|
||||
|
||||
if 'html' in requested_output_format:
|
||||
# Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output
|
||||
# But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and this" etc which is too much.
|
||||
n_body = n_body.replace(' ', ' ')
|
||||
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
|
||||
k = '?' if not '?' in url else '&'
|
||||
if not 'avatar_url' in url \
|
||||
and not url.startswith('mail') \
|
||||
and not url.startswith('post') \
|
||||
and not url.startswith('get') \
|
||||
and not url.startswith('delete') \
|
||||
and not url.startswith('put'):
|
||||
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
|
||||
|
||||
(url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original)
|
||||
if url.startswith('tgram://'):
|
||||
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
||||
# re https://github.com/dgtlmoon/changedetection.io/issues/555
|
||||
# @todo re-use an existing library we have already imported to strip all non-allowed tags
|
||||
n_body = n_body.replace('<br>', '\n')
|
||||
n_body = n_body.replace('</br>', '\n')
|
||||
# real limit is 4096, but minus some for extra metadata
|
||||
payload_max_size = 3600
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
|
||||
apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS"
|
||||
elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith(
|
||||
'https://discord.com/api'):
|
||||
# real limit is 2000, but minus some for extra metadata
|
||||
payload_max_size = 1700
|
||||
body_limit = max(0, payload_max_size - len(n_title))
|
||||
n_title = n_title[0:payload_max_size]
|
||||
n_body = n_body[0:body_limit]
|
||||
|
||||
if not 'format=' in url:
|
||||
parsed_url = urlparse(url)
|
||||
prefix_add_to_url = '?' if not parsed_url.query else '&'
|
||||
elif url.startswith('mailto'):
|
||||
# Apprise will default to HTML, so we need to override it
|
||||
# So that whats' generated in n_body is in line with what is going to be sent.
|
||||
# https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
|
||||
if not 'format=' in url and (n_format == 'Text' or n_format == 'Markdown'):
|
||||
prefix = '?' if not '?' in url else '&'
|
||||
# Apprise format is lowercase text https://github.com/caronc/apprise/issues/633
|
||||
n_format = n_format.lower()
|
||||
url = f"{url}{prefix}format={n_format}"
|
||||
# If n_format == HTML, then apprise email should default to text/html and we should be sending HTML only
|
||||
|
||||
# THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC
|
||||
if 'html' in requested_output_format:
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
|
||||
apprise_input_format = NotifyFormat.HTML.value
|
||||
elif 'text' in requested_output_format:
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}"
|
||||
apprise_input_format = NotifyFormat.TEXT.value
|
||||
|
||||
elif requested_output_format == NotifyFormat.MARKDOWN.value:
|
||||
# Convert markdown to HTML ourselves since not all plugins do this
|
||||
from apprise.conversion import markdown_to_html
|
||||
# Make sure there are paragraph breaks around horizontal rules
|
||||
n_body = n_body.replace('---', '\n\n---\n\n')
|
||||
n_body = markdown_to_html(n_body)
|
||||
url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
|
||||
requested_output_format = NotifyFormat.HTML.value
|
||||
apprise_input_format = NotifyFormat.HTML.value # Changed from MARKDOWN to HTML
|
||||
|
||||
# Could have arrived at any stage, so we dont end up running .escape on it
|
||||
if 'html' in requested_output_format:
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n')
|
||||
else:
|
||||
# texty types
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n')
|
||||
|
||||
else:
|
||||
# ?format was IN the apprise URL, they are kind of on their own here, we will try our best
|
||||
if 'format=html' in url:
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n')
|
||||
# This will also prevent apprise from doing conversion
|
||||
apprise_input_format = NotifyFormat.HTML.value
|
||||
requested_output_format = NotifyFormat.HTML.value
|
||||
elif 'format=text' in url:
|
||||
n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n')
|
||||
apprise_input_format = NotifyFormat.TEXT.value
|
||||
requested_output_format = NotifyFormat.TEXT.value
|
||||
apobj.add(url)
|
||||
|
||||
sent_objs.append({'title': n_title,
|
||||
'body': n_body,
|
||||
'url': url})
|
||||
apobj.add(url)
|
||||
|
||||
# Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely.
|
||||
# It should always be similar to the 'history' part of the UI.
|
||||
if url.startswith('mail') and 'html' in requested_output_format:
|
||||
if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already
|
||||
n_body = as_monospaced_html_email(content=n_body, title=n_title)
|
||||
'url': url,
|
||||
'body_format': n_format})
|
||||
|
||||
# Blast off the notifications tht are set in .add()
|
||||
apobj.notify(
|
||||
title=n_title,
|
||||
body=n_body,
|
||||
# `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise
|
||||
# &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
|
||||
body_format=apprise_input_format,
|
||||
body_format=n_format,
|
||||
# False is not an option for AppRise, must be type None
|
||||
attach=n_object.get('screenshot', None)
|
||||
)
|
||||
|
||||
|
||||
# Returns empty string if nothing found, multi-line string otherwise
|
||||
log_value = logs.getvalue()
|
||||
|
||||
if log_value and ('WARNING' in log_value or 'ERROR' in log_value):
|
||||
if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
|
||||
logger.critical(log_value)
|
||||
raise Exception(log_value)
|
||||
|
||||
@@ -406,15 +141,17 @@ def process_notification(n_object: NotificationContextData, datastore):
|
||||
|
||||
# Notification title + body content parameters get created here.
|
||||
# ( Where we prepare the tokens in the notification to be replaced with actual values )
|
||||
def create_notification_parameters(n_object: NotificationContextData, datastore):
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
def create_notification_parameters(n_object, datastore):
|
||||
from copy import deepcopy
|
||||
from . import valid_tokens
|
||||
|
||||
watch = datastore.data['watching'].get(n_object['uuid'])
|
||||
if watch:
|
||||
watch_title = datastore.data['watching'][n_object['uuid']].label
|
||||
# in the case we send a test notification from the main settings, there is no UUID.
|
||||
uuid = n_object['uuid'] if 'uuid' in n_object else ''
|
||||
|
||||
if uuid:
|
||||
watch_title = datastore.data['watching'][uuid].get('title', '')
|
||||
tag_list = []
|
||||
tags = datastore.get_all_tags_for_watch(n_object['uuid'])
|
||||
tags = datastore.get_all_tags_for_watch(uuid)
|
||||
if tags:
|
||||
for tag_uuid, tag in tags.items():
|
||||
tag_list.append(tag.get('title'))
|
||||
@@ -429,10 +166,14 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
|
||||
|
||||
watch_url = n_object['watch_url']
|
||||
|
||||
diff_url = "{}/diff/{}".format(base_url, n_object['uuid'])
|
||||
preview_url = "{}/preview/{}".format(base_url, n_object['uuid'])
|
||||
diff_url = "{}/diff/{}".format(base_url, uuid)
|
||||
preview_url = "{}/preview/{}".format(base_url, uuid)
|
||||
|
||||
n_object.update(
|
||||
# Not sure deepcopy is needed here, but why not
|
||||
tokens = deepcopy(valid_tokens)
|
||||
|
||||
# Valid_tokens also used as a field validator
|
||||
tokens.update(
|
||||
{
|
||||
'base_url': base_url,
|
||||
'diff_url': diff_url,
|
||||
@@ -440,10 +181,13 @@ def create_notification_parameters(n_object: NotificationContextData, datastore)
|
||||
'watch_tag': watch_tag if watch_tag is not None else '',
|
||||
'watch_title': watch_title if watch_title is not None else '',
|
||||
'watch_url': watch_url,
|
||||
'watch_uuid': n_object['uuid'],
|
||||
'watch_uuid': uuid,
|
||||
})
|
||||
|
||||
if watch:
|
||||
n_object.update(datastore.data['watching'].get(n_object['uuid']).extra_notification_token_values())
|
||||
# n_object will contain diff, diff_added etc etc
|
||||
tokens.update(n_object)
|
||||
|
||||
return n_object
|
||||
if uuid:
|
||||
tokens.update(datastore.data['watching'].get(uuid).extra_notification_token_values())
|
||||
|
||||
return tokens
|
||||
|
||||
@@ -6,70 +6,9 @@ Extracted from update_worker.py to provide standalone notification functionality
|
||||
for both sync and async workers
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from changedetectionio.notification import default_notification_format, valid_notification_formats
|
||||
|
||||
# This gets modified on notification time (handler.py) depending on the required notification output
|
||||
CUSTOM_LINEBREAK_PLACEHOLDER='@BR@'
|
||||
|
||||
|
||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
|
||||
class NotificationContextData(dict):
|
||||
def __init__(self, initial_data=None, **kwargs):
|
||||
super().__init__({
|
||||
'base_url': None,
|
||||
'current_snapshot': None,
|
||||
'diff': None,
|
||||
'diff_added': None,
|
||||
'diff_full': None,
|
||||
'diff_patch': None,
|
||||
'diff_removed': None,
|
||||
'diff_url': None,
|
||||
'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
|
||||
'notification_timestamp': time.time(),
|
||||
'preview_url': None,
|
||||
'screenshot': None,
|
||||
'triggered_text': None,
|
||||
'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX', # Converted to 'watch_uuid' in create_notification_parameters
|
||||
'watch_mime_type': None,
|
||||
'watch_tag': None,
|
||||
'watch_title': None,
|
||||
'watch_url': 'https://WATCH-PLACE-HOLDER/',
|
||||
})
|
||||
|
||||
# Apply any initial data passed in
|
||||
self.update({'watch_uuid': self.get('uuid')})
|
||||
if initial_data:
|
||||
self.update(initial_data)
|
||||
|
||||
# Apply any keyword arguments
|
||||
if kwargs:
|
||||
self.update(kwargs)
|
||||
|
||||
n_format = self.get('notification_format')
|
||||
if n_format and not valid_notification_formats.get(n_format):
|
||||
raise ValueError(f'Invalid notification format: "{n_format}"')
|
||||
|
||||
def set_random_for_validation(self):
|
||||
import random, string
|
||||
"""Randomly fills all dict keys with random strings (for validation/testing).
|
||||
So we can test the output in the notification body
|
||||
"""
|
||||
for key in self.keys():
|
||||
if key in ['uuid', 'time', 'watch_uuid']:
|
||||
continue
|
||||
rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
|
||||
self[key] = rand_str
|
||||
|
||||
def __setitem__(self, key, value):
|
||||
if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'):
|
||||
if not valid_notification_formats.get(value):
|
||||
raise ValueError(f'Invalid notification format: "{value}"')
|
||||
|
||||
super().__setitem__(key, value)
|
||||
|
||||
class NotificationService:
|
||||
"""
|
||||
@@ -81,15 +20,12 @@ class NotificationService:
|
||||
self.datastore = datastore
|
||||
self.notification_q = notification_q
|
||||
|
||||
def queue_notification_for_watch(self, n_object: NotificationContextData, watch):
|
||||
def queue_notification_for_watch(self, n_object, watch):
|
||||
"""
|
||||
Queue a notification for a watch with full diff rendering and template variables
|
||||
"""
|
||||
from changedetectionio import diff
|
||||
from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
|
||||
if not isinstance(n_object, NotificationContextData):
|
||||
raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
|
||||
from changedetectionio.notification import default_notification_format_for_watch
|
||||
|
||||
dates = []
|
||||
trigger_text = ''
|
||||
@@ -108,16 +44,29 @@ class NotificationService:
|
||||
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
|
||||
|
||||
# If we ended up here with "System default"
|
||||
if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
if n_object.get('notification_format') == default_notification_format_for_watch:
|
||||
n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
html_colour_enable = False
|
||||
# HTML needs linebreak, but MarkDown and Text can use a linefeed
|
||||
if n_object.get('notification_format') == 'HTML':
|
||||
line_feed_sep = "<br>"
|
||||
# Snapshot will be plaintext on the disk, convert to some kind of HTML
|
||||
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
|
||||
elif n_object.get('notification_format') == 'HTML Color':
|
||||
line_feed_sep = "<br>"
|
||||
# Snapshot will be plaintext on the disk, convert to some kind of HTML
|
||||
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
|
||||
html_colour_enable = True
|
||||
else:
|
||||
line_feed_sep = "\n"
|
||||
|
||||
triggered_text = ''
|
||||
if len(trigger_text):
|
||||
from . import html_tools
|
||||
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
|
||||
if triggered_text:
|
||||
triggered_text = CUSTOM_LINEBREAK_PLACEHOLDER.join(triggered_text)
|
||||
triggered_text = line_feed_sep.join(triggered_text)
|
||||
|
||||
# Could be called as a 'test notification' with only 1 snapshot available
|
||||
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
|
||||
@@ -129,17 +78,16 @@ class NotificationService:
|
||||
|
||||
n_object.update({
|
||||
'current_snapshot': snapshot_contents,
|
||||
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER),
|
||||
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER),
|
||||
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER),
|
||||
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER, patch_format=True),
|
||||
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER),
|
||||
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
|
||||
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
|
||||
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True),
|
||||
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep),
|
||||
'notification_timestamp': now,
|
||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
||||
'triggered_text': triggered_text,
|
||||
'uuid': watch.get('uuid') if watch else None,
|
||||
'watch_url': watch.get('url') if watch else None,
|
||||
'watch_uuid': watch.get('uuid') if watch else None,
|
||||
'watch_mime_type': watch.get('content-type')
|
||||
})
|
||||
|
||||
if watch:
|
||||
@@ -155,7 +103,7 @@ class NotificationService:
|
||||
Individual watch settings > Tag settings > Global settings
|
||||
"""
|
||||
from changedetectionio.notification import (
|
||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
|
||||
default_notification_format_for_watch,
|
||||
default_notification_body,
|
||||
default_notification_title
|
||||
)
|
||||
@@ -163,7 +111,7 @@ class NotificationService:
|
||||
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
|
||||
v = watch.get(var_name)
|
||||
if v and not watch.get('notification_muted'):
|
||||
if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
|
||||
if var_name == 'notification_format' and v == default_notification_format_for_watch:
|
||||
return self.datastore.data['settings']['application'].get('notification_format')
|
||||
|
||||
return v
|
||||
@@ -180,7 +128,7 @@ class NotificationService:
|
||||
|
||||
# Otherwise could be defaults
|
||||
if var_name == 'notification_format':
|
||||
return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
return default_notification_format_for_watch
|
||||
if var_name == 'notification_body':
|
||||
return default_notification_body
|
||||
if var_name == 'notification_title':
|
||||
@@ -192,7 +140,7 @@ class NotificationService:
|
||||
"""
|
||||
Send notification when content changes are detected
|
||||
"""
|
||||
n_object = NotificationContextData()
|
||||
n_object = {}
|
||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
||||
if not watch:
|
||||
return
|
||||
@@ -235,25 +183,11 @@ class NotificationService:
|
||||
if not watch:
|
||||
return
|
||||
|
||||
filter_list = ", ".join(watch['include_filters'])
|
||||
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
|
||||
body = f"""Hello,
|
||||
|
||||
Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts.
|
||||
|
||||
It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab )
|
||||
|
||||
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
|
||||
|
||||
Thanks - Your omniscient changedetection.io installation.
|
||||
"""
|
||||
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
||||
'notification_body': body,
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
||||
'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
|
||||
", ".join(watch['include_filters']),
|
||||
threshold),
|
||||
'notification_format': 'text'}
|
||||
|
||||
if len(watch['notification_urls']):
|
||||
n_object['notification_urls'] = watch['notification_urls']
|
||||
@@ -281,28 +215,12 @@ Thanks - Your omniscient changedetection.io installation.
|
||||
if not watch:
|
||||
return
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
|
||||
|
||||
step = step_n + 1
|
||||
# @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
|
||||
|
||||
# {{{{ }}}} because this will be Jinja2 {{ }} tokens
|
||||
body = f"""Hello,
|
||||
|
||||
Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout?
|
||||
|
||||
The element may have moved and needs editing, or does it need a delay added?
|
||||
|
||||
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
|
||||
|
||||
Thanks - Your omniscient changedetection.io installation.
|
||||
"""
|
||||
|
||||
n_object = NotificationContextData({
|
||||
'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
|
||||
'notification_body': body,
|
||||
'notification_format': self._check_cascading_vars('notification_format', watch),
|
||||
})
|
||||
n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
|
||||
n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1),
|
||||
'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} "
|
||||
"did not appear on the page after {} attempts, did the page change layout? "
|
||||
"Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n"
|
||||
"Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold),
|
||||
'notification_format': 'text'}
|
||||
|
||||
if len(watch['notification_urls']):
|
||||
n_object['notification_urls'] = watch['notification_urls']
|
||||
|
||||
@@ -91,8 +91,6 @@ class difference_detection_processor():
|
||||
else:
|
||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||
|
||||
logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
|
||||
|
||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||
self.fetcher = fetcher_obj(proxy_override=proxy_url,
|
||||
@@ -104,7 +102,7 @@ class difference_detection_processor():
|
||||
self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
|
||||
|
||||
# Tweak the base config with the per-watch ones
|
||||
from changedetectionio.jinja2_custom import render as jinja_render
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
request_headers = CaseInsensitiveDict()
|
||||
|
||||
ua = self.datastore.data['settings']['requests'].get('default_ua')
|
||||
@@ -148,19 +146,18 @@ class difference_detection_processor():
|
||||
|
||||
# And here we go! call the right browser with browser-specific settings
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
|
||||
# All fetchers are now async
|
||||
await self.fetcher.run(
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
empty_pages_are_a_change=empty_pages_are_a_change,
|
||||
fetch_favicon=self.watch.favicon_is_expired(),
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
is_binary=is_binary,
|
||||
request_body=request_body,
|
||||
request_headers=request_headers,
|
||||
request_method=request_method,
|
||||
timeout=timeout,
|
||||
url=url,
|
||||
)
|
||||
await self.fetcher.run(url=url,
|
||||
timeout=timeout,
|
||||
request_headers=request_headers,
|
||||
request_body=request_body,
|
||||
request_method=request_method,
|
||||
ignore_status_codes=ignore_status_codes,
|
||||
current_include_filters=self.watch.get('include_filters'),
|
||||
is_binary=is_binary,
|
||||
empty_pages_are_a_change=empty_pages_are_a_change
|
||||
)
|
||||
|
||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||
self.fetcher.quit(watch=self.watch)
|
||||
|
||||
@@ -1,133 +0,0 @@
|
||||
"""
|
||||
Content Type Detection and Stream Classification
|
||||
|
||||
This module provides intelligent content-type detection for changedetection.io.
|
||||
It addresses the common problem where HTTP Content-Type headers are missing, incorrect,
|
||||
or too generic, which would otherwise cause the wrong processor to be used.
|
||||
|
||||
The guess_stream_type class combines:
|
||||
1. HTTP Content-Type headers (when available and reliable)
|
||||
2. Python-magic library for MIME detection (analyzing actual file content)
|
||||
3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.)
|
||||
|
||||
This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF,
|
||||
plain text, CSV, YAML, and XML formats - even when servers provide misleading headers.
|
||||
|
||||
Used by: processors/text_json_diff/processor.py and other content processors
|
||||
"""
|
||||
|
||||
# When to apply the 'cdata to real HTML' hack
|
||||
RSS_XML_CONTENT_TYPES = [
|
||||
"application/rss+xml",
|
||||
"application/rdf+xml",
|
||||
"application/atom+xml",
|
||||
"text/rss+xml", # rare, non-standard
|
||||
"application/x-rss+xml", # legacy (older feed software)
|
||||
"application/x-atom+xml", # legacy (older Atom)
|
||||
]
|
||||
|
||||
# JSON Content-types
|
||||
JSON_CONTENT_TYPES = [
|
||||
"application/activity+json",
|
||||
"application/feed+json",
|
||||
"application/json",
|
||||
"application/ld+json",
|
||||
"application/vnd.api+json",
|
||||
]
|
||||
|
||||
|
||||
# Generic XML Content-types (non-RSS/Atom)
|
||||
XML_CONTENT_TYPES = [
|
||||
"text/xml",
|
||||
"application/xml",
|
||||
]
|
||||
|
||||
HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
|
||||
|
||||
from loguru import logger
|
||||
|
||||
class guess_stream_type():
|
||||
is_pdf = False
|
||||
is_json = False
|
||||
is_html = False
|
||||
is_plaintext = False
|
||||
is_rss = False
|
||||
is_csv = False
|
||||
is_xml = False # Generic XML, not RSS/Atom
|
||||
is_yaml = False
|
||||
|
||||
def __init__(self, http_content_header, content):
|
||||
import re
|
||||
magic_content_header = http_content_header
|
||||
test_content = content[:200].lower().strip()
|
||||
|
||||
# Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.)
|
||||
test_content_normalized = re.sub(r'<\s+', '<', test_content)
|
||||
|
||||
# Use puremagic for lightweight MIME detection (saves ~14MB vs python-magic)
|
||||
magic_result = None
|
||||
try:
|
||||
import puremagic
|
||||
|
||||
# puremagic needs bytes, so encode if we have a string
|
||||
content_bytes = content[:200].encode('utf-8') if isinstance(content, str) else content[:200]
|
||||
|
||||
# puremagic returns a list of PureMagic objects with confidence scores
|
||||
detections = puremagic.magic_string(content_bytes)
|
||||
if detections:
|
||||
# Get the highest confidence detection
|
||||
mime = detections[0].mime_type
|
||||
logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'")
|
||||
if mime and "/" in mime:
|
||||
magic_result = mime
|
||||
# Ignore generic/fallback mime types
|
||||
if mime in ['application/octet-stream', 'application/x-empty', 'binary']:
|
||||
logger.debug(f"Ignoring generic mime type '{mime}' from puremagic library")
|
||||
# Trust puremagic for non-text types immediately
|
||||
elif mime not in ['text/html', 'text/plain']:
|
||||
magic_content_header = mime
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
|
||||
|
||||
# Content-based detection (most reliable for text formats)
|
||||
# Check for HTML patterns first - if found, override magic's text/plain
|
||||
has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
|
||||
|
||||
# Always trust headers first
|
||||
if 'text/plain' in http_content_header:
|
||||
self.is_plaintext = True
|
||||
if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
elif 'pdf' in magic_content_header:
|
||||
self.is_pdf = True
|
||||
elif has_html_patterns or http_content_header == 'text/html':
|
||||
self.is_html = True
|
||||
elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
|
||||
self.is_json = True
|
||||
# magic will call a rss document 'xml'
|
||||
# Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
|
||||
# This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
|
||||
elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
|
||||
self.is_rss = True
|
||||
elif any(s in http_content_header for s in XML_CONTENT_TYPES):
|
||||
# Only mark as generic XML if not already detected as RSS
|
||||
if not self.is_rss:
|
||||
self.is_xml = True
|
||||
elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
|
||||
# Generic XML that's not RSS/Atom (RSS/Atom checked above)
|
||||
self.is_xml = True
|
||||
elif '%pdf-1' in test_content:
|
||||
self.is_pdf = True
|
||||
elif http_content_header.startswith('text/'):
|
||||
self.is_plaintext = True
|
||||
# Only trust magic for 'text' if no other patterns matched
|
||||
elif 'text' in magic_content_header:
|
||||
self.is_plaintext = True
|
||||
# If magic says text/plain and we found no HTML patterns, trust it
|
||||
elif magic_result == 'text/plain':
|
||||
self.is_plaintext = True
|
||||
logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")
|
||||
|
||||
@@ -32,7 +32,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
||||
from changedetectionio import forms, html_tools
|
||||
from changedetectionio.model.Watch import model as watch_model
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from concurrent.futures import ProcessPoolExecutor
|
||||
from copy import deepcopy
|
||||
from flask import request
|
||||
import brotli
|
||||
@@ -76,16 +76,13 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||
update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
|
||||
|
||||
# Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
|
||||
# Do this as parallel threads (not processes) to avoid pickle issues with Lock objects
|
||||
try:
|
||||
with ThreadPoolExecutor(max_workers=2) as executor:
|
||||
future1 = executor.submit(_task, tmp_watch, update_handler)
|
||||
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
|
||||
# Do this as a parallel process because it could take some time
|
||||
with ProcessPoolExecutor(max_workers=2) as executor:
|
||||
future1 = executor.submit(_task, tmp_watch, update_handler)
|
||||
future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
|
||||
|
||||
text_after_filter = future1.result()
|
||||
text_before_filter = future2.result()
|
||||
except Exception as e:
|
||||
x=1
|
||||
text_after_filter = future1.result()
|
||||
text_before_filter = future2.result()
|
||||
|
||||
try:
|
||||
trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
|
||||
|
||||
@@ -7,24 +7,18 @@ import re
|
||||
import urllib3
|
||||
|
||||
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||
from changedetectionio.diff import ADDED_PLACEMARKER_OPEN
|
||||
from changedetectionio.processors import difference_detection_processor
|
||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||
from changedetectionio import html_tools, content_fetchers
|
||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.processors.magic import guess_stream_type
|
||||
|
||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
|
||||
name = 'Webpage Text/HTML, JSON and PDF changes'
|
||||
description = 'Detects all text changes where possible'
|
||||
|
||||
JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:']
|
||||
|
||||
# Assume it's this type if the server says nothing on content-type
|
||||
DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html'
|
||||
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
|
||||
|
||||
class FilterNotFoundInResponse(ValueError):
|
||||
def __init__(self, msg, screenshot=None, xpath_data=None):
|
||||
@@ -38,560 +32,356 @@ class PDFToHTMLToolNotFound(ValueError):
|
||||
ValueError.__init__(self, msg)
|
||||
|
||||
|
||||
class FilterConfig:
|
||||
"""Consolidates all filter and rule configurations from watch, tags, and global settings."""
|
||||
|
||||
def __init__(self, watch, datastore):
|
||||
self.watch = watch
|
||||
self.datastore = datastore
|
||||
self.watch_uuid = watch.get('uuid')
|
||||
# Cache computed properties to avoid repeated list operations
|
||||
self._include_filters_cache = None
|
||||
self._subtractive_selectors_cache = None
|
||||
|
||||
def _get_merged_rules(self, attr, include_global=False):
|
||||
"""Merge rules from watch, tags, and optionally global settings."""
|
||||
watch_rules = self.watch.get(attr, [])
|
||||
tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr)
|
||||
rules = list(dict.fromkeys(watch_rules + tag_rules))
|
||||
|
||||
if include_global:
|
||||
global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', [])
|
||||
rules = list(dict.fromkeys(rules + global_rules))
|
||||
|
||||
return rules
|
||||
|
||||
@property
|
||||
def include_filters(self):
|
||||
if self._include_filters_cache is None:
|
||||
filters = self._get_merged_rules('include_filters')
|
||||
# Inject LD+JSON price tracker rule if enabled
|
||||
if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
||||
filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
|
||||
self._include_filters_cache = filters
|
||||
return self._include_filters_cache
|
||||
|
||||
@property
|
||||
def subtractive_selectors(self):
|
||||
if self._subtractive_selectors_cache is None:
|
||||
watch_selectors = self.watch.get("subtractive_selectors", [])
|
||||
tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors')
|
||||
global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
|
||||
self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
|
||||
return self._subtractive_selectors_cache
|
||||
|
||||
@property
|
||||
def extract_text(self):
|
||||
return self._get_merged_rules('extract_text')
|
||||
|
||||
@property
|
||||
def ignore_text(self):
|
||||
return self._get_merged_rules('ignore_text', include_global=True)
|
||||
|
||||
@property
|
||||
def trigger_text(self):
|
||||
return self._get_merged_rules('trigger_text')
|
||||
|
||||
@property
|
||||
def text_should_not_be_present(self):
|
||||
return self._get_merged_rules('text_should_not_be_present')
|
||||
|
||||
@property
|
||||
def has_include_filters(self):
|
||||
return bool(self.include_filters) and bool(self.include_filters[0].strip())
|
||||
|
||||
@property
|
||||
def has_include_json_filters(self):
|
||||
return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES)
|
||||
|
||||
@property
|
||||
def has_subtractive_selectors(self):
|
||||
return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip())
|
||||
|
||||
|
||||
class ContentTransformer:
|
||||
"""Handles text transformations like trimming, sorting, and deduplication."""
|
||||
|
||||
@staticmethod
|
||||
def trim_whitespace(text):
|
||||
"""Remove leading/trailing whitespace from each line."""
|
||||
# Use generator expression to avoid building intermediate list
|
||||
return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines())
|
||||
|
||||
@staticmethod
|
||||
def remove_duplicate_lines(text):
|
||||
"""Remove duplicate lines while preserving order."""
|
||||
return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines()))
|
||||
|
||||
@staticmethod
|
||||
def sort_alphabetically(text):
|
||||
"""Sort lines alphabetically (case-insensitive)."""
|
||||
# Remove double line feeds before sorting
|
||||
text = text.replace("\n\n", "\n")
|
||||
return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
@staticmethod
|
||||
def extract_by_regex(text, regex_patterns):
|
||||
"""Extract text matching regex patterns."""
|
||||
# Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior)
|
||||
regex_matched_output = []
|
||||
|
||||
for s_re in regex_patterns:
|
||||
# Check if it's perl-style regex /.../
|
||||
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
|
||||
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
|
||||
result = re.findall(regex, text)
|
||||
|
||||
for match in result:
|
||||
if type(match) is tuple:
|
||||
regex_matched_output.extend(match)
|
||||
regex_matched_output.append('\n')
|
||||
else:
|
||||
regex_matched_output.append(match)
|
||||
regex_matched_output.append('\n')
|
||||
else:
|
||||
# Plain text search (case-insensitive)
|
||||
r = re.compile(re.escape(s_re), re.IGNORECASE)
|
||||
res = r.findall(text)
|
||||
if res:
|
||||
for match in res:
|
||||
regex_matched_output.append(match)
|
||||
regex_matched_output.append('\n')
|
||||
|
||||
return ''.join(regex_matched_output) if regex_matched_output else ''
|
||||
|
||||
|
||||
class RuleEngine:
|
||||
"""Evaluates blocking rules (triggers, conditions, text_should_not_be_present)."""
|
||||
|
||||
@staticmethod
|
||||
def evaluate_trigger_text(content, trigger_patterns):
|
||||
"""
|
||||
Check if trigger text is present. If trigger_text is configured,
|
||||
content is blocked UNLESS the trigger is found.
|
||||
Returns True if blocked, False if allowed.
|
||||
"""
|
||||
if not trigger_patterns:
|
||||
return False
|
||||
|
||||
# Assume blocked if trigger_text is configured
|
||||
result = html_tools.strip_ignore_text(
|
||||
content=str(content),
|
||||
wordlist=trigger_patterns,
|
||||
mode="line numbers"
|
||||
)
|
||||
# Unblock if trigger was found
|
||||
return not bool(result)
|
||||
|
||||
@staticmethod
|
||||
def evaluate_text_should_not_be_present(content, patterns):
|
||||
"""
|
||||
Check if forbidden text is present. If found, block the change.
|
||||
Returns True if blocked, False if allowed.
|
||||
"""
|
||||
if not patterns:
|
||||
return False
|
||||
|
||||
result = html_tools.strip_ignore_text(
|
||||
content=str(content),
|
||||
wordlist=patterns,
|
||||
mode="line numbers"
|
||||
)
|
||||
# Block if forbidden text was found
|
||||
return bool(result)
|
||||
|
||||
@staticmethod
|
||||
def evaluate_conditions(watch, datastore, content):
|
||||
"""
|
||||
Evaluate custom conditions ruleset.
|
||||
Returns True if blocked, False if allowed.
|
||||
"""
|
||||
if not watch.get('conditions') or not watch.get('conditions_match_logic'):
|
||||
return False
|
||||
|
||||
conditions_result = execute_ruleset_against_all_plugins(
|
||||
current_watch_uuid=watch.get('uuid'),
|
||||
application_datastruct=datastore.data,
|
||||
ephemeral_data={'text': content}
|
||||
)
|
||||
|
||||
# Block if conditions not met
|
||||
return not conditions_result.get('result')
|
||||
|
||||
|
||||
class ContentProcessor:
|
||||
"""Handles content preprocessing, filtering, and extraction."""
|
||||
|
||||
def __init__(self, fetcher, watch, filter_config, datastore):
|
||||
self.fetcher = fetcher
|
||||
self.watch = watch
|
||||
self.filter_config = filter_config
|
||||
self.datastore = datastore
|
||||
|
||||
def preprocess_rss(self, content):
|
||||
"""
|
||||
Convert CDATA/comments in RSS to usable text.
|
||||
|
||||
Supports two RSS processing modes:
|
||||
- 'default': Inline CDATA replacement (original behavior)
|
||||
- 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked)
|
||||
"""
|
||||
from changedetectionio import rss_tools
|
||||
rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode")
|
||||
if rss_mode:
|
||||
# Format RSS items nicely with CDATA content unmarked and converted to text
|
||||
return rss_tools.format_rss_items(content)
|
||||
else:
|
||||
# Default: Original inline CDATA replacement
|
||||
return cdata_in_document_to_text(html_content=content)
|
||||
|
||||
def preprocess_pdf(self, raw_content):
|
||||
"""Convert PDF to HTML using external tool."""
|
||||
from shutil import which
|
||||
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
||||
if not which(tool):
|
||||
raise PDFToHTMLToolNotFound(
|
||||
f"Command-line `{tool}` tool was not found in system PATH, was it installed?"
|
||||
)
|
||||
|
||||
import subprocess
|
||||
proc = subprocess.Popen(
|
||||
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
|
||||
stdout=subprocess.PIPE,
|
||||
stdin=subprocess.PIPE
|
||||
)
|
||||
proc.stdin.write(raw_content)
|
||||
proc.stdin.close()
|
||||
html_content = proc.stdout.read().decode('utf-8')
|
||||
proc.wait(timeout=60)
|
||||
|
||||
# Add metadata for change detection
|
||||
metadata = (
|
||||
f"<p>Added by changedetection.io: Document checksum - "
|
||||
f"{hashlib.md5(raw_content).hexdigest().upper()} "
|
||||
f"Original file size - {len(raw_content)} bytes</p>"
|
||||
)
|
||||
return html_content.replace('</body>', metadata + '</body>')
|
||||
|
||||
def preprocess_json(self, raw_content):
|
||||
"""Format and sort JSON content."""
|
||||
# Then we re-format it, else it does have filters (later on) which will reformat it anyway
|
||||
content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$")
|
||||
|
||||
# Sort JSON to avoid false alerts from reordering
|
||||
try:
|
||||
content = json.dumps(json.loads(content), sort_keys=True, indent=4)
|
||||
except Exception:
|
||||
# Might be malformed JSON, continue anyway
|
||||
pass
|
||||
|
||||
return content
|
||||
|
||||
def apply_include_filters(self, content, stream_content_type):
|
||||
"""Apply CSS, XPath, or JSON filters to extract specific content."""
|
||||
filtered_content = ""
|
||||
|
||||
for filter_rule in self.filter_config.include_filters:
|
||||
# XPath filters
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
filtered_content += html_tools.xpath_filter(
|
||||
xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=content,
|
||||
append_pretty_line_formatting=not self.watch.is_source_type_url,
|
||||
is_rss=stream_content_type.is_rss
|
||||
)
|
||||
|
||||
# XPath1 filters (first match only)
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
filtered_content += html_tools.xpath1_filter(
|
||||
xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=content,
|
||||
append_pretty_line_formatting=not self.watch.is_source_type_url,
|
||||
is_rss=stream_content_type.is_rss
|
||||
)
|
||||
|
||||
# JSON filters
|
||||
elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES):
|
||||
filtered_content += html_tools.extract_json_as_string(
|
||||
content=content,
|
||||
json_filter=filter_rule
|
||||
)
|
||||
|
||||
# CSS selectors, default fallback
|
||||
else:
|
||||
filtered_content += html_tools.include_filters(
|
||||
include_filters=filter_rule,
|
||||
html_content=content,
|
||||
append_pretty_line_formatting=not self.watch.is_source_type_url
|
||||
)
|
||||
|
||||
# Raise error if filter returned nothing
|
||||
if not filtered_content.strip():
|
||||
raise FilterNotFoundInResponse(
|
||||
msg=self.filter_config.include_filters,
|
||||
screenshot=self.fetcher.screenshot,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
return filtered_content
|
||||
|
||||
def apply_subtractive_selectors(self, content):
|
||||
"""Remove elements matching subtractive selectors."""
|
||||
return html_tools.element_removal(self.filter_config.subtractive_selectors, content)
|
||||
|
||||
def extract_text_from_html(self, html_content, stream_content_type):
|
||||
"""Convert HTML to plain text."""
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
return html_tools.html_to_text(
|
||||
html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=stream_content_type.is_rss
|
||||
)
|
||||
|
||||
|
||||
class ChecksumCalculator:
|
||||
"""Calculates checksums with various options."""
|
||||
|
||||
@staticmethod
|
||||
def calculate(text, ignore_whitespace=False):
|
||||
"""Calculate MD5 checksum of text content."""
|
||||
if ignore_whitespace:
|
||||
text = text.translate(TRANSLATE_WHITESPACE_TABLE)
|
||||
return hashlib.md5(text.encode('utf-8')).hexdigest()
|
||||
|
||||
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check(difference_detection_processor):
|
||||
|
||||
def run_changedetection(self, watch):
|
||||
changed_detected = False
|
||||
html_content = ""
|
||||
screenshot = False # as bytes
|
||||
stripped_text_from_html = ""
|
||||
|
||||
if not watch:
|
||||
raise Exception("Watch no longer exists.")
|
||||
|
||||
# Initialize components
|
||||
filter_config = FilterConfig(watch, self.datastore)
|
||||
content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
|
||||
transformer = ContentTransformer()
|
||||
rule_engine = RuleEngine()
|
||||
|
||||
# Get content type and stream info
|
||||
ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower()
|
||||
stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content)
|
||||
|
||||
# Unset any existing notification error
|
||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||
|
||||
url = watch.link
|
||||
|
||||
self.screenshot = self.fetcher.screenshot
|
||||
self.xpath_data = self.fetcher.xpath_data
|
||||
|
||||
# Track the content type and checksum before filters
|
||||
update_obj['content_type'] = ctype_header
|
||||
# Track the content type
|
||||
update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
|
||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||
# Saves a lot of CPU
|
||||
update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
|
||||
|
||||
# === CONTENT PREPROCESSING ===
|
||||
# Avoid creating unnecessary intermediate string copies by reassigning only when needed
|
||||
content = self.fetcher.content
|
||||
# Fetching complete, now filters
|
||||
|
||||
# RSS preprocessing
|
||||
if stream_content_type.is_rss:
|
||||
content = content_processor.preprocess_rss(content)
|
||||
if self.datastore.data["settings"]["application"].get("rss_reader_mode"):
|
||||
# Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc)
|
||||
stream_content_type.is_rss = False
|
||||
stream_content_type.is_html = True
|
||||
self.fetcher.content = content
|
||||
# @note: I feel like the following should be in a more obvious chain system
|
||||
# - Check filter text
|
||||
# - Is the checksum different?
|
||||
# - Do we convert to JSON?
|
||||
# https://stackoverflow.com/questions/41817578/basic-method-chaining ?
|
||||
# return content().textfilter().jsonextract().checksumcompare() ?
|
||||
|
||||
# PDF preprocessing
|
||||
if watch.is_pdf or stream_content_type.is_pdf:
|
||||
content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content)
|
||||
stream_content_type.is_html = True
|
||||
is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
is_html = not is_json
|
||||
is_rss = False
|
||||
|
||||
# JSON - Always reformat it nicely for consistency.
|
||||
ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
|
||||
# Go into RSS preprocess for converting CDATA/comment to usable text
|
||||
if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
|
||||
if '<rss' in self.fetcher.content[:100].lower():
|
||||
self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
|
||||
is_rss = True
|
||||
|
||||
if stream_content_type.is_json:
|
||||
if not filter_config.has_include_json_filters:
|
||||
content = content_processor.preprocess_json(raw_content=content)
|
||||
#else, otherwise it gets sorted/formatted in the filter stage anyway
|
||||
|
||||
# HTML obfuscation workarounds
|
||||
if stream_content_type.is_html:
|
||||
content = html_tools.workarounds_for_obfuscations(content)
|
||||
|
||||
# Check for LD+JSON price data (for HTML content)
|
||||
if stream_content_type.is_html:
|
||||
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content)
|
||||
|
||||
# === FILTER APPLICATION ===
|
||||
# Start with content reference, avoid copy until modification
|
||||
html_content = content
|
||||
|
||||
# Apply include filters (CSS, XPath, JSON)
|
||||
# Except for plaintext (incase they tried to confuse the system, it will HTML escape
|
||||
#if not stream_content_type.is_plaintext:
|
||||
if filter_config.has_include_filters:
|
||||
html_content = content_processor.apply_include_filters(content, stream_content_type)
|
||||
|
||||
# Apply subtractive selectors
|
||||
if filter_config.has_subtractive_selectors:
|
||||
html_content = content_processor.apply_subtractive_selectors(html_content)
|
||||
|
||||
# === TEXT EXTRACTION ===
|
||||
# source: support, basically treat it as plaintext
|
||||
if watch.is_source_type_url:
|
||||
# For source URLs, keep raw content
|
||||
stripped_text = html_content
|
||||
elif stream_content_type.is_plaintext:
|
||||
# For plaintext, keep as-is without HTML-to-text conversion
|
||||
stripped_text = html_content
|
||||
else:
|
||||
# Extract text from HTML/RSS content (not generic XML)
|
||||
if stream_content_type.is_html or stream_content_type.is_rss:
|
||||
stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type)
|
||||
is_html = False
|
||||
is_json = False
|
||||
|
||||
inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10]
|
||||
if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
|
||||
from shutil import which
|
||||
tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
|
||||
if not which(tool):
|
||||
raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool))
|
||||
|
||||
import subprocess
|
||||
proc = subprocess.Popen(
|
||||
[tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
|
||||
stdout=subprocess.PIPE,
|
||||
stdin=subprocess.PIPE)
|
||||
proc.stdin.write(self.fetcher.raw_content)
|
||||
proc.stdin.close()
|
||||
self.fetcher.content = proc.stdout.read().decode('utf-8')
|
||||
proc.wait(timeout=60)
|
||||
|
||||
# Add a little metadata so we know if the file changes (like if an image changes, but the text is the same
|
||||
# @todo may cause problems with non-UTF8?
|
||||
metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format(
|
||||
hashlib.md5(self.fetcher.raw_content).hexdigest().upper(),
|
||||
len(self.fetcher.content))
|
||||
|
||||
self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>')
|
||||
|
||||
# Better would be if Watch.model could access the global data also
|
||||
# and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
|
||||
# https://realpython.com/inherit-python-dict/ instead of doing it procedurely
|
||||
include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
|
||||
|
||||
# 1845 - remove duplicated filters in both group and watch include filter
|
||||
include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
|
||||
|
||||
subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
|
||||
*watch.get("subtractive_selectors", []),
|
||||
*self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
|
||||
]
|
||||
|
||||
# Inject a virtual LD+JSON price tracker rule
|
||||
if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
|
||||
include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
|
||||
|
||||
has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
|
||||
has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
|
||||
|
||||
if is_json and not has_filter_rule:
|
||||
include_filters_rule.append("json:$")
|
||||
has_filter_rule = True
|
||||
|
||||
if is_json:
|
||||
# Sort the JSON so we dont get false alerts when the content is just re-ordered
|
||||
try:
|
||||
self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True)
|
||||
except Exception as e:
|
||||
# Might have just been a snippet, or otherwise bad JSON, continue
|
||||
pass
|
||||
|
||||
if has_filter_rule:
|
||||
for filter in include_filters_rule:
|
||||
if any(prefix in filter for prefix in json_filter_prefixes):
|
||||
stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter)
|
||||
is_html = False
|
||||
|
||||
if is_html or watch.is_source_type_url:
|
||||
|
||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||
self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
|
||||
html_content = self.fetcher.content
|
||||
|
||||
# If not JSON, and if it's not text/plain..
|
||||
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower():
|
||||
# Don't run get_text or xpath/css filters on plaintext
|
||||
stripped_text_from_html = html_content
|
||||
else:
|
||||
stripped_text = html_content
|
||||
# Does it have some ld+json price data? used for easier monitoring
|
||||
update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content)
|
||||
|
||||
# Then we assume HTML
|
||||
if has_filter_rule:
|
||||
html_content = ""
|
||||
|
||||
for filter_rule in include_filters_rule:
|
||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
|
||||
elif filter_rule.startswith('xpath1:'):
|
||||
html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url,
|
||||
is_rss=is_rss)
|
||||
else:
|
||||
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||
html_content=self.fetcher.content,
|
||||
append_pretty_line_formatting=not watch.is_source_type_url)
|
||||
|
||||
if not html_content.strip():
|
||||
raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
|
||||
|
||||
if has_subtractive_selectors:
|
||||
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
||||
|
||||
if watch.is_source_type_url:
|
||||
stripped_text_from_html = html_content
|
||||
else:
|
||||
# extract text
|
||||
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||
stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
|
||||
render_anchor_tag_content=do_anchor,
|
||||
is_rss=is_rss) # 1874 activate the <title workaround hack
|
||||
|
||||
# === TEXT TRANSFORMATIONS ===
|
||||
if watch.get('trim_text_whitespace'):
|
||||
stripped_text = transformer.trim_whitespace(stripped_text)
|
||||
stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
|
||||
|
||||
# Save text before ignore filters (for diff calculation)
|
||||
text_content_before_ignored_filter = stripped_text
|
||||
# Re #340 - return the content before the 'ignore text' was applied
|
||||
# Also used to calculate/show what was removed
|
||||
text_content_before_ignored_filter = stripped_text_from_html
|
||||
|
||||
# @todo whitespace coming from missing rtrim()?
|
||||
# stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
|
||||
# Rewrite's the processing text based on only what diff result they want to see
|
||||
|
||||
# === DIFF FILTERING ===
|
||||
# If user wants specific diff types (added/removed/replaced only)
|
||||
if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
|
||||
stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter)
|
||||
if stripped_text is None:
|
||||
# No differences found, but content exists
|
||||
c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True)
|
||||
return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8')
|
||||
# Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
|
||||
from changedetectionio import diff
|
||||
# needs to not include (added) etc or it may get used twice
|
||||
# Replace the processed text with the preferred result
|
||||
rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
|
||||
newest_version_file_contents=stripped_text_from_html,
|
||||
include_equal=False, # not the same lines
|
||||
include_added=watch.get('filter_text_added', True),
|
||||
include_removed=watch.get('filter_text_removed', True),
|
||||
include_replaced=watch.get('filter_text_replaced', True),
|
||||
line_feed_sep="\n",
|
||||
include_change_type_prefix=False)
|
||||
|
||||
# === EMPTY PAGE CHECK ===
|
||||
watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
|
||||
|
||||
if not rendered_diff and stripped_text_from_html:
|
||||
# We had some content, but no differences were found
|
||||
# Store our new file as the MD5 so it will trigger in the future
|
||||
c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
|
||||
return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
|
||||
else:
|
||||
stripped_text_from_html = rendered_diff
|
||||
|
||||
# Treat pages with no renderable text content as a change? No by default
|
||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||
if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0:
|
||||
raise content_fetchers.exceptions.ReplyWithContentButNoText(
|
||||
url=url,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
has_filters=filter_config.has_include_filters,
|
||||
html_content=html_content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
|
||||
raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
|
||||
status_code=self.fetcher.get_last_status_code(),
|
||||
screenshot=self.fetcher.screenshot,
|
||||
has_filters=has_filter_rule,
|
||||
html_content=html_content,
|
||||
xpath_data=self.fetcher.xpath_data
|
||||
)
|
||||
|
||||
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
||||
# in the future we'll implement other mechanisms.
|
||||
|
||||
update_obj["last_check_status"] = self.fetcher.get_last_status_code()
|
||||
|
||||
# === REGEX EXTRACTION ===
|
||||
if filter_config.extract_text:
|
||||
extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
|
||||
stripped_text = extracted
|
||||
# 615 Extract text by regex
|
||||
extract_text = watch.get('extract_text', [])
|
||||
extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
|
||||
if len(extract_text) > 0:
|
||||
regex_matched_output = []
|
||||
for s_re in extract_text:
|
||||
# incase they specified something in '/.../x'
|
||||
if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
|
||||
regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
|
||||
result = re.findall(regex, stripped_text_from_html)
|
||||
|
||||
for l in result:
|
||||
if type(l) is tuple:
|
||||
# @todo - some formatter option default (between groups)
|
||||
regex_matched_output += list(l) + ['\n']
|
||||
else:
|
||||
# @todo - some formatter option default (between each ungrouped result)
|
||||
regex_matched_output += [l] + ['\n']
|
||||
else:
|
||||
# Doesnt look like regex, just hunt for plaintext and return that which matches
|
||||
# `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
|
||||
r = re.compile(re.escape(s_re), re.IGNORECASE)
|
||||
res = r.findall(stripped_text_from_html)
|
||||
if res:
|
||||
for match in res:
|
||||
regex_matched_output += [match] + ['\n']
|
||||
|
||||
##########################################################
|
||||
stripped_text_from_html = ''
|
||||
|
||||
if regex_matched_output:
|
||||
# @todo some formatter for presentation?
|
||||
stripped_text_from_html = ''.join(regex_matched_output)
|
||||
|
||||
# === MORE TEXT TRANSFORMATIONS ===
|
||||
if watch.get('remove_duplicate_lines'):
|
||||
stripped_text = transformer.remove_duplicate_lines(stripped_text)
|
||||
stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
|
||||
|
||||
|
||||
if watch.get('sort_text_alphabetically'):
|
||||
stripped_text = transformer.sort_alphabetically(stripped_text)
|
||||
# Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
|
||||
# we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
|
||||
stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
|
||||
stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
|
||||
|
||||
# === CHECKSUM CALCULATION ===
|
||||
text_for_checksuming = stripped_text
|
||||
### CALCULATE MD5
|
||||
# If there's text to ignore
|
||||
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||
text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
|
||||
|
||||
# Apply ignore_text for checksum calculation
|
||||
if filter_config.ignore_text:
|
||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text)
|
||||
text_for_checksuming = stripped_text_from_html
|
||||
if text_to_ignore:
|
||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||
|
||||
# Optionally remove ignored lines from output
|
||||
strip_ignored_lines = watch.get('strip_ignored_lines')
|
||||
if strip_ignored_lines is None:
|
||||
strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines')
|
||||
if strip_ignored_lines:
|
||||
stripped_text = text_for_checksuming
|
||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||
if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
|
||||
else:
|
||||
fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
|
||||
|
||||
# Calculate checksum
|
||||
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False)
|
||||
fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace)
|
||||
|
||||
# === BLOCKING RULES EVALUATION ===
|
||||
############ Blocking rules, after checksum #################
|
||||
blocked = False
|
||||
|
||||
# Check trigger_text
|
||||
if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
|
||||
trigger_text = watch.get('trigger_text', [])
|
||||
trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
|
||||
if len(trigger_text):
|
||||
# Assume blocked
|
||||
blocked = True
|
||||
# Filter and trigger works the same, so reuse it
|
||||
# It should return the line numbers that match
|
||||
# Unblock flow if the trigger was found (some text remained after stripped what didnt match)
|
||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||
wordlist=trigger_text,
|
||||
mode="line numbers")
|
||||
# Unblock if the trigger was found
|
||||
if result:
|
||||
blocked = False
|
||||
|
||||
# Check text_should_not_be_present
|
||||
if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present):
|
||||
blocked = True
|
||||
text_should_not_be_present = watch.get('text_should_not_be_present', [])
|
||||
text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
|
||||
if len(text_should_not_be_present):
|
||||
# If anything matched, then we should block a change from happening
|
||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||
wordlist=text_should_not_be_present,
|
||||
mode="line numbers")
|
||||
if result:
|
||||
blocked = True
|
||||
|
||||
# Check custom conditions
|
||||
if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text):
|
||||
blocked = True
|
||||
# And check if 'conditions' will let this pass through
|
||||
if watch.get('conditions') and watch.get('conditions_match_logic'):
|
||||
conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
|
||||
application_datastruct=self.datastore.data,
|
||||
ephemeral_data={
|
||||
'text': stripped_text_from_html
|
||||
}
|
||||
)
|
||||
|
||||
# === CHANGE DETECTION ===
|
||||
if not conditions_result.get('result'):
|
||||
# Conditions say "Condition not met" so we block it.
|
||||
blocked = True
|
||||
|
||||
# Looks like something changed, but did it match all the rules?
|
||||
if blocked:
|
||||
changed_detected = False
|
||||
else:
|
||||
# Compare checksums
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
if watch.get('previous_md5') != fetched_md5:
|
||||
changed_detected = True
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
|
||||
# On first run, initialize previous_md5
|
||||
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
|
||||
if not watch.get('previous_md5'):
|
||||
watch['previous_md5'] = fetched_md5
|
||||
|
||||
logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
|
||||
|
||||
# === UNIQUE LINES CHECK ===
|
||||
if changed_detected and watch.get('check_unique_lines', False):
|
||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
|
||||
lines=stripped_text.splitlines(),
|
||||
ignore_whitespace=ignore_whitespace
|
||||
)
|
||||
if changed_detected:
|
||||
if watch.get('check_unique_lines', False):
|
||||
ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
|
||||
|
||||
if not has_unique_lines:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
||||
changed_detected = False
|
||||
else:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
|
||||
lines=stripped_text_from_html.splitlines(),
|
||||
ignore_whitespace=ignore_whitespace
|
||||
)
|
||||
|
||||
# Note: Explicit cleanup is only needed here because text_json_diff handles
|
||||
# large strings (100KB-300KB for RSS/HTML). The other processors work with
|
||||
# small strings and don't need this.
|
||||
#
|
||||
# Python would clean these up automatically, but explicit `del` frees memory
|
||||
# immediately rather than waiting for function return, reducing peak memory usage.
|
||||
del content
|
||||
if 'html_content' in locals() and html_content is not stripped_text:
|
||||
del html_content
|
||||
if 'text_content_before_ignored_filter' in locals() and text_content_before_ignored_filter is not stripped_text:
|
||||
del text_content_before_ignored_filter
|
||||
if 'text_for_checksuming' in locals() and text_for_checksuming is not stripped_text:
|
||||
del text_for_checksuming
|
||||
# One or more lines? unsure?
|
||||
if not has_unique_lines:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
|
||||
changed_detected = False
|
||||
else:
|
||||
logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
|
||||
|
||||
return changed_detected, update_obj, stripped_text
|
||||
|
||||
def _apply_diff_filtering(self, watch, stripped_text, text_before_filter):
|
||||
"""Apply user's diff filtering preferences (show only added/removed/replaced lines)."""
|
||||
from changedetectionio import diff
|
||||
|
||||
rendered_diff = diff.render_diff(
|
||||
previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
|
||||
newest_version_file_contents=stripped_text,
|
||||
include_equal=False,
|
||||
include_added=watch.get('filter_text_added', True),
|
||||
include_removed=watch.get('filter_text_removed', True),
|
||||
include_replaced=watch.get('filter_text_replaced', True),
|
||||
line_feed_sep="\n",
|
||||
include_change_type_prefix=False
|
||||
)
|
||||
|
||||
watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8'))
|
||||
|
||||
if not rendered_diff and stripped_text:
|
||||
# No differences found
|
||||
return None
|
||||
|
||||
return rendered_diff
|
||||
# stripped_text_from_html - Everything after filters and NO 'ignored' content
|
||||
return changed_detected, update_obj, stripped_text_from_html
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
[pytest]
|
||||
addopts = --no-start-live-server --live-server-port=0
|
||||
addopts = --no-start-live-server --live-server-port=5005
|
||||
#testpaths = tests pytest_invenio
|
||||
#live_server_scope = function
|
||||
|
||||
|
||||
@@ -1,435 +0,0 @@
|
||||
from blinker import signal
|
||||
from loguru import logger
|
||||
from typing import Dict, List, Any, Optional
|
||||
import heapq
|
||||
import queue
|
||||
import threading
|
||||
|
||||
try:
|
||||
import janus
|
||||
except ImportError:
|
||||
logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
|
||||
raise
|
||||
|
||||
|
||||
class RecheckPriorityQueue:
|
||||
"""
|
||||
Ultra-reliable priority queue using janus for async/sync bridging.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
|
||||
- Synchronous code (Flask, threads) cannot use async methods without blocking
|
||||
- Async code cannot use sync methods without blocking the event loop
|
||||
- janus provides the only safe bridge between these two worlds
|
||||
|
||||
Attempting to unify to async-only would require:
|
||||
- Converting all Flask routes to async (major breaking change)
|
||||
- Using asyncio.run() in sync contexts (causes deadlocks)
|
||||
- Thread-pool wrapping (adds complexity and overhead)
|
||||
|
||||
Minimal implementation focused on reliability:
|
||||
- Pure janus for sync/async bridge
|
||||
- Thread-safe priority ordering
|
||||
- Bulletproof error handling with critical logging
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
|
||||
# Priority storage - thread-safe
|
||||
self._priority_items = []
|
||||
self._lock = threading.RLock()
|
||||
|
||||
# Signals for UI updates
|
||||
self.queue_length_signal = signal('queue_length')
|
||||
|
||||
logger.debug("RecheckPriorityQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
|
||||
raise
|
||||
|
||||
# SYNC INTERFACE (for ticker thread)
|
||||
def put(self, item, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with priority ordering"""
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus sync queue
|
||||
self.sync_q.put(True, block=block, timeout=timeout)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}")
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get with priority ordering"""
|
||||
try:
|
||||
# Wait for notification
|
||||
self.sync_q.get(block=block, timeout=timeout)
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical(f"CRITICAL: Queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get item from queue: {str(e)}")
|
||||
raise
|
||||
|
||||
# ASYNC INTERFACE (for workers)
|
||||
async def async_put(self, item):
|
||||
"""Pure async put with priority ordering"""
|
||||
try:
|
||||
# Add to priority storage
|
||||
with self._lock:
|
||||
heapq.heappush(self._priority_items, item)
|
||||
|
||||
# Notify via janus async queue
|
||||
await self.async_q.put(True)
|
||||
|
||||
# Emit signals
|
||||
self._emit_put_signals(item)
|
||||
|
||||
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
|
||||
return True
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
|
||||
# Remove from priority storage if janus put failed
|
||||
try:
|
||||
with self._lock:
|
||||
if item in self._priority_items:
|
||||
self._priority_items.remove(item)
|
||||
heapq.heapify(self._priority_items)
|
||||
except Exception as cleanup_e:
|
||||
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get with priority ordering"""
|
||||
try:
|
||||
# Wait for notification
|
||||
await self.async_q.get()
|
||||
|
||||
# Get highest priority item
|
||||
with self._lock:
|
||||
if not self._priority_items:
|
||||
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
|
||||
raise Exception("Priority queue inconsistency")
|
||||
item = heapq.heappop(self._priority_items)
|
||||
|
||||
# Emit signals
|
||||
self._emit_get_signals()
|
||||
|
||||
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
|
||||
return item
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
|
||||
raise
|
||||
|
||||
# UTILITY METHODS
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
with self._lock:
|
||||
return len(self._priority_items)
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
logger.debug("RecheckPriorityQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
|
||||
|
||||
# COMPATIBILITY METHODS (from original implementation)
|
||||
@property
|
||||
def queue(self):
|
||||
"""Provide compatibility with original queue access"""
|
||||
try:
|
||||
with self._lock:
|
||||
return list(self._priority_items)
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue list: {str(e)}")
|
||||
return []
|
||||
|
||||
def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]:
|
||||
"""Find position of UUID in queue"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = list(self._priority_items)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
|
||||
|
||||
# Find target item
|
||||
for item in queue_list:
|
||||
if (hasattr(item, 'item') and isinstance(item.item, dict) and
|
||||
item.item.get('uuid') == target_uuid):
|
||||
|
||||
# Count items with higher priority
|
||||
position = sum(1 for other in queue_list if other.priority < item.priority)
|
||||
return {
|
||||
'position': position,
|
||||
'total_items': total_items,
|
||||
'priority': item.priority,
|
||||
'found': True
|
||||
}
|
||||
|
||||
return {'position': None, 'total_items': total_items, 'priority': None, 'found': False}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {str(e)}")
|
||||
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
|
||||
|
||||
def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]:
|
||||
"""Get all queued UUIDs with pagination"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = sorted(self._priority_items) # Sort by priority
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
|
||||
|
||||
# Apply pagination
|
||||
end_idx = min(offset + limit, total_items) if limit else total_items
|
||||
items_to_process = queue_list[offset:end_idx]
|
||||
|
||||
result = []
|
||||
for position, item in enumerate(items_to_process, start=offset):
|
||||
if (hasattr(item, 'item') and isinstance(item.item, dict) and
|
||||
'uuid' in item.item):
|
||||
result.append({
|
||||
'uuid': item.item['uuid'],
|
||||
'position': position,
|
||||
'priority': item.priority
|
||||
})
|
||||
|
||||
return {
|
||||
'items': result,
|
||||
'total_items': total_items,
|
||||
'returned_items': len(result),
|
||||
'has_more': (offset + len(result)) < total_items
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {str(e)}")
|
||||
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
|
||||
|
||||
def get_queue_summary(self) -> Dict[str, Any]:
|
||||
"""Get queue summary statistics"""
|
||||
try:
|
||||
with self._lock:
|
||||
queue_list = list(self._priority_items)
|
||||
total_items = len(queue_list)
|
||||
|
||||
if total_items == 0:
|
||||
return {
|
||||
'total_items': 0, 'priority_breakdown': {},
|
||||
'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0
|
||||
}
|
||||
|
||||
immediate_items = clone_items = scheduled_items = 0
|
||||
priority_counts = {}
|
||||
|
||||
for item in queue_list:
|
||||
priority = item.priority
|
||||
priority_counts[priority] = priority_counts.get(priority, 0) + 1
|
||||
|
||||
if priority == 1:
|
||||
immediate_items += 1
|
||||
elif priority == 5:
|
||||
clone_items += 1
|
||||
elif priority > 100:
|
||||
scheduled_items += 1
|
||||
|
||||
return {
|
||||
'total_items': total_items,
|
||||
'priority_breakdown': priority_counts,
|
||||
'immediate_items': immediate_items,
|
||||
'clone_items': clone_items,
|
||||
'scheduled_items': scheduled_items,
|
||||
'min_priority': min(priority_counts.keys()) if priority_counts else None,
|
||||
'max_priority': max(priority_counts.keys()) if priority_counts else None
|
||||
}
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get queue summary: {str(e)}")
|
||||
return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0,
|
||||
'clone_items': 0, 'scheduled_items': 0}
|
||||
|
||||
# PRIVATE METHODS
|
||||
def _get_item_uuid(self, item) -> str:
|
||||
"""Safely extract UUID from item for logging"""
|
||||
try:
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict):
|
||||
return item.item.get('uuid', 'unknown')
|
||||
except Exception:
|
||||
pass
|
||||
return 'unknown'
|
||||
|
||||
def _emit_put_signals(self, item):
|
||||
"""Emit signals when item is added"""
|
||||
try:
|
||||
# Watch update signal
|
||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
||||
watch_check_update = signal('watch_check_update')
|
||||
if watch_check_update:
|
||||
watch_check_update.send(watch_uuid=item.item['uuid'])
|
||||
|
||||
# Queue length signal
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")
|
||||
|
||||
def _emit_get_signals(self):
|
||||
"""Emit signals when item is removed"""
|
||||
try:
|
||||
if self.queue_length_signal:
|
||||
self.queue_length_signal.send(length=self.qsize())
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit get signals: {str(e)}")
|
||||
|
||||
|
||||
class NotificationQueue:
|
||||
"""
|
||||
Ultra-reliable notification queue using pure janus.
|
||||
|
||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
||||
- async_q: Used by async workers and coroutines
|
||||
|
||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts.
|
||||
See RecheckPriorityQueue docstring above for detailed explanation.
|
||||
|
||||
Simple wrapper around janus with bulletproof error handling.
|
||||
"""
|
||||
|
||||
def __init__(self, maxsize: int = 0):
|
||||
try:
|
||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
||||
# BOTH interfaces required - see class docstring for why
|
||||
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
|
||||
self.async_q = self._janus_queue.async_q # Async workers
|
||||
self.notification_event_signal = signal('notification_event')
|
||||
logger.debug("NotificationQueue initialized successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
|
||||
raise
|
||||
|
||||
def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync put with signal emission"""
|
||||
try:
|
||||
self.sync_q.put(item, block=block, timeout=timeout)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
async def async_put(self, item: Dict[str, Any]):
|
||||
"""Pure async put with signal emission"""
|
||||
try:
|
||||
await self.async_q.put(item)
|
||||
self._emit_notification_signal(item)
|
||||
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
|
||||
return True
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
||||
return False
|
||||
|
||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
||||
"""Thread-safe sync get"""
|
||||
try:
|
||||
return self.sync_q.get(block=block, timeout=timeout)
|
||||
except queue.Empty as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
|
||||
raise e
|
||||
|
||||
async def async_get(self):
|
||||
"""Pure async get"""
|
||||
try:
|
||||
return await self.async_q.get()
|
||||
except queue.Empty as e:
|
||||
raise e
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}")
|
||||
raise e
|
||||
|
||||
def qsize(self) -> int:
|
||||
"""Get current queue size"""
|
||||
try:
|
||||
return self.sync_q.qsize()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
|
||||
return 0
|
||||
|
||||
def empty(self) -> bool:
|
||||
"""Check if queue is empty"""
|
||||
return self.qsize() == 0
|
||||
|
||||
def close(self):
|
||||
"""Close the janus queue"""
|
||||
try:
|
||||
self._janus_queue.close()
|
||||
logger.debug("NotificationQueue closed successfully")
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
|
||||
|
||||
def _emit_notification_signal(self, item: Dict[str, Any]):
|
||||
"""Emit notification signal"""
|
||||
try:
|
||||
if self.notification_event_signal and isinstance(item, dict):
|
||||
watch_uuid = item.get('uuid')
|
||||
if watch_uuid:
|
||||
self.notification_event_signal.send(watch_uuid=watch_uuid)
|
||||
else:
|
||||
self.notification_event_signal.send()
|
||||
except Exception as e:
|
||||
logger.critical(f"CRITICAL: Failed to emit notification signal: {str(e)}")
|
||||
@@ -29,9 +29,6 @@ class SignalHandler:
|
||||
watch_delete_signal = signal('watch_deleted')
|
||||
watch_delete_signal.connect(self.handle_deleted_signal, weak=False)
|
||||
|
||||
watch_favicon_bumped_signal = signal('watch_favicon_bump')
|
||||
watch_favicon_bumped_signal.connect(self.handle_watch_bumped_favicon_signal, weak=False)
|
||||
|
||||
# Connect to the notification_event signal
|
||||
notification_event_signal = signal('notification_event')
|
||||
notification_event_signal.connect(self.handle_notification_event, weak=False)
|
||||
@@ -40,7 +37,7 @@ class SignalHandler:
|
||||
# Create and start the queue update thread using standard threading
|
||||
import threading
|
||||
self.polling_emitter_thread = threading.Thread(
|
||||
target=self.polling_emit_running_or_queued_watches_threaded,
|
||||
target=self.polling_emit_running_or_queued_watches_threaded,
|
||||
daemon=True
|
||||
)
|
||||
self.polling_emitter_thread.start()
|
||||
@@ -72,16 +69,6 @@ class SignalHandler:
|
||||
else:
|
||||
logger.warning(f"Watch UUID {watch_uuid} not found in datastore")
|
||||
|
||||
def handle_watch_bumped_favicon_signal(self, *args, **kwargs):
|
||||
watch_uuid = kwargs.get('watch_uuid')
|
||||
if watch_uuid:
|
||||
# Emit the queue size to all connected clients
|
||||
self.socketio_instance.emit("watch_bumped_favicon", {
|
||||
"uuid": watch_uuid,
|
||||
"event_timestamp": time.time()
|
||||
})
|
||||
logger.debug(f"Watch UUID {watch_uuid} got its favicon updated")
|
||||
|
||||
def handle_deleted_signal(self, *args, **kwargs):
|
||||
watch_uuid = kwargs.get('watch_uuid')
|
||||
if watch_uuid:
|
||||
@@ -118,38 +105,39 @@ class SignalHandler:
|
||||
"watch_uuid": watch_uuid,
|
||||
"event_timestamp": time.time()
|
||||
})
|
||||
|
||||
|
||||
logger.trace(f"Socket.IO: Emitted notification_event for watch UUID {watch_uuid}")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Socket.IO error in handle_notification_event: {str(e)}")
|
||||
|
||||
|
||||
def polling_emit_running_or_queued_watches_threaded(self):
|
||||
"""Threading version of polling for Windows compatibility"""
|
||||
import time
|
||||
import threading
|
||||
logger.info("Queue update thread started (threading mode)")
|
||||
|
||||
|
||||
# Import here to avoid circular imports
|
||||
from changedetectionio.flask_app import app
|
||||
from changedetectionio import worker_handler
|
||||
watch_check_update = signal('watch_check_update')
|
||||
|
||||
|
||||
# Track previous state to avoid unnecessary emissions
|
||||
previous_running_uuids = set()
|
||||
|
||||
|
||||
# Run until app shutdown - check exit flag more frequently for fast shutdown
|
||||
exit_event = getattr(app.config, 'exit', threading.Event())
|
||||
|
||||
|
||||
while not exit_event.is_set():
|
||||
try:
|
||||
# Get current running UUIDs from async workers
|
||||
running_uuids = set(worker_handler.get_running_uuids())
|
||||
|
||||
|
||||
# Only send updates for UUIDs that changed state
|
||||
newly_running = running_uuids - previous_running_uuids
|
||||
no_longer_running = previous_running_uuids - running_uuids
|
||||
|
||||
|
||||
# Send updates for newly running UUIDs (but exit fast if shutdown requested)
|
||||
for uuid in newly_running:
|
||||
if exit_event.is_set():
|
||||
@@ -158,7 +146,7 @@ class SignalHandler:
|
||||
with app.app_context():
|
||||
watch_check_update.send(app_context=app, watch_uuid=uuid)
|
||||
time.sleep(0.01) # Small yield
|
||||
|
||||
|
||||
# Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
|
||||
if not exit_event.is_set():
|
||||
for uuid in no_longer_running:
|
||||
@@ -168,16 +156,16 @@ class SignalHandler:
|
||||
with app.app_context():
|
||||
watch_check_update.send(app_context=app, watch_uuid=uuid)
|
||||
time.sleep(0.01) # Small yield
|
||||
|
||||
|
||||
# Update tracking for next iteration
|
||||
previous_running_uuids = running_uuids
|
||||
|
||||
|
||||
# Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
|
||||
for _ in range(20): # 20 * 0.5 = 10 seconds total
|
||||
if exit_event.is_set():
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error in threading polling: {str(e)}")
|
||||
# Even during error recovery, check for exit quickly
|
||||
@@ -185,11 +173,11 @@ class SignalHandler:
|
||||
if exit_event.is_set():
|
||||
break
|
||||
time.sleep(0.5)
|
||||
|
||||
|
||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
||||
import sys
|
||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
||||
|
||||
|
||||
if not in_pytest:
|
||||
logger.info("Queue update thread stopped (threading mode)")
|
||||
|
||||
@@ -220,20 +208,20 @@ def handle_watch_update(socketio, **kwargs):
|
||||
|
||||
watch_data = {
|
||||
'checking_now': True if watch.get('uuid') in running_uuids else False,
|
||||
'error_text': error_texts,
|
||||
'event_timestamp': time.time(),
|
||||
'fetch_time': watch.get('fetch_time'),
|
||||
'has_error': True if error_texts else False,
|
||||
'has_favicon': True if watch.get_favicon_filename() else False,
|
||||
'history_n': watch.history_n,
|
||||
'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet',
|
||||
'last_changed': watch.get('last_changed'),
|
||||
'last_checked': watch.get('last_checked'),
|
||||
'error_text': error_texts,
|
||||
'history_n': watch.history_n,
|
||||
'last_checked_text': _jinja2_filter_datetime(watch),
|
||||
'notification_muted': True if watch.get('notification_muted') else False,
|
||||
'paused': True if watch.get('paused') else False,
|
||||
'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet',
|
||||
'queued': True if watch.get('uuid') in queue_list else False,
|
||||
'paused': True if watch.get('paused') else False,
|
||||
'notification_muted': True if watch.get('notification_muted') else False,
|
||||
'unviewed': watch.has_unviewed,
|
||||
'uuid': watch.get('uuid'),
|
||||
'event_timestamp': time.time()
|
||||
}
|
||||
|
||||
errored_count = 0
|
||||
@@ -243,15 +231,14 @@ def handle_watch_update(socketio, **kwargs):
|
||||
|
||||
general_stats = {
|
||||
'count_errors': errored_count,
|
||||
'unread_changes_count': datastore.unread_changes_count
|
||||
'has_unviewed': datastore.has_unviewed
|
||||
}
|
||||
|
||||
# Debug what's being emitted
|
||||
# logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
|
||||
|
||||
# Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
|
||||
socketio.emit("watch_update", {'watch': watch_data})
|
||||
socketio.emit("general_stats_update", general_stats)
|
||||
socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats})
|
||||
|
||||
# Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
|
||||
logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")
|
||||
@@ -264,15 +251,15 @@ def init_socketio(app, datastore):
|
||||
"""Initialize SocketIO with the main Flask app"""
|
||||
import platform
|
||||
import sys
|
||||
|
||||
|
||||
# Platform-specific async_mode selection for better stability
|
||||
system = platform.system().lower()
|
||||
python_version = sys.version_info
|
||||
|
||||
|
||||
# Check for SocketIO mode configuration via environment variable
|
||||
# Default is 'threading' for best cross-platform compatibility
|
||||
socketio_mode = os.getenv('SOCKETIO_MODE', 'threading').lower()
|
||||
|
||||
|
||||
if socketio_mode == 'gevent':
|
||||
# Use gevent mode (higher concurrency but platform limitations)
|
||||
try:
|
||||
@@ -290,7 +277,7 @@ def init_socketio(app, datastore):
|
||||
# Invalid mode specified, use default
|
||||
async_mode = 'threading'
|
||||
logger.warning(f"Invalid SOCKETIO_MODE='{socketio_mode}', using default {async_mode} mode for Socket.IO")
|
||||
|
||||
|
||||
# Log platform info for debugging
|
||||
logger.info(f"Platform: {system}, Python: {python_version.major}.{python_version.minor}, Socket.IO mode: {async_mode}")
|
||||
|
||||
@@ -328,6 +315,7 @@ def init_socketio(app, datastore):
|
||||
emit_flash=False
|
||||
)
|
||||
|
||||
|
||||
@socketio.on('connect')
|
||||
def handle_connect():
|
||||
"""Handle client connection"""
|
||||
@@ -405,4 +393,4 @@ def init_socketio(app, datastore):
|
||||
|
||||
logger.info("Socket.IO initialized and attached to main Flask app")
|
||||
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
||||
return socketio
|
||||
return socketio
|
||||
|
||||
@@ -1,130 +0,0 @@
|
||||
"""
|
||||
RSS/Atom feed processing tools for changedetection.io
|
||||
"""
|
||||
|
||||
from loguru import logger
|
||||
import re
|
||||
|
||||
|
||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||
"""
|
||||
Process CDATA sections in HTML/XML content - inline replacement.
|
||||
|
||||
Args:
|
||||
html_content: The HTML/XML content to process
|
||||
render_anchor_tag_content: Whether to render anchor tag content
|
||||
|
||||
Returns:
|
||||
Processed HTML/XML content with CDATA sections replaced inline
|
||||
"""
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
from .html_tools import html_to_text
|
||||
|
||||
pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
|
||||
|
||||
def repl(m):
|
||||
text = m.group(1)
|
||||
return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip()
|
||||
|
||||
return re.sub(pattern, repl, html_content)
|
||||
|
||||
|
||||
def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
|
||||
"""
|
||||
Format RSS/Atom feed items in a readable text format using feedparser.
|
||||
|
||||
Converts RSS <item> or Atom <entry> elements to formatted text with:
|
||||
- <title> → <h1>Title</h1>
|
||||
- <link> → Link: [url]
|
||||
- <guid> → Guid: [id]
|
||||
- <pubDate> → PubDate: [date]
|
||||
- <description> or <content> → Raw HTML content (CDATA and entities automatically handled)
|
||||
|
||||
Args:
|
||||
rss_content: The RSS/Atom feed content
|
||||
render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility)
|
||||
|
||||
Returns:
|
||||
Formatted HTML content ready for html_to_text conversion
|
||||
"""
|
||||
try:
|
||||
import feedparser
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
|
||||
# Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc.
|
||||
feed = feedparser.parse(rss_content)
|
||||
|
||||
formatted_items = []
|
||||
|
||||
# Determine feed type for appropriate labels when fields are missing
|
||||
# feedparser sets feed.version to things like 'rss20', 'atom10', etc.
|
||||
is_atom = feed.version and 'atom' in feed.version
|
||||
|
||||
for entry in feed.entries:
|
||||
item_parts = []
|
||||
|
||||
# Title - feedparser handles CDATA and entity unescaping automatically
|
||||
if hasattr(entry, 'title') and entry.title:
|
||||
item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>')
|
||||
|
||||
# Link
|
||||
if hasattr(entry, 'link') and entry.link:
|
||||
item_parts.append(f'Link: {xml_escape(entry.link)}<br>')
|
||||
|
||||
# GUID/ID
|
||||
if hasattr(entry, 'id') and entry.id:
|
||||
item_parts.append(f'Guid: {xml_escape(entry.id)}<br>')
|
||||
|
||||
# Date - feedparser normalizes all date field names to 'published'
|
||||
if hasattr(entry, 'published') and entry.published:
|
||||
item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>')
|
||||
|
||||
# Description/Content - feedparser handles CDATA and entity unescaping automatically
|
||||
# Only add "Summary:" label for Atom <summary> tags
|
||||
content = None
|
||||
add_label = False
|
||||
|
||||
if hasattr(entry, 'content') and entry.content:
|
||||
# Atom <content> - no label, just content
|
||||
content = entry.content[0].value if entry.content[0].value else None
|
||||
elif hasattr(entry, 'summary'):
|
||||
# Could be RSS <description> or Atom <summary>
|
||||
# feedparser maps both to entry.summary
|
||||
content = entry.summary if entry.summary else None
|
||||
# Only add "Summary:" label for Atom feeds (which use <summary> tag)
|
||||
if is_atom:
|
||||
add_label = True
|
||||
|
||||
# Add content with or without label
|
||||
if content:
|
||||
if add_label:
|
||||
item_parts.append(f'Summary:<br>{content}')
|
||||
else:
|
||||
item_parts.append(content)
|
||||
else:
|
||||
# No content - just show <none>
|
||||
item_parts.append('<none>')
|
||||
|
||||
# Join all parts of this item
|
||||
if item_parts:
|
||||
formatted_items.append('\n'.join(item_parts))
|
||||
|
||||
# Wrap each item in a div with classes (first, last, item-N)
|
||||
items_html = []
|
||||
total_items = len(formatted_items)
|
||||
for idx, item in enumerate(formatted_items):
|
||||
classes = ['rss-item']
|
||||
if idx == 0:
|
||||
classes.append('first')
|
||||
if idx == total_items - 1:
|
||||
classes.append('last')
|
||||
classes.append(f'item-{idx + 1}')
|
||||
|
||||
class_str = ' '.join(classes)
|
||||
items_html.append(f'<div class="{class_str}">{item}</div>')
|
||||
return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>'
|
||||
|
||||
except Exception as e:
|
||||
logger.warning(f"Error formatting RSS items: {str(e)}")
|
||||
# Fall back to original content
|
||||
return rss_content
|
||||
@@ -11,29 +11,32 @@ set -e
|
||||
|
||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load tests/test_*.py
|
||||
find tests/test_*py -type f|while read test_name
|
||||
do
|
||||
echo "TEST RUNNING $test_name"
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
|
||||
done
|
||||
|
||||
#time pytest -n auto --dist loadfile -vv --tb=long tests/test_*.py
|
||||
echo "RUNNING WITH BASE_URL SET"
|
||||
|
||||
# Now re-run some tests with BASE_URL enabled
|
||||
# Re #65 - Ability to include a link back to the installation, in the notification.
|
||||
export BASE_URL="https://really-unique-domain.io"
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv --maxfail=1 tests/test_notification.py
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||
|
||||
|
||||
# Re-run with HIDE_REFERER set - could affect login
|
||||
export HIDE_REFERER=True
|
||||
pytest -vv -s --maxfail=1 tests/test_access_control.py
|
||||
pytest tests/test_access_control.py
|
||||
|
||||
# Re-run a few tests that will trigger brotli based storage
|
||||
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
||||
pytest -vv -s --maxfail=1 tests/test_access_control.py
|
||||
pytest tests/test_access_control.py
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||
pytest -vv -s --maxfail=1 tests/test_backend.py
|
||||
pytest -vv -s --maxfail=1 tests/test_rss.py
|
||||
pytest -vv -s --maxfail=1 tests/test_unique_lines.py
|
||||
pytest tests/test_backend.py
|
||||
pytest tests/test_rss.py
|
||||
pytest tests/test_unique_lines.py
|
||||
|
||||
# Try high concurrency
|
||||
FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l
|
||||
|
||||
@@ -6,8 +6,6 @@
|
||||
|
||||
# enable debug
|
||||
set -x
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
|
||||
|
||||
# A extra browser is configured, but we never chose to use it, so it should NOT show in the logs
|
||||
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url'
|
||||
|
||||
@@ -19,13 +19,12 @@ docker run --network changedet-network -d \
|
||||
-v `pwd`/tests/proxy_list/squid-passwords.txt:/etc/squid3/passwords \
|
||||
ubuntu/squid:4.13-21.10_edge
|
||||
|
||||
sleep 5
|
||||
|
||||
## 2nd test actually choose the preferred proxy from proxies.json
|
||||
# This will force a request via "proxy-two"
|
||||
docker run --network changedet-network \
|
||||
-v `pwd`/tests/proxy_list/proxies.json-example:/tmp/proxies.json \
|
||||
-v `pwd`/tests/proxy_list/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest -s tests/proxy_list/test_multiple_proxy.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py'
|
||||
|
||||
set +e
|
||||
echo "- Looking for chosen.changedetection.io request in squid-one - it should NOT be here"
|
||||
@@ -49,10 +48,8 @@ fi
|
||||
# Test the UI configurable proxies
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py'
|
||||
|
||||
# Give squid proxies a moment to flush their logs
|
||||
sleep 2
|
||||
|
||||
# Should see a request for one.changedetection.io in there
|
||||
echo "- Looking for .changedetection.io request in squid-custom"
|
||||
@@ -66,10 +63,7 @@ fi
|
||||
# Test "no-proxy" option
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py --datastore-path /tmp'
|
||||
|
||||
# Give squid proxies a moment to flush their logs
|
||||
sleep 2
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py'
|
||||
|
||||
# We need to handle grep returning 1
|
||||
set +e
|
||||
@@ -86,8 +80,6 @@ for c in $(echo "squid-one squid-two squid-custom"); do
|
||||
fi
|
||||
done
|
||||
|
||||
echo "docker ps output"
|
||||
docker ps
|
||||
|
||||
docker kill squid-one squid-two squid-custom
|
||||
|
||||
@@ -96,19 +88,19 @@ docker kill squid-one squid-two squid-custom
|
||||
# Requests
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
|
||||
|
||||
# Playwright
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
|
||||
|
||||
# Puppeteer fast
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
|
||||
|
||||
# Selenium
|
||||
docker run --network changedet-network \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py'
|
||||
|
||||
@@ -5,23 +5,22 @@ set -e
|
||||
# enable debug
|
||||
set -x
|
||||
|
||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
|
||||
|
||||
# SOCKS5 related - start simple Socks5 proxy server
|
||||
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched
|
||||
docker run --network changedet-network -d --hostname socks5proxy --rm --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy
|
||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy
|
||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth serjs/go-socks5-proxy
|
||||
|
||||
echo "---------------------------------- SOCKS5 -------------------"
|
||||
# SOCKS5 related - test from proxies.json
|
||||
docker run --network changedet-network \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example:/tmp/proxies.json \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
|
||||
--rm \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=proxiesjson" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
|
||||
# SOCKS5 related - by manually entering in UI
|
||||
docker run --network changedet-network \
|
||||
@@ -30,18 +29,18 @@ docker run --network changedet-network \
|
||||
--hostname cdio \
|
||||
-e "SOCKSTEST=manual" \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy.py'
|
||||
|
||||
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
|
||||
docker run --network changedet-network \
|
||||
-e "SOCKSTEST=manual-playwright" \
|
||||
--hostname cdio \
|
||||
-e "FLASK_SERVER_NAME=cdio" \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/tmp/proxies.json \
|
||||
-v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
|
||||
-e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
|
||||
--rm \
|
||||
test-changedetectionio \
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp'
|
||||
bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004 -s tests/proxy_socks5/test_socks5_proxy_sources.py'
|
||||
|
||||
echo "socks5 server logs"
|
||||
docker logs socks5proxy
|
||||
|
||||
18
changedetectionio/safe_jinja.py
Normal file
18
changedetectionio/safe_jinja.py
Normal file
@@ -0,0 +1,18 @@
|
||||
"""
|
||||
Safe Jinja2 render with max payload sizes
|
||||
|
||||
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
|
||||
"""
|
||||
|
||||
import jinja2.sandbox
|
||||
import typing as t
|
||||
import os
|
||||
|
||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
|
||||
|
||||
|
||||
def render(template_str, **args: t.Any) -> str:
|
||||
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension'])
|
||||
output = jinja2_env.from_string(template_str).render(args)
|
||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||
|
||||
@@ -29,7 +29,7 @@ $(document).ready(function () {
|
||||
$(this).text(new Date($(this).data("utc")).toLocaleString());
|
||||
})
|
||||
|
||||
const timezoneInput = $('#application-scheduler_timezone_default');
|
||||
const timezoneInput = $('#application-timezone');
|
||||
if(timezoneInput.length) {
|
||||
const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone;
|
||||
if (!timezoneInput.val().trim()) {
|
||||
|
||||
@@ -14,10 +14,10 @@ $(document).ready(function () {
|
||||
e.preventDefault();
|
||||
|
||||
data = {
|
||||
notification_urls: $('textarea.notification-urls').val(),
|
||||
notification_title: $('input.notification-title').val(),
|
||||
notification_body: $('textarea.notification-body').val(),
|
||||
notification_format: $('select.notification-format').val(),
|
||||
notification_body: $('#notification_body').val(),
|
||||
notification_format: $('#notification_format').val(),
|
||||
notification_title: $('#notification_title').val(),
|
||||
notification_urls: $('.notification-urls').val(),
|
||||
tags: $('#tags').val(),
|
||||
window_url: window.location.href,
|
||||
}
|
||||
|
||||
@@ -159,7 +159,6 @@
|
||||
// Return the current request in case it's needed
|
||||
return requests[namespace];
|
||||
};
|
||||
|
||||
})(jQuery);
|
||||
|
||||
|
||||
|
||||
@@ -2,13 +2,6 @@
|
||||
|
||||
$(document).ready(function () {
|
||||
|
||||
function reapplyTableStripes() {
|
||||
$('.watch-table tbody tr').each(function(index) {
|
||||
$(this).removeClass('pure-table-odd pure-table-even');
|
||||
$(this).addClass(index % 2 === 0 ? 'pure-table-odd' : 'pure-table-even');
|
||||
});
|
||||
}
|
||||
|
||||
function bindSocketHandlerButtonsEvents(socket) {
|
||||
$('.ajax-op').on('click.socketHandlerNamespace', function (e) {
|
||||
e.preventDefault();
|
||||
@@ -55,9 +48,9 @@ $(document).ready(function () {
|
||||
// Connect to Socket.IO on the same host/port, with path from template
|
||||
const socket = io({
|
||||
path: socketio_url, // This will be the path prefix like "/app/socket.io" from the template
|
||||
transports: ['websocket', 'polling'],
|
||||
reconnectionDelay: 3000,
|
||||
reconnectionAttempts: 25
|
||||
transports: ['polling', 'websocket'], // Try WebSocket but fall back to polling
|
||||
reconnectionDelay: 1000,
|
||||
reconnectionAttempts: 15
|
||||
});
|
||||
|
||||
// Connection status logging
|
||||
@@ -105,60 +98,49 @@ $(document).ready(function () {
|
||||
console.log(`Stub handler for notification_event ${data.watch_uuid}`)
|
||||
});
|
||||
|
||||
socket.on('watch_deleted', function (data) {
|
||||
$('tr[data-watch-uuid="' + data.uuid + '"] td').fadeOut(500, function () {
|
||||
$(this).closest('tr').remove();
|
||||
reapplyTableStripes();
|
||||
});
|
||||
});
|
||||
|
||||
// So that the favicon is only updated when the server has written the scraped favicon to disk.
|
||||
socket.on('watch_bumped_favicon', function (watch) {
|
||||
const $watchRow = $(`tr[data-watch-uuid="${watch.uuid}"]`);
|
||||
if ($watchRow.length) {
|
||||
$watchRow.addClass('has-favicon');
|
||||
// Because the event could be emitted from a process that is outside the app context, url_for() might not work.
|
||||
// Lets use url_for at template generation time to give us a PLACEHOLDER instead
|
||||
let favicon_url = favicon_baseURL.replace('/PLACEHOLDER', `/${watch.uuid}?cache=${watch.event_timestamp}`);
|
||||
console.log(`Setting favicon for UUID - ${watch.uuid} - ${favicon_url}`);
|
||||
$('img.favicon', $watchRow).attr('src', favicon_url);
|
||||
}
|
||||
})
|
||||
|
||||
socket.on('general_stats_update', function (general_stats) {
|
||||
// Tabs at bottom of list
|
||||
$('#watch-table-wrapper').toggleClass("has-unread-changes", general_stats.unread_changes_count !==0)
|
||||
$('#watch-table-wrapper').toggleClass("has-error", general_stats.count_errors !== 0)
|
||||
$('#post-list-with-errors a').text(`With errors (${ new Intl.NumberFormat(navigator.language).format(general_stats.count_errors) })`);
|
||||
$('#unread-tab-counter').text(new Intl.NumberFormat(navigator.language).format(general_stats.unread_changes_count));
|
||||
});
|
||||
// Listen for periodically emitted watch data
|
||||
console.log('Adding watch_update event listener');
|
||||
|
||||
socket.on('watch_update', function (data) {
|
||||
const watch = data.watch;
|
||||
const general_stats = data.general_stats;
|
||||
|
||||
// Log the entire watch object for debugging
|
||||
console.log('!!! WATCH UPDATE EVENT RECEIVED !!!');
|
||||
console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`);
|
||||
console.log('Watch data:', watch);
|
||||
console.log('General stats:', general_stats);
|
||||
|
||||
// Updating watch table rows
|
||||
const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]');
|
||||
console.log('Found watch row elements:', $watchRow.length);
|
||||
|
||||
|
||||
if ($watchRow.length) {
|
||||
$($watchRow).toggleClass('checking-now', watch.checking_now);
|
||||
$($watchRow).toggleClass('queued', watch.queued);
|
||||
$($watchRow).toggleClass('unviewed', watch.unviewed);
|
||||
$($watchRow).toggleClass('has-error', watch.has_error);
|
||||
$($watchRow).toggleClass('has-favicon', watch.has_favicon);
|
||||
$($watchRow).toggleClass('notification_muted', watch.notification_muted);
|
||||
$($watchRow).toggleClass('paused', watch.paused);
|
||||
$($watchRow).toggleClass('single-history', watch.history_n === 1);
|
||||
$($watchRow).toggleClass('multiple-history', watch.history_n >= 2);
|
||||
|
||||
$('td.title-col .error-text', $watchRow).html(watch.error_text)
|
||||
|
||||
$('td.last-changed', $watchRow).text(watch.last_changed_text)
|
||||
|
||||
$('td.last-checked .innertext', $watchRow).text(watch.last_checked_text)
|
||||
$('td.last-checked', $watchRow).data('timestamp', watch.last_checked).data('fetchduration', watch.fetch_time);
|
||||
$('td.last-checked', $watchRow).data('eta_complete', watch.last_checked + watch.fetch_time);
|
||||
|
||||
|
||||
console.log('Updated UI for watch:', watch.uuid);
|
||||
}
|
||||
|
||||
// Tabs at bottom of list
|
||||
$('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed);
|
||||
$('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0)
|
||||
$('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`);
|
||||
|
||||
$('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid));
|
||||
});
|
||||
|
||||
|
||||
@@ -16,12 +16,6 @@ $(function () {
|
||||
$('#op_extradata').val(prompt("Enter a tag name"));
|
||||
});
|
||||
|
||||
|
||||
$('.history-link').click(function (e) {
|
||||
// Incase they click 'back' in the browser, it should be removed.
|
||||
$(this).closest('tr').removeClass('unviewed');
|
||||
});
|
||||
|
||||
$('.with-share-link > *').click(function () {
|
||||
$("#copied-clipboard").remove();
|
||||
|
||||
|
||||
@@ -51,7 +51,6 @@ $(document).ready(function () {
|
||||
$('#notification_body').val('');
|
||||
$('#notification_format').val('System default');
|
||||
$('#notification_urls').val('');
|
||||
$('#notification_muted_none').prop('checked', true); // in the case of a ternary field
|
||||
e.preventDefault();
|
||||
});
|
||||
$("#notification-token-toggle").click(function (e) {
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -3,16 +3,15 @@
|
||||
"version": "0.0.3",
|
||||
"description": "",
|
||||
"main": "index.js",
|
||||
"engines": {
|
||||
"node": ">=18.0.0"
|
||||
},
|
||||
"scripts": {
|
||||
"watch": "sass --watch scss:. --style=compressed --no-source-map",
|
||||
"build": "sass scss:. --style=compressed --no-source-map"
|
||||
"watch": "node-sass -w scss -o .",
|
||||
"build": "node-sass scss -o ."
|
||||
},
|
||||
"author": "Leigh Morresi / Web Technologies s.r.o.",
|
||||
"license": "Apache",
|
||||
"author": "",
|
||||
"license": "ISC",
|
||||
"dependencies": {
|
||||
"sass": "^1.77.8"
|
||||
"node-sass": "^7.0.0",
|
||||
"tar": "^6.1.9",
|
||||
"trim-newlines": "^3.0.1"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
@use "parts/variables";
|
||||
@import "parts/_variables.scss";
|
||||
|
||||
#diff-ui {
|
||||
|
||||
|
||||
@@ -64,17 +64,17 @@ body.proxy-check-active {
|
||||
#recommended-proxy {
|
||||
display: grid;
|
||||
gap: 2rem;
|
||||
padding-bottom: 1em;
|
||||
|
||||
@media (min-width: 991px) {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
}
|
||||
@media (min-width: 991px) {
|
||||
grid-template-columns: repeat(2, 1fr);
|
||||
}
|
||||
|
||||
> div {
|
||||
border: 1px #aaa solid;
|
||||
border-radius: 4px;
|
||||
padding: 1em;
|
||||
}
|
||||
|
||||
padding-bottom: 1em;
|
||||
}
|
||||
|
||||
#extra-proxies-setting {
|
||||
|
||||
@@ -1,92 +0,0 @@
|
||||
.watch-table {
|
||||
&.favicon-not-enabled {
|
||||
tr {
|
||||
.favicon {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
tr {
|
||||
/* make the icons and the text inline-ish */
|
||||
td.inline.title-col {
|
||||
.flex-wrapper {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 4px;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
td,
|
||||
th {
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
tr.has-favicon {
|
||||
&.unviewed {
|
||||
img.favicon {
|
||||
opacity: 1.0 !important;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.status-icons {
|
||||
white-space: nowrap;
|
||||
display: flex;
|
||||
align-items: center; /* Vertical centering */
|
||||
gap: 4px; /* Space between image and text */
|
||||
> * {
|
||||
vertical-align: middle;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.title-col {
|
||||
/* Optional, for spacing */
|
||||
padding: 10px;
|
||||
}
|
||||
|
||||
.title-wrapper {
|
||||
display: flex;
|
||||
align-items: center; /* Vertical centering */
|
||||
gap: 10px; /* Space between image and text */
|
||||
}
|
||||
|
||||
/* Make sure .title-col-inner doesn't collapse or misalign */
|
||||
.title-col-inner {
|
||||
display: inline-block;
|
||||
vertical-align: middle;
|
||||
}
|
||||
|
||||
/* favicon styling */
|
||||
.watch-table {
|
||||
img.favicon {
|
||||
vertical-align: middle;
|
||||
max-width: 25px;
|
||||
max-height: 25px;
|
||||
height: 25px;
|
||||
padding-right: 4px;
|
||||
}
|
||||
|
||||
// Reserved for future use
|
||||
/* &.thumbnail-type-screenshot {
|
||||
tr.has-favicon {
|
||||
td.inline.title-col {
|
||||
img.thumbnail {
|
||||
background-color: #fff; !* fallback bg for SVGs without bg *!
|
||||
border-radius: 4px; !* subtle rounded corners *!
|
||||
border: 1px solid #ddd; !* light border for contrast *!
|
||||
box-shadow: 0 2px 6px rgba(0, 0, 0, 0.15); !* soft shadow *!
|
||||
filter: contrast(1.05) saturate(1.1) drop-shadow(0 0 0.5px rgba(0, 0, 0, 0.2));
|
||||
object-fit: cover; !* crop/fill if needed *!
|
||||
opacity: 0.8;
|
||||
max-width: 30px;
|
||||
max-height: 30px;
|
||||
height: 30px;
|
||||
}
|
||||
}
|
||||
}
|
||||
}*/
|
||||
}
|
||||
@@ -1,4 +1,4 @@
|
||||
@use "minitabs";
|
||||
@import "minitabs";
|
||||
|
||||
body.preview-text-enabled {
|
||||
|
||||
|
||||
@@ -17,6 +17,15 @@ body.checking-now {
|
||||
position: fixed;
|
||||
}
|
||||
|
||||
#post-list-buttons {
|
||||
#post-list-with-errors.has-error {
|
||||
display: inline-block !important;
|
||||
}
|
||||
#post-list-mark-views.has-unviewed {
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -1,178 +0,0 @@
|
||||
$grid-col-checkbox: 20px;
|
||||
$grid-col-watch: 100px;
|
||||
$grid-gap: 0.5rem;
|
||||
|
||||
|
||||
@media (max-width: 767px) {
|
||||
|
||||
/*
|
||||
Max width before this PARTICULAR table gets nasty
|
||||
This query will take effect for any screen smaller than 760px
|
||||
and also iPads specifically.
|
||||
*/
|
||||
.watch-table {
|
||||
/* make headings work on mobile */
|
||||
thead {
|
||||
display: block;
|
||||
|
||||
tr {
|
||||
th {
|
||||
display: inline-block;
|
||||
// Hide the "Last" text for smaller screens
|
||||
@media (max-width: 768px) {
|
||||
.hide-on-mobile {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.empty-cell {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
.last-checked {
|
||||
margin-left: calc($grid-col-checkbox + $grid-gap);
|
||||
|
||||
> span {
|
||||
vertical-align: middle;
|
||||
}
|
||||
}
|
||||
|
||||
.last-changed {
|
||||
margin-left: calc($grid-col-checkbox + $grid-gap);
|
||||
}
|
||||
|
||||
.last-checked::before {
|
||||
color: var(--color-text);
|
||||
content: "Last Checked ";
|
||||
}
|
||||
|
||||
.last-changed::before {
|
||||
color: var(--color-text);
|
||||
content: "Last Changed ";
|
||||
}
|
||||
|
||||
/* Force table to not be like tables anymore */
|
||||
td.inline {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.pure-table td,
|
||||
.pure-table th {
|
||||
border: none;
|
||||
}
|
||||
|
||||
td {
|
||||
/* Behave like a "row" */
|
||||
border: none;
|
||||
border-bottom: 1px solid var(--color-border-watch-table-cell);
|
||||
vertical-align: middle;
|
||||
|
||||
&:before {
|
||||
/* Top/left values mimic padding */
|
||||
top: 6px;
|
||||
left: 6px;
|
||||
width: 45%;
|
||||
padding-right: 10px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
}
|
||||
|
||||
&.pure-table-striped {
|
||||
tr {
|
||||
background-color: var(--color-table-background);
|
||||
}
|
||||
|
||||
tr:nth-child(2n-1) {
|
||||
background-color: var(--color-table-stripe);
|
||||
}
|
||||
|
||||
tr:nth-child(2n-1) td {
|
||||
background-color: inherit;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 767px) {
|
||||
.watch-table {
|
||||
tbody {
|
||||
tr {
|
||||
padding-bottom: 10px;
|
||||
padding-top: 10px;
|
||||
display: grid;
|
||||
grid-template-columns: $grid-col-checkbox 1fr $grid-col-watch;
|
||||
grid-template-rows: auto auto auto auto;
|
||||
gap: $grid-gap;
|
||||
|
||||
.counter-i {
|
||||
display: none;
|
||||
}
|
||||
|
||||
td.checkbox-uuid {
|
||||
display: grid;
|
||||
place-items: center;
|
||||
}
|
||||
|
||||
td.inline {
|
||||
/* display: block !important;;*/
|
||||
}
|
||||
|
||||
> td {
|
||||
border-bottom: none;
|
||||
}
|
||||
|
||||
> td.title-col {
|
||||
grid-column: 1 / -1;
|
||||
grid-row: 1;
|
||||
.watch-title {
|
||||
font-size: 0.92rem;
|
||||
}
|
||||
.link-spread {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
||||
> td.last-checked {
|
||||
grid-column: 1 / -1;
|
||||
grid-row: 2;
|
||||
}
|
||||
|
||||
> td.last-changed {
|
||||
grid-column: 1 / -1;
|
||||
grid-row: 3;
|
||||
}
|
||||
|
||||
> td.checkbox-uuid {
|
||||
grid-column: 1;
|
||||
grid-row: 4;
|
||||
}
|
||||
|
||||
> td.buttons {
|
||||
grid-column: 2;
|
||||
grid-row: 4;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: flex-start;
|
||||
}
|
||||
|
||||
> td.watch-controls {
|
||||
grid-column: 3;
|
||||
grid-row: 4;
|
||||
display: grid;
|
||||
place-items: center;
|
||||
|
||||
a img {
|
||||
padding: 10px;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
.pure-table td {
|
||||
padding: 3px !important;
|
||||
}
|
||||
}
|
||||
@@ -7,7 +7,6 @@
|
||||
&.unviewed {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
color: var(--color-watch-table-row-text);
|
||||
}
|
||||
|
||||
@@ -49,17 +48,17 @@
|
||||
/* Row with 'checking-now' */
|
||||
tr.checking-now {
|
||||
td:first-child {
|
||||
position: relative;
|
||||
position: relative;
|
||||
}
|
||||
|
||||
td:first-child::before {
|
||||
content: "";
|
||||
position: absolute;
|
||||
top: 0;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
width: 3px;
|
||||
background-color: #293eff;
|
||||
content: "";
|
||||
position: absolute;
|
||||
top: 0;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
width: 3px;
|
||||
background-color: #293eff;
|
||||
}
|
||||
|
||||
td.last-checked {
|
||||
@@ -110,7 +109,6 @@
|
||||
|
||||
tr.has-error {
|
||||
color: var(--color-watch-table-error);
|
||||
|
||||
.error-text {
|
||||
display: block !important;
|
||||
}
|
||||
@@ -121,50 +119,12 @@
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
|
||||
tr.multiple-history {
|
||||
a.history-link {
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
|
||||
#watch-table-wrapper {
|
||||
/* general styling */
|
||||
#post-list-buttons {
|
||||
text-align: right;
|
||||
padding: 0px;
|
||||
margin: 0px;
|
||||
|
||||
li {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
a {
|
||||
border-top-left-radius: initial;
|
||||
border-top-right-radius: initial;
|
||||
border-bottom-left-radius: 5px;
|
||||
border-bottom-right-radius: 5px;
|
||||
}
|
||||
}
|
||||
|
||||
/* post list dynamically on/off stuff */
|
||||
|
||||
&.has-error {
|
||||
#post-list-buttons {
|
||||
#post-list-with-errors {
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
&.has-unread-changes {
|
||||
#post-list-buttons {
|
||||
#post-list-unread, #post-list-mark-views, #post-list-unread {
|
||||
display: inline-block !important;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,114 +0,0 @@
|
||||
|
||||
// Ternary radio button group component
|
||||
.ternary-radio-group {
|
||||
display: flex;
|
||||
gap: 0;
|
||||
border: 1px solid var(--color-grey-750);
|
||||
border-radius: 4px;
|
||||
overflow: hidden;
|
||||
width: fit-content;
|
||||
background: var(--color-background);
|
||||
|
||||
.ternary-radio-option {
|
||||
position: relative;
|
||||
cursor: pointer;
|
||||
margin: 0;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
|
||||
input[type="radio"] {
|
||||
position: absolute;
|
||||
opacity: 0;
|
||||
width: 0;
|
||||
height: 0;
|
||||
}
|
||||
|
||||
.ternary-radio-label {
|
||||
padding: 8px 16px;
|
||||
background: var(--color-grey-900);
|
||||
border: none;
|
||||
border-right: 1px solid var(--color-grey-750);
|
||||
font-size: 13px;
|
||||
font-weight: 500;
|
||||
color: var(--color-text);
|
||||
transition: all 0.2s ease;
|
||||
cursor: pointer;
|
||||
display: block;
|
||||
text-align: center;
|
||||
}
|
||||
|
||||
&:last-child .ternary-radio-label {
|
||||
border-right: none;
|
||||
}
|
||||
|
||||
input:checked + .ternary-radio-label {
|
||||
background: var(--color-link);
|
||||
color: var(--color-text-button);
|
||||
font-weight: 600;
|
||||
|
||||
&.ternary-default {
|
||||
background: var(--color-grey-600);
|
||||
color: var(--color-text-button);
|
||||
}
|
||||
|
||||
&:hover {
|
||||
background: #1a7bc4;
|
||||
|
||||
&.ternary-default {
|
||||
background: var(--color-grey-500);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
&:hover .ternary-radio-label {
|
||||
background: var(--color-grey-800);
|
||||
}
|
||||
}
|
||||
|
||||
@media (max-width: 480px) {
|
||||
width: 100%;
|
||||
|
||||
.ternary-radio-label {
|
||||
flex: 1;
|
||||
min-width: auto;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Standard radio button styling
|
||||
input[type="radio"].pure-radio:checked + label,
|
||||
input[type="radio"].pure-radio:checked {
|
||||
background: var(--color-link);
|
||||
color: var(--color-text-button);
|
||||
}
|
||||
|
||||
html[data-darkmode="true"] {
|
||||
.ternary-radio-group {
|
||||
.ternary-radio-option {
|
||||
.ternary-radio-label {
|
||||
background: var(--color-grey-350);
|
||||
}
|
||||
|
||||
&:hover .ternary-radio-label {
|
||||
background: var(--color-grey-400);
|
||||
}
|
||||
|
||||
input:checked + .ternary-radio-label {
|
||||
background: var(--color-link);
|
||||
color: var(--color-text-button);
|
||||
|
||||
&.ternary-default {
|
||||
background: var(--color-grey-600);
|
||||
}
|
||||
|
||||
&:hover {
|
||||
background: #1a7bc4;
|
||||
|
||||
&.ternary-default {
|
||||
background: var(--color-grey-500);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -2,25 +2,21 @@
|
||||
* -- BASE STYLES --
|
||||
*/
|
||||
|
||||
@use "parts/variables";
|
||||
@use "parts/arrows";
|
||||
@use "parts/browser-steps";
|
||||
@use "parts/extra_proxies";
|
||||
@use "parts/extra_browsers";
|
||||
@use "parts/pagination";
|
||||
@use "parts/spinners";
|
||||
@use "parts/darkmode";
|
||||
@use "parts/menu";
|
||||
@use "parts/love";
|
||||
@use "parts/preview_text_filter";
|
||||
@use "parts/watch_table";
|
||||
@use "parts/watch_table-mobile";
|
||||
@use "parts/edit";
|
||||
@use "parts/conditions_table";
|
||||
@use "parts/lister_extra";
|
||||
@use "parts/socket";
|
||||
@use "parts/visualselector";
|
||||
@use "parts/widgets";
|
||||
@import "parts/_arrows";
|
||||
@import "parts/_browser-steps";
|
||||
@import "parts/_extra_proxies";
|
||||
@import "parts/_extra_browsers";
|
||||
@import "parts/_pagination";
|
||||
@import "parts/_spinners";
|
||||
@import "parts/_variables";
|
||||
@import "parts/_darkmode";
|
||||
@import "parts/_menu";
|
||||
@import "parts/_love";
|
||||
@import "parts/preview_text_filter";
|
||||
@import "parts/_watch_table";
|
||||
@import "parts/_edit";
|
||||
@import "parts/_conditions_table";
|
||||
@import "parts/_socket";
|
||||
|
||||
body {
|
||||
color: var(--color-text);
|
||||
@@ -188,21 +184,33 @@ code {
|
||||
@extend .inline-tag;
|
||||
}
|
||||
|
||||
@media (min-width: 768px) {
|
||||
.box {
|
||||
margin: 0 1em !important;
|
||||
}
|
||||
}
|
||||
|
||||
.box {
|
||||
max-width: 100%;
|
||||
margin: 0 0.3em;
|
||||
margin: 0 1em;
|
||||
flex-direction: column;
|
||||
display: flex;
|
||||
justify-content: center;
|
||||
}
|
||||
|
||||
|
||||
#post-list-buttons {
|
||||
text-align: right;
|
||||
padding: 0px;
|
||||
margin: 0px;
|
||||
|
||||
li {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
a {
|
||||
border-top-left-radius: initial;
|
||||
border-top-right-radius: initial;
|
||||
border-bottom-left-radius: 5px;
|
||||
border-bottom-right-radius: 5px;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
body:after {
|
||||
content: "";
|
||||
background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%);
|
||||
@@ -344,7 +352,7 @@ label {
|
||||
}
|
||||
}
|
||||
|
||||
.grey-form-border {
|
||||
#notification-customisation {
|
||||
border: 1px solid var(--color-border-notification);
|
||||
padding: 0.5rem;
|
||||
border-radius: 5px;
|
||||
@@ -686,6 +694,114 @@ footer {
|
||||
width: 100%;
|
||||
}
|
||||
|
||||
/*
|
||||
Max width before this PARTICULAR table gets nasty
|
||||
This query will take effect for any screen smaller than 760px
|
||||
and also iPads specifically.
|
||||
*/
|
||||
.watch-table {
|
||||
/* make headings work on mobile */
|
||||
thead {
|
||||
display: block;
|
||||
tr {
|
||||
th {
|
||||
display: inline-block;
|
||||
// Hide the "Last" text for smaller screens
|
||||
@media (max-width: 768px) {
|
||||
.hide-on-mobile {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
.empty-cell {
|
||||
display: none;
|
||||
}
|
||||
}
|
||||
|
||||
/* Force table to not be like tables anymore */
|
||||
tbody {
|
||||
td,
|
||||
tr {
|
||||
display: block;
|
||||
}
|
||||
}
|
||||
|
||||
tbody {
|
||||
tr {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
|
||||
// The third child of each row will take up the remaining space
|
||||
// This is useful for the URL column, which should expand to fill the remaining space
|
||||
:nth-child(3) {
|
||||
flex-grow: 1;
|
||||
}
|
||||
// The last three children (from the end) of each row will take up the full width
|
||||
// This is useful for the "Last Checked", "Last Changed", and the action buttons columns, which should each take up the full width
|
||||
:nth-last-child(-n+3) {
|
||||
flex-basis: 100%;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
.last-checked {
|
||||
>span {
|
||||
vertical-align: middle;
|
||||
}
|
||||
}
|
||||
|
||||
.last-checked::before {
|
||||
color: var(--color-last-checked);
|
||||
content: "Last Checked ";
|
||||
}
|
||||
|
||||
.last-changed::before {
|
||||
color: var(--color-last-checked);
|
||||
content: "Last Changed ";
|
||||
}
|
||||
|
||||
/* Force table to not be like tables anymore */
|
||||
td.inline {
|
||||
display: inline-block;
|
||||
}
|
||||
|
||||
.pure-table td,
|
||||
.pure-table th {
|
||||
border: none;
|
||||
}
|
||||
|
||||
td {
|
||||
/* Behave like a "row" */
|
||||
border: none;
|
||||
border-bottom: 1px solid var(--color-border-watch-table-cell);
|
||||
vertical-align: middle;
|
||||
|
||||
&:before {
|
||||
/* Top/left values mimic padding */
|
||||
top: 6px;
|
||||
left: 6px;
|
||||
width: 45%;
|
||||
padding-right: 10px;
|
||||
white-space: nowrap;
|
||||
}
|
||||
}
|
||||
|
||||
&.pure-table-striped {
|
||||
tr {
|
||||
background-color: var(--color-table-background);
|
||||
}
|
||||
|
||||
tr:nth-child(2n-1) {
|
||||
background-color: var(--color-table-stripe);
|
||||
}
|
||||
|
||||
tr:nth-child(2n-1) td {
|
||||
background-color: inherit;
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
.pure-table {
|
||||
@@ -940,6 +1056,8 @@ ul {
|
||||
}
|
||||
}
|
||||
|
||||
@import "parts/_visualselector";
|
||||
|
||||
#webdriver_delay {
|
||||
width: 5em;
|
||||
}
|
||||
@@ -1057,23 +1175,17 @@ ul {
|
||||
|
||||
|
||||
#quick-watch-processor-type {
|
||||
ul#processor {
|
||||
color: #fff;
|
||||
padding-left: 0px;
|
||||
color: #fff;
|
||||
ul {
|
||||
padding: 0.3rem;
|
||||
li {
|
||||
list-style: none;
|
||||
font-size: 0.9rem;
|
||||
display: grid;
|
||||
grid-template-columns: auto 1fr;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
margin-bottom: 0.5rem;
|
||||
> * {
|
||||
display: inline-block;
|
||||
}
|
||||
}
|
||||
}
|
||||
label, input {
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
}
|
||||
}
|
||||
|
||||
.restock-label {
|
||||
@@ -1112,12 +1224,11 @@ ul {
|
||||
}
|
||||
|
||||
#realtime-conn-error {
|
||||
position: fixed;
|
||||
position: absolute;
|
||||
bottom: 0;
|
||||
left: 0;
|
||||
left: 30px;
|
||||
background: var(--color-warning);
|
||||
padding: 10px;
|
||||
font-size: 0.8rem;
|
||||
color: #fff;
|
||||
opacity: 0.8;
|
||||
}
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,13 +1,11 @@
|
||||
from changedetectionio.strtobool import strtobool
|
||||
|
||||
from changedetectionio.validate_url import is_safe_valid_url
|
||||
|
||||
from flask import (
|
||||
flash
|
||||
)
|
||||
|
||||
from .html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||
from .model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
|
||||
from . model import App, Watch
|
||||
from copy import deepcopy, copy
|
||||
from os import path, unlink
|
||||
from threading import Lock
|
||||
@@ -15,7 +13,6 @@ import json
|
||||
import os
|
||||
import re
|
||||
import secrets
|
||||
import sys
|
||||
import threading
|
||||
import time
|
||||
import uuid as uuid_builder
|
||||
@@ -42,24 +39,17 @@ class ChangeDetectionStore:
|
||||
needs_write_urgent = False
|
||||
|
||||
__version_check = True
|
||||
save_data_thread = None
|
||||
|
||||
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
|
||||
# Should only be active for docker
|
||||
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
|
||||
|
||||
self.__data = App.model()
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||
logger.info(f"Datastore path is '{self.json_store_path}'")
|
||||
self.needs_write = False
|
||||
self.start_time = time.time()
|
||||
self.stop_thread = False
|
||||
self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag)
|
||||
|
||||
|
||||
def reload_state(self, datastore_path, include_default_watches, version_tag):
|
||||
logger.info(f"Datastore path is '{datastore_path}'")
|
||||
|
||||
self.__data = App.model()
|
||||
self.datastore_path = datastore_path
|
||||
self.json_store_path = os.path.join(self.datastore_path, "url-watches.json")
|
||||
# Base definition for all watchers
|
||||
# deepcopy part of #569 - not sure why its needed exactly
|
||||
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
|
||||
@@ -128,12 +118,14 @@ class ChangeDetectionStore:
|
||||
test_list = self.proxy_list
|
||||
|
||||
# Helper to remove password protection
|
||||
password_reset_lockfile = os.path.join(self.datastore_path, "removepassword.lock")
|
||||
password_reset_lockfile = "{}/removepassword.lock".format(self.datastore_path)
|
||||
if path.isfile(password_reset_lockfile):
|
||||
self.__data['settings']['application']['password'] = False
|
||||
unlink(password_reset_lockfile)
|
||||
|
||||
if not 'app_guid' in self.__data:
|
||||
import os
|
||||
import sys
|
||||
if "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ:
|
||||
self.__data['app_guid'] = "test-" + str(uuid_builder.uuid4())
|
||||
else:
|
||||
@@ -152,10 +144,7 @@ class ChangeDetectionStore:
|
||||
self.needs_write = True
|
||||
|
||||
# Finally start the thread that will manage periodic data saves to JSON
|
||||
# Only start if thread is not already running (reload_state might be called multiple times)
|
||||
if not self.save_data_thread or not self.save_data_thread.is_alive():
|
||||
self.save_data_thread = threading.Thread(target=self.save_datastore)
|
||||
self.save_data_thread.start()
|
||||
save_data_thread = threading.Thread(target=self.save_datastore).start()
|
||||
|
||||
def rehydrate_entity(self, uuid, entity, processor_override=None):
|
||||
"""Set the dict back to the dict Watch object"""
|
||||
@@ -214,13 +203,14 @@ class ChangeDetectionStore:
|
||||
return seconds
|
||||
|
||||
@property
|
||||
def unread_changes_count(self):
|
||||
unread_changes_count = 0
|
||||
def has_unviewed(self):
|
||||
if not self.__data.get('watching'):
|
||||
return None
|
||||
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
if watch.history_n >= 2 and watch.viewed == False:
|
||||
unread_changes_count += 1
|
||||
|
||||
return unread_changes_count
|
||||
return True
|
||||
return False
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
@@ -240,37 +230,26 @@ class ChangeDetectionStore:
|
||||
d['settings']['application']['active_base_url'] = active_base_url.strip('" ')
|
||||
return d
|
||||
|
||||
from pathlib import Path
|
||||
|
||||
def delete_path(self, path: Path):
|
||||
import shutil
|
||||
"""Delete a file or directory tree, including the path itself."""
|
||||
if not path.exists():
|
||||
return
|
||||
if path.is_file() or path.is_symlink():
|
||||
path.unlink(missing_ok=True) # deletes a file or symlink
|
||||
else:
|
||||
shutil.rmtree(path, ignore_errors=True) # deletes dir *and* its contents
|
||||
|
||||
# Delete a single watch by UUID
|
||||
def delete(self, uuid):
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
with self.lock:
|
||||
if uuid == 'all':
|
||||
self.__data['watching'] = {}
|
||||
time.sleep(1) # Mainly used for testing to allow all items to flush before running next test
|
||||
|
||||
# GitHub #30 also delete history records
|
||||
for uuid in self.data['watching']:
|
||||
path = pathlib.Path(
|
||||
os.path.join(self.datastore_path, uuid))
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
if os.path.exists(path):
|
||||
self.delete(uuid)
|
||||
shutil.rmtree(path)
|
||||
|
||||
else:
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
if os.path.exists(path):
|
||||
self.delete_path(path)
|
||||
|
||||
shutil.rmtree(path)
|
||||
del self.data['watching'][uuid]
|
||||
|
||||
self.needs_write_urgent = True
|
||||
@@ -284,6 +263,11 @@ class ChangeDetectionStore:
|
||||
extras = deepcopy(self.data['watching'][uuid])
|
||||
new_uuid = self.add_watch(url=url, extras=extras)
|
||||
watch = self.data['watching'][new_uuid]
|
||||
|
||||
if self.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
|
||||
# Because it will be recalculated on the next fetch
|
||||
self.data['watching'][new_uuid]['title'] = None
|
||||
|
||||
return new_uuid
|
||||
|
||||
def url_exists(self, url):
|
||||
@@ -325,6 +309,7 @@ class ChangeDetectionStore:
|
||||
'browser_steps',
|
||||
'css_filter',
|
||||
'extract_text',
|
||||
'extract_title_as_title',
|
||||
'headers',
|
||||
'ignore_text',
|
||||
'include_filters',
|
||||
@@ -339,7 +324,6 @@ class ChangeDetectionStore:
|
||||
'title',
|
||||
'trigger_text',
|
||||
'url',
|
||||
'use_page_title_in_list',
|
||||
'webdriver_js_execute_code',
|
||||
]:
|
||||
if res.get(k):
|
||||
@@ -353,10 +337,9 @@ class ChangeDetectionStore:
|
||||
logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}")
|
||||
flash("Error fetching metadata for {}".format(url), 'error')
|
||||
return False
|
||||
|
||||
if not is_safe_valid_url(url):
|
||||
flash('Watch protocol is not permitted or invalid URL format', 'error')
|
||||
|
||||
from .model.Watch import is_safe_url
|
||||
if not is_safe_url(url):
|
||||
flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error')
|
||||
return None
|
||||
|
||||
if tag and type(tag) == str:
|
||||
@@ -403,9 +386,9 @@ class ChangeDetectionStore:
|
||||
return new_uuid
|
||||
|
||||
def visualselector_data_is_ready(self, watch_uuid):
|
||||
output_path = os.path.join(self.datastore_path, watch_uuid)
|
||||
screenshot_filename = os.path.join(output_path, "last-screenshot.png")
|
||||
elements_index_filename = os.path.join(output_path, "elements.deflate")
|
||||
output_path = "{}/{}".format(self.datastore_path, watch_uuid)
|
||||
screenshot_filename = "{}/last-screenshot.png".format(output_path)
|
||||
elements_index_filename = "{}/elements.deflate".format(output_path)
|
||||
if path.isfile(screenshot_filename) and path.isfile(elements_index_filename) :
|
||||
return True
|
||||
|
||||
@@ -422,6 +405,7 @@ class ChangeDetectionStore:
|
||||
self.sync_to_json()
|
||||
return
|
||||
else:
|
||||
|
||||
try:
|
||||
# Re #286 - First write to a temp file, then confirm it looks OK and rename it
|
||||
# This is a fairly basic strategy to deal with the case that the file is corrupted,
|
||||
@@ -429,7 +413,7 @@ class ChangeDetectionStore:
|
||||
with open(self.json_store_path+".tmp", 'w') as json_file:
|
||||
# Use compact JSON in production for better performance
|
||||
json.dump(data, json_file, indent=2)
|
||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||
except Exception as e:
|
||||
logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}")
|
||||
|
||||
@@ -451,7 +435,7 @@ class ChangeDetectionStore:
|
||||
logger.remove()
|
||||
logger.add(sys.stderr)
|
||||
|
||||
logger.info(f"Shutting down datastore '{self.datastore_path}' thread")
|
||||
logger.critical("Shutting down datastore thread")
|
||||
return
|
||||
|
||||
if self.needs_write or self.needs_write_urgent:
|
||||
@@ -490,7 +474,7 @@ class ChangeDetectionStore:
|
||||
|
||||
# Load from external config file
|
||||
if path.isfile(proxy_list_file):
|
||||
with open(os.path.join(self.datastore_path, "proxies.json")) as f:
|
||||
with open("{}/proxies.json".format(self.datastore_path)) as f:
|
||||
proxy_list = json.load(f)
|
||||
|
||||
# Mapping from UI config if available
|
||||
@@ -748,10 +732,10 @@ class ChangeDetectionStore:
|
||||
logger.critical(f"Applying update_{update_n}")
|
||||
# Wont exist on fresh installs
|
||||
if os.path.exists(self.json_store_path):
|
||||
shutil.copyfile(self.json_store_path, os.path.join(self.datastore_path, f"url-watches-before-{update_n}.json"))
|
||||
shutil.copyfile(self.json_store_path, self.datastore_path+"/url-watches-before-{}.json".format(update_n))
|
||||
|
||||
try:
|
||||
update_method = getattr(self, f"update_{update_n}")()
|
||||
update_method = getattr(self, "update_{}".format(update_n))()
|
||||
except Exception as e:
|
||||
logger.error(f"Error while trying update_{update_n}")
|
||||
logger.error(e)
|
||||
@@ -990,55 +974,6 @@ class ChangeDetectionStore:
|
||||
f_d.write(zlib.compress(f_j.read()))
|
||||
os.unlink(json_path)
|
||||
|
||||
def update_20(self):
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
if self.data['watching'][uuid].get('extract_title_as_title'):
|
||||
self.data['watching'][uuid]['use_page_title_in_list'] = self.data['watching'][uuid].get('extract_title_as_title')
|
||||
del self.data['watching'][uuid]['extract_title_as_title']
|
||||
|
||||
if self.data['settings']['application'].get('extract_title_as_title'):
|
||||
self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title')
|
||||
|
||||
def update_21(self):
|
||||
if self.data['settings']['application'].get('timezone'):
|
||||
self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone')
|
||||
del self.data['settings']['application']['timezone']
|
||||
|
||||
|
||||
# Some notification formats got the wrong name type
|
||||
def update_23(self):
|
||||
|
||||
def re_run(formats):
|
||||
sys_n_format = self.data['settings']['application'].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == sys_n_format), None)
|
||||
if key_exists_as_value: # key of "Plain text"
|
||||
logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['settings']['application']['notification_format'] = key_exists_as_value
|
||||
|
||||
for uuid, watch in self.data['watching'].items():
|
||||
n_format = self.data['watching'][uuid].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
|
||||
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
|
||||
logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['watching'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever
|
||||
|
||||
for uuid, tag in self.data['settings']['application']['tags'].items():
|
||||
n_format = self.data['settings']['application']['tags'][uuid].get('notification_format')
|
||||
key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None)
|
||||
if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: # key of "Plain text"
|
||||
logger.success(
|
||||
f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'")
|
||||
self.data['settings']['application']['tags'][uuid][
|
||||
'notification_format'] = key_exists_as_value # should be 'text' or whatever
|
||||
|
||||
from .notification import valid_notification_formats
|
||||
formats = deepcopy(valid_notification_formats)
|
||||
re_run(formats)
|
||||
# And in previous versions, it was "text" instead of Plain text, Markdown instead of "Markdown to HTML"
|
||||
formats['text'] = 'Text'
|
||||
formats['markdown'] = 'Markdown'
|
||||
re_run(formats)
|
||||
|
||||
def add_notification_url(self, notification_url):
|
||||
|
||||
logger.debug(f">>> Adding new notification_url - '{notification_url}'")
|
||||
|
||||
@@ -33,7 +33,7 @@
|
||||
<div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group grey-form-border">
|
||||
<div id="notification-customisation" class="pure-control-group">
|
||||
<div class="pure-control-group">
|
||||
{{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }}
|
||||
<span class="pure-form-message-inline">Title for all notifications</span>
|
||||
@@ -70,7 +70,7 @@
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{watch_title}}' }}</code></td>
|
||||
<td>The page title of the watch, uses <title> if not set, falls back to URL</td>
|
||||
<td>The title of the watch.</td>
|
||||
</tr>
|
||||
<tr>
|
||||
<td><code>{{ '{{watch_tag}}' }}</code></td>
|
||||
@@ -134,12 +134,6 @@
|
||||
<p>
|
||||
URL encoding, use <strong>|urlencode</strong>, for example - <code>gets://hook-website.com/test.php?title={{ '{{ watch_title|urlencode }}' }}</code>
|
||||
</p>
|
||||
<p>
|
||||
Regular-expression replace, use <strong>|regex_replace</strong>, for example - <code>{{ "{{ \"hello world 123\" | regex_replace('[0-9]+', 'no-more-numbers') }}" }}</code>
|
||||
</p>
|
||||
<p>
|
||||
For a complete reference of all Jinja2 built-in filters, users can refer to the <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters">https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters</a>
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="pure-control-group">
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user