mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-02 07:37:25 +00:00
Compare commits
2 Commits
3376-clean
...
UI-eta-tim
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
01dd67b1ad | ||
|
|
316cca0901 |
@@ -29,35 +29,3 @@ venv/
|
|||||||
|
|
||||||
# Visual Studio
|
# Visual Studio
|
||||||
.vscode/
|
.vscode/
|
||||||
|
|
||||||
# Test and development files
|
|
||||||
test-datastore/
|
|
||||||
tests/
|
|
||||||
docs/
|
|
||||||
*.md
|
|
||||||
!README.md
|
|
||||||
|
|
||||||
# Temporary and log files
|
|
||||||
*.log
|
|
||||||
*.tmp
|
|
||||||
tmp/
|
|
||||||
temp/
|
|
||||||
|
|
||||||
# Training data and large files
|
|
||||||
train-data/
|
|
||||||
works-data/
|
|
||||||
|
|
||||||
# Container files
|
|
||||||
Dockerfile*
|
|
||||||
docker-compose*.yml
|
|
||||||
.dockerignore
|
|
||||||
|
|
||||||
# Development certificates and keys
|
|
||||||
*.pem
|
|
||||||
*.key
|
|
||||||
*.crt
|
|
||||||
profile_output.prof
|
|
||||||
|
|
||||||
# Large binary files that shouldn't be in container
|
|
||||||
*.pdf
|
|
||||||
chrome.json
|
|
||||||
6
.github/test/Dockerfile-alpine
vendored
6
.github/test/Dockerfile-alpine
vendored
@@ -2,7 +2,7 @@
|
|||||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
||||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
||||||
|
|
||||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.22
|
FROM ghcr.io/linuxserver/baseimage-alpine:3.21
|
||||||
ENV PYTHONUNBUFFERED=1
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
COPY requirements.txt /requirements.txt
|
COPY requirements.txt /requirements.txt
|
||||||
@@ -18,19 +18,17 @@ RUN \
|
|||||||
libxslt-dev \
|
libxslt-dev \
|
||||||
openssl-dev \
|
openssl-dev \
|
||||||
python3-dev \
|
python3-dev \
|
||||||
file \
|
|
||||||
zip \
|
zip \
|
||||||
zlib-dev && \
|
zlib-dev && \
|
||||||
apk add --update --no-cache \
|
apk add --update --no-cache \
|
||||||
libjpeg \
|
libjpeg \
|
||||||
libxslt \
|
libxslt \
|
||||||
file \
|
|
||||||
nodejs \
|
nodejs \
|
||||||
poppler-utils \
|
poppler-utils \
|
||||||
python3 && \
|
python3 && \
|
||||||
echo "**** pip3 install test of changedetection.io ****" && \
|
echo "**** pip3 install test of changedetection.io ****" && \
|
||||||
python3 -m venv /lsiopy && \
|
python3 -m venv /lsiopy && \
|
||||||
pip install -U pip wheel setuptools && \
|
pip install -U pip wheel setuptools && \
|
||||||
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \
|
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
|
||||||
apk del --purge \
|
apk del --purge \
|
||||||
build-dependencies
|
build-dependencies
|
||||||
|
|||||||
2
.github/workflows/codeql-analysis.yml
vendored
2
.github/workflows/codeql-analysis.yml
vendored
@@ -30,7 +30,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Checkout repository
|
- name: Checkout repository
|
||||||
uses: actions/checkout@v5
|
uses: actions/checkout@v4
|
||||||
|
|
||||||
# Initializes the CodeQL tools for scanning.
|
# Initializes the CodeQL tools for scanning.
|
||||||
- name: Initialize CodeQL
|
- name: Initialize CodeQL
|
||||||
|
|||||||
16
.github/workflows/containers.yml
vendored
16
.github/workflows/containers.yml
vendored
@@ -39,7 +39,7 @@ jobs:
|
|||||||
# Or if we are in a tagged release scenario.
|
# Or if we are in a tagged release scenario.
|
||||||
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
|
if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.11
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
@@ -103,13 +103,6 @@ jobs:
|
|||||||
# provenance: false
|
# provenance: false
|
||||||
|
|
||||||
# A new tagged release is required, which builds :tag and :latest
|
# A new tagged release is required, which builds :tag and :latest
|
||||||
- name: Debug release info
|
|
||||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
|
||||||
run: |
|
|
||||||
echo "Release tag: ${{ github.event.release.tag_name }}"
|
|
||||||
echo "Github ref: ${{ github.ref }}"
|
|
||||||
echo "Github ref name: ${{ github.ref_name }}"
|
|
||||||
|
|
||||||
- name: Docker meta :tag
|
- name: Docker meta :tag
|
||||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||||
uses: docker/metadata-action@v5
|
uses: docker/metadata-action@v5
|
||||||
@@ -119,10 +112,9 @@ jobs:
|
|||||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||||
ghcr.io/dgtlmoon/changedetection.io
|
ghcr.io/dgtlmoon/changedetection.io
|
||||||
tags: |
|
tags: |
|
||||||
type=semver,pattern={{version}},value=${{ github.event.release.tag_name }}
|
type=semver,pattern={{version}}
|
||||||
type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
|
type=semver,pattern={{major}}
|
||||||
type=raw,value=latest
|
|
||||||
|
|
||||||
- name: Build and push :tag
|
- name: Build and push :tag
|
||||||
id: docker_build_tag_release
|
id: docker_build_tag_release
|
||||||
|
|||||||
6
.github/workflows/pypi-release.yml
vendored
6
.github/workflows/pypi-release.yml
vendored
@@ -7,7 +7,7 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python
|
- name: Set up Python
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
@@ -34,7 +34,7 @@ jobs:
|
|||||||
- build
|
- build
|
||||||
steps:
|
steps:
|
||||||
- name: Download all the dists
|
- name: Download all the dists
|
||||||
uses: actions/download-artifact@v5
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: python-package-distributions
|
name: python-package-distributions
|
||||||
path: dist/
|
path: dist/
|
||||||
@@ -72,7 +72,7 @@ jobs:
|
|||||||
|
|
||||||
steps:
|
steps:
|
||||||
- name: Download all the dists
|
- name: Download all the dists
|
||||||
uses: actions/download-artifact@v5
|
uses: actions/download-artifact@v4
|
||||||
with:
|
with:
|
||||||
name: python-package-distributions
|
name: python-package-distributions
|
||||||
path: dist/
|
path: dist/
|
||||||
|
|||||||
44
.github/workflows/test-container-build.yml
vendored
44
.github/workflows/test-container-build.yml
vendored
@@ -23,30 +23,10 @@ on:
|
|||||||
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
|
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
|
||||||
# @todo: some kind of path filter for requirements.txt and Dockerfile
|
# @todo: some kind of path filter for requirements.txt and Dockerfile
|
||||||
jobs:
|
jobs:
|
||||||
builder:
|
test-container-build:
|
||||||
name: Build ${{ matrix.platform }} (${{ matrix.dockerfile == './Dockerfile' && 'main' || 'alpine' }})
|
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
strategy:
|
|
||||||
matrix:
|
|
||||||
include:
|
|
||||||
# Main Dockerfile platforms
|
|
||||||
- platform: linux/amd64
|
|
||||||
dockerfile: ./Dockerfile
|
|
||||||
- platform: linux/arm64
|
|
||||||
dockerfile: ./Dockerfile
|
|
||||||
- platform: linux/arm/v7
|
|
||||||
dockerfile: ./Dockerfile
|
|
||||||
- platform: linux/arm/v8
|
|
||||||
dockerfile: ./Dockerfile
|
|
||||||
- platform: linux/arm64/v8
|
|
||||||
dockerfile: ./Dockerfile
|
|
||||||
# Alpine Dockerfile platforms (musl via alpine check)
|
|
||||||
- platform: linux/amd64
|
|
||||||
dockerfile: ./.github/test/Dockerfile-alpine
|
|
||||||
- platform: linux/arm64
|
|
||||||
dockerfile: ./.github/test/Dockerfile-alpine
|
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v4
|
||||||
- name: Set up Python 3.11
|
- name: Set up Python 3.11
|
||||||
uses: actions/setup-python@v5
|
uses: actions/setup-python@v5
|
||||||
with:
|
with:
|
||||||
@@ -67,14 +47,24 @@ jobs:
|
|||||||
version: latest
|
version: latest
|
||||||
driver-opts: image=moby/buildkit:master
|
driver-opts: image=moby/buildkit:master
|
||||||
|
|
||||||
- name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
|
# https://github.com/dgtlmoon/changedetection.io/pull/1067
|
||||||
|
# Check we can still build under alpine/musl
|
||||||
|
- name: Test that the docker containers can build (musl via alpine check)
|
||||||
|
id: docker_build_musl
|
||||||
|
uses: docker/build-push-action@v6
|
||||||
|
with:
|
||||||
|
context: ./
|
||||||
|
file: ./.github/test/Dockerfile-alpine
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|
||||||
|
- name: Test that the docker containers can build
|
||||||
id: docker_build
|
id: docker_build
|
||||||
uses: docker/build-push-action@v6
|
uses: docker/build-push-action@v6
|
||||||
# https://github.com/docker/build-push-action#customizing
|
# https://github.com/docker/build-push-action#customizing
|
||||||
with:
|
with:
|
||||||
context: ./
|
context: ./
|
||||||
file: ${{ matrix.dockerfile }}
|
file: ./Dockerfile
|
||||||
platforms: ${{ matrix.platform }}
|
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||||
cache-from: type=gha
|
cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
|
||||||
|
|||||||
18
.github/workflows/test-only.yml
vendored
18
.github/workflows/test-only.yml
vendored
@@ -7,14 +7,14 @@ jobs:
|
|||||||
lint-code:
|
lint-code:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v4
|
||||||
- name: Lint with Ruff
|
- name: Lint with flake8
|
||||||
run: |
|
run: |
|
||||||
pip install ruff
|
pip3 install flake8
|
||||||
# Check for syntax errors and undefined names
|
# stop the build if there are Python syntax errors or undefined names
|
||||||
ruff check . --select E9,F63,F7,F82
|
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||||
# Complete check with errors treated as warnings
|
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||||
ruff check . --exit-zero
|
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||||
|
|
||||||
test-application-3-10:
|
test-application-3-10:
|
||||||
needs: lint-code
|
needs: lint-code
|
||||||
@@ -28,6 +28,7 @@ jobs:
|
|||||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
|
skip-pypuppeteer: true
|
||||||
|
|
||||||
test-application-3-12:
|
test-application-3-12:
|
||||||
needs: lint-code
|
needs: lint-code
|
||||||
@@ -41,4 +42,5 @@ jobs:
|
|||||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||||
with:
|
with:
|
||||||
python-version: '3.13'
|
python-version: '3.13'
|
||||||
skip-pypuppeteer: true
|
skip-pypuppeteer: true
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ on:
|
|||||||
description: 'Python version to use'
|
description: 'Python version to use'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
default: '3.11'
|
default: '3.10'
|
||||||
skip-pypuppeteer:
|
skip-pypuppeteer:
|
||||||
description: 'Skip PyPuppeteer (not supported in 3.11/3.12)'
|
description: 'Skip PyPuppeteer (not supported in 3.11/3.12)'
|
||||||
required: false
|
required: false
|
||||||
@@ -20,7 +20,7 @@ jobs:
|
|||||||
env:
|
env:
|
||||||
PYTHON_VERSION: ${{ inputs.python-version }}
|
PYTHON_VERSION: ${{ inputs.python-version }}
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v5
|
- uses: actions/checkout@v4
|
||||||
|
|
||||||
# Mainly just for link/flake8
|
# Mainly just for link/flake8
|
||||||
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
- name: Set up Python ${{ env.PYTHON_VERSION }}
|
||||||
@@ -86,10 +86,10 @@ jobs:
|
|||||||
run: |
|
run: |
|
||||||
# Playwright via Sockpuppetbrowser fetch
|
# Playwright via Sockpuppetbrowser fetch
|
||||||
# tests/visualselector/test_fetch_data.py will do browser steps
|
# tests/visualselector/test_fetch_data.py will do browser steps
|
||||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
|
||||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
|
||||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
|
||||||
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
|
||||||
|
|
||||||
|
|
||||||
- name: Playwright and SocketPuppetBrowser - Headers and requests
|
- name: Playwright and SocketPuppetBrowser - Headers and requests
|
||||||
@@ -172,33 +172,13 @@ jobs:
|
|||||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||||
|
|
||||||
# Check whether TRACE log is enabled.
|
# Check whether TRACE log is enabled.
|
||||||
# Also, check whether TRACE came from STDOUT
|
# Also, check whether TRACE is came from STDERR
|
||||||
docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
|
docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1
|
||||||
# Check whether DEBUG is came from STDOUT
|
# Check whether DEBUG is came from STDOUT
|
||||||
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
|
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
|
||||||
|
|
||||||
docker kill test-changedetectionio
|
docker kill test-changedetectionio
|
||||||
|
|
||||||
- name: Test HTTPS SSL mode
|
|
||||||
run: |
|
|
||||||
openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost"
|
|
||||||
docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio
|
|
||||||
sleep 3
|
|
||||||
# Should return 0 (no error) when grep finds it
|
|
||||||
# -k because its self-signed
|
|
||||||
curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid
|
|
||||||
|
|
||||||
docker kill test-changedetectionio-ssl
|
|
||||||
|
|
||||||
- name: Test IPv6 Mode
|
|
||||||
run: |
|
|
||||||
# IPv6 - :: bind to all interfaces inside container (like 0.0.0.0), ::1 would be localhost only
|
|
||||||
docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio
|
|
||||||
sleep 3
|
|
||||||
# Should return 0 (no error) when grep finds it on localhost
|
|
||||||
curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid
|
|
||||||
docker kill test-changedetectionio-ipv6
|
|
||||||
|
|
||||||
- name: Test changedetection.io SIGTERM and SIGINT signal shutdown
|
- name: Test changedetection.io SIGTERM and SIGINT signal shutdown
|
||||||
run: |
|
run: |
|
||||||
|
|
||||||
|
|||||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -16,7 +16,6 @@ dist/
|
|||||||
.env
|
.env
|
||||||
.venv/
|
.venv/
|
||||||
venv/
|
venv/
|
||||||
.python-version
|
|
||||||
|
|
||||||
# IDEs
|
# IDEs
|
||||||
.idea
|
.idea
|
||||||
|
|||||||
@@ -1,9 +0,0 @@
|
|||||||
repos:
|
|
||||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
|
||||||
rev: v0.11.2
|
|
||||||
hooks:
|
|
||||||
# Lint (and apply safe fixes)
|
|
||||||
- id: ruff
|
|
||||||
args: [--fix]
|
|
||||||
# Fomrat
|
|
||||||
- id: ruff-format
|
|
||||||
48
.ruff.toml
48
.ruff.toml
@@ -1,48 +0,0 @@
|
|||||||
# Minimum supported version
|
|
||||||
target-version = "py310"
|
|
||||||
|
|
||||||
# Formatting options
|
|
||||||
line-length = 100
|
|
||||||
indent-width = 4
|
|
||||||
|
|
||||||
exclude = [
|
|
||||||
"__pycache__",
|
|
||||||
".eggs",
|
|
||||||
".git",
|
|
||||||
".tox",
|
|
||||||
".venv",
|
|
||||||
"*.egg-info",
|
|
||||||
"*.pyc",
|
|
||||||
]
|
|
||||||
|
|
||||||
[lint]
|
|
||||||
# https://docs.astral.sh/ruff/rules/
|
|
||||||
select = [
|
|
||||||
"B", # flake8-bugbear
|
|
||||||
"B9",
|
|
||||||
"C",
|
|
||||||
"E", # pycodestyle
|
|
||||||
"F", # Pyflakes
|
|
||||||
"I", # isort
|
|
||||||
"N", # pep8-naming
|
|
||||||
"UP", # pyupgrade
|
|
||||||
"W", # pycodestyle
|
|
||||||
]
|
|
||||||
ignore = [
|
|
||||||
"B007", # unused-loop-control-variable
|
|
||||||
"B909", # loop-iterator-mutation
|
|
||||||
"E203", # whitespace-before-punctuation
|
|
||||||
"E266", # multiple-leading-hashes-for-block-comment
|
|
||||||
"E501", # redundant-backslash
|
|
||||||
"F403", # undefined-local-with-import-star
|
|
||||||
"N802", # invalid-function-name
|
|
||||||
"N806", # non-lowercase-variable-in-function
|
|
||||||
"N815", # mixed-case-variable-in-class-scope
|
|
||||||
]
|
|
||||||
|
|
||||||
[lint.mccabe]
|
|
||||||
max-complexity = 12
|
|
||||||
|
|
||||||
[format]
|
|
||||||
indent-style = "space"
|
|
||||||
quote-style = "preserve"
|
|
||||||
28
Dockerfile
28
Dockerfile
@@ -1,5 +1,8 @@
|
|||||||
# pip dependencies install stage
|
# pip dependencies install stage
|
||||||
|
|
||||||
|
# @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py
|
||||||
|
# If you know how to fix it, please do! and test it for both 3.10 and 3.11
|
||||||
|
|
||||||
ARG PYTHON_VERSION=3.11
|
ARG PYTHON_VERSION=3.11
|
||||||
|
|
||||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
||||||
@@ -16,7 +19,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
libssl-dev \
|
libssl-dev \
|
||||||
libxslt-dev \
|
libxslt-dev \
|
||||||
make \
|
make \
|
||||||
patch \
|
|
||||||
zlib1g-dev
|
zlib1g-dev
|
||||||
|
|
||||||
RUN mkdir /install
|
RUN mkdir /install
|
||||||
@@ -24,24 +26,13 @@ WORKDIR /install
|
|||||||
|
|
||||||
COPY requirements.txt /requirements.txt
|
COPY requirements.txt /requirements.txt
|
||||||
|
|
||||||
# Use cache mounts and multiple wheel sources for faster ARM builds
|
# --extra-index-url https://www.piwheels.org/simple is for cryptography module to be prebuilt (or rustc etc needs to be installed)
|
||||||
ENV PIP_CACHE_DIR=/tmp/pip-cache
|
RUN pip install --extra-index-url https://www.piwheels.org/simple --target=/dependencies -r /requirements.txt
|
||||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
|
||||||
pip install \
|
|
||||||
--extra-index-url https://www.piwheels.org/simple \
|
|
||||||
--extra-index-url https://pypi.anaconda.org/ARM-software/simple \
|
|
||||||
--cache-dir=/tmp/pip-cache \
|
|
||||||
--target=/dependencies \
|
|
||||||
-r /requirements.txt
|
|
||||||
|
|
||||||
# Playwright is an alternative to Selenium
|
# Playwright is an alternative to Selenium
|
||||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
||||||
RUN --mount=type=cache,target=/tmp/pip-cache \
|
RUN pip install --target=/dependencies playwright~=1.48.0 \
|
||||||
pip install \
|
|
||||||
--cache-dir=/tmp/pip-cache \
|
|
||||||
--target=/dependencies \
|
|
||||||
playwright~=1.48.0 \
|
|
||||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||||
|
|
||||||
# Final image stage
|
# Final image stage
|
||||||
@@ -54,8 +45,6 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
|
|||||||
locales \
|
locales \
|
||||||
# For pdftohtml
|
# For pdftohtml
|
||||||
poppler-utils \
|
poppler-utils \
|
||||||
# favicon type detection and other uses
|
|
||||||
file \
|
|
||||||
zlib1g \
|
zlib1g \
|
||||||
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
&& apt-get clean && rm -rf /var/lib/apt/lists/*
|
||||||
|
|
||||||
@@ -82,10 +71,7 @@ COPY changedetection.py /app/changedetection.py
|
|||||||
# Github Action test purpose(test-only.yml).
|
# Github Action test purpose(test-only.yml).
|
||||||
# On production, it is effectively LOGGER_LEVEL=''.
|
# On production, it is effectively LOGGER_LEVEL=''.
|
||||||
ARG LOGGER_LEVEL=''
|
ARG LOGGER_LEVEL=''
|
||||||
ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
ENV LOGGER_LEVEL "$LOGGER_LEVEL"
|
||||||
|
|
||||||
# Default
|
|
||||||
ENV LC_ALL=en_US.UTF-8
|
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||||
|
|||||||
2
LICENSE
2
LICENSE
@@ -186,7 +186,7 @@
|
|||||||
same "printed page" as the copyright notice for easier
|
same "printed page" as the copyright notice for easier
|
||||||
identification within third-party archives.
|
identification within third-party archives.
|
||||||
|
|
||||||
Copyright 2025 Web Technologies s.r.o.
|
Copyright [yyyy] [name of copyright owner]
|
||||||
|
|
||||||
Licensed under the Apache License, Version 2.0 (the "License");
|
Licensed under the Apache License, Version 2.0 (the "License");
|
||||||
you may not use this file except in compliance with the License.
|
you may not use this file except in compliance with the License.
|
||||||
|
|||||||
@@ -1,11 +1,10 @@
|
|||||||
recursive-include changedetectionio/api *
|
recursive-include changedetectionio/api *
|
||||||
|
recursive-include changedetectionio/apprise_plugin *
|
||||||
recursive-include changedetectionio/blueprint *
|
recursive-include changedetectionio/blueprint *
|
||||||
recursive-include changedetectionio/content_fetchers *
|
recursive-include changedetectionio/content_fetchers *
|
||||||
recursive-include changedetectionio/conditions *
|
recursive-include changedetectionio/conditions *
|
||||||
recursive-include changedetectionio/model *
|
recursive-include changedetectionio/model *
|
||||||
recursive-include changedetectionio/notification *
|
|
||||||
recursive-include changedetectionio/processors *
|
recursive-include changedetectionio/processors *
|
||||||
recursive-include changedetectionio/realtime *
|
|
||||||
recursive-include changedetectionio/static *
|
recursive-include changedetectionio/static *
|
||||||
recursive-include changedetectionio/templates *
|
recursive-include changedetectionio/templates *
|
||||||
recursive-include changedetectionio/tests *
|
recursive-include changedetectionio/tests *
|
||||||
|
|||||||
@@ -1,21 +1,11 @@
|
|||||||
# Monitor website changes
|
## Web Site Change Detection, Monitoring and Notification.
|
||||||
|
|
||||||
Detect WebPage Changes Automatically — Monitor Web Page Changes in Real Time
|
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
||||||
|
|
||||||
Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more.
|
|
||||||
|
|
||||||
Detect web page content changes and get instant alerts.
|
|
||||||
|
|
||||||
|
|
||||||
[Changedetection.io is the best tool to monitor web-pages for changes](https://changedetection.io) Track website content changes and receive notifications via Discord, Email, Slack, Telegram and 90+ more
|
|
||||||
|
|
||||||
Ideal for monitoring price changes, content edits, conditional changes and more.
|
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes" title="Self-hosted web page change monitoring, list of websites with changes" />](https://changedetection.io)
|
||||||
|
|
||||||
|
|
||||||
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
[**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
### Target specific parts of the webpage using the Visual Selector tool.
|
### Target specific parts of the webpage using the Visual Selector tool.
|
||||||
|
|||||||
23
README.md
23
README.md
@@ -1,13 +1,11 @@
|
|||||||
# Detect Website Changes Automatically — Monitor Web Page Changes in Real Time
|
## Web Site Change Detection, Restock monitoring and notifications.
|
||||||
|
|
||||||
Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more.
|
**_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**
|
||||||
|
|
||||||
**Detect web page content changes and get instant alerts.**
|
_Live your data-life pro-actively._
|
||||||
|
|
||||||
Ideal for monitoring price changes, content edits, conditional changes and more.
|
|
||||||
|
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Web site page change monitoring" title="Web site page change monitoring" />](https://changedetection.io?src=github)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web site page change monitoring" title="Self-hosted web site page change monitoring" />](https://changedetection.io?src=github)
|
||||||
|
|
||||||
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
||||||
|
|
||||||
@@ -15,7 +13,6 @@ Ideal for monitoring price changes, content edits, conditional changes and more.
|
|||||||
|
|
||||||
[**Get started with website page change monitoring straight away. Don't have time? Try our $8.99/month subscription, use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_
|
[**Get started with website page change monitoring straight away. Don't have time? Try our $8.99/month subscription, use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_
|
||||||
|
|
||||||
|
|
||||||
- Chrome browser included.
|
- Chrome browser included.
|
||||||
- Nothing to install, access via browser login after signup.
|
- Nothing to install, access via browser login after signup.
|
||||||
- Super fast, no registration needed setup.
|
- Super fast, no registration needed setup.
|
||||||
@@ -92,7 +89,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
|||||||
#### Key Features
|
#### Key Features
|
||||||
|
|
||||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||||
- Target elements with xPath 1 and xPath 2, CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
- Target elements with xPath(1.0) and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
||||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||||
- Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums)
|
- Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums)
|
||||||
- Easily specify how often a site should be checked
|
- Easily specify how often a site should be checked
|
||||||
@@ -102,16 +99,12 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
|||||||
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
|
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
|
||||||
- Send a screenshot with the notification when a change is detected in the web page
|
- Send a screenshot with the notification when a change is detected in the web page
|
||||||
|
|
||||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $150 using our signup link.
|
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
|
||||||
|
|
||||||
|
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.
|
||||||
|
|
||||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||||
|
|
||||||
### Conditional web page changes
|
|
||||||
|
|
||||||
Easily [configure conditional actions](https://changedetection.io/tutorial/conditional-actions-web-page-changes), for example, only trigger when a price is above or below a preset amount, or [when a web page includes (or does not include) a keyword](https://changedetection.io/tutorial/how-monitor-keywords-any-website)
|
|
||||||
|
|
||||||
<img src="./docs/web-page-change-conditions.png" style="max-width:80%;" alt="Conditional web page changes" title="Conditional web page changes" />
|
|
||||||
|
|
||||||
### Schedule web page watches in any timezone, limit by day of week and time.
|
### Schedule web page watches in any timezone, limit by day of week and time.
|
||||||
|
|
||||||
Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours.
|
Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours.
|
||||||
|
|||||||
@@ -3,6 +3,4 @@
|
|||||||
# Only exists for direct CLI usage
|
# Only exists for direct CLI usage
|
||||||
|
|
||||||
import changedetectionio
|
import changedetectionio
|
||||||
|
changedetectionio.main()
|
||||||
if __name__ == '__main__':
|
|
||||||
changedetectionio.main()
|
|
||||||
|
|||||||
@@ -1,98 +0,0 @@
|
|||||||
# Creating Plugins for changedetection.io
|
|
||||||
|
|
||||||
This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways.
|
|
||||||
|
|
||||||
## Plugin Types
|
|
||||||
|
|
||||||
### UI Stats Tab Plugins
|
|
||||||
|
|
||||||
These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch.
|
|
||||||
|
|
||||||
#### Creating a UI Stats Tab Plugin
|
|
||||||
|
|
||||||
1. Create a Python file in a directory that will be loaded by the plugin system.
|
|
||||||
|
|
||||||
2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import pluggy
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
|
||||||
|
|
||||||
@global_hookimpl
|
|
||||||
def ui_edit_stats_extras(watch):
|
|
||||||
"""Add custom content to the stats tab"""
|
|
||||||
# Calculate or retrieve your stats
|
|
||||||
my_stat = calculate_something(watch)
|
|
||||||
|
|
||||||
# Return HTML content as a string
|
|
||||||
html = f"""
|
|
||||||
<div class="my-plugin-stats">
|
|
||||||
<h4>My Plugin Statistics</h4>
|
|
||||||
<p>My statistic: {my_stat}</p>
|
|
||||||
</div>
|
|
||||||
"""
|
|
||||||
return html
|
|
||||||
```
|
|
||||||
|
|
||||||
3. The HTML you return will be included in the Stats tab.
|
|
||||||
|
|
||||||
## Plugin Loading
|
|
||||||
|
|
||||||
Plugins can be loaded from:
|
|
||||||
|
|
||||||
1. Built-in plugin directories in the codebase
|
|
||||||
2. External packages using setuptools entry points
|
|
||||||
|
|
||||||
To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`.
|
|
||||||
|
|
||||||
## Example Plugin
|
|
||||||
|
|
||||||
Here's a simple example of a plugin that adds a word count statistic to the Stats tab:
|
|
||||||
|
|
||||||
```python
|
|
||||||
import pluggy
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
|
||||||
|
|
||||||
def count_words_in_history(watch):
|
|
||||||
"""Count words in the latest snapshot"""
|
|
||||||
try:
|
|
||||||
if not watch.history.keys():
|
|
||||||
return 0
|
|
||||||
|
|
||||||
latest_key = list(watch.history.keys())[-1]
|
|
||||||
latest_content = watch.get_history_snapshot(latest_key)
|
|
||||||
return len(latest_content.split())
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error counting words: {str(e)}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
@global_hookimpl
|
|
||||||
def ui_edit_stats_extras(watch):
|
|
||||||
"""Add word count to the Stats tab"""
|
|
||||||
word_count = count_words_in_history(watch)
|
|
||||||
|
|
||||||
html = f"""
|
|
||||||
<div class="word-count-stats">
|
|
||||||
<h4>Content Analysis</h4>
|
|
||||||
<table class="pure-table">
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>Word count (latest snapshot)</td>
|
|
||||||
<td>{word_count}</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
</div>
|
|
||||||
"""
|
|
||||||
return html
|
|
||||||
```
|
|
||||||
|
|
||||||
## Testing Your Plugin
|
|
||||||
|
|
||||||
1. Place your plugin in one of the directories scanned by the plugin system
|
|
||||||
2. Restart changedetection.io
|
|
||||||
3. Go to the Edit page of a watch and check the Stats tab to see your content
|
|
||||||
@@ -2,23 +2,24 @@
|
|||||||
|
|
||||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||||
|
|
||||||
__version__ = '0.50.9'
|
__version__ = '0.49.8'
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
import os
|
import os
|
||||||
|
os.environ['EVENTLET_NO_GREENDNS'] = 'yes'
|
||||||
|
import eventlet
|
||||||
|
import eventlet.wsgi
|
||||||
import getopt
|
import getopt
|
||||||
import platform
|
|
||||||
import signal
|
import signal
|
||||||
|
import socket
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
# Eventlet completely removed - using threading mode for SocketIO
|
|
||||||
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
|
|
||||||
from changedetectionio import store
|
from changedetectionio import store
|
||||||
from changedetectionio.flask_app import changedetection_app
|
from changedetectionio.flask_app import changedetection_app
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
# Only global so we can access it in the signal handler
|
# Only global so we can access it in the signal handler
|
||||||
app = None
|
app = None
|
||||||
datastore = None
|
datastore = None
|
||||||
@@ -28,44 +29,16 @@ def get_version():
|
|||||||
|
|
||||||
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
|
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
|
||||||
def sigshutdown_handler(_signo, _stack_frame):
|
def sigshutdown_handler(_signo, _stack_frame):
|
||||||
|
global app
|
||||||
|
global datastore
|
||||||
name = signal.Signals(_signo).name
|
name = signal.Signals(_signo).name
|
||||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
|
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown')
|
||||||
|
datastore.sync_to_json()
|
||||||
# Set exit flag immediately to stop all loops
|
logger.success('Sync JSON to disk complete.')
|
||||||
app.config.exit.set()
|
# This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it.
|
||||||
|
# Solution: move to gevent or other server in the future (#2014)
|
||||||
datastore.stop_thread = True
|
datastore.stop_thread = True
|
||||||
|
app.config.exit.set()
|
||||||
# Shutdown workers and queues immediately
|
|
||||||
try:
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
worker_handler.shutdown_workers()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error shutting down workers: {str(e)}")
|
|
||||||
|
|
||||||
# Close janus queues properly
|
|
||||||
try:
|
|
||||||
from changedetectionio.flask_app import update_q, notification_q
|
|
||||||
update_q.close()
|
|
||||||
notification_q.close()
|
|
||||||
logger.debug("Janus queues closed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
|
|
||||||
|
|
||||||
# Shutdown socketio server fast
|
|
||||||
from changedetectionio.flask_app import socketio_server
|
|
||||||
if socketio_server and hasattr(socketio_server, 'shutdown'):
|
|
||||||
try:
|
|
||||||
socketio_server.shutdown()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error shutting down Socket.IO server: {str(e)}")
|
|
||||||
|
|
||||||
# Save data quickly
|
|
||||||
try:
|
|
||||||
datastore.sync_to_json()
|
|
||||||
logger.success('Fast sync to disk complete.')
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error syncing to disk: {str(e)}")
|
|
||||||
|
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
@@ -74,8 +47,9 @@ def main():
|
|||||||
|
|
||||||
datastore_path = None
|
datastore_path = None
|
||||||
do_cleanup = False
|
do_cleanup = False
|
||||||
host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
|
host = ''
|
||||||
port = int(os.environ.get('PORT', 5000))
|
ipv6_enabled = False
|
||||||
|
port = os.environ.get('PORT') or 5000
|
||||||
ssl_mode = False
|
ssl_mode = False
|
||||||
|
|
||||||
# On Windows, create and use a default path.
|
# On Windows, create and use a default path.
|
||||||
@@ -116,6 +90,10 @@ def main():
|
|||||||
if opt == '-d':
|
if opt == '-d':
|
||||||
datastore_path = arg
|
datastore_path = arg
|
||||||
|
|
||||||
|
if opt == '-6':
|
||||||
|
logger.success("Enabling IPv6 listen support")
|
||||||
|
ipv6_enabled = True
|
||||||
|
|
||||||
# Cleanup (remove text files that arent in the index)
|
# Cleanup (remove text files that arent in the index)
|
||||||
if opt == '-c':
|
if opt == '-c':
|
||||||
do_cleanup = True
|
do_cleanup = True
|
||||||
@@ -127,24 +105,10 @@ def main():
|
|||||||
if opt == '-l':
|
if opt == '-l':
|
||||||
logger_level = int(arg) if arg.isdigit() else arg.upper()
|
logger_level = int(arg) if arg.isdigit() else arg.upper()
|
||||||
|
|
||||||
|
|
||||||
logger.success(f"changedetection.io version {get_version()} starting.")
|
|
||||||
# Launch using SocketIO run method for proper integration (if enabled)
|
|
||||||
ssl_cert_file = os.getenv("SSL_CERT_FILE", 'cert.pem')
|
|
||||||
ssl_privkey_file = os.getenv("SSL_PRIVKEY_FILE", 'privkey.pem')
|
|
||||||
if os.getenv("SSL_CERT_FILE") and os.getenv("SSL_PRIVKEY_FILE"):
|
|
||||||
ssl_mode = True
|
|
||||||
|
|
||||||
# SSL mode could have been set by -s too, therefor fallback to default values
|
|
||||||
if ssl_mode:
|
|
||||||
if not os.path.isfile(ssl_cert_file) or not os.path.isfile(ssl_privkey_file):
|
|
||||||
logger.critical(f"Cannot start SSL/HTTPS mode, Please be sure that {ssl_cert_file}' and '{ssl_privkey_file}' exist in in {os.getcwd()}")
|
|
||||||
os._exit(2)
|
|
||||||
|
|
||||||
# Without this, a logger will be duplicated
|
# Without this, a logger will be duplicated
|
||||||
logger.remove()
|
logger.remove()
|
||||||
try:
|
try:
|
||||||
log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' }
|
log_level_for_stdout = { 'DEBUG', 'SUCCESS' }
|
||||||
logger.configure(handlers=[
|
logger.configure(handlers=[
|
||||||
{"sink": sys.stdout, "level": logger_level,
|
{"sink": sys.stdout, "level": logger_level,
|
||||||
"filter" : lambda record: record['level'].name in log_level_for_stdout},
|
"filter" : lambda record: record['level'].name in log_level_for_stdout},
|
||||||
@@ -181,26 +145,8 @@ def main():
|
|||||||
|
|
||||||
app = changedetection_app(app_config, datastore)
|
app = changedetection_app(app_config, datastore)
|
||||||
|
|
||||||
# Get the SocketIO instance from the Flask app (created in flask_app.py)
|
|
||||||
from changedetectionio.flask_app import socketio_server
|
|
||||||
global socketio
|
|
||||||
socketio = socketio_server
|
|
||||||
|
|
||||||
signal.signal(signal.SIGTERM, sigshutdown_handler)
|
signal.signal(signal.SIGTERM, sigshutdown_handler)
|
||||||
signal.signal(signal.SIGINT, sigshutdown_handler)
|
signal.signal(signal.SIGINT, sigshutdown_handler)
|
||||||
|
|
||||||
# Custom signal handler for memory cleanup
|
|
||||||
def sigusr_clean_handler(_signo, _stack_frame):
|
|
||||||
from changedetectionio.gc_cleanup import memory_cleanup
|
|
||||||
logger.info('SIGUSR1 received: Running memory cleanup')
|
|
||||||
return memory_cleanup(app)
|
|
||||||
|
|
||||||
# Register the SIGUSR1 signal handler
|
|
||||||
# Only register the signal handler if running on Linux
|
|
||||||
if platform.system() == "Linux":
|
|
||||||
signal.signal(signal.SIGUSR1, sigusr_clean_handler)
|
|
||||||
else:
|
|
||||||
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
|
|
||||||
|
|
||||||
# Go into cleanup mode
|
# Go into cleanup mode
|
||||||
if do_cleanup:
|
if do_cleanup:
|
||||||
@@ -210,11 +156,10 @@ def main():
|
|||||||
|
|
||||||
|
|
||||||
@app.context_processor
|
@app.context_processor
|
||||||
def inject_template_globals():
|
def inject_version():
|
||||||
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
return dict(right_sticky="v{}".format(datastore.data['version_tag']),
|
||||||
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
new_version_available=app.config['NEW_VERSION_AVAILABLE'],
|
||||||
has_password=datastore.data['settings']['application']['password'] != False,
|
has_password=datastore.data['settings']['application']['password'] != False
|
||||||
socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
||||||
@@ -238,21 +183,15 @@ def main():
|
|||||||
from werkzeug.middleware.proxy_fix import ProxyFix
|
from werkzeug.middleware.proxy_fix import ProxyFix
|
||||||
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
|
||||||
|
|
||||||
|
s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET
|
||||||
|
|
||||||
|
if ssl_mode:
|
||||||
|
# @todo finalise SSL config, but this should get you in the right direction if you need it.
|
||||||
|
eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type),
|
||||||
|
certfile='cert.pem',
|
||||||
|
keyfile='privkey.pem',
|
||||||
|
server_side=True), app)
|
||||||
|
|
||||||
# SocketIO instance is already initialized in flask_app.py
|
|
||||||
if socketio_server:
|
|
||||||
if ssl_mode:
|
|
||||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
|
||||||
socketio.run(app, host=host, port=int(port), debug=False,
|
|
||||||
ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
|
|
||||||
else:
|
|
||||||
socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
|
|
||||||
else:
|
else:
|
||||||
# Run Flask app without Socket.IO if disabled
|
eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app)
|
||||||
logger.info("Starting Flask app without Socket.IO server")
|
|
||||||
if ssl_mode:
|
|
||||||
logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
|
|
||||||
app.run(host=host, port=int(port), debug=False,
|
|
||||||
ssl_context=(ssl_cert_file, ssl_privkey_file))
|
|
||||||
else:
|
|
||||||
app.run(host=host, port=int(port), debug=False)
|
|
||||||
|
|||||||
@@ -1,145 +0,0 @@
|
|||||||
from flask_expects_json import expects_json
|
|
||||||
from flask_restful import Resource
|
|
||||||
from . import auth
|
|
||||||
from flask_restful import abort, Resource
|
|
||||||
from flask import request
|
|
||||||
from . import auth
|
|
||||||
from . import schema_create_notification_urls, schema_delete_notification_urls
|
|
||||||
|
|
||||||
class Notifications(Resource):
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
# datastore is a black box dependency
|
|
||||||
self.datastore = kwargs['datastore']
|
|
||||||
|
|
||||||
@auth.check_token
|
|
||||||
def get(self):
|
|
||||||
"""
|
|
||||||
@api {get} /api/v1/notifications Return Notification URL List
|
|
||||||
@apiDescription Return the Notification URL List from the configuration
|
|
||||||
@apiExample {curl} Example usage:
|
|
||||||
curl http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
|
||||||
HTTP/1.0 200
|
|
||||||
{
|
|
||||||
'notification_urls': ["notification-urls-list"]
|
|
||||||
}
|
|
||||||
@apiName Get
|
|
||||||
@apiGroup Notifications
|
|
||||||
"""
|
|
||||||
|
|
||||||
notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])
|
|
||||||
|
|
||||||
return {
|
|
||||||
'notification_urls': notification_urls,
|
|
||||||
}, 200
|
|
||||||
|
|
||||||
@auth.check_token
|
|
||||||
@expects_json(schema_create_notification_urls)
|
|
||||||
def post(self):
|
|
||||||
"""
|
|
||||||
@api {post} /api/v1/notifications Create Notification URLs
|
|
||||||
@apiDescription Add one or more notification URLs from the configuration
|
|
||||||
@apiExample {curl} Example usage:
|
|
||||||
curl http://localhost:5000/api/v1/notifications/batch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
|
||||||
@apiName CreateBatch
|
|
||||||
@apiGroup Notifications
|
|
||||||
@apiSuccess (201) {Object[]} notification_urls List of added notification URLs
|
|
||||||
@apiError (400) {String} Invalid input
|
|
||||||
"""
|
|
||||||
|
|
||||||
json_data = request.get_json()
|
|
||||||
notification_urls = json_data.get("notification_urls", [])
|
|
||||||
|
|
||||||
from wtforms import ValidationError
|
|
||||||
try:
|
|
||||||
validate_notification_urls(notification_urls)
|
|
||||||
except ValidationError as e:
|
|
||||||
return str(e), 400
|
|
||||||
|
|
||||||
added_urls = []
|
|
||||||
|
|
||||||
for url in notification_urls:
|
|
||||||
clean_url = url.strip()
|
|
||||||
added_url = self.datastore.add_notification_url(clean_url)
|
|
||||||
if added_url:
|
|
||||||
added_urls.append(added_url)
|
|
||||||
|
|
||||||
if not added_urls:
|
|
||||||
return "No valid notification URLs were added", 400
|
|
||||||
|
|
||||||
return {'notification_urls': added_urls}, 201
|
|
||||||
|
|
||||||
@auth.check_token
|
|
||||||
@expects_json(schema_create_notification_urls)
|
|
||||||
def put(self):
|
|
||||||
"""
|
|
||||||
@api {put} /api/v1/notifications Replace Notification URLs
|
|
||||||
@apiDescription Replace all notification URLs with the provided list (can be empty)
|
|
||||||
@apiExample {curl} Example usage:
|
|
||||||
curl -X PUT http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
|
||||||
@apiName Replace
|
|
||||||
@apiGroup Notifications
|
|
||||||
@apiSuccess (200) {Object[]} notification_urls List of current notification URLs
|
|
||||||
@apiError (400) {String} Invalid input
|
|
||||||
"""
|
|
||||||
json_data = request.get_json()
|
|
||||||
notification_urls = json_data.get("notification_urls", [])
|
|
||||||
|
|
||||||
from wtforms import ValidationError
|
|
||||||
try:
|
|
||||||
validate_notification_urls(notification_urls)
|
|
||||||
except ValidationError as e:
|
|
||||||
return str(e), 400
|
|
||||||
|
|
||||||
if not isinstance(notification_urls, list):
|
|
||||||
return "Invalid input format", 400
|
|
||||||
|
|
||||||
clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)]
|
|
||||||
self.datastore.data['settings']['application']['notification_urls'] = clean_urls
|
|
||||||
self.datastore.needs_write = True
|
|
||||||
|
|
||||||
return {'notification_urls': clean_urls}, 200
|
|
||||||
|
|
||||||
@auth.check_token
|
|
||||||
@expects_json(schema_delete_notification_urls)
|
|
||||||
def delete(self):
|
|
||||||
"""
|
|
||||||
@api {delete} /api/v1/notifications Delete Notification URLs
|
|
||||||
@apiDescription Deletes one or more notification URLs from the configuration
|
|
||||||
@apiExample {curl} Example usage:
|
|
||||||
curl http://localhost:5000/api/v1/notifications -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
|
||||||
@apiParam {String[]} notification_urls The notification URLs to delete.
|
|
||||||
@apiName Delete
|
|
||||||
@apiGroup Notifications
|
|
||||||
@apiSuccess (204) {String} OK Deleted
|
|
||||||
@apiError (400) {String} No matching notification URLs found.
|
|
||||||
"""
|
|
||||||
|
|
||||||
json_data = request.get_json()
|
|
||||||
urls_to_delete = json_data.get("notification_urls", [])
|
|
||||||
if not isinstance(urls_to_delete, list):
|
|
||||||
abort(400, message="Expected a list of notification URLs.")
|
|
||||||
|
|
||||||
notification_urls = self.datastore.data['settings']['application'].get('notification_urls', [])
|
|
||||||
deleted = []
|
|
||||||
|
|
||||||
for url in urls_to_delete:
|
|
||||||
clean_url = url.strip()
|
|
||||||
if clean_url in notification_urls:
|
|
||||||
notification_urls.remove(clean_url)
|
|
||||||
deleted.append(clean_url)
|
|
||||||
|
|
||||||
if not deleted:
|
|
||||||
abort(400, message="No matching notification URLs found.")
|
|
||||||
|
|
||||||
self.datastore.data['settings']['application']['notification_urls'] = notification_urls
|
|
||||||
self.datastore.needs_write = True
|
|
||||||
|
|
||||||
return 'OK', 204
|
|
||||||
|
|
||||||
def validate_notification_urls(notification_urls):
|
|
||||||
from changedetectionio.forms import ValidateAppRiseServers
|
|
||||||
validator = ValidateAppRiseServers()
|
|
||||||
class DummyForm: pass
|
|
||||||
dummy_form = DummyForm()
|
|
||||||
field = type("Field", (object,), {"data": notification_urls, "gettext": lambda self, x: x})()
|
|
||||||
validator(dummy_form, field)
|
|
||||||
@@ -3,9 +3,8 @@ from changedetectionio.strtobool import strtobool
|
|||||||
|
|
||||||
from flask_expects_json import expects_json
|
from flask_expects_json import expects_json
|
||||||
from changedetectionio import queuedWatchMetaData
|
from changedetectionio import queuedWatchMetaData
|
||||||
from changedetectionio import worker_handler
|
|
||||||
from flask_restful import abort, Resource
|
from flask_restful import abort, Resource
|
||||||
from flask import request, make_response, send_from_directory
|
from flask import request, make_response
|
||||||
import validators
|
import validators
|
||||||
from . import auth
|
from . import auth
|
||||||
import copy
|
import copy
|
||||||
@@ -48,7 +47,7 @@ class Watch(Resource):
|
|||||||
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
abort(404, message='No watch exists with the UUID of {}'.format(uuid))
|
||||||
|
|
||||||
if request.args.get('recheck'):
|
if request.args.get('recheck'):
|
||||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return "OK", 200
|
return "OK", 200
|
||||||
if request.args.get('paused', '') == 'paused':
|
if request.args.get('paused', '') == 'paused':
|
||||||
self.datastore.data['watching'].get(uuid).pause()
|
self.datastore.data['watching'].get(uuid).pause()
|
||||||
@@ -191,47 +190,6 @@ class WatchSingleHistory(Resource):
|
|||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
class WatchFavicon(Resource):
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
# datastore is a black box dependency
|
|
||||||
self.datastore = kwargs['datastore']
|
|
||||||
|
|
||||||
@auth.check_token
|
|
||||||
def get(self, uuid):
|
|
||||||
"""
|
|
||||||
@api {get} /api/v1/watch/<string:uuid>/favicon Get Favicon for a watch
|
|
||||||
@apiDescription Requires watch `uuid`
|
|
||||||
@apiExample {curl} Example usage:
|
|
||||||
curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/favicon -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
|
||||||
@apiName Get latest Favicon
|
|
||||||
@apiGroup Watch History
|
|
||||||
@apiSuccess (200) {String} OK
|
|
||||||
@apiSuccess (404) {String} ERR Not found
|
|
||||||
"""
|
|
||||||
watch = self.datastore.data['watching'].get(uuid)
|
|
||||||
if not watch:
|
|
||||||
abort(404, message=f"No watch exists with the UUID of {uuid}")
|
|
||||||
|
|
||||||
favicon_filename = watch.get_favicon_filename()
|
|
||||||
if favicon_filename:
|
|
||||||
try:
|
|
||||||
import magic
|
|
||||||
mime = magic.from_file(
|
|
||||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
|
||||||
mime=True
|
|
||||||
)
|
|
||||||
except ImportError:
|
|
||||||
# Fallback, no python-magic
|
|
||||||
import mimetypes
|
|
||||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
|
||||||
|
|
||||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
|
||||||
response.headers['Content-type'] = mime
|
|
||||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
|
||||||
return response
|
|
||||||
|
|
||||||
abort(404, message=f'No Favicon available for {uuid}')
|
|
||||||
|
|
||||||
|
|
||||||
class CreateWatch(Resource):
|
class CreateWatch(Resource):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
@@ -278,7 +236,7 @@ class CreateWatch(Resource):
|
|||||||
|
|
||||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
|
||||||
if new_uuid:
|
if new_uuid:
|
||||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||||
return {'uuid': new_uuid}, 201
|
return {'uuid': new_uuid}, 201
|
||||||
else:
|
else:
|
||||||
return "Invalid or unsupported URL", 400
|
return "Invalid or unsupported URL", 400
|
||||||
@@ -333,7 +291,7 @@ class CreateWatch(Resource):
|
|||||||
|
|
||||||
if request.args.get('recheck_all'):
|
if request.args.get('recheck_all'):
|
||||||
for uuid in self.datastore.data['watching'].keys():
|
for uuid in self.datastore.data['watching'].keys():
|
||||||
worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return {'status': "OK"}, 200
|
return {'status': "OK"}, 200
|
||||||
|
|
||||||
return list, 200
|
return list, 200
|
||||||
@@ -19,15 +19,8 @@ schema_create_tag['required'] = ['title']
|
|||||||
schema_update_tag = copy.deepcopy(schema_tag)
|
schema_update_tag = copy.deepcopy(schema_tag)
|
||||||
schema_update_tag['additionalProperties'] = False
|
schema_update_tag['additionalProperties'] = False
|
||||||
|
|
||||||
schema_notification_urls = copy.deepcopy(schema)
|
|
||||||
schema_create_notification_urls = copy.deepcopy(schema_notification_urls)
|
|
||||||
schema_create_notification_urls['required'] = ['notification_urls']
|
|
||||||
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
|
|
||||||
schema_delete_notification_urls['required'] = ['notification_urls']
|
|
||||||
|
|
||||||
# Import all API resources
|
# Import all API resources
|
||||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon
|
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch
|
||||||
from .Tags import Tags, Tag
|
from .Tags import Tags, Tag
|
||||||
from .Import import Import
|
from .Import import Import
|
||||||
from .SystemInfo import SystemInfo
|
from .SystemInfo import SystemInfo
|
||||||
from .Notifications import Notifications
|
|
||||||
|
|||||||
@@ -1,7 +1,5 @@
|
|||||||
# Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
|
# Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
|
||||||
# Probably other ways to solve this when the backend switches to some ORM
|
# Probably other ways to solve this when the backend switches to some ORM
|
||||||
from changedetectionio.notification import valid_notification_formats
|
|
||||||
|
|
||||||
|
|
||||||
def build_time_between_check_json_schema():
|
def build_time_between_check_json_schema():
|
||||||
# Setup time between check schema
|
# Setup time between check schema
|
||||||
@@ -100,6 +98,8 @@ def build_watch_json_schema(d):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
from changedetectionio.notification import valid_notification_formats
|
||||||
|
|
||||||
schema['properties']['notification_format'] = {'type': 'string',
|
schema['properties']['notification_format'] = {'type': 'string',
|
||||||
'enum': list(valid_notification_formats.keys())
|
'enum': list(valid_notification_formats.keys())
|
||||||
}
|
}
|
||||||
|
|||||||
12
changedetectionio/apprise_asset.py
Normal file
12
changedetectionio/apprise_asset.py
Normal file
@@ -0,0 +1,12 @@
|
|||||||
|
from changedetectionio import apprise_plugin
|
||||||
|
import apprise
|
||||||
|
|
||||||
|
# Create our AppriseAsset and populate it with some of our new values:
|
||||||
|
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
|
||||||
|
asset = apprise.AppriseAsset(
|
||||||
|
image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
|
||||||
|
)
|
||||||
|
|
||||||
|
asset.app_id = "changedetection.io"
|
||||||
|
asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection"
|
||||||
|
asset.app_url = "https://changedetection.io"
|
||||||
98
changedetectionio/apprise_plugin/__init__.py
Normal file
98
changedetectionio/apprise_plugin/__init__.py
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# include the decorator
|
||||||
|
from apprise.decorators import notify
|
||||||
|
from loguru import logger
|
||||||
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
|
|
||||||
|
@notify(on="delete")
|
||||||
|
@notify(on="deletes")
|
||||||
|
@notify(on="get")
|
||||||
|
@notify(on="gets")
|
||||||
|
@notify(on="post")
|
||||||
|
@notify(on="posts")
|
||||||
|
@notify(on="put")
|
||||||
|
@notify(on="puts")
|
||||||
|
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
from urllib.parse import unquote_plus
|
||||||
|
from apprise.utils.parse import parse_url as apprise_parse_url
|
||||||
|
|
||||||
|
url = kwargs['meta'].get('url')
|
||||||
|
schema = kwargs['meta'].get('schema').lower().strip()
|
||||||
|
|
||||||
|
# Choose POST, GET etc from requests
|
||||||
|
method = re.sub(rf's$', '', schema)
|
||||||
|
requests_method = getattr(requests, method)
|
||||||
|
|
||||||
|
params = CaseInsensitiveDict({}) # Added to requests
|
||||||
|
auth = None
|
||||||
|
has_error = False
|
||||||
|
|
||||||
|
# Convert /foobar?+some-header=hello to proper header dictionary
|
||||||
|
results = apprise_parse_url(url)
|
||||||
|
|
||||||
|
# Add our headers that the user can potentially over-ride if they wish
|
||||||
|
# to to our returned result set and tidy entries by unquoting them
|
||||||
|
headers = CaseInsensitiveDict({unquote_plus(x): unquote_plus(y)
|
||||||
|
for x, y in results['qsd+'].items()})
|
||||||
|
|
||||||
|
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||||
|
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
|
||||||
|
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
||||||
|
for k, v in results['qsd'].items():
|
||||||
|
if not k.strip('+-') in results['qsd+'].keys():
|
||||||
|
params[unquote_plus(k)] = unquote_plus(v)
|
||||||
|
|
||||||
|
# Determine Authentication
|
||||||
|
auth = ''
|
||||||
|
if results.get('user') and results.get('password'):
|
||||||
|
auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user')))
|
||||||
|
elif results.get('user'):
|
||||||
|
auth = (unquote_plus(results.get('user')))
|
||||||
|
|
||||||
|
# If it smells like it could be JSON and no content-type was already set, offer a default content type.
|
||||||
|
if body and '{' in body[:100] and not headers.get('Content-Type'):
|
||||||
|
json_header = 'application/json; charset=utf-8'
|
||||||
|
try:
|
||||||
|
# Try if it's JSON
|
||||||
|
json.loads(body)
|
||||||
|
headers['Content-Type'] = json_header
|
||||||
|
except ValueError as e:
|
||||||
|
logger.warning(f"Could not automatically add '{json_header}' header to the notification because the document failed to parse as JSON: {e}")
|
||||||
|
pass
|
||||||
|
|
||||||
|
# POSTS -> HTTPS etc
|
||||||
|
if schema.lower().endswith('s'):
|
||||||
|
url = re.sub(rf'^{schema}', 'https', results.get('url'))
|
||||||
|
else:
|
||||||
|
url = re.sub(rf'^{schema}', 'http', results.get('url'))
|
||||||
|
|
||||||
|
status_str = ''
|
||||||
|
try:
|
||||||
|
r = requests_method(url,
|
||||||
|
auth=auth,
|
||||||
|
data=body.encode('utf-8') if type(body) is str else body,
|
||||||
|
headers=headers,
|
||||||
|
params=params
|
||||||
|
)
|
||||||
|
|
||||||
|
if not (200 <= r.status_code < 300):
|
||||||
|
status_str = f"Error sending '{method.upper()}' request to {url} - Status: {r.status_code}: '{r.reason}'"
|
||||||
|
logger.error(status_str)
|
||||||
|
has_error = True
|
||||||
|
else:
|
||||||
|
logger.info(f"Sent '{method.upper()}' request to {url}")
|
||||||
|
has_error = False
|
||||||
|
|
||||||
|
except requests.RequestException as e:
|
||||||
|
status_str = f"Error sending '{method.upper()}' request to {url} - {str(e)}"
|
||||||
|
logger.error(status_str)
|
||||||
|
has_error = True
|
||||||
|
|
||||||
|
if has_error:
|
||||||
|
raise TypeError(status_str)
|
||||||
|
|
||||||
|
return True
|
||||||
@@ -1,466 +0,0 @@
|
|||||||
from .processors.exceptions import ProcessorException
|
|
||||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
|
|
||||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
|
|
||||||
from changedetectionio import html_tools
|
|
||||||
from changedetectionio.flask_app import watch_check_update
|
|
||||||
|
|
||||||
import asyncio
|
|
||||||
import importlib
|
|
||||||
import os
|
|
||||||
import queue
|
|
||||||
import time
|
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
# Async version of update_worker
|
|
||||||
# Processes jobs from AsyncSignalPriorityQueue instead of threaded queue
|
|
||||||
|
|
||||||
async def async_update_worker(worker_id, q, notification_q, app, datastore):
|
|
||||||
"""
|
|
||||||
Async worker function that processes watch check jobs from the queue.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
worker_id: Unique identifier for this worker
|
|
||||||
q: AsyncSignalPriorityQueue containing jobs to process
|
|
||||||
notification_q: Standard queue for notifications
|
|
||||||
app: Flask application instance
|
|
||||||
datastore: Application datastore
|
|
||||||
"""
|
|
||||||
# Set a descriptive name for this task
|
|
||||||
task = asyncio.current_task()
|
|
||||||
if task:
|
|
||||||
task.set_name(f"async-worker-{worker_id}")
|
|
||||||
|
|
||||||
logger.info(f"Starting async worker {worker_id}")
|
|
||||||
|
|
||||||
while not app.config.exit.is_set():
|
|
||||||
update_handler = None
|
|
||||||
watch = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Use native janus async interface - no threads needed!
|
|
||||||
queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
|
|
||||||
|
|
||||||
except asyncio.TimeoutError:
|
|
||||||
# No jobs available, continue loop
|
|
||||||
continue
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
|
|
||||||
|
|
||||||
# Log queue health for debugging
|
|
||||||
try:
|
|
||||||
queue_size = q.qsize()
|
|
||||||
is_empty = q.empty()
|
|
||||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
|
|
||||||
except Exception as health_e:
|
|
||||||
logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
|
|
||||||
|
|
||||||
await asyncio.sleep(0.1)
|
|
||||||
continue
|
|
||||||
|
|
||||||
uuid = queued_item_data.item.get('uuid')
|
|
||||||
fetch_start_time = round(time.time())
|
|
||||||
|
|
||||||
# Mark this UUID as being processed
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
worker_handler.set_uuid_processing(uuid, processing=True)
|
|
||||||
|
|
||||||
try:
|
|
||||||
if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
|
|
||||||
changed_detected = False
|
|
||||||
contents = b''
|
|
||||||
process_changedetection_results = True
|
|
||||||
update_obj = {}
|
|
||||||
|
|
||||||
# Clear last errors
|
|
||||||
datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
|
|
||||||
datastore.data['watching'][uuid]['last_checked'] = fetch_start_time
|
|
||||||
|
|
||||||
watch = datastore.data['watching'].get(uuid)
|
|
||||||
|
|
||||||
logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
watch_check_update.send(watch_uuid=uuid)
|
|
||||||
|
|
||||||
# Processor is what we are using for detecting the "Change"
|
|
||||||
processor = watch.get('processor', 'text_json_diff')
|
|
||||||
|
|
||||||
# Init a new 'difference_detection_processor'
|
|
||||||
processor_module_name = f"changedetectionio.processors.{processor}.processor"
|
|
||||||
try:
|
|
||||||
processor_module = importlib.import_module(processor_module_name)
|
|
||||||
except ModuleNotFoundError as e:
|
|
||||||
print(f"Processor module '{processor}' not found.")
|
|
||||||
raise e
|
|
||||||
|
|
||||||
update_handler = processor_module.perform_site_check(datastore=datastore,
|
|
||||||
watch_uuid=uuid)
|
|
||||||
|
|
||||||
# All fetchers are now async, so call directly
|
|
||||||
await update_handler.call_browser()
|
|
||||||
|
|
||||||
# Run change detection (this is synchronous)
|
|
||||||
changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
|
|
||||||
|
|
||||||
except PermissionError as e:
|
|
||||||
logger.critical(f"File permission error updating file, watch: {uuid}")
|
|
||||||
logger.critical(str(e))
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except ProcessorException as e:
|
|
||||||
if e.screenshot:
|
|
||||||
watch.save_screenshot(screenshot=e.screenshot)
|
|
||||||
if e.xpath_data:
|
|
||||||
watch.save_xpath_data(data=e.xpath_data)
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
|
|
||||||
extra_help = ""
|
|
||||||
if e.has_filters:
|
|
||||||
has_img = html_tools.include_filters(include_filters='img',
|
|
||||||
html_content=e.html_content)
|
|
||||||
if has_img:
|
|
||||||
extra_help = ", it's possible that the filters you have give an empty result or contain only an image."
|
|
||||||
else:
|
|
||||||
extra_help = ", it's possible that the filters were found, but contained no usable text."
|
|
||||||
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={
|
|
||||||
'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
|
|
||||||
})
|
|
||||||
|
|
||||||
if e.screenshot:
|
|
||||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
|
||||||
|
|
||||||
if e.xpath_data:
|
|
||||||
watch.save_xpath_data(data=e.xpath_data)
|
|
||||||
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
|
|
||||||
if e.status_code == 403:
|
|
||||||
err_text = "Error - 403 (Access denied) received"
|
|
||||||
elif e.status_code == 404:
|
|
||||||
err_text = "Error - 404 (Page not found) received"
|
|
||||||
elif e.status_code == 407:
|
|
||||||
err_text = "Error - 407 (Proxy authentication required) received, did you need a username and password for the proxy?"
|
|
||||||
elif e.status_code == 500:
|
|
||||||
err_text = "Error - 500 (Internal server error) received from the web site"
|
|
||||||
else:
|
|
||||||
extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
|
|
||||||
err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
|
|
||||||
|
|
||||||
if e.screenshot:
|
|
||||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
|
||||||
if e.xpath_data:
|
|
||||||
watch.save_xpath_data(data=e.xpath_data, as_error=True)
|
|
||||||
if e.page_text:
|
|
||||||
watch.save_error_text(contents=e.page_text)
|
|
||||||
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except FilterNotFoundInResponse as e:
|
|
||||||
if not datastore.data['watching'].get(uuid):
|
|
||||||
continue
|
|
||||||
|
|
||||||
err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
|
||||||
|
|
||||||
# Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
|
|
||||||
if e.screenshot:
|
|
||||||
watch.save_screenshot(screenshot=e.screenshot)
|
|
||||||
|
|
||||||
if e.xpath_data:
|
|
||||||
watch.save_xpath_data(data=e.xpath_data)
|
|
||||||
|
|
||||||
# Only when enabled, send the notification
|
|
||||||
if watch.get('filter_failure_notification_send', False):
|
|
||||||
c = watch.get('consecutive_filter_failures', 0)
|
|
||||||
c += 1
|
|
||||||
# Send notification if we reached the threshold?
|
|
||||||
threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
|
||||||
logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
|
|
||||||
if c >= threshold:
|
|
||||||
if not watch.get('notification_muted'):
|
|
||||||
logger.debug(f"Sending filter failed notification for {uuid}")
|
|
||||||
await send_filter_failure_notification(uuid, notification_q, datastore)
|
|
||||||
c = 0
|
|
||||||
logger.debug(f"Reset filter failure count back to zero")
|
|
||||||
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
|
||||||
else:
|
|
||||||
logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
|
|
||||||
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
|
|
||||||
# Yes fine, so nothing todo, don't continue to process.
|
|
||||||
process_changedetection_results = False
|
|
||||||
changed_detected = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.BrowserConnectError as e:
|
|
||||||
datastore.update_watch(uuid=uuid,
|
|
||||||
update_obj={'last_error': e.msg})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
|
|
||||||
datastore.update_watch(uuid=uuid,
|
|
||||||
update_obj={'last_error': e.msg})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.BrowserStepsStepException as e:
|
|
||||||
if not datastore.data['watching'].get(uuid):
|
|
||||||
continue
|
|
||||||
|
|
||||||
error_step = e.step_n + 1
|
|
||||||
from playwright._impl._errors import TimeoutError, Error
|
|
||||||
|
|
||||||
# Generally enough info for TimeoutError (couldnt locate the element after default seconds)
|
|
||||||
err_text = f"Browser step at position {error_step} could not run, check the watch, add a delay if necessary, view Browser Steps to see screenshot at that step."
|
|
||||||
|
|
||||||
if e.original_e.name == "TimeoutError":
|
|
||||||
# Just the first line is enough, the rest is the stack trace
|
|
||||||
err_text += " Could not find the target."
|
|
||||||
else:
|
|
||||||
# Other Error, more info is good.
|
|
||||||
err_text += " " + str(e.original_e).splitlines()[0]
|
|
||||||
|
|
||||||
logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}")
|
|
||||||
|
|
||||||
datastore.update_watch(uuid=uuid,
|
|
||||||
update_obj={'last_error': err_text,
|
|
||||||
'browser_steps_last_error_step': error_step})
|
|
||||||
|
|
||||||
if watch.get('filter_failure_notification_send', False):
|
|
||||||
c = watch.get('consecutive_filter_failures', 0)
|
|
||||||
c += 1
|
|
||||||
# Send notification if we reached the threshold?
|
|
||||||
threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
|
|
||||||
logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
|
|
||||||
if threshold > 0 and c >= threshold:
|
|
||||||
if not watch.get('notification_muted'):
|
|
||||||
await send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n, notification_q=notification_q, datastore=datastore)
|
|
||||||
c = 0
|
|
||||||
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
|
|
||||||
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.EmptyReply as e:
|
|
||||||
# Some kind of custom to-str handler in the exception handler that does this?
|
|
||||||
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
|
||||||
'last_check_status': e.status_code})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.ScreenshotUnavailable as e:
|
|
||||||
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
|
||||||
'last_check_status': e.status_code})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.JSActionExceptions as e:
|
|
||||||
err_text = "Error running JS Actions - Page request - "+e.message
|
|
||||||
if e.screenshot:
|
|
||||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
|
||||||
'last_check_status': e.status_code})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.PageUnloadable as e:
|
|
||||||
err_text = "Page request from server didnt respond correctly"
|
|
||||||
if e.message:
|
|
||||||
err_text = "{} - {}".format(err_text, e.message)
|
|
||||||
|
|
||||||
if e.screenshot:
|
|
||||||
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
|
|
||||||
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
|
||||||
'last_check_status': e.status_code,
|
|
||||||
'has_ldjson_price_data': None})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
|
|
||||||
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
|
|
||||||
process_changedetection_results = False
|
|
||||||
logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}")
|
|
||||||
logger.error(str(e))
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
|
|
||||||
process_changedetection_results = False
|
|
||||||
|
|
||||||
else:
|
|
||||||
if not datastore.data['watching'].get(uuid):
|
|
||||||
continue
|
|
||||||
|
|
||||||
update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
|
|
||||||
|
|
||||||
if not watch.get('ignore_status_codes'):
|
|
||||||
update_obj['consecutive_filter_failures'] = 0
|
|
||||||
|
|
||||||
update_obj['last_error'] = False
|
|
||||||
cleanup_error_artifacts(uuid, datastore)
|
|
||||||
|
|
||||||
if not datastore.data['watching'].get(uuid):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if process_changedetection_results:
|
|
||||||
# Extract title if needed
|
|
||||||
if datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
|
|
||||||
if not watch['title'] or not len(watch['title']):
|
|
||||||
try:
|
|
||||||
update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content)
|
|
||||||
logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}")
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.")
|
|
||||||
|
|
||||||
try:
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj=update_obj)
|
|
||||||
|
|
||||||
if changed_detected or not watch.history_n:
|
|
||||||
if update_handler.screenshot:
|
|
||||||
watch.save_screenshot(screenshot=update_handler.screenshot)
|
|
||||||
|
|
||||||
if update_handler.xpath_data:
|
|
||||||
watch.save_xpath_data(data=update_handler.xpath_data)
|
|
||||||
|
|
||||||
# Ensure unique timestamp for history
|
|
||||||
if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
|
|
||||||
logger.warning(f"Timestamp {fetch_start_time} already exists, waiting 1 seconds")
|
|
||||||
fetch_start_time += 1
|
|
||||||
await asyncio.sleep(1)
|
|
||||||
|
|
||||||
watch.save_history_text(contents=contents,
|
|
||||||
timestamp=int(fetch_start_time),
|
|
||||||
snapshot_id=update_obj.get('previous_md5', 'none'))
|
|
||||||
|
|
||||||
empty_pages_are_a_change = datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
|
||||||
if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
|
|
||||||
watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
|
|
||||||
|
|
||||||
# Send notifications on second+ check
|
|
||||||
if watch.history_n >= 2:
|
|
||||||
logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
|
|
||||||
if not watch.get('notification_muted'):
|
|
||||||
await send_content_changed_notification(uuid, notification_q, datastore)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"Worker {worker_id} exception in process_changedetection_results")
|
|
||||||
logger.critical(str(e))
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
|
||||||
|
|
||||||
# Always record attempt count
|
|
||||||
count = watch.get('check_count', 0) + 1
|
|
||||||
|
|
||||||
# Record server header
|
|
||||||
try:
|
|
||||||
server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
|
|
||||||
except Exception as e:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Store favicon if necessary
|
|
||||||
if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
|
|
||||||
watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
|
|
||||||
favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
|
|
||||||
)
|
|
||||||
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
|
|
||||||
'check_count': count})
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
|
|
||||||
logger.error(f"Worker {worker_id} traceback:", exc_info=True)
|
|
||||||
|
|
||||||
# Also update the watch with error information
|
|
||||||
if datastore and uuid in datastore.data['watching']:
|
|
||||||
datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"})
|
|
||||||
|
|
||||||
finally:
|
|
||||||
# Always cleanup - this runs whether there was an exception or not
|
|
||||||
if uuid:
|
|
||||||
try:
|
|
||||||
# Mark UUID as no longer being processed
|
|
||||||
worker_handler.set_uuid_processing(uuid, processing=False)
|
|
||||||
|
|
||||||
# Send completion signal
|
|
||||||
if watch:
|
|
||||||
#logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
|
|
||||||
watch_check_update.send(watch_uuid=watch['uuid'])
|
|
||||||
|
|
||||||
update_handler = None
|
|
||||||
logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
|
|
||||||
except Exception as cleanup_error:
|
|
||||||
logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
|
|
||||||
|
|
||||||
# Brief pause before continuing to avoid tight error loops (only on error)
|
|
||||||
if 'e' in locals():
|
|
||||||
await asyncio.sleep(1.0)
|
|
||||||
else:
|
|
||||||
# Small yield for normal completion
|
|
||||||
await asyncio.sleep(0.01)
|
|
||||||
|
|
||||||
# Check if we should exit
|
|
||||||
if app.config.exit.is_set():
|
|
||||||
break
|
|
||||||
|
|
||||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
|
||||||
import sys
|
|
||||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
|
||||||
|
|
||||||
if not in_pytest:
|
|
||||||
logger.info(f"Worker {worker_id} shutting down")
|
|
||||||
|
|
||||||
|
|
||||||
def cleanup_error_artifacts(uuid, datastore):
|
|
||||||
"""Helper function to clean up error artifacts"""
|
|
||||||
cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
|
|
||||||
for f in cleanup_files:
|
|
||||||
full_path = os.path.join(datastore.datastore_path, uuid, f)
|
|
||||||
if os.path.isfile(full_path):
|
|
||||||
os.unlink(full_path)
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
async def send_content_changed_notification(watch_uuid, notification_q, datastore):
|
|
||||||
"""Helper function to queue notifications using the new notification service"""
|
|
||||||
try:
|
|
||||||
from changedetectionio.notification_service import create_notification_service
|
|
||||||
|
|
||||||
# Create notification service instance
|
|
||||||
notification_service = create_notification_service(datastore, notification_q)
|
|
||||||
|
|
||||||
notification_service.send_content_changed_notification(watch_uuid)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error sending notification for {watch_uuid}: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def send_filter_failure_notification(watch_uuid, notification_q, datastore):
|
|
||||||
"""Helper function to send filter failure notifications using the new notification service"""
|
|
||||||
try:
|
|
||||||
from changedetectionio.notification_service import create_notification_service
|
|
||||||
|
|
||||||
# Create notification service instance
|
|
||||||
notification_service = create_notification_service(datastore, notification_q)
|
|
||||||
|
|
||||||
notification_service.send_filter_failure_notification(watch_uuid)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error sending filter failure notification for {watch_uuid}: {e}")
|
|
||||||
|
|
||||||
|
|
||||||
async def send_step_failure_notification(watch_uuid, step_n, notification_q, datastore):
|
|
||||||
"""Helper function to send step failure notifications using the new notification service"""
|
|
||||||
try:
|
|
||||||
from changedetectionio.notification_service import create_notification_service
|
|
||||||
|
|
||||||
# Create notification service instance
|
|
||||||
notification_service = create_notification_service(datastore, notification_q)
|
|
||||||
|
|
||||||
notification_service.send_step_failure_notification(watch_uuid, step_n)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error sending step failure notification for {watch_uuid}: {e}")
|
|
||||||
@@ -20,7 +20,10 @@ def login_optionally_required(func):
|
|||||||
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
|
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
|
||||||
|
|
||||||
# Permitted
|
# Permitted
|
||||||
if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
|
if request.endpoint and 'static_content' in request.endpoint and request.view_args and request.view_args.get('group') == 'styles':
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
# Permitted
|
||||||
|
elif request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
elif request.method in flask_login.config.EXEMPT_METHODS:
|
elif request.method in flask_login.config.EXEMPT_METHODS:
|
||||||
return func(*args, **kwargs)
|
return func(*args, **kwargs)
|
||||||
|
|||||||
@@ -23,55 +23,47 @@ from loguru import logger
|
|||||||
browsersteps_sessions = {}
|
browsersteps_sessions = {}
|
||||||
io_interface_context = None
|
io_interface_context = None
|
||||||
import json
|
import json
|
||||||
|
import base64
|
||||||
import hashlib
|
import hashlib
|
||||||
from flask import Response
|
from flask import Response
|
||||||
import asyncio
|
|
||||||
import threading
|
|
||||||
|
|
||||||
def run_async_in_browser_loop(coro):
|
|
||||||
"""Run async coroutine using the existing async worker event loop"""
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
|
|
||||||
# Use the existing async worker event loop instead of creating a new one
|
|
||||||
if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed():
|
|
||||||
logger.debug("Browser steps using existing async worker event loop")
|
|
||||||
future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop)
|
|
||||||
return future.result()
|
|
||||||
else:
|
|
||||||
# Fallback: create a new event loop (for sync workers or if async loop not available)
|
|
||||||
logger.debug("Browser steps creating temporary event loop")
|
|
||||||
loop = asyncio.new_event_loop()
|
|
||||||
asyncio.set_event_loop(loop)
|
|
||||||
try:
|
|
||||||
return loop.run_until_complete(coro)
|
|
||||||
finally:
|
|
||||||
loop.close()
|
|
||||||
|
|
||||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||||
|
|
||||||
async def start_browsersteps_session(watch_uuid):
|
def start_browsersteps_session(watch_uuid):
|
||||||
|
from . import nonContext
|
||||||
from . import browser_steps
|
from . import browser_steps
|
||||||
import time
|
import time
|
||||||
from playwright.async_api import async_playwright
|
global browsersteps_sessions
|
||||||
|
global io_interface_context
|
||||||
|
|
||||||
|
|
||||||
# We keep the playwright session open for many minutes
|
# We keep the playwright session open for many minutes
|
||||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||||
|
|
||||||
browsersteps_start_session = {'start_time': time.time()}
|
browsersteps_start_session = {'start_time': time.time()}
|
||||||
|
|
||||||
# Create a new async playwright instance for browser steps
|
# You can only have one of these running
|
||||||
playwright_instance = async_playwright()
|
# This should be very fine to leave running for the life of the application
|
||||||
playwright_context = await playwright_instance.start()
|
# @idea - Make it global so the pool of watch fetchers can use it also
|
||||||
|
if not io_interface_context:
|
||||||
|
io_interface_context = nonContext.c_sync_playwright()
|
||||||
|
# Start the Playwright context, which is actually a nodejs sub-process and communicates over STDIN/STDOUT pipes
|
||||||
|
io_interface_context = io_interface_context.start()
|
||||||
|
|
||||||
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
keepalive_ms = ((keepalive_seconds + 3) * 1000)
|
||||||
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
|
||||||
a = "?" if not '?' in base_url else '&'
|
a = "?" if not '?' in base_url else '&'
|
||||||
base_url += a + f"timeout={keepalive_ms}"
|
base_url += a + f"timeout={keepalive_ms}"
|
||||||
|
|
||||||
browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
|
try:
|
||||||
browsersteps_start_session['browser'] = browser
|
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
|
||||||
browsersteps_start_session['playwright_context'] = playwright_context
|
except Exception as e:
|
||||||
|
if 'ECONNREFUSED' in str(e):
|
||||||
|
return make_response('Unable to start the Playwright Browser session, is it running?', 401)
|
||||||
|
else:
|
||||||
|
# Other errors, bad URL syntax, bad reply etc
|
||||||
|
return make_response(str(e), 401)
|
||||||
|
|
||||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||||
proxy = None
|
proxy = None
|
||||||
@@ -93,20 +85,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
|
||||||
|
|
||||||
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
# Tell Playwright to connect to Chrome and setup a new session via our stepper interface
|
||||||
browserstepper = browser_steps.browsersteps_live_ui(
|
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||||
playwright_browser=browser,
|
playwright_browser=browsersteps_start_session['browser'],
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
start_url=datastore.data['watching'][watch_uuid].link,
|
start_url=datastore.data['watching'][watch_uuid].link,
|
||||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||||
)
|
)
|
||||||
|
|
||||||
# Initialize the async connection
|
|
||||||
await browserstepper.connect(proxy=proxy)
|
|
||||||
|
|
||||||
browsersteps_start_session['browserstepper'] = browserstepper
|
|
||||||
|
|
||||||
# For test
|
# For test
|
||||||
#await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
#browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
|
||||||
|
|
||||||
return browsersteps_start_session
|
return browsersteps_start_session
|
||||||
|
|
||||||
@@ -115,8 +102,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
@browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
|
||||||
def browsersteps_start_session():
|
def browsersteps_start_session():
|
||||||
# A new session was requested, return sessionID
|
# A new session was requested, return sessionID
|
||||||
import asyncio
|
|
||||||
import uuid
|
import uuid
|
||||||
|
global browsersteps_sessions
|
||||||
|
|
||||||
browsersteps_session_id = str(uuid.uuid4())
|
browsersteps_session_id = str(uuid.uuid4())
|
||||||
watch_uuid = request.args.get('uuid')
|
watch_uuid = request.args.get('uuid')
|
||||||
|
|
||||||
@@ -125,19 +114,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
logger.debug("Starting connection with playwright")
|
logger.debug("Starting connection with playwright")
|
||||||
logger.debug("browser_steps.py connecting")
|
logger.debug("browser_steps.py connecting")
|
||||||
|
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
|
||||||
try:
|
|
||||||
# Run the async function in the dedicated browser steps event loop
|
|
||||||
browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop(
|
|
||||||
start_browsersteps_session(watch_uuid)
|
|
||||||
)
|
|
||||||
except Exception as e:
|
|
||||||
if 'ECONNREFUSED' in str(e):
|
|
||||||
return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
|
|
||||||
else:
|
|
||||||
# Other errors, bad URL syntax, bad reply etc
|
|
||||||
return make_response(str(e), 401)
|
|
||||||
|
|
||||||
logger.debug("Starting connection with playwright - done")
|
logger.debug("Starting connection with playwright - done")
|
||||||
return {'browsersteps_session_id': browsersteps_session_id}
|
return {'browsersteps_session_id': browsersteps_session_id}
|
||||||
|
|
||||||
@@ -172,6 +149,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
def browsersteps_ui_update():
|
def browsersteps_ui_update():
|
||||||
import base64
|
import base64
|
||||||
import playwright._impl._errors
|
import playwright._impl._errors
|
||||||
|
global browsersteps_sessions
|
||||||
from changedetectionio.blueprint.browser_steps import browser_steps
|
from changedetectionio.blueprint.browser_steps import browser_steps
|
||||||
|
|
||||||
remaining =0
|
remaining =0
|
||||||
@@ -194,15 +172,12 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
step_optional_value = request.form.get('optional_value')
|
step_optional_value = request.form.get('optional_value')
|
||||||
is_last_step = strtobool(request.form.get('is_last_step'))
|
is_last_step = strtobool(request.form.get('is_last_step'))
|
||||||
|
|
||||||
|
# @todo try.. accept.. nice errors not popups..
|
||||||
try:
|
try:
|
||||||
# Run the async call_action method in the dedicated browser steps event loop
|
|
||||||
run_async_in_browser_loop(
|
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
|
||||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
|
selector=step_selector,
|
||||||
action_name=step_operation,
|
optional_value=step_optional_value)
|
||||||
selector=step_selector,
|
|
||||||
optional_value=step_optional_value
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
|
||||||
@@ -216,11 +191,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
# Screenshots and other info only needed on requesting a step (POST)
|
# Screenshots and other info only needed on requesting a step (POST)
|
||||||
try:
|
try:
|
||||||
# Run the async get_current_state method in the dedicated browser steps event loop
|
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||||
(screenshot, xpath_data) = run_async_in_browser_loop(
|
|
||||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
|
||||||
)
|
|
||||||
|
|
||||||
if is_last_step:
|
if is_last_step:
|
||||||
watch = datastore.data['watching'].get(uuid)
|
watch = datastore.data['watching'].get(uuid)
|
||||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||||
@@ -228,10 +199,13 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
watch.save_screenshot(screenshot=screenshot)
|
watch.save_screenshot(screenshot=screenshot)
|
||||||
watch.save_xpath_data(data=xpath_data)
|
watch.save_xpath_data(data=xpath_data)
|
||||||
|
|
||||||
|
except playwright._impl._api_types.Error as e:
|
||||||
|
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
return make_response(f"Error fetching screenshot and element data - {str(e)}", 401)
|
return make_response("Error fetching screenshot and element data - " + str(e), 401)
|
||||||
|
|
||||||
# SEND THIS BACK TO THE BROWSER
|
# SEND THIS BACK TO THE BROWSER
|
||||||
|
|
||||||
output = {
|
output = {
|
||||||
"screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
|
"screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
|
||||||
"xpath_data": xpath_data,
|
"xpath_data": xpath_data,
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import re
|
|||||||
from random import randint
|
from random import randint
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||||
from changedetectionio.safe_jinja import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
|
|
||||||
@@ -35,7 +35,6 @@ browser_step_ui_config = {'Choose one': '0 0',
|
|||||||
'Make all child elements visible': '1 0',
|
'Make all child elements visible': '1 0',
|
||||||
'Press Enter': '0 0',
|
'Press Enter': '0 0',
|
||||||
'Select by label': '1 1',
|
'Select by label': '1 1',
|
||||||
'<select> by option text': '1 1',
|
|
||||||
'Scroll down': '0 0',
|
'Scroll down': '0 0',
|
||||||
'Uncheck checkbox': '1 0',
|
'Uncheck checkbox': '1 0',
|
||||||
'Wait for seconds': '0 1',
|
'Wait for seconds': '0 1',
|
||||||
@@ -55,17 +54,14 @@ browser_step_ui_config = {'Choose one': '0 0',
|
|||||||
class steppable_browser_interface():
|
class steppable_browser_interface():
|
||||||
page = None
|
page = None
|
||||||
start_url = None
|
start_url = None
|
||||||
|
|
||||||
action_timeout = 10 * 1000
|
action_timeout = 10 * 1000
|
||||||
|
|
||||||
def __init__(self, start_url):
|
def __init__(self, start_url):
|
||||||
self.start_url = start_url
|
self.start_url = start_url
|
||||||
|
|
||||||
# Convert and perform "Click Button" for example
|
# Convert and perform "Click Button" for example
|
||||||
async def call_action(self, action_name, selector=None, optional_value=None):
|
def call_action(self, action_name, selector=None, optional_value=None):
|
||||||
if self.page is None:
|
|
||||||
logger.warning("Cannot call action on None page object")
|
|
||||||
return
|
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
|
call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
|
||||||
if call_action_name == 'choose_one':
|
if call_action_name == 'choose_one':
|
||||||
@@ -76,216 +72,136 @@ class steppable_browser_interface():
|
|||||||
if selector and selector.startswith('/') and not selector.startswith('//'):
|
if selector and selector.startswith('/') and not selector.startswith('//'):
|
||||||
selector = "xpath=" + selector
|
selector = "xpath=" + selector
|
||||||
|
|
||||||
# Check if action handler exists
|
|
||||||
if not hasattr(self, "action_" + call_action_name):
|
|
||||||
logger.warning(f"Action handler for '{call_action_name}' not found")
|
|
||||||
return
|
|
||||||
|
|
||||||
action_handler = getattr(self, "action_" + call_action_name)
|
action_handler = getattr(self, "action_" + call_action_name)
|
||||||
|
|
||||||
# Support for Jinja2 variables in the value and selector
|
# Support for Jinja2 variables in the value and selector
|
||||||
|
|
||||||
if selector and ('{%' in selector or '{{' in selector):
|
if selector and ('{%' in selector or '{{' in selector):
|
||||||
selector = jinja_render(template_str=selector)
|
selector = jinja_render(template_str=selector)
|
||||||
|
|
||||||
if optional_value and ('{%' in optional_value or '{{' in optional_value):
|
if optional_value and ('{%' in optional_value or '{{' in optional_value):
|
||||||
optional_value = jinja_render(template_str=optional_value)
|
optional_value = jinja_render(template_str=optional_value)
|
||||||
|
|
||||||
# Trigger click and cautiously handle potential navigation
|
action_handler(selector, optional_value)
|
||||||
# This means the page redirects/reloads/changes JS etc etc
|
self.page.wait_for_timeout(1.5 * 1000)
|
||||||
if call_action_name.startswith('click_'):
|
|
||||||
try:
|
|
||||||
# Set up navigation expectation before the click (like sync version)
|
|
||||||
async with self.page.expect_event("framenavigated", timeout=3000) as navigation_info:
|
|
||||||
await action_handler(selector, optional_value)
|
|
||||||
|
|
||||||
# Check if navigation actually occurred
|
|
||||||
try:
|
|
||||||
await navigation_info.value # This waits for the navigation promise
|
|
||||||
logger.debug(f"Navigation occurred on {call_action_name}.")
|
|
||||||
except Exception:
|
|
||||||
logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
# If expect_event itself times out, that means no navigation occurred - that's OK
|
|
||||||
if "framenavigated" in str(e) and "exceeded" in str(e):
|
|
||||||
logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.")
|
|
||||||
else:
|
|
||||||
raise e
|
|
||||||
else:
|
|
||||||
# Some other action that probably a navigation is not expected
|
|
||||||
await action_handler(selector, optional_value)
|
|
||||||
|
|
||||||
|
|
||||||
# Safely wait for timeout
|
|
||||||
await self.page.wait_for_timeout(1.5 * 1000)
|
|
||||||
logger.debug(f"Call action done in {time.time()-now:.2f}s")
|
logger.debug(f"Call action done in {time.time()-now:.2f}s")
|
||||||
|
|
||||||
async def action_goto_url(self, selector=None, value=None):
|
def action_goto_url(self, selector=None, value=None):
|
||||||
if not value:
|
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
||||||
logger.warning("No URL provided for goto_url action")
|
|
||||||
return None
|
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
response = await self.page.goto(value, timeout=0, wait_until='load')
|
response = self.page.goto(value, timeout=0, wait_until='load')
|
||||||
|
# Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout)
|
||||||
|
#and also wait for seconds ?
|
||||||
|
#await page.waitForTimeout(1000);
|
||||||
|
#await page.waitForTimeout(extra_wait_ms);
|
||||||
logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
|
logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
# Incase they request to go back to the start
|
# Incase they request to go back to the start
|
||||||
async def action_goto_site(self, selector=None, value=None):
|
def action_goto_site(self, selector=None, value=None):
|
||||||
return await self.action_goto_url(value=re.sub(r'^source:', '', self.start_url, flags=re.IGNORECASE))
|
return self.action_goto_url(value=self.start_url)
|
||||||
|
|
||||||
async def action_click_element_containing_text(self, selector=None, value=''):
|
def action_click_element_containing_text(self, selector=None, value=''):
|
||||||
logger.debug("Clicking element containing text")
|
logger.debug("Clicking element containing text")
|
||||||
if not value or not len(value.strip()):
|
if not len(value.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
elem = self.page.get_by_text(value)
|
elem = self.page.get_by_text(value)
|
||||||
if await elem.count():
|
if elem.count():
|
||||||
await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||||
|
|
||||||
|
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
||||||
async def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
|
||||||
logger.debug("Clicking element containing text if exists")
|
logger.debug("Clicking element containing text if exists")
|
||||||
if not value or not len(value.strip()):
|
if not len(value.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
elem = self.page.get_by_text(value)
|
elem = self.page.get_by_text(value)
|
||||||
count = await elem.count()
|
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
||||||
logger.debug(f"Clicking element containing text - {count} elements found")
|
if elem.count():
|
||||||
if count:
|
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||||
await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
else:
|
||||||
|
|
||||||
|
|
||||||
async def action_enter_text_in_field(self, selector, value):
|
|
||||||
if not selector or not len(selector.strip()):
|
|
||||||
return
|
return
|
||||||
|
|
||||||
await self.page.fill(selector, value, timeout=self.action_timeout)
|
def action_enter_text_in_field(self, selector, value):
|
||||||
|
if not len(selector.strip()):
|
||||||
|
return
|
||||||
|
|
||||||
async def action_execute_js(self, selector, value):
|
self.page.fill(selector, value, timeout=self.action_timeout)
|
||||||
if not value:
|
|
||||||
return None
|
|
||||||
|
|
||||||
return await self.page.evaluate(value)
|
|
||||||
|
|
||||||
async def action_click_element(self, selector, value):
|
def action_execute_js(self, selector, value):
|
||||||
|
response = self.page.evaluate(value)
|
||||||
|
return response
|
||||||
|
|
||||||
|
def action_click_element(self, selector, value):
|
||||||
logger.debug("Clicking element")
|
logger.debug("Clicking element")
|
||||||
if not selector or not len(selector.strip()):
|
if not len(selector.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
await self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
||||||
|
|
||||||
async def action_click_element_if_exists(self, selector, value):
|
def action_click_element_if_exists(self, selector, value):
|
||||||
import playwright._impl._errors as _api_types
|
import playwright._impl._errors as _api_types
|
||||||
logger.debug("Clicking element if exists")
|
logger.debug("Clicking element if exists")
|
||||||
if not selector or not len(selector.strip()):
|
if not len(selector.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
await self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
||||||
except _api_types.TimeoutError:
|
except _api_types.TimeoutError as e:
|
||||||
return
|
return
|
||||||
except _api_types.Error:
|
except _api_types.Error as e:
|
||||||
# Element was there, but page redrew and now its long long gone
|
# Element was there, but page redrew and now its long long gone
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
async def action_click_x_y(self, selector, value):
|
def action_click_x_y(self, selector, value):
|
||||||
if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
|
if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
|
||||||
logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
|
raise Exception("'Click X,Y' step should be in the format of '100 , 90'")
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
x, y = value.strip().split(',')
|
||||||
x, y = value.strip().split(',')
|
x = int(float(x.strip()))
|
||||||
x = int(float(x.strip()))
|
y = int(float(y.strip()))
|
||||||
y = int(float(y.strip()))
|
self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
|
||||||
|
|
||||||
await self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error parsing x,y coordinates: {str(e)}")
|
|
||||||
|
|
||||||
async def action__select_by_option_text(self, selector, value):
|
def action_scroll_down(self, selector, value):
|
||||||
if not selector or not len(selector.strip()):
|
|
||||||
return
|
|
||||||
|
|
||||||
await self.page.select_option(selector, label=value, timeout=self.action_timeout)
|
|
||||||
|
|
||||||
async def action_scroll_down(self, selector, value):
|
|
||||||
# Some sites this doesnt work on for some reason
|
# Some sites this doesnt work on for some reason
|
||||||
await self.page.mouse.wheel(0, 600)
|
self.page.mouse.wheel(0, 600)
|
||||||
await self.page.wait_for_timeout(1000)
|
self.page.wait_for_timeout(1000)
|
||||||
|
|
||||||
async def action_wait_for_seconds(self, selector, value):
|
def action_wait_for_seconds(self, selector, value):
|
||||||
try:
|
self.page.wait_for_timeout(float(value.strip()) * 1000)
|
||||||
seconds = float(value.strip()) if value else 1.0
|
|
||||||
await self.page.wait_for_timeout(seconds * 1000)
|
|
||||||
except (ValueError, TypeError) as e:
|
|
||||||
logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
|
|
||||||
|
|
||||||
async def action_wait_for_text(self, selector, value):
|
def action_wait_for_text(self, selector, value):
|
||||||
if not value:
|
|
||||||
return
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
v = json.dumps(value)
|
v = json.dumps(value)
|
||||||
await self.page.wait_for_function(
|
self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000)
|
||||||
f'document.querySelector("body").innerText.includes({v});',
|
|
||||||
timeout=30000
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
async def action_wait_for_text_in_element(self, selector, value):
|
def action_wait_for_text_in_element(self, selector, value):
|
||||||
if not selector or not value:
|
|
||||||
return
|
|
||||||
|
|
||||||
import json
|
import json
|
||||||
s = json.dumps(selector)
|
s = json.dumps(selector)
|
||||||
v = json.dumps(value)
|
v = json.dumps(value)
|
||||||
|
self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
|
||||||
await self.page.wait_for_function(
|
|
||||||
f'document.querySelector({s}).innerText.includes({v});',
|
|
||||||
timeout=30000
|
|
||||||
)
|
|
||||||
|
|
||||||
# @todo - in the future make some popout interface to capture what needs to be set
|
# @todo - in the future make some popout interface to capture what needs to be set
|
||||||
# https://playwright.dev/python/docs/api/class-keyboard
|
# https://playwright.dev/python/docs/api/class-keyboard
|
||||||
async def action_press_enter(self, selector, value):
|
def action_press_enter(self, selector, value):
|
||||||
await self.page.keyboard.press("Enter", delay=randint(200, 500))
|
self.page.keyboard.press("Enter", delay=randint(200, 500))
|
||||||
|
|
||||||
|
|
||||||
async def action_press_page_up(self, selector, value):
|
def action_press_page_up(self, selector, value):
|
||||||
await self.page.keyboard.press("PageUp", delay=randint(200, 500))
|
self.page.keyboard.press("PageUp", delay=randint(200, 500))
|
||||||
|
|
||||||
async def action_press_page_down(self, selector, value):
|
def action_press_page_down(self, selector, value):
|
||||||
await self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
||||||
|
|
||||||
async def action_check_checkbox(self, selector, value):
|
def action_check_checkbox(self, selector, value):
|
||||||
if not selector:
|
self.page.locator(selector).check(timeout=self.action_timeout)
|
||||||
return
|
|
||||||
|
|
||||||
await self.page.locator(selector).check(timeout=self.action_timeout)
|
def action_uncheck_checkbox(self, selector, value):
|
||||||
|
self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
||||||
|
|
||||||
async def action_uncheck_checkbox(self, selector, value):
|
def action_remove_elements(self, selector, value):
|
||||||
if not selector:
|
|
||||||
return
|
|
||||||
|
|
||||||
await self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
|
||||||
|
|
||||||
|
|
||||||
async def action_remove_elements(self, selector, value):
|
|
||||||
"""Removes all elements matching the given selector from the DOM."""
|
"""Removes all elements matching the given selector from the DOM."""
|
||||||
if not selector:
|
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
||||||
return
|
|
||||||
|
|
||||||
await self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
|
||||||
|
|
||||||
async def action_make_all_child_elements_visible(self, selector, value):
|
def action_make_all_child_elements_visible(self, selector, value):
|
||||||
"""Recursively makes all child elements inside the given selector fully visible."""
|
"""Recursively makes all child elements inside the given selector fully visible."""
|
||||||
if not selector:
|
self.page.locator(selector).locator("*").evaluate_all("""
|
||||||
return
|
|
||||||
|
|
||||||
await self.page.locator(selector).locator("*").evaluate_all("""
|
|
||||||
els => els.forEach(el => {
|
els => els.forEach(el => {
|
||||||
el.style.display = 'block'; // Forces it to be displayed
|
el.style.display = 'block'; // Forces it to be displayed
|
||||||
el.style.visibility = 'visible'; // Ensures it's not hidden
|
el.style.visibility = 'visible'; // Ensures it's not hidden
|
||||||
@@ -308,9 +224,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
# bump and kill this if idle after X sec
|
# bump and kill this if idle after X sec
|
||||||
age_start = 0
|
age_start = 0
|
||||||
headers = {}
|
headers = {}
|
||||||
# Track if resources are properly cleaned up
|
|
||||||
_is_cleaned_up = False
|
|
||||||
|
|
||||||
# use a special driver, maybe locally etc
|
# use a special driver, maybe locally etc
|
||||||
command_executor = os.getenv(
|
command_executor = os.getenv(
|
||||||
"PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
|
"PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
|
||||||
@@ -329,23 +243,17 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
self.age_start = time.time()
|
self.age_start = time.time()
|
||||||
self.playwright_browser = playwright_browser
|
self.playwright_browser = playwright_browser
|
||||||
self.start_url = start_url
|
self.start_url = start_url
|
||||||
self._is_cleaned_up = False
|
if self.context is None:
|
||||||
self.proxy = proxy
|
self.connect(proxy=proxy)
|
||||||
# Note: connect() is now async and must be called separately
|
|
||||||
|
|
||||||
def __del__(self):
|
|
||||||
# Ensure cleanup happens if object is garbage collected
|
|
||||||
# Note: cleanup is now async, so we can only mark as cleaned up here
|
|
||||||
self._is_cleaned_up = True
|
|
||||||
|
|
||||||
# Connect and setup a new context
|
# Connect and setup a new context
|
||||||
async def connect(self, proxy=None):
|
def connect(self, proxy=None):
|
||||||
# Should only get called once - test that
|
# Should only get called once - test that
|
||||||
keep_open = 1000 * 60 * 5
|
keep_open = 1000 * 60 * 5
|
||||||
now = time.time()
|
now = time.time()
|
||||||
|
|
||||||
# @todo handle multiple contexts, bind a unique id from the browser on each req?
|
# @todo handle multiple contexts, bind a unique id from the browser on each req?
|
||||||
self.context = await self.playwright_browser.new_context(
|
self.context = self.playwright_browser.new_context(
|
||||||
accept_downloads=False, # Should never be needed
|
accept_downloads=False, # Should never be needed
|
||||||
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
||||||
extra_http_headers=self.headers,
|
extra_http_headers=self.headers,
|
||||||
@@ -356,142 +264,64 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
user_agent=manage_user_agent(headers=self.headers),
|
user_agent=manage_user_agent(headers=self.headers),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.page = await self.context.new_page()
|
|
||||||
|
self.page = self.context.new_page()
|
||||||
|
|
||||||
# self.page.set_default_navigation_timeout(keep_open)
|
# self.page.set_default_navigation_timeout(keep_open)
|
||||||
self.page.set_default_timeout(keep_open)
|
self.page.set_default_timeout(keep_open)
|
||||||
# Set event handlers
|
# @todo probably this doesnt work
|
||||||
self.page.on("close", self.mark_as_closed)
|
self.page.on(
|
||||||
|
"close",
|
||||||
|
self.mark_as_closed,
|
||||||
|
)
|
||||||
# Listen for all console events and handle errors
|
# Listen for all console events and handle errors
|
||||||
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
||||||
|
|
||||||
logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
|
logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
|
||||||
await self.page.wait_for_timeout(1 * 1000)
|
self.page.wait_for_timeout(1 * 1000)
|
||||||
|
|
||||||
|
|
||||||
def mark_as_closed(self):
|
def mark_as_closed(self):
|
||||||
logger.debug("Page closed, cleaning up..")
|
logger.debug("Page closed, cleaning up..")
|
||||||
# Note: This is called from a sync context (event handler)
|
|
||||||
# so we'll just mark as cleaned up and let __del__ handle the rest
|
|
||||||
self._is_cleaned_up = True
|
|
||||||
|
|
||||||
async def cleanup(self):
|
|
||||||
"""Properly clean up all resources to prevent memory leaks"""
|
|
||||||
if self._is_cleaned_up:
|
|
||||||
return
|
|
||||||
|
|
||||||
logger.debug("Cleaning up browser steps resources")
|
|
||||||
|
|
||||||
# Clean up page
|
|
||||||
if hasattr(self, 'page') and self.page is not None:
|
|
||||||
try:
|
|
||||||
# Force garbage collection before closing
|
|
||||||
await self.page.request_gc()
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Error during page garbage collection: {str(e)}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Remove event listeners before closing
|
|
||||||
self.page.remove_listener("close", self.mark_as_closed)
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Error removing event listeners: {str(e)}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
await self.page.close()
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Error closing page: {str(e)}")
|
|
||||||
|
|
||||||
self.page = None
|
|
||||||
|
|
||||||
# Clean up context
|
|
||||||
if hasattr(self, 'context') and self.context is not None:
|
|
||||||
try:
|
|
||||||
await self.context.close()
|
|
||||||
except Exception as e:
|
|
||||||
logger.debug(f"Error closing context: {str(e)}")
|
|
||||||
|
|
||||||
self.context = None
|
|
||||||
|
|
||||||
self._is_cleaned_up = True
|
|
||||||
logger.debug("Browser steps resources cleanup complete")
|
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_expired(self):
|
def has_expired(self):
|
||||||
if not self.page or self._is_cleaned_up:
|
if not self.page:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# Check if session has expired based on age
|
|
||||||
max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10)) # Default 10 minutes
|
|
||||||
if (time.time() - self.age_start) > max_age_seconds:
|
|
||||||
logger.debug(f"Browser steps session expired after {max_age_seconds} seconds")
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def get_current_state(self):
|
|
||||||
|
def get_current_state(self):
|
||||||
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
|
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
|
||||||
import importlib.resources
|
import importlib.resources
|
||||||
import json
|
|
||||||
# because we for now only run browser steps in playwright mode (not puppeteer mode)
|
|
||||||
from changedetectionio.content_fetchers.playwright import capture_full_page_async
|
|
||||||
|
|
||||||
# Safety check - don't proceed if resources are cleaned up
|
|
||||||
if self._is_cleaned_up or self.page is None:
|
|
||||||
logger.warning("Attempted to get current state after cleanup")
|
|
||||||
return (None, None)
|
|
||||||
|
|
||||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
await self.page.wait_for_timeout(1 * 1000)
|
self.page.wait_for_timeout(1 * 1000)
|
||||||
|
|
||||||
screenshot = None
|
|
||||||
xpath_data = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
# Get screenshot first
|
|
||||||
screenshot = await capture_full_page_async(page=self.page)
|
|
||||||
if not screenshot:
|
|
||||||
logger.error("No screenshot was retrieved :((")
|
|
||||||
|
|
||||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||||
|
|
||||||
# Then get interactive elements
|
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||||
now = time.time()
|
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||||
await self.page.evaluate("var include_filters=''")
|
screenshot = capture_stitched_together_full_page(self.page)
|
||||||
await self.page.request_gc()
|
else:
|
||||||
|
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
||||||
|
|
||||||
scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||||
|
|
||||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
now = time.time()
|
||||||
xpath_data = json.loads(await self.page.evaluate(xpath_element_js, {
|
self.page.evaluate("var include_filters=''")
|
||||||
"visualselector_xpath_selectors": scan_elements,
|
# Go find the interactive elements
|
||||||
"max_height": MAX_TOTAL_HEIGHT
|
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
|
||||||
}))
|
elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
||||||
await self.page.request_gc()
|
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
|
||||||
|
|
||||||
# Sort elements by size
|
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
# So the JS will find the smallest one first
|
||||||
logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
|
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||||
|
logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s")
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error getting current state: {str(e)}")
|
|
||||||
# If the page has navigated (common with logins) then the context is destroyed on navigation, continue
|
|
||||||
# I'm not sure that this is required anymore because we have the "expect navigation wrapper" at the top
|
|
||||||
if "Execution context was destroyed" in str(e):
|
|
||||||
logger.debug("Execution context was destroyed, most likely because of navigation, continuing...")
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Attempt recovery - force garbage collection
|
# playwright._impl._api_types.Error: Browser closed.
|
||||||
try:
|
# @todo show some countdown timer?
|
||||||
await self.page.request_gc()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Request garbage collection one final time
|
|
||||||
try:
|
|
||||||
await self.page.request_gc()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return (screenshot, xpath_data)
|
return (screenshot, xpath_data)
|
||||||
|
|
||||||
|
|||||||
17
changedetectionio/blueprint/browser_steps/nonContext.py
Normal file
17
changedetectionio/blueprint/browser_steps/nonContext.py
Normal file
@@ -0,0 +1,17 @@
|
|||||||
|
from playwright.sync_api import PlaywrightContextManager
|
||||||
|
|
||||||
|
# So playwright wants to run as a context manager, but we do something horrible and hacky
|
||||||
|
# we are holding the session open for as long as possible, then shutting it down, and opening a new one
|
||||||
|
# So it means we don't get to use PlaywrightContextManager' __enter__ __exit__
|
||||||
|
# To work around this, make goodbye() act the same as the __exit__()
|
||||||
|
#
|
||||||
|
# But actually I think this is because the context is opened correctly with __enter__() but we timeout the connection
|
||||||
|
# then theres some lock condition where we cant destroy it without it hanging
|
||||||
|
|
||||||
|
class c_PlaywrightContextManager(PlaywrightContextManager):
|
||||||
|
|
||||||
|
def goodbye(self) -> None:
|
||||||
|
self.__exit__()
|
||||||
|
|
||||||
|
def c_sync_playwright() -> PlaywrightContextManager:
|
||||||
|
return c_PlaywrightContextManager()
|
||||||
@@ -1,7 +1,6 @@
|
|||||||
from flask import Blueprint, request, redirect, url_for, flash, render_template
|
from flask import Blueprint, request, redirect, url_for, flash, render_template
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
from changedetectionio.auth_decorator import login_optionally_required
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
from changedetectionio import worker_handler
|
|
||||||
from changedetectionio.blueprint.imports.importer import (
|
from changedetectionio.blueprint.imports.importer import (
|
||||||
import_url_list,
|
import_url_list,
|
||||||
import_distill_io_json,
|
import_distill_io_json,
|
||||||
@@ -25,7 +24,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
importer_handler = import_url_list()
|
importer_handler = import_url_list()
|
||||||
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||||
for uuid in importer_handler.new_uuids:
|
for uuid in importer_handler.new_uuids:
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
if len(importer_handler.remaining_data) == 0:
|
if len(importer_handler.remaining_data) == 0:
|
||||||
return redirect(url_for('watchlist.index'))
|
return redirect(url_for('watchlist.index'))
|
||||||
@@ -38,7 +37,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
d_importer = import_distill_io_json()
|
d_importer = import_distill_io_json()
|
||||||
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||||
for uuid in d_importer.new_uuids:
|
for uuid in d_importer.new_uuids:
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
# XLSX importer
|
# XLSX importer
|
||||||
if request.files and request.files.get('xlsx_file'):
|
if request.files and request.files.get('xlsx_file'):
|
||||||
@@ -61,7 +60,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
w_importer.run(data=file, flash=flash, datastore=datastore)
|
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||||
|
|
||||||
for uuid in w_importer.new_uuids:
|
for uuid in w_importer.new_uuids:
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
# Could be some remaining, or we could be on GET
|
# Could be some remaining, or we could be on GET
|
||||||
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from flask import Blueprint, flash, redirect, url_for
|
|||||||
from flask_login import login_required
|
from flask_login import login_required
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
from changedetectionio import queuedWatchMetaData
|
from changedetectionio import queuedWatchMetaData
|
||||||
from changedetectionio import worker_handler
|
|
||||||
from queue import PriorityQueue
|
from queue import PriorityQueue
|
||||||
|
|
||||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
PRICE_DATA_TRACK_ACCEPT = 'accepted'
|
||||||
@@ -20,7 +19,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
|||||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
|
||||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||||
datastore.data['watching'][uuid].clear_watch()
|
datastore.data['watching'][uuid].clear_watch()
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return redirect(url_for("watchlist.index"))
|
return redirect(url_for("watchlist.index"))
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
|
|||||||
@@ -1 +1,102 @@
|
|||||||
RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')]
|
import time
|
||||||
|
import datetime
|
||||||
|
import pytz
|
||||||
|
from flask import Blueprint, make_response, request, url_for
|
||||||
|
from loguru import logger
|
||||||
|
from feedgen.feed import FeedGenerator
|
||||||
|
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
|
rss_blueprint = Blueprint('rss', __name__)
|
||||||
|
|
||||||
|
# Import the login decorator if needed
|
||||||
|
# from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
@rss_blueprint.route("", methods=['GET'])
|
||||||
|
def feed():
|
||||||
|
now = time.time()
|
||||||
|
# Always requires token set
|
||||||
|
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
|
||||||
|
rss_url_token = request.args.get('token')
|
||||||
|
if rss_url_token != app_rss_token:
|
||||||
|
return "Access denied, bad token", 403
|
||||||
|
|
||||||
|
from changedetectionio import diff
|
||||||
|
limit_tag = request.args.get('tag', '').lower().strip()
|
||||||
|
# Be sure limit_tag is a uuid
|
||||||
|
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||||
|
if limit_tag == tag.get('title', '').lower().strip():
|
||||||
|
limit_tag = uuid
|
||||||
|
|
||||||
|
# Sort by last_changed and add the uuid which is usually the key..
|
||||||
|
sorted_watches = []
|
||||||
|
|
||||||
|
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
|
||||||
|
for uuid, watch in datastore.data['watching'].items():
|
||||||
|
# @todo tag notification_muted skip also (improve Watch model)
|
||||||
|
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
||||||
|
continue
|
||||||
|
if limit_tag and not limit_tag in watch['tags']:
|
||||||
|
continue
|
||||||
|
watch['uuid'] = uuid
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
|
||||||
|
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
|
||||||
|
|
||||||
|
fg = FeedGenerator()
|
||||||
|
fg.title('changedetection.io')
|
||||||
|
fg.description('Feed description')
|
||||||
|
fg.link(href='https://changedetection.io')
|
||||||
|
|
||||||
|
for watch in sorted_watches:
|
||||||
|
|
||||||
|
dates = list(watch.history.keys())
|
||||||
|
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
|
||||||
|
if len(dates) < 2:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not watch.viewed:
|
||||||
|
# Re #239 - GUID needs to be individual for each event
|
||||||
|
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||||
|
guid = "{}/{}".format(watch['uuid'], watch.last_changed)
|
||||||
|
fe = fg.add_entry()
|
||||||
|
|
||||||
|
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
|
||||||
|
# Description is the page you watch, link takes you to the diff JS UI page
|
||||||
|
# Dict val base_url will get overriden with the env var if it is set.
|
||||||
|
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
|
||||||
|
|
||||||
|
# Because we are called via whatever web server, flask should figure out the right path (
|
||||||
|
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||||
|
|
||||||
|
fe.link(link=diff_link)
|
||||||
|
|
||||||
|
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
|
||||||
|
|
||||||
|
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
||||||
|
fe.title(title=watch_title)
|
||||||
|
|
||||||
|
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||||
|
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
||||||
|
include_equal=False,
|
||||||
|
line_feed_sep="<br>")
|
||||||
|
|
||||||
|
# @todo Make this configurable and also consider html-colored markup
|
||||||
|
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
||||||
|
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
||||||
|
content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
|
||||||
|
|
||||||
|
fe.content(content=content, type='CDATA')
|
||||||
|
|
||||||
|
fe.guid(guid, permalink=False)
|
||||||
|
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
|
||||||
|
dt = dt.replace(tzinfo=pytz.UTC)
|
||||||
|
fe.pubDate(dt)
|
||||||
|
|
||||||
|
response = make_response(fg.rss_str())
|
||||||
|
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||||
|
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
|
||||||
|
return response
|
||||||
|
|
||||||
|
return rss_blueprint
|
||||||
@@ -1,147 +0,0 @@
|
|||||||
|
|
||||||
from changedetectionio.safe_jinja import render as jinja_render
|
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
|
||||||
from feedgen.feed import FeedGenerator
|
|
||||||
from flask import Blueprint, make_response, request, url_for, redirect
|
|
||||||
from loguru import logger
|
|
||||||
import datetime
|
|
||||||
import pytz
|
|
||||||
import re
|
|
||||||
import time
|
|
||||||
|
|
||||||
|
|
||||||
BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
|
|
||||||
|
|
||||||
# Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc)
|
|
||||||
def scan_invalid_chars_in_rss(content):
|
|
||||||
for match in re.finditer(BAD_CHARS_REGEX, content):
|
|
||||||
i = match.start()
|
|
||||||
bad_char = content[i]
|
|
||||||
hex_value = f"0x{ord(bad_char):02x}"
|
|
||||||
# Grab context
|
|
||||||
start = max(0, i - 20)
|
|
||||||
end = min(len(content), i + 21)
|
|
||||||
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
|
|
||||||
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
|
|
||||||
# First match is enough
|
|
||||||
return True
|
|
||||||
|
|
||||||
return False
|
|
||||||
|
|
||||||
|
|
||||||
def clean_entry_content(content):
|
|
||||||
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
|
|
||||||
return cleaned
|
|
||||||
|
|
||||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
|
||||||
rss_blueprint = Blueprint('rss', __name__)
|
|
||||||
|
|
||||||
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
|
|
||||||
# to some earlier blueprint rerouting work, it should goto feed.
|
|
||||||
@rss_blueprint.route("/", methods=['GET'])
|
|
||||||
def extraslash():
|
|
||||||
return redirect(url_for('rss.feed'))
|
|
||||||
|
|
||||||
# Import the login decorator if needed
|
|
||||||
# from changedetectionio.auth_decorator import login_optionally_required
|
|
||||||
@rss_blueprint.route("", methods=['GET'])
|
|
||||||
def feed():
|
|
||||||
now = time.time()
|
|
||||||
# Always requires token set
|
|
||||||
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
|
|
||||||
rss_url_token = request.args.get('token')
|
|
||||||
if rss_url_token != app_rss_token:
|
|
||||||
return "Access denied, bad token", 403
|
|
||||||
|
|
||||||
from changedetectionio import diff
|
|
||||||
limit_tag = request.args.get('tag', '').lower().strip()
|
|
||||||
# Be sure limit_tag is a uuid
|
|
||||||
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
|
||||||
if limit_tag == tag.get('title', '').lower().strip():
|
|
||||||
limit_tag = uuid
|
|
||||||
|
|
||||||
# Sort by last_changed and add the uuid which is usually the key..
|
|
||||||
sorted_watches = []
|
|
||||||
|
|
||||||
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
|
|
||||||
for uuid, watch in datastore.data['watching'].items():
|
|
||||||
# @todo tag notification_muted skip also (improve Watch model)
|
|
||||||
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
|
||||||
continue
|
|
||||||
if limit_tag and not limit_tag in watch['tags']:
|
|
||||||
continue
|
|
||||||
watch['uuid'] = uuid
|
|
||||||
sorted_watches.append(watch)
|
|
||||||
|
|
||||||
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
|
|
||||||
|
|
||||||
fg = FeedGenerator()
|
|
||||||
fg.title('changedetection.io')
|
|
||||||
fg.description('Feed description')
|
|
||||||
fg.link(href='https://changedetection.io')
|
|
||||||
|
|
||||||
html_colour_enable = False
|
|
||||||
if datastore.data['settings']['application'].get('rss_content_format') == 'html':
|
|
||||||
html_colour_enable = True
|
|
||||||
|
|
||||||
for watch in sorted_watches:
|
|
||||||
|
|
||||||
dates = list(watch.history.keys())
|
|
||||||
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
|
|
||||||
if len(dates) < 2:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if not watch.viewed:
|
|
||||||
# Re #239 - GUID needs to be individual for each event
|
|
||||||
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
|
||||||
guid = "{}/{}".format(watch['uuid'], watch.last_changed)
|
|
||||||
fe = fg.add_entry()
|
|
||||||
|
|
||||||
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
|
|
||||||
# Description is the page you watch, link takes you to the diff JS UI page
|
|
||||||
# Dict val base_url will get overriden with the env var if it is set.
|
|
||||||
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
|
|
||||||
# @todo fix
|
|
||||||
|
|
||||||
# Because we are called via whatever web server, flask should figure out the right path (
|
|
||||||
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
|
|
||||||
|
|
||||||
fe.link(link=diff_link)
|
|
||||||
|
|
||||||
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
|
|
||||||
|
|
||||||
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
|
||||||
fe.title(title=watch_title)
|
|
||||||
try:
|
|
||||||
|
|
||||||
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
|
||||||
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
|
||||||
include_equal=False,
|
|
||||||
line_feed_sep="<br>",
|
|
||||||
html_colour=html_colour_enable
|
|
||||||
)
|
|
||||||
except FileNotFoundError as e:
|
|
||||||
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
|
|
||||||
|
|
||||||
# @todo Make this configurable and also consider html-colored markup
|
|
||||||
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
|
||||||
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
|
||||||
|
|
||||||
content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
|
|
||||||
|
|
||||||
# Out of range chars could also break feedgen
|
|
||||||
if scan_invalid_chars_in_rss(content):
|
|
||||||
content = clean_entry_content(content)
|
|
||||||
|
|
||||||
fe.content(content=content, type='CDATA')
|
|
||||||
fe.guid(guid, permalink=False)
|
|
||||||
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
|
|
||||||
dt = dt.replace(tzinfo=pytz.UTC)
|
|
||||||
fe.pubDate(dt)
|
|
||||||
|
|
||||||
response = make_response(fg.rss_str())
|
|
||||||
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
|
||||||
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
|
|
||||||
return response
|
|
||||||
|
|
||||||
return rss_blueprint
|
|
||||||
@@ -67,32 +67,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
del (app_update['password'])
|
del (app_update['password'])
|
||||||
|
|
||||||
datastore.data['settings']['application'].update(app_update)
|
datastore.data['settings']['application'].update(app_update)
|
||||||
|
|
||||||
# Handle dynamic worker count adjustment
|
|
||||||
old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
|
|
||||||
new_worker_count = form.data['requests'].get('workers', 1)
|
|
||||||
|
|
||||||
datastore.data['settings']['requests'].update(form.data['requests'])
|
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||||
|
|
||||||
# Adjust worker count if it changed
|
|
||||||
if new_worker_count != old_worker_count:
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
|
|
||||||
|
|
||||||
result = worker_handler.adjust_async_worker_count(
|
|
||||||
new_count=new_worker_count,
|
|
||||||
update_q=update_q,
|
|
||||||
notification_q=notification_q,
|
|
||||||
app=app,
|
|
||||||
datastore=ds
|
|
||||||
)
|
|
||||||
|
|
||||||
if result['status'] == 'success':
|
|
||||||
flash(f"Worker count adjusted: {result['message']}", 'notice')
|
|
||||||
elif result['status'] == 'not_supported':
|
|
||||||
flash("Dynamic worker adjustment not supported for sync workers", 'warning')
|
|
||||||
elif result['status'] == 'error':
|
|
||||||
flash(f"Error adjusting workers: {result['message']}", 'error')
|
|
||||||
|
|
||||||
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
||||||
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
||||||
|
|||||||
@@ -22,7 +22,6 @@
|
|||||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||||
<li class="tab"><a href="#fetching">Fetching</a></li>
|
<li class="tab"><a href="#fetching">Fetching</a></li>
|
||||||
<li class="tab"><a href="#filters">Global Filters</a></li>
|
<li class="tab"><a href="#filters">Global Filters</a></li>
|
||||||
<li class="tab"><a href="#ui-options">UI Options</a></li>
|
|
||||||
<li class="tab"><a href="#api">API</a></li>
|
<li class="tab"><a href="#api">API</a></li>
|
||||||
<li class="tab"><a href="#timedate">Time & Date</a></li>
|
<li class="tab"><a href="#timedate">Time & Date</a></li>
|
||||||
<li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li>
|
<li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li>
|
||||||
@@ -79,10 +78,7 @@
|
|||||||
{{ render_field(form.application.form.pager_size) }}
|
{{ render_field(form.application.form.pager_size) }}
|
||||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_field(form.application.form.rss_content_format) }}
|
|
||||||
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
||||||
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
|
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
|
||||||
@@ -135,12 +131,6 @@
|
|||||||
{{ render_field(form.application.form.webdriver_delay) }}
|
{{ render_field(form.application.form.webdriver_delay) }}
|
||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_field(form.requests.form.workers) }}
|
|
||||||
{% set worker_info = get_worker_status_info() %}
|
|
||||||
<span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
|
|
||||||
Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_field(form.requests.form.default_ua) }}
|
{{ render_field(form.requests.form.default_ua) }}
|
||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">
|
||||||
@@ -224,7 +214,7 @@ nav
|
|||||||
<a id="chrome-extension-link"
|
<a id="chrome-extension-link"
|
||||||
title="Try our new Chrome Extension!"
|
title="Try our new Chrome Extension!"
|
||||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||||
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" alt="Chrome">
|
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
|
||||||
Chrome Webstore
|
Chrome Webstore
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
@@ -247,21 +237,6 @@ nav
|
|||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="tab-pane-inner" id="ui-options">
|
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
|
|
||||||
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }}
|
|
||||||
<span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span>
|
|
||||||
</div>
|
|
||||||
<div class="pure-control-group">
|
|
||||||
{{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
|
|
||||||
<span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
|
||||||
<div class="tab-pane-inner" id="proxies">
|
<div class="tab-pane-inner" id="proxies">
|
||||||
<div id="recommended-proxy">
|
<div id="recommended-proxy">
|
||||||
<div>
|
<div>
|
||||||
|
|||||||
@@ -104,9 +104,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
|
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
|
||||||
|
|
||||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||||
if not default:
|
|
||||||
flash("Tag not found", "error")
|
|
||||||
return redirect(url_for('watchlist.index'))
|
|
||||||
|
|
||||||
form = group_restock_settings_form(
|
form = group_restock_settings_form(
|
||||||
formdata=request.form if request.method == 'POST' else None,
|
formdata=request.form if request.method == 'POST' else None,
|
||||||
|
|||||||
@@ -13,7 +13,6 @@
|
|||||||
/*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/
|
/*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/
|
||||||
/*{% endif %}*/
|
/*{% endif %}*/
|
||||||
|
|
||||||
{% set has_tag_filters_extra='' %}
|
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@@ -47,12 +46,59 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||||
<p>These settings are <strong><i>added</i></strong> to any existing watch configurations.</p>
|
<div class="pure-control-group">
|
||||||
{% include "edit/include_subtract.html" %}
|
{% set field = render_field(form.include_filters,
|
||||||
<div class="text-filtering border-fieldset">
|
rows=5,
|
||||||
<h3>Text filtering</h3>
|
placeholder="#example
|
||||||
{% include "edit/text-options.html" %}
|
xpath://body/div/span[contains(@class, 'example-class')]",
|
||||||
</div>
|
class="m-d")
|
||||||
|
%}
|
||||||
|
{{ field }}
|
||||||
|
{% if '/text()' in field %}
|
||||||
|
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
||||||
|
{% endif %}
|
||||||
|
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
|
||||||
|
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
||||||
|
<ul id="advanced-help-selectors">
|
||||||
|
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||||
|
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||||
|
<ul>
|
||||||
|
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
||||||
|
{% if jq_support %}
|
||||||
|
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
|
||||||
|
{% else %}
|
||||||
|
<li>jq support not installed</li>
|
||||||
|
{% endif %}
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code>
|
||||||
|
<ul>
|
||||||
|
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a
|
||||||
|
href="http://xpather.com/" target="new">test your XPath here</a></li>
|
||||||
|
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
|
||||||
|
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
|
</ul>
|
||||||
|
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
||||||
|
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
|
||||||
|
</span>
|
||||||
|
</div>
|
||||||
|
<fieldset class="pure-control-group">
|
||||||
|
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
|
||||||
|
footer
|
||||||
|
nav
|
||||||
|
.stockticker
|
||||||
|
//*[contains(text(), 'Advertisement')]") }}
|
||||||
|
<span class="pure-form-message-inline">
|
||||||
|
<ul>
|
||||||
|
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
||||||
|
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
||||||
|
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
||||||
|
</ul>
|
||||||
|
</span>
|
||||||
|
</fieldset>
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{# rendered sub Template #}
|
{# rendered sub Template #}
|
||||||
@@ -66,7 +112,7 @@
|
|||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_checkbox_field(form.notification_muted) }}
|
{{ render_checkbox_field(form.notification_muted) }}
|
||||||
</div>
|
</div>
|
||||||
{% if 1 %}
|
{% if is_html_webdriver %}
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_checkbox_field(form.notification_screenshot) }}
|
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">
|
||||||
|
|||||||
@@ -10,7 +10,7 @@
|
|||||||
<legend>Add a new organisational tag</legend>
|
<legend>Add a new organisational tag</legend>
|
||||||
<div id="watch-add-wrapper-zone">
|
<div id="watch-add-wrapper-zone">
|
||||||
<div>
|
<div>
|
||||||
{{ render_simple_field(form.name, placeholder="Watch group / tag") }}
|
{{ render_simple_field(form.name, placeholder="watch label / tag") }}
|
||||||
</div>
|
</div>
|
||||||
<div>
|
<div>
|
||||||
{{ render_simple_field(form.save_button, title="Save" ) }}
|
{{ render_simple_field(form.save_button, title="Save" ) }}
|
||||||
|
|||||||
@@ -1,112 +1,14 @@
|
|||||||
import time
|
import time
|
||||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
|
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
|
||||||
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
|
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
|
||||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||||
|
|
||||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
|
def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_update_threads, queuedWatchMetaData):
|
||||||
from flask import request, flash
|
|
||||||
|
|
||||||
if op == 'delete':
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.delete(uuid)
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches deleted")
|
|
||||||
|
|
||||||
elif op == 'pause':
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.data['watching'][uuid]['paused'] = True
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches paused")
|
|
||||||
|
|
||||||
elif op == 'unpause':
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.data['watching'][uuid.strip()]['paused'] = False
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches unpaused")
|
|
||||||
|
|
||||||
elif (op == 'mark-viewed'):
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.set_last_viewed(uuid, int(time.time()))
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches updated")
|
|
||||||
|
|
||||||
elif (op == 'mute'):
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.data['watching'][uuid]['notification_muted'] = True
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches muted")
|
|
||||||
|
|
||||||
elif (op == 'unmute'):
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.data['watching'][uuid]['notification_muted'] = False
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches un-muted")
|
|
||||||
|
|
||||||
elif (op == 'recheck'):
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
# Recheck and require a full reprocessing
|
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches queued for rechecking")
|
|
||||||
|
|
||||||
elif (op == 'clear-errors'):
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.data['watching'][uuid]["last_error"] = False
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches errors cleared")
|
|
||||||
|
|
||||||
elif (op == 'clear-history'):
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.clear_watch_history(uuid)
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches cleared/reset.")
|
|
||||||
|
|
||||||
elif (op == 'notification-default'):
|
|
||||||
from changedetectionio.notification import (
|
|
||||||
default_notification_format_for_watch
|
|
||||||
)
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
datastore.data['watching'][uuid]['notification_title'] = None
|
|
||||||
datastore.data['watching'][uuid]['notification_body'] = None
|
|
||||||
datastore.data['watching'][uuid]['notification_urls'] = []
|
|
||||||
datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches set to use default notification settings")
|
|
||||||
|
|
||||||
elif (op == 'assign-tag'):
|
|
||||||
op_extradata = extra_data
|
|
||||||
if op_extradata:
|
|
||||||
tag_uuid = datastore.add_tag(title=op_extradata)
|
|
||||||
if op_extradata and tag_uuid:
|
|
||||||
for uuid in uuids:
|
|
||||||
if datastore.data['watching'].get(uuid):
|
|
||||||
# Bug in old versions caused by bad edit page/tag handler
|
|
||||||
if isinstance(datastore.data['watching'][uuid]['tags'], str):
|
|
||||||
datastore.data['watching'][uuid]['tags'] = []
|
|
||||||
|
|
||||||
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
|
||||||
if emit_flash:
|
|
||||||
flash(f"{len(uuids)} watches were tagged")
|
|
||||||
|
|
||||||
if uuids:
|
|
||||||
for uuid in uuids:
|
|
||||||
watch_check_update.send(watch_uuid=uuid)
|
|
||||||
|
|
||||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
|
|
||||||
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||||
|
|
||||||
# Register the edit blueprint
|
# Register the edit blueprint
|
||||||
@@ -118,9 +20,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
|||||||
ui_blueprint.register_blueprint(notification_blueprint)
|
ui_blueprint.register_blueprint(notification_blueprint)
|
||||||
|
|
||||||
# Register the views blueprint
|
# Register the views blueprint
|
||||||
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update)
|
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData)
|
||||||
ui_blueprint.register_blueprint(views_blueprint)
|
ui_blueprint.register_blueprint(views_blueprint)
|
||||||
|
|
||||||
# Import the login decorator
|
# Import the login decorator
|
||||||
from changedetectionio.auth_decorator import login_optionally_required
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
|
||||||
@@ -133,6 +35,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
|||||||
flash('Watch not found', 'error')
|
flash('Watch not found', 'error')
|
||||||
else:
|
else:
|
||||||
flash("Cleared snapshot history for watch {}".format(uuid))
|
flash("Cleared snapshot history for watch {}".format(uuid))
|
||||||
|
|
||||||
return redirect(url_for('watchlist.index'))
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
@ui_blueprint.route("/clear_history", methods=['GET', 'POST'])
|
@ui_blueprint.route("/clear_history", methods=['GET', 'POST'])
|
||||||
@@ -144,6 +47,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
|||||||
if confirmtext == 'clear':
|
if confirmtext == 'clear':
|
||||||
for uuid in datastore.data['watching'].keys():
|
for uuid in datastore.data['watching'].keys():
|
||||||
datastore.clear_watch_history(uuid)
|
datastore.clear_watch_history(uuid)
|
||||||
|
|
||||||
flash("Cleared snapshot history for all watches")
|
flash("Cleared snapshot history for all watches")
|
||||||
else:
|
else:
|
||||||
flash('Incorrect confirmation text.', 'error')
|
flash('Incorrect confirmation text.', 'error')
|
||||||
@@ -159,20 +63,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
|||||||
def mark_all_viewed():
|
def mark_all_viewed():
|
||||||
# Save the current newest history as the most recently viewed
|
# Save the current newest history as the most recently viewed
|
||||||
with_errors = request.args.get('with_errors') == "1"
|
with_errors = request.args.get('with_errors') == "1"
|
||||||
tag_limit = request.args.get('tag')
|
|
||||||
logger.debug(f"Limiting to tag {tag_limit}")
|
|
||||||
now = int(time.time())
|
|
||||||
for watch_uuid, watch in datastore.data['watching'].items():
|
for watch_uuid, watch in datastore.data['watching'].items():
|
||||||
if with_errors and not watch.get('last_error'):
|
if with_errors and not watch.get('last_error'):
|
||||||
continue
|
continue
|
||||||
|
datastore.set_last_viewed(watch_uuid, int(time.time()))
|
||||||
|
|
||||||
if tag_limit and ( not watch.get('tags') or tag_limit not in watch['tags'] ):
|
return redirect(url_for('watchlist.index'))
|
||||||
logger.debug(f"Skipping watch {watch_uuid}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
datastore.set_last_viewed(watch_uuid, now)
|
|
||||||
|
|
||||||
return redirect(url_for('watchlist.index', tag=tag_limit))
|
|
||||||
|
|
||||||
@ui_blueprint.route("/delete", methods=['GET'])
|
@ui_blueprint.route("/delete", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@@ -200,13 +96,12 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
|||||||
uuid = list(datastore.data['watching'].keys()).pop()
|
uuid = list(datastore.data['watching'].keys()).pop()
|
||||||
|
|
||||||
new_uuid = datastore.clone(uuid)
|
new_uuid = datastore.clone(uuid)
|
||||||
|
if new_uuid:
|
||||||
|
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||||
|
flash('Cloned.')
|
||||||
|
|
||||||
if not datastore.data['watching'].get(uuid).get('paused'):
|
return redirect(url_for('watchlist.index'))
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
|
||||||
|
|
||||||
flash('Cloned, you are editing the new watch.')
|
|
||||||
|
|
||||||
return redirect(url_for("ui.ui_edit.edit_page", uuid=new_uuid))
|
|
||||||
|
|
||||||
@ui_blueprint.route("/checknow", methods=['GET'])
|
@ui_blueprint.route("/checknow", methods=['GET'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@@ -218,19 +113,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
|||||||
|
|
||||||
i = 0
|
i = 0
|
||||||
|
|
||||||
running_uuids = worker_handler.get_running_uuids()
|
running_uuids = []
|
||||||
|
for t in running_update_threads:
|
||||||
|
running_uuids.append(t.current_uuid)
|
||||||
|
|
||||||
if uuid:
|
if uuid:
|
||||||
if uuid not in running_uuids:
|
if uuid not in running_uuids:
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
else:
|
else:
|
||||||
# Recheck all, including muted
|
# Recheck all, including muted
|
||||||
# Get most overdue first
|
for watch_uuid, watch in datastore.data['watching'].items():
|
||||||
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
|
||||||
watch_uuid = k[0]
|
|
||||||
watch = k[1]
|
|
||||||
if not watch['paused']:
|
if not watch['paused']:
|
||||||
if watch_uuid not in running_uuids:
|
if watch_uuid not in running_uuids:
|
||||||
if with_errors and not watch.get('last_error'):
|
if with_errors and not watch.get('last_error'):
|
||||||
@@ -239,13 +133,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
|||||||
if tag != None and tag not in watch['tags']:
|
if tag != None and tag not in watch['tags']:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||||
i += 1
|
i += 1
|
||||||
|
|
||||||
if i == 1:
|
if i == 1:
|
||||||
flash("Queued 1 watch for rechecking.")
|
flash("Queued 1 watch for rechecking.")
|
||||||
if i > 1:
|
if i > 1:
|
||||||
flash(f"Queued {i} watches for rechecking.")
|
flash("Queued {} watches for rechecking.".format(i))
|
||||||
if i == 0:
|
if i == 0:
|
||||||
flash("No watches available to recheck.")
|
flash("No watches available to recheck.")
|
||||||
|
|
||||||
@@ -255,18 +149,100 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handle
|
|||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def form_watch_list_checkbox_operations():
|
def form_watch_list_checkbox_operations():
|
||||||
op = request.form['op']
|
op = request.form['op']
|
||||||
uuids = [u.strip() for u in request.form.getlist('uuids') if u]
|
uuids = request.form.getlist('uuids')
|
||||||
extra_data = request.form.get('op_extradata', '').strip()
|
|
||||||
_handle_operations(
|
if (op == 'delete'):
|
||||||
datastore=datastore,
|
for uuid in uuids:
|
||||||
extra_data=extra_data,
|
uuid = uuid.strip()
|
||||||
queuedWatchMetaData=queuedWatchMetaData,
|
if datastore.data['watching'].get(uuid):
|
||||||
uuids=uuids,
|
datastore.delete(uuid.strip())
|
||||||
worker_handler=worker_handler,
|
flash("{} watches deleted".format(len(uuids)))
|
||||||
update_q=update_q,
|
|
||||||
watch_check_update=watch_check_update,
|
elif (op == 'pause'):
|
||||||
op=op,
|
for uuid in uuids:
|
||||||
)
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['paused'] = True
|
||||||
|
flash("{} watches paused".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'unpause'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||||
|
flash("{} watches unpaused".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'mark-viewed'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.set_last_viewed(uuid, int(time.time()))
|
||||||
|
flash("{} watches updated".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'mute'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_muted'] = True
|
||||||
|
flash("{} watches muted".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'unmute'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_muted'] = False
|
||||||
|
flash("{} watches un-muted".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'recheck'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
# Recheck and require a full reprocessing
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
flash("{} watches queued for rechecking".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'clear-errors'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid]["last_error"] = False
|
||||||
|
flash(f"{len(uuids)} watches errors cleared")
|
||||||
|
|
||||||
|
elif (op == 'clear-history'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.clear_watch_history(uuid)
|
||||||
|
flash("{} watches cleared/reset.".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'notification-default'):
|
||||||
|
from changedetectionio.notification import (
|
||||||
|
default_notification_format_for_watch
|
||||||
|
)
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_title'] = None
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_body'] = None
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_urls'] = []
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_format'] = default_notification_format_for_watch
|
||||||
|
flash("{} watches set to use default notification settings".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'assign-tag'):
|
||||||
|
op_extradata = request.form.get('op_extradata', '').strip()
|
||||||
|
if op_extradata:
|
||||||
|
tag_uuid = datastore.add_tag(title=op_extradata)
|
||||||
|
if op_extradata and tag_uuid:
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
# Bug in old versions caused by bad edit page/tag handler
|
||||||
|
if isinstance(datastore.data['watching'][uuid]['tags'], str):
|
||||||
|
datastore.data['watching'][uuid]['tags'] = []
|
||||||
|
|
||||||
|
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||||
|
|
||||||
|
flash(f"{len(uuids)} watches were tagged")
|
||||||
|
|
||||||
return redirect(url_for('watchlist.index'))
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
|||||||
@@ -9,7 +9,6 @@ from jinja2 import Environment, FileSystemLoader
|
|||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
from changedetectionio.auth_decorator import login_optionally_required
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
from changedetectionio.time_handler import is_within_schedule
|
from changedetectionio.time_handler import is_within_schedule
|
||||||
from changedetectionio import worker_handler
|
|
||||||
|
|
||||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||||
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||||
@@ -202,7 +201,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
#############################
|
#############################
|
||||||
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
||||||
# Queue the watch for immediate recheck, with a higher priority
|
# Queue the watch for immediate recheck, with a higher priority
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
# Diff page [edit] link should go back to diff page
|
# Diff page [edit] link should go back to diff page
|
||||||
if request.args.get("next") and request.args.get("next") == 'diff':
|
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||||
@@ -214,6 +213,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
if request.method == 'POST' and not form.validate():
|
if request.method == 'POST' and not form.validate():
|
||||||
flash("An error occurred, please see below.", "error")
|
flash("An error occurred, please see below.", "error")
|
||||||
|
|
||||||
|
visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid)
|
||||||
|
|
||||||
|
|
||||||
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
|
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
|
||||||
jq_support = True
|
jq_support = True
|
||||||
try:
|
try:
|
||||||
@@ -223,20 +225,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
|
|
||||||
watch = datastore.data['watching'].get(uuid)
|
watch = datastore.data['watching'].get(uuid)
|
||||||
|
|
||||||
# if system or watch is configured to need a chrome type browser
|
|
||||||
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||||
watch_needs_selenium_or_playwright = False
|
|
||||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
|
||||||
watch_needs_selenium_or_playwright = True
|
|
||||||
|
|
||||||
|
watch_uses_webdriver = False
|
||||||
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||||
|
watch_uses_webdriver = True
|
||||||
|
|
||||||
from zoneinfo import available_timezones
|
from zoneinfo import available_timezones
|
||||||
|
|
||||||
# Only works reliably with Playwright
|
# Only works reliably with Playwright
|
||||||
|
|
||||||
# Import the global plugin system
|
|
||||||
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
|
|
||||||
|
|
||||||
template_args = {
|
template_args = {
|
||||||
'available_processors': processors.available_processors(),
|
'available_processors': processors.available_processors(),
|
||||||
'available_timezones': sorted(available_timezones()),
|
'available_timezones': sorted(available_timezones()),
|
||||||
@@ -249,18 +247,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||||
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
||||||
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
||||||
|
'watch_uses_webdriver': watch_uses_webdriver,
|
||||||
'jq_support': jq_support,
|
'jq_support': jq_support,
|
||||||
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||||
'settings_application': datastore.data['settings']['application'],
|
'settings_application': datastore.data['settings']['application'],
|
||||||
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
|
|
||||||
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
|
||||||
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
|
|
||||||
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
|
|
||||||
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
||||||
'using_global_webdriver_wait': not default['webdriver_delay'],
|
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||||
'uuid': uuid,
|
'uuid': uuid,
|
||||||
'watch': watch,
|
'watch': watch
|
||||||
'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
included_content = None
|
included_content = None
|
||||||
|
|||||||
@@ -17,13 +17,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
# Watch_uuid could be unset in the case it`s used in tag editor, global settings
|
# Watch_uuid could be unset in the case it`s used in tag editor, global settings
|
||||||
import apprise
|
import apprise
|
||||||
from changedetectionio.notification.handler import process_notification
|
from changedetectionio.apprise_asset import asset
|
||||||
from changedetectionio.notification.apprise_plugin.assets import apprise_asset
|
apobj = apprise.Apprise(asset=asset)
|
||||||
|
|
||||||
from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler
|
|
||||||
|
|
||||||
apobj = apprise.Apprise(asset=apprise_asset)
|
|
||||||
|
|
||||||
|
# so that the custom endpoints are registered
|
||||||
|
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
||||||
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
|
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
|
||||||
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
|
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
|
||||||
|
|
||||||
@@ -92,6 +90,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
n_object['as_async'] = False
|
n_object['as_async'] = False
|
||||||
n_object.update(watch.extra_notification_token_values())
|
n_object.update(watch.extra_notification_token_values())
|
||||||
|
from changedetectionio.notification import process_notification
|
||||||
sent_obj = process_notification(n_object, datastore)
|
sent_obj = process_notification(n_object, datastore)
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
|
|||||||
@@ -1,14 +1,14 @@
|
|||||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||||
|
from flask_login import current_user
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
from loguru import logger
|
from copy import deepcopy
|
||||||
|
|
||||||
from changedetectionio.store import ChangeDetectionStore
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
from changedetectionio.auth_decorator import login_optionally_required
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
from changedetectionio import html_tools
|
from changedetectionio import html_tools
|
||||||
from changedetectionio import worker_handler
|
|
||||||
|
|
||||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
|
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||||
views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
|
views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
|
||||||
|
|
||||||
@views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
@views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||||
@@ -77,46 +77,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
@views_blueprint.route("/diff/<string:uuid>", methods=['POST'])
|
@views_blueprint.route("/diff/<string:uuid>", methods=['GET', 'POST'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
def diff_history_page_build_report(uuid):
|
def diff_history_page(uuid):
|
||||||
from changedetectionio import forms
|
|
||||||
|
|
||||||
# More for testing, possible to return the first/only
|
|
||||||
if uuid == 'first':
|
|
||||||
uuid = list(datastore.data['watching'].keys()).pop()
|
|
||||||
|
|
||||||
try:
|
|
||||||
watch = datastore.data['watching'][uuid]
|
|
||||||
except KeyError:
|
|
||||||
flash("No history found for the specified link, bad link?", "error")
|
|
||||||
return redirect(url_for('watchlist.index'))
|
|
||||||
|
|
||||||
# For submission of requesting an extract
|
|
||||||
extract_form = forms.extractDataForm(formdata=request.form,
|
|
||||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
|
||||||
)
|
|
||||||
if not extract_form.validate():
|
|
||||||
flash("An error occurred, please see below.", "error")
|
|
||||||
return _render_diff_template(uuid, extract_form)
|
|
||||||
|
|
||||||
else:
|
|
||||||
extract_regex = request.form.get('extract_regex', '').strip()
|
|
||||||
output = watch.extract_regex_from_all_history(extract_regex)
|
|
||||||
if output:
|
|
||||||
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
|
||||||
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
|
||||||
response.headers['Content-type'] = 'text/csv'
|
|
||||||
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
|
||||||
response.headers['Pragma'] = 'no-cache'
|
|
||||||
response.headers['Expires'] = "0"
|
|
||||||
return response
|
|
||||||
|
|
||||||
flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
|
|
||||||
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
|
|
||||||
|
|
||||||
def _render_diff_template(uuid, extract_form=None):
|
|
||||||
"""Helper function to render the diff template with all required data"""
|
|
||||||
from changedetectionio import forms
|
from changedetectionio import forms
|
||||||
|
|
||||||
# More for testing, possible to return the first/only
|
# More for testing, possible to return the first/only
|
||||||
@@ -130,36 +93,62 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
flash("No history found for the specified link, bad link?", "error")
|
flash("No history found for the specified link, bad link?", "error")
|
||||||
return redirect(url_for('watchlist.index'))
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
# Use provided form or create a new one
|
# For submission of requesting an extract
|
||||||
if extract_form is None:
|
extract_form = forms.extractDataForm(request.form)
|
||||||
extract_form = forms.extractDataForm(formdata=request.form,
|
if request.method == 'POST':
|
||||||
data={'extract_regex': request.form.get('extract_regex', '')}
|
if not extract_form.validate():
|
||||||
)
|
flash("An error occurred, please see below.", "error")
|
||||||
|
|
||||||
|
else:
|
||||||
|
extract_regex = request.form.get('extract_regex').strip()
|
||||||
|
output = watch.extract_regex_from_all_history(extract_regex)
|
||||||
|
if output:
|
||||||
|
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||||
|
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||||
|
response.headers['Content-type'] = 'text/csv'
|
||||||
|
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||||
|
response.headers['Pragma'] = 'no-cache'
|
||||||
|
response.headers['Expires'] = 0
|
||||||
|
return response
|
||||||
|
|
||||||
|
flash('Nothing matches that RegEx', 'error')
|
||||||
|
redirect(url_for('ui_views.diff_history_page', uuid=uuid)+'#extract')
|
||||||
|
|
||||||
history = watch.history
|
history = watch.history
|
||||||
dates = list(history.keys())
|
dates = list(history.keys())
|
||||||
|
|
||||||
# If a "from_version" was requested, then find it (or the closest one)
|
if len(dates) < 2:
|
||||||
# Also set "from version" to be the closest version to the one that was last viewed.
|
flash("Not enough saved change detection snapshots to produce a report.", "error")
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
|
# Save the current newest history as the most recently viewed
|
||||||
from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
|
datastore.set_last_viewed(uuid, time.time())
|
||||||
from_version = request.args.get('from_version', from_version_timestamp )
|
|
||||||
|
|
||||||
# Use the current one if nothing was specified
|
# Read as binary and force decode as UTF-8
|
||||||
to_version = request.args.get('to_version', str(dates[-1]))
|
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||||
|
from_version = request.args.get('from_version')
|
||||||
|
from_version_index = -2 # second newest
|
||||||
|
if from_version and from_version in dates:
|
||||||
|
from_version_index = dates.index(from_version)
|
||||||
|
else:
|
||||||
|
from_version = dates[from_version_index]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
|
from_version_file_contents = watch.get_history_snapshot(dates[from_version_index])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
|
from_version_file_contents = f"Unable to read to-version at index {dates[from_version_index]}.\n"
|
||||||
to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
|
|
||||||
|
to_version = request.args.get('to_version')
|
||||||
|
to_version_index = -1
|
||||||
|
if to_version and to_version in dates:
|
||||||
|
to_version_index = dates.index(to_version)
|
||||||
|
else:
|
||||||
|
to_version = dates[to_version_index]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
|
to_version_file_contents = watch.get_history_snapshot(dates[to_version_index])
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
|
to_version_file_contents = "Unable to read to-version at index{}.\n".format(dates[to_version_index])
|
||||||
from_version_file_contents = f"Unable to read to-version {from_version}.\n"
|
|
||||||
|
|
||||||
screenshot_url = watch.get_screenshot()
|
screenshot_url = watch.get_screenshot()
|
||||||
|
|
||||||
@@ -173,9 +162,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||||
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||||
|
|
||||||
datastore.set_last_viewed(uuid, time.time())
|
output = render_template("diff.html",
|
||||||
|
|
||||||
return render_template("diff.html",
|
|
||||||
current_diff_url=watch['url'],
|
current_diff_url=watch['url'],
|
||||||
from_version=str(from_version),
|
from_version=str(from_version),
|
||||||
to_version=str(to_version),
|
to_version=str(to_version),
|
||||||
@@ -198,10 +185,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
watch_a=watch
|
watch_a=watch
|
||||||
)
|
)
|
||||||
|
|
||||||
@views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
|
return output
|
||||||
@login_optionally_required
|
|
||||||
def diff_history_page(uuid):
|
|
||||||
return _render_diff_template(uuid)
|
|
||||||
|
|
||||||
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
|
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@@ -228,7 +212,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||||
else:
|
else:
|
||||||
# Straight into the queue.
|
# Straight into the queue.
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||||
flash("Watch added.")
|
flash("Watch added.")
|
||||||
|
|
||||||
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||||
|
|||||||
@@ -72,32 +72,31 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
|
|||||||
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
||||||
|
|
||||||
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||||
|
|
||||||
output = render_template(
|
output = render_template(
|
||||||
"watch-overview.html",
|
"watch-overview.html",
|
||||||
active_tag=active_tag,
|
active_tag=active_tag,
|
||||||
active_tag_uuid=active_tag_uuid,
|
active_tag_uuid=active_tag_uuid,
|
||||||
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||||
datastore=datastore,
|
datastore=datastore,
|
||||||
errored_count=errored_count,
|
errored_count=errored_count,
|
||||||
form=form,
|
form=form,
|
||||||
guid=datastore.data['app_guid'],
|
guid=datastore.data['app_guid'],
|
||||||
has_proxies=datastore.proxy_list,
|
has_proxies=datastore.proxy_list,
|
||||||
has_unviewed=datastore.has_unviewed,
|
has_unviewed=datastore.has_unviewed,
|
||||||
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||||
now_time_server=round(time.time()),
|
now_time_server=time.time(),
|
||||||
pagination=pagination,
|
pagination=pagination,
|
||||||
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||||
search_q=request.args.get('q', '').strip(),
|
search_q=request.args.get('q', '').strip(),
|
||||||
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||||
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||||
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||||
tags=sorted_tags,
|
tags=sorted_tags,
|
||||||
watches=sorted_watches
|
watches=sorted_watches
|
||||||
)
|
)
|
||||||
|
|
||||||
if session.get('share-link'):
|
if session.get('share-link'):
|
||||||
del (session['share-link'])
|
del(session['share-link'])
|
||||||
|
|
||||||
resp = make_response(output)
|
resp = make_response(output)
|
||||||
|
|
||||||
|
|||||||
@@ -1,16 +1,10 @@
|
|||||||
{%- extends 'base.html' -%}
|
{% extends 'base.html' %}
|
||||||
{%- block content -%}
|
{% block content %}
|
||||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
|
{% from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title %}
|
||||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||||
<script>let nowtimeserver={{ now_time_server }};</script>
|
<script>let nowtimeserver={{ now_time_server }};</script>
|
||||||
<script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script>
|
|
||||||
<script>
|
|
||||||
// Initialize Feather icons after the page loads
|
|
||||||
document.addEventListener('DOMContentLoaded', function() {
|
|
||||||
feather.replace();
|
|
||||||
});
|
|
||||||
</script>
|
|
||||||
<style>
|
<style>
|
||||||
.checking-now .last-checked {
|
.checking-now .last-checked {
|
||||||
background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%);
|
background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%);
|
||||||
@@ -19,20 +13,19 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
transition: background-size 0.9s ease
|
transition: background-size 0.9s ease
|
||||||
}
|
}
|
||||||
</style>
|
</style>
|
||||||
<div class="box" id="form-quick-watch-add">
|
<div class="box">
|
||||||
|
|
||||||
<form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
|
<form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>Add a new web page change detection watch</legend>
|
<legend>Add a new change detection watch</legend>
|
||||||
<div id="watch-add-wrapper-zone">
|
<div id="watch-add-wrapper-zone">
|
||||||
|
|
||||||
{{ render_nolabel_field(form.url, placeholder="https://...", required=true) }}
|
{{ render_nolabel_field(form.url, placeholder="https://...", required=true) }}
|
||||||
|
{{ render_nolabel_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="watch label / tag") }}
|
||||||
{{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
{{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }}
|
||||||
{{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
{{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
|
||||||
</div>
|
</div>
|
||||||
<div id="watch-group-tag">
|
|
||||||
{{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }}
|
|
||||||
</div>
|
|
||||||
<div id="quick-watch-processor-type">
|
<div id="quick-watch-processor-type">
|
||||||
{{ render_simple_field(form.processor) }}
|
{{ render_simple_field(form.processor) }}
|
||||||
</div>
|
</div>
|
||||||
@@ -40,221 +33,219 @@ document.addEventListener('DOMContentLoaded', function() {
|
|||||||
</fieldset>
|
</fieldset>
|
||||||
<span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span>
|
<span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
|
||||||
<div class="box">
|
|
||||||
<form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
|
<form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||||
<input type="hidden" id="op_extradata" name="op_extradata" value="" >
|
<input type="hidden" id="op_extradata" name="op_extradata" value="" >
|
||||||
<div id="checkbox-operations">
|
<div id="checkbox-operations">
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Pause</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="pause">Pause</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnPause</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="unpause">UnPause</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mute</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="mute">Mute</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnMute</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="unmute">UnMute</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Recheck</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Tag</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag">Tag</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mark viewed</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Use default notification</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear errors</button>
|
<button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors">Clear errors</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear/reset history</button>
|
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history">Clear/reset history</button>
|
||||||
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Delete</button>
|
<button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete">Delete</button>
|
||||||
</div>
|
</div>
|
||||||
{%- if watches|length >= pagination.per_page -%}
|
{% if watches|length >= pagination.per_page %}
|
||||||
{{ pagination.info }}
|
{{ pagination.info }}
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
{%- if search_q -%}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
|
{% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
|
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
|
||||||
|
|
||||||
<!-- tag list -->
|
<!-- tag list -->
|
||||||
{%- for uuid, tag in tags -%}
|
{% for uuid, tag in tags %}
|
||||||
{%- if tag != "" -%}
|
{% if tag != "" %}
|
||||||
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
{%- endfor -%}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{%- set sort_order = sort_order or 'asc' -%}
|
{% set sort_order = sort_order or 'asc' %}
|
||||||
{%- set sort_attribute = sort_attribute or 'last_changed' -%}
|
{% set sort_attribute = sort_attribute or 'last_changed' %}
|
||||||
{%- set pagination_page = request.args.get('page', 0) -%}
|
{% set pagination_page = request.args.get('page', 0) %}
|
||||||
{%- set cols_required = 6 -%}
|
{% set cols_required = 6 %}
|
||||||
{%- set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") -%}
|
{% set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") %}
|
||||||
{%- if any_has_restock_price_processor -%}
|
{% if any_has_restock_price_processor %}
|
||||||
{%- set cols_required = cols_required + 1 -%}
|
{% set cols_required = cols_required + 1 %}
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
{%- set ui_settings = datastore.data['settings']['application']['ui'] -%}
|
|
||||||
|
|
||||||
<div id="watch-table-wrapper">
|
<div id="watch-table-wrapper">
|
||||||
{%- set table_classes = [
|
|
||||||
'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled',
|
<table class="pure-table pure-table-striped watch-table">
|
||||||
] -%}
|
|
||||||
<table class="pure-table pure-table-striped watch-table {{ table_classes | reject('equalto', '') | join(' ') }}">
|
|
||||||
<thead>
|
<thead>
|
||||||
<tr>
|
<tr>
|
||||||
{%- set link_order = "desc" if sort_order == 'asc' else "asc" -%}
|
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
|
||||||
{%- set arrow_span = "" -%}
|
{% set arrow_span = "" %}
|
||||||
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
||||||
<th>
|
<th class="empty-cell"></th>
|
||||||
<a class="{{ 'active '+link_order if sort_attribute == 'paused' else 'inactive' }}" href="{{url_for('watchlist.index', sort='paused', order=link_order, tag=active_tag_uuid)}}"><i data-feather="pause" style="vertical-align: bottom; width: 14px; height: 14px; margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
|
|
||||||
|
|
||||||
<a class="{{ 'active '+link_order if sort_attribute == 'notification_muted' else 'inactive' }}" href="{{url_for('watchlist.index', sort='notification_muted', order=link_order, tag=active_tag_uuid)}}"><i data-feather="volume-2" style="vertical-align: bottom; width: 14px; height: 14px; margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
|
|
||||||
</th>
|
|
||||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||||
{%- if any_has_restock_price_processor -%}
|
{% if any_has_restock_price_processor %}
|
||||||
<th>Restock & Price</th>
|
<th>Restock & Price</th>
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
|
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
|
||||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
|
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
|
||||||
<th class="empty-cell"></th>
|
<th class="empty-cell"></th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{%- if not watches|length -%}
|
{% if not watches|length %}
|
||||||
<tr>
|
<tr>
|
||||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
|
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
|
||||||
</tr>
|
</tr>
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
|
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
|
||||||
|
|
||||||
{%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%}
|
{% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}
|
||||||
{%- set checking_now = is_checking_now(watch) -%}
|
{% set checking_now = is_checking_now(watch) %}
|
||||||
{%- set history_n = watch.history_n -%}
|
<tr id="{{ watch.uuid }}"
|
||||||
{%- set favicon = watch.get_favicon_filename() -%}
|
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
|
||||||
{# Mirror in changedetectionio/static/js/realtime.js for the frontend #}
|
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||||
{%- set row_classes = [
|
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
|
||||||
loop.cycle('pure-table-odd', 'pure-table-even'),
|
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
|
||||||
'processor-' ~ watch['processor'],
|
{% if is_unviewed %}unviewed{% endif %}
|
||||||
'has-error' if watch.compile_error_texts()|length > 2 else '',
|
{% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
|
||||||
'paused' if watch.paused is defined and watch.paused != False else '',
|
{% if watch.uuid in queued_uuids %}queued{% endif %}
|
||||||
'unviewed' if watch.has_unviewed else '',
|
{% if checking_now %}checking-now{% endif %}
|
||||||
'has-restock-info' if watch.has_restock_info else 'no-restock-info',
|
">
|
||||||
'has-favicon' if favicon else '',
|
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
|
||||||
'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '',
|
|
||||||
'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '',
|
|
||||||
'queued' if watch.uuid in queued_uuids else '',
|
|
||||||
'checking-now' if checking_now else '',
|
|
||||||
'notification_muted' if watch.notification_muted else '',
|
|
||||||
'single-history' if history_n == 1 else '',
|
|
||||||
'multiple-history' if history_n >= 2 else '',
|
|
||||||
] -%}
|
|
||||||
<tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}">
|
|
||||||
<td class="inline checkbox-uuid" ><div><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
|
|
||||||
<td class="inline watch-controls">
|
<td class="inline watch-controls">
|
||||||
<div>
|
{% if not watch.paused %}
|
||||||
<a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
|
<a class="state-off" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
|
||||||
<a class="ajax-op state-on pause-toggle" data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
{% else %}
|
||||||
<a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a>
|
<a class="state-on" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
||||||
<a class="ajax-op state-on mute-toggle" data-op="mute" style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a>
|
{% endif %}
|
||||||
</div>
|
{% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
|
||||||
|
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
|
||||||
</td>
|
</td>
|
||||||
|
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||||
|
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
||||||
|
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||||
|
|
||||||
<td class="title-col inline">
|
{% if watch.get_fetch_backend == "html_webdriver"
|
||||||
<div class="flex-wrapper">
|
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||||
{% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
|
or "extra_browser_" in watch.get_fetch_backend
|
||||||
<div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder' #}
|
%}
|
||||||
<img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {% endif %} />
|
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||||
</div>
|
{% endif %}
|
||||||
{% endif %}
|
|
||||||
<div>
|
|
||||||
<span class="watch-title">
|
|
||||||
{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
|
|
||||||
</span>
|
|
||||||
<div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
|
|
||||||
{%- if watch['processor'] == 'text_json_diff' -%}
|
|
||||||
{%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] -%}
|
|
||||||
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
|
||||||
{%- endif -%}
|
|
||||||
{%- endif -%}
|
|
||||||
{%- if watch['processor'] == 'restock_diff' -%}
|
|
||||||
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
|
||||||
{%- endif -%}
|
|
||||||
{%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
|
|
||||||
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
|
||||||
{%- endfor -%}
|
|
||||||
</div>
|
|
||||||
<div class="status-icons">
|
|
||||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
|
||||||
{%- if watch.get_fetch_backend == "html_webdriver"
|
|
||||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
|
||||||
or "extra_browser_" in watch.get_fetch_backend
|
|
||||||
-%}
|
|
||||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
|
||||||
{%- endif -%}
|
|
||||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
|
||||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
|
||||||
|
|
||||||
|
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
|
||||||
|
{% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %}
|
||||||
|
{% if watch.last_error is defined and watch.last_error != False %}
|
||||||
|
<div class="fetch-error">{{ watch.last_error }}
|
||||||
|
|
||||||
|
{% if '403' in watch.last_error %}
|
||||||
|
{% if has_proxies %}
|
||||||
|
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>
|
||||||
|
{% endif %}
|
||||||
|
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
|
||||||
|
|
||||||
|
{% endif %}
|
||||||
|
{% if 'empty result or contain only an image' in watch.last_error %}
|
||||||
|
<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
|
||||||
|
{% endif %}
|
||||||
</div>
|
</div>
|
||||||
</div>
|
{% endif %}
|
||||||
|
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
|
||||||
|
<div class="fetch-error notification-error"><a href="{{url_for('settings.notification_logs')}}">{{ watch.last_notification_error }}</a></div>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% if watch['processor'] == 'text_json_diff' %}
|
||||||
|
{% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data'] %}
|
||||||
|
<div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
|
{% if watch['processor'] == 'restock_diff' %}
|
||||||
|
<span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}" class="status-icon price-follow-tag-icon" > Price</span>
|
||||||
|
{% endif %}
|
||||||
|
{% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %}
|
||||||
|
<span class="watch-tag-list">{{ watch_tag.title }}</span>
|
||||||
|
{% endfor %}
|
||||||
</td>
|
</td>
|
||||||
{%- if any_has_restock_price_processor -%}
|
<!-- @todo make it so any watch handler obj can expose this --->
|
||||||
|
{% if any_has_restock_price_processor %}
|
||||||
<td class="restock-and-price">
|
<td class="restock-and-price">
|
||||||
{%- if watch['processor'] == 'restock_diff' -%}
|
{% if watch['processor'] == 'restock_diff' %}
|
||||||
{%- if watch.has_restock_info -%}
|
{% if watch.has_restock_info %}
|
||||||
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
|
<span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
|
||||||
<!-- maybe some object watch['processor'][restock_diff] or.. -->
|
<!-- maybe some object watch['processor'][restock_diff] or.. -->
|
||||||
{%- if watch['restock']['in_stock']-%} In stock {%- else-%} Not in stock {%- endif -%}
|
{% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %}
|
||||||
</span>
|
</span>
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
|
|
||||||
{%- if watch.get('restock') and watch['restock']['price'] != None -%}
|
{% if watch.get('restock') and watch['restock']['price'] != None %}
|
||||||
{%- if watch['restock']['price'] != None -%}
|
{% if watch['restock']['price'] != None %}
|
||||||
<span class="restock-label price" title="Price">
|
<span class="restock-label price" title="Price">
|
||||||
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
|
{{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
|
||||||
</span>
|
</span>
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
{%- elif not watch.has_restock_info -%}
|
{% elif not watch.has_restock_info %}
|
||||||
<span class="restock-label error">No information</span>
|
<span class="restock-label error">No information</span>
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
{#last_checked becomes fetch-start-time#}
|
{#last_checked becomes fetch-start-time#}
|
||||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
|
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" {% if checking_now %} data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" {% endif %} >
|
||||||
<div class="spinner-wrapper" style="display:none;" >
|
{% if checking_now %}
|
||||||
<span class="spinner"></span><span> Checking now</span>
|
<span class="spinner"></span><span> Checking now</span>
|
||||||
</div>
|
{% else %}
|
||||||
<span class="innertext">{{watch|format_last_checked_time|safe}}</span>
|
{{watch|format_last_checked_time|safe}}</td>
|
||||||
</td>
|
{% endif %}
|
||||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
|
|
||||||
|
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
|
||||||
{{watch.last_changed|format_timestamp_timeago}}
|
{{watch.last_changed|format_timestamp_timeago}}
|
||||||
{%- else -%}
|
{% else %}
|
||||||
Not yet
|
Not yet
|
||||||
{%- endif -%}
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
<td class="buttons">
|
<td>
|
||||||
<div>
|
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
|
||||||
{%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%}
|
class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
|
||||||
<a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a>
|
|
||||||
<a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a>
|
|
||||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
|
{% if watch.history_n >= 2 %}
|
||||||
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
|
|
||||||
</div>
|
{% if is_unviewed %}
|
||||||
|
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
||||||
|
{% else %}
|
||||||
|
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
|
{% else %}
|
||||||
|
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
|
||||||
|
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary">Preview</a>
|
||||||
|
{% endif %}
|
||||||
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
</tr>
|
</tr>
|
||||||
{%- endfor -%}
|
{% endfor %}
|
||||||
</tbody>
|
</tbody>
|
||||||
</table>
|
</table>
|
||||||
<ul id="post-list-buttons">
|
<ul id="post-list-buttons">
|
||||||
<li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" >
|
{% if errored_count %}
|
||||||
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
|
|
||||||
</li>
|
|
||||||
<li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" >
|
|
||||||
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
|
|
||||||
</li>
|
|
||||||
{%- if active_tag_uuid -%}
|
|
||||||
<li id="post-list-mark-views-tag">
|
|
||||||
<a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
|
|
||||||
</li>
|
|
||||||
{%- endif -%}
|
|
||||||
<li>
|
<li>
|
||||||
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
|
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error ">With errors ({{ errored_count }})</a>
|
||||||
all {% if active_tag_uuid %} in '{{active_tag.title}}'{%endif%}</a>
|
</li>
|
||||||
|
{% endif %}
|
||||||
|
{% if has_unviewed %}
|
||||||
|
<li>
|
||||||
|
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Mark all viewed</a>
|
||||||
|
</li>
|
||||||
|
{% endif %}
|
||||||
|
<li>
|
||||||
|
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck
|
||||||
|
all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='Generic_Feed-icon.svg')}}" height="15"></a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
{{ pagination.links }}
|
{{ pagination.links }}
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
</div>
|
</div>
|
||||||
{%- endblock -%}
|
{% endblock %}
|
||||||
|
|||||||
@@ -1,12 +1,14 @@
|
|||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
from json_logic.builtins import BUILTINS
|
from json_logic.builtins import BUILTINS
|
||||||
|
|
||||||
from .exceptions import EmptyConditionRuleRowNotUsable
|
from .exceptions import EmptyConditionRuleRowNotUsable
|
||||||
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
|
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
|
||||||
from . import default_plugin
|
from . import default_plugin
|
||||||
from loguru import logger
|
|
||||||
# List of all supported JSON Logic operators
|
# List of all supported JSON Logic operators
|
||||||
operator_choices = [
|
operator_choices = [
|
||||||
(None, "Choose one - Operator"),
|
(None, "Choose one"),
|
||||||
(">", "Greater Than"),
|
(">", "Greater Than"),
|
||||||
("<", "Less Than"),
|
("<", "Less Than"),
|
||||||
(">=", "Greater Than or Equal To"),
|
(">=", "Greater Than or Equal To"),
|
||||||
@@ -14,11 +16,12 @@ operator_choices = [
|
|||||||
("==", "Equals"),
|
("==", "Equals"),
|
||||||
("!=", "Not Equals"),
|
("!=", "Not Equals"),
|
||||||
("in", "Contains"),
|
("in", "Contains"),
|
||||||
|
("!in", "Does Not Contain"),
|
||||||
]
|
]
|
||||||
|
|
||||||
# Fields available in the rules
|
# Fields available in the rules
|
||||||
field_choices = [
|
field_choices = [
|
||||||
(None, "Choose one - Field"),
|
(None, "Choose one"),
|
||||||
]
|
]
|
||||||
|
|
||||||
# The data we will feed the JSON Rules to see if it passes the test/conditions or not
|
# The data we will feed the JSON Rules to see if it passes the test/conditions or not
|
||||||
@@ -91,41 +94,20 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
|
|||||||
EXECUTE_DATA = {}
|
EXECUTE_DATA = {}
|
||||||
result = True
|
result = True
|
||||||
|
|
||||||
watch = application_datastruct['watching'].get(current_watch_uuid)
|
ruleset_settings = application_datastruct['watching'].get(current_watch_uuid)
|
||||||
|
|
||||||
if watch and watch.get("conditions"):
|
if ruleset_settings.get("conditions"):
|
||||||
logic_operator = "and" if watch.get("conditions_match_logic", "ALL") == "ALL" else "or"
|
logic_operator = "and" if ruleset_settings.get("conditions_match_logic", "ALL") == "ALL" else "or"
|
||||||
complete_rules = filter_complete_rules(watch['conditions'])
|
complete_rules = filter_complete_rules(ruleset_settings['conditions'])
|
||||||
if complete_rules:
|
if complete_rules:
|
||||||
# Give all plugins a chance to update the data dict again (that we will test the conditions against)
|
# Give all plugins a chance to update the data dict again (that we will test the conditions against)
|
||||||
for plugin in plugin_manager.get_plugins():
|
for plugin in plugin_manager.get_plugins():
|
||||||
try:
|
new_execute_data = plugin.add_data(current_watch_uuid=current_watch_uuid,
|
||||||
import concurrent.futures
|
application_datastruct=application_datastruct,
|
||||||
import time
|
ephemeral_data=ephemeral_data)
|
||||||
|
|
||||||
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
||||||
future = executor.submit(
|
|
||||||
plugin.add_data,
|
|
||||||
current_watch_uuid=current_watch_uuid,
|
|
||||||
application_datastruct=application_datastruct,
|
|
||||||
ephemeral_data=ephemeral_data
|
|
||||||
)
|
|
||||||
logger.debug(f"Trying plugin {plugin}....")
|
|
||||||
|
|
||||||
# Set a timeout of 10 seconds
|
if new_execute_data and isinstance(new_execute_data, dict):
|
||||||
try:
|
EXECUTE_DATA.update(new_execute_data)
|
||||||
new_execute_data = future.result(timeout=10)
|
|
||||||
if new_execute_data and isinstance(new_execute_data, dict):
|
|
||||||
EXECUTE_DATA.update(new_execute_data)
|
|
||||||
|
|
||||||
except concurrent.futures.TimeoutError:
|
|
||||||
# The plugin took too long, abort processing for this watch
|
|
||||||
raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")
|
|
||||||
except Exception as e:
|
|
||||||
# Log the error but continue with the next plugin
|
|
||||||
import logging
|
|
||||||
logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}")
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Create the ruleset
|
# Create the ruleset
|
||||||
ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules)
|
ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules)
|
||||||
@@ -134,7 +116,8 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
|
|||||||
if not jsonLogic(logic=ruleset, data=EXECUTE_DATA, operations=CUSTOM_OPERATIONS):
|
if not jsonLogic(logic=ruleset, data=EXECUTE_DATA, operations=CUSTOM_OPERATIONS):
|
||||||
result = False
|
result = False
|
||||||
|
|
||||||
return {'executed_data': EXECUTE_DATA, 'result': result}
|
return result
|
||||||
|
|
||||||
|
|
||||||
# Load plugins dynamically
|
# Load plugins dynamically
|
||||||
for plugin in plugin_manager.get_plugins():
|
for plugin in plugin_manager.get_plugins():
|
||||||
@@ -150,18 +133,3 @@ for plugin in plugin_manager.get_plugins():
|
|||||||
if isinstance(new_field_choices, list):
|
if isinstance(new_field_choices, list):
|
||||||
field_choices.extend(new_field_choices)
|
field_choices.extend(new_field_choices)
|
||||||
|
|
||||||
def collect_ui_edit_stats_extras(watch):
|
|
||||||
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
|
|
||||||
extras_content = []
|
|
||||||
|
|
||||||
for plugin in plugin_manager.get_plugins():
|
|
||||||
try:
|
|
||||||
content = plugin.ui_edit_stats_extras(watch=watch)
|
|
||||||
if content:
|
|
||||||
extras_content.append(content)
|
|
||||||
except Exception as e:
|
|
||||||
# Skip plugins that don't implement the hook or have errors
|
|
||||||
pass
|
|
||||||
|
|
||||||
return "\n".join(extras_content) if extras_content else ""
|
|
||||||
|
|
||||||
|
|||||||
@@ -67,8 +67,7 @@ def construct_blueprint(datastore):
|
|||||||
|
|
||||||
return jsonify({
|
return jsonify({
|
||||||
'status': 'success',
|
'status': 'success',
|
||||||
'result': result.get('result'),
|
'result': result,
|
||||||
'data': result.get('executed_data'),
|
|
||||||
'message': 'Condition passes' if result else 'Condition does not pass'
|
'message': 'Condition passes' if result else 'Condition does not pass'
|
||||||
})
|
})
|
||||||
|
|
||||||
|
|||||||
@@ -21,21 +21,17 @@ def register_operators():
|
|||||||
def length_max(_, text, strlen):
|
def length_max(_, text, strlen):
|
||||||
return len(text) <= int(strlen)
|
return len(text) <= int(strlen)
|
||||||
|
|
||||||
# Custom function for case-insensitive regex matching
|
# ✅ Custom function for case-insensitive regex matching
|
||||||
def contains_regex(_, text, pattern):
|
def contains_regex(_, text, pattern):
|
||||||
"""Returns True if `text` contains `pattern` (case-insensitive regex match)."""
|
"""Returns True if `text` contains `pattern` (case-insensitive regex match)."""
|
||||||
return bool(re.search(pattern, str(text), re.IGNORECASE))
|
return bool(re.search(pattern, str(text), re.IGNORECASE))
|
||||||
|
|
||||||
# Custom function for NOT matching case-insensitive regex
|
# ✅ Custom function for NOT matching case-insensitive regex
|
||||||
def not_contains_regex(_, text, pattern):
|
def not_contains_regex(_, text, pattern):
|
||||||
"""Returns True if `text` does NOT contain `pattern` (case-insensitive regex match)."""
|
"""Returns True if `text` does NOT contain `pattern` (case-insensitive regex match)."""
|
||||||
return not bool(re.search(pattern, str(text), re.IGNORECASE))
|
return not bool(re.search(pattern, str(text), re.IGNORECASE))
|
||||||
|
|
||||||
def not_contains(_, text, pattern):
|
|
||||||
return not pattern in text
|
|
||||||
|
|
||||||
return {
|
return {
|
||||||
"!in": not_contains,
|
|
||||||
"!contains_regex": not_contains_regex,
|
"!contains_regex": not_contains_regex,
|
||||||
"contains_regex": contains_regex,
|
"contains_regex": contains_regex,
|
||||||
"ends_with": ends_with,
|
"ends_with": ends_with,
|
||||||
@@ -47,7 +43,6 @@ def register_operators():
|
|||||||
@hookimpl
|
@hookimpl
|
||||||
def register_operator_choices():
|
def register_operator_choices():
|
||||||
return [
|
return [
|
||||||
("!in", "Does NOT Contain"),
|
|
||||||
("starts_with", "Text Starts With"),
|
("starts_with", "Text Starts With"),
|
||||||
("ends_with", "Text Ends With"),
|
("ends_with", "Text Ends With"),
|
||||||
("length_min", "Length minimum"),
|
("length_min", "Length minimum"),
|
||||||
|
|||||||
@@ -19,7 +19,7 @@ class ConditionFormRow(Form):
|
|||||||
validators=[validators.Optional()]
|
validators=[validators.Optional()]
|
||||||
)
|
)
|
||||||
|
|
||||||
value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"})
|
value = StringField("Value", validators=[validators.Optional()])
|
||||||
|
|
||||||
def validate(self, extra_validators=None):
|
def validate(self, extra_validators=None):
|
||||||
# First, run the default validators
|
# First, run the default validators
|
||||||
|
|||||||
@@ -1,8 +1,5 @@
|
|||||||
import pluggy
|
import pluggy
|
||||||
import os
|
from . import default_plugin # Import the default plugin
|
||||||
import importlib
|
|
||||||
import sys
|
|
||||||
from . import default_plugin
|
|
||||||
|
|
||||||
# ✅ Ensure that the namespace in HookspecMarker matches PluginManager
|
# ✅ Ensure that the namespace in HookspecMarker matches PluginManager
|
||||||
PLUGIN_NAMESPACE = "changedetectionio_conditions"
|
PLUGIN_NAMESPACE = "changedetectionio_conditions"
|
||||||
@@ -33,11 +30,6 @@ class ConditionsSpec:
|
|||||||
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
||||||
"""Add to the datadict"""
|
"""Add to the datadict"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@hookspec
|
|
||||||
def ui_edit_stats_extras(watch):
|
|
||||||
"""Return HTML content to add to the stats tab in the edit view"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
# ✅ Set up Pluggy Plugin Manager
|
# ✅ Set up Pluggy Plugin Manager
|
||||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||||
@@ -48,27 +40,5 @@ plugin_manager.add_hookspecs(ConditionsSpec)
|
|||||||
# ✅ Register built-in plugins manually
|
# ✅ Register built-in plugins manually
|
||||||
plugin_manager.register(default_plugin, "default_plugin")
|
plugin_manager.register(default_plugin, "default_plugin")
|
||||||
|
|
||||||
# ✅ Load plugins from the plugins directory
|
|
||||||
def load_plugins_from_directory():
|
|
||||||
plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins')
|
|
||||||
if not os.path.exists(plugins_dir):
|
|
||||||
return
|
|
||||||
|
|
||||||
# Get all Python files (excluding __init__.py)
|
|
||||||
for filename in os.listdir(plugins_dir):
|
|
||||||
if filename.endswith(".py") and filename != "__init__.py":
|
|
||||||
module_name = filename[:-3] # Remove .py extension
|
|
||||||
module_path = f"changedetectionio.conditions.plugins.{module_name}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
module = importlib.import_module(module_path)
|
|
||||||
# Register the plugin with pluggy
|
|
||||||
plugin_manager.register(module, module_name)
|
|
||||||
except (ImportError, AttributeError) as e:
|
|
||||||
print(f"Error loading plugin {module_name}: {e}")
|
|
||||||
|
|
||||||
# Load plugins from the plugins directory
|
|
||||||
load_plugins_from_directory()
|
|
||||||
|
|
||||||
# ✅ Discover installed plugins from external packages (if any)
|
# ✅ Discover installed plugins from external packages (if any)
|
||||||
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
|
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
|
||||||
|
|||||||
@@ -1 +0,0 @@
|
|||||||
# Import plugins package to make them discoverable
|
|
||||||
@@ -1,119 +0,0 @@
|
|||||||
import pluggy
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000
|
|
||||||
|
|
||||||
# Support both plugin systems
|
|
||||||
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
|
||||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
|
||||||
|
|
||||||
def levenshtein_ratio_recent_history(watch, incoming_text=None):
|
|
||||||
try:
|
|
||||||
from Levenshtein import ratio, distance
|
|
||||||
k = list(watch.history.keys())
|
|
||||||
a = None
|
|
||||||
b = None
|
|
||||||
|
|
||||||
# When called from ui_edit_stats_extras, we don't have incoming_text
|
|
||||||
if incoming_text is None:
|
|
||||||
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
|
|
||||||
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
|
|
||||||
|
|
||||||
# Needs atleast one snapshot
|
|
||||||
elif len(k) >= 1: # Should be atleast one snapshot to compare against
|
|
||||||
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
|
|
||||||
b = incoming_text if incoming_text else k[-2]
|
|
||||||
|
|
||||||
if a and b:
|
|
||||||
distance_value = distance(a, b)
|
|
||||||
ratio_value = ratio(a, b)
|
|
||||||
return {
|
|
||||||
'distance': distance_value,
|
|
||||||
'ratio': ratio_value,
|
|
||||||
'percent_similar': round(ratio_value * 100, 2)
|
|
||||||
}
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Unable to calc similarity: {str(e)}")
|
|
||||||
|
|
||||||
return ''
|
|
||||||
|
|
||||||
@conditions_hookimpl
|
|
||||||
def register_operators():
|
|
||||||
pass
|
|
||||||
|
|
||||||
@conditions_hookimpl
|
|
||||||
def register_operator_choices():
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
@conditions_hookimpl
|
|
||||||
def register_field_choices():
|
|
||||||
return [
|
|
||||||
("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
|
|
||||||
("levenshtein_distance", "Levenshtein - Text change distance"),
|
|
||||||
]
|
|
||||||
|
|
||||||
@conditions_hookimpl
|
|
||||||
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
|
||||||
res = {}
|
|
||||||
watch = application_datastruct['watching'].get(current_watch_uuid)
|
|
||||||
# ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
|
|
||||||
|
|
||||||
if watch and 'text' in ephemeral_data:
|
|
||||||
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
|
|
||||||
if isinstance(lev_data, dict):
|
|
||||||
res['levenshtein_ratio'] = lev_data.get('ratio', 0)
|
|
||||||
res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
|
|
||||||
res['levenshtein_distance'] = lev_data.get('distance', 0)
|
|
||||||
|
|
||||||
return res
|
|
||||||
|
|
||||||
@global_hookimpl
|
|
||||||
def ui_edit_stats_extras(watch):
|
|
||||||
"""Add Levenshtein stats to the UI using the global plugin system"""
|
|
||||||
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
|
|
||||||
if len(watch.history.keys()) < 2:
|
|
||||||
return "<p>Not enough history to calculate Levenshtein metrics</p>"
|
|
||||||
|
|
||||||
|
|
||||||
# Protection against the algorithm getting stuck on huge documents
|
|
||||||
k = list(watch.history.keys())
|
|
||||||
if any(
|
|
||||||
len(watch.get_history_snapshot(timestamp=k[idx])) > LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS
|
|
||||||
for idx in (-1, -2)
|
|
||||||
if len(k) >= abs(idx)
|
|
||||||
):
|
|
||||||
return "<p>Snapshot too large for edit statistics, skipping.</p>"
|
|
||||||
|
|
||||||
try:
|
|
||||||
lev_data = levenshtein_ratio_recent_history(watch)
|
|
||||||
if not lev_data or not isinstance(lev_data, dict):
|
|
||||||
return "<p>Unable to calculate Levenshtein metrics</p>"
|
|
||||||
|
|
||||||
html = f"""
|
|
||||||
<div class="levenshtein-stats">
|
|
||||||
<h4>Levenshtein Text Similarity Details</h4>
|
|
||||||
<table class="pure-table">
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>Raw distance (edits needed)</td>
|
|
||||||
<td>{lev_data['distance']}</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Similarity ratio</td>
|
|
||||||
<td>{lev_data['ratio']:.4f}</td>
|
|
||||||
</tr>
|
|
||||||
<tr>
|
|
||||||
<td>Percent similar</td>
|
|
||||||
<td>{lev_data['percent_similar']}%</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
<p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
|
|
||||||
</div>
|
|
||||||
"""
|
|
||||||
return html
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
|
|
||||||
return "<p>Error calculating Levenshtein metrics</p>"
|
|
||||||
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
import pluggy
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
# Support both plugin systems
|
|
||||||
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
|
||||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
|
||||||
|
|
||||||
def count_words_in_history(watch, incoming_text=None):
|
|
||||||
"""Count words in snapshot text"""
|
|
||||||
try:
|
|
||||||
if incoming_text is not None:
|
|
||||||
# When called from add_data with incoming text
|
|
||||||
return len(incoming_text.split())
|
|
||||||
elif watch.history.keys():
|
|
||||||
# When called from UI extras to count latest snapshot
|
|
||||||
latest_key = list(watch.history.keys())[-1]
|
|
||||||
latest_content = watch.get_history_snapshot(latest_key)
|
|
||||||
return len(latest_content.split())
|
|
||||||
return 0
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error counting words: {str(e)}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
# Implement condition plugin hooks
|
|
||||||
@conditions_hookimpl
|
|
||||||
def register_operators():
|
|
||||||
# No custom operators needed
|
|
||||||
return {}
|
|
||||||
|
|
||||||
@conditions_hookimpl
|
|
||||||
def register_operator_choices():
|
|
||||||
# No custom operator choices needed
|
|
||||||
return []
|
|
||||||
|
|
||||||
@conditions_hookimpl
|
|
||||||
def register_field_choices():
|
|
||||||
# Add a field that will be available in conditions
|
|
||||||
return [
|
|
||||||
("word_count", "Word count of content"),
|
|
||||||
]
|
|
||||||
|
|
||||||
@conditions_hookimpl
|
|
||||||
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
|
||||||
"""Add word count data for conditions"""
|
|
||||||
result = {}
|
|
||||||
watch = application_datastruct['watching'].get(current_watch_uuid)
|
|
||||||
|
|
||||||
if watch and 'text' in ephemeral_data:
|
|
||||||
word_count = count_words_in_history(watch, ephemeral_data['text'])
|
|
||||||
result['word_count'] = word_count
|
|
||||||
|
|
||||||
return result
|
|
||||||
|
|
||||||
def _generate_stats_html(watch):
|
|
||||||
"""Generate the HTML content for the stats tab"""
|
|
||||||
word_count = count_words_in_history(watch)
|
|
||||||
|
|
||||||
html = f"""
|
|
||||||
<div class="word-count-stats">
|
|
||||||
<h4>Content Analysis</h4>
|
|
||||||
<table class="pure-table">
|
|
||||||
<tbody>
|
|
||||||
<tr>
|
|
||||||
<td>Word count (latest snapshot)</td>
|
|
||||||
<td>{word_count}</td>
|
|
||||||
</tr>
|
|
||||||
</tbody>
|
|
||||||
</table>
|
|
||||||
<p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p>
|
|
||||||
</div>
|
|
||||||
"""
|
|
||||||
return html
|
|
||||||
|
|
||||||
@conditions_hookimpl
|
|
||||||
def ui_edit_stats_extras(watch):
|
|
||||||
"""Add word count stats to the UI through conditions plugin system"""
|
|
||||||
return _generate_stats_html(watch)
|
|
||||||
|
|
||||||
@global_hookimpl
|
|
||||||
def ui_edit_stats_extras(watch):
|
|
||||||
"""Add word count stats to the UI using the global plugin system"""
|
|
||||||
return _generate_stats_html(watch)
|
|
||||||
@@ -7,30 +7,11 @@ import os
|
|||||||
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
|
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
|
||||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
|
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
|
||||||
|
|
||||||
SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000
|
|
||||||
SCREENSHOT_DEFAULT_QUALITY = 40
|
|
||||||
|
|
||||||
# Maximum total height for the final image (When in stitch mode).
|
|
||||||
# We limit this to 16000px due to the huge amount of RAM that was being used
|
|
||||||
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
|
|
||||||
SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
|
||||||
|
|
||||||
# The size at which we will switch to stitching method, when below this (and
|
|
||||||
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
|
|
||||||
# screenshot method.
|
|
||||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
|
|
||||||
|
|
||||||
# available_fetchers() will scan this implementation looking for anything starting with html_
|
# available_fetchers() will scan this implementation looking for anything starting with html_
|
||||||
# this information is used in the form selections
|
# this information is used in the form selections
|
||||||
from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
||||||
|
|
||||||
|
|
||||||
import importlib.resources
|
|
||||||
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
|
||||||
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
|
||||||
FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8')
|
|
||||||
|
|
||||||
|
|
||||||
def available_fetchers():
|
def available_fetchers():
|
||||||
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
||||||
import inspect
|
import inspect
|
||||||
|
|||||||
@@ -48,7 +48,6 @@ class Fetcher():
|
|||||||
error = None
|
error = None
|
||||||
fetcher_description = "No description"
|
fetcher_description = "No description"
|
||||||
headers = {}
|
headers = {}
|
||||||
favicon_blob = None
|
|
||||||
instock_data = None
|
instock_data = None
|
||||||
instock_data_js = ""
|
instock_data_js = ""
|
||||||
status_code = None
|
status_code = None
|
||||||
@@ -64,12 +63,17 @@ class Fetcher():
|
|||||||
# Time ONTOP of the system defined env minimum time
|
# Time ONTOP of the system defined env minimum time
|
||||||
render_extract_delay = 0
|
render_extract_delay = 0
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
import importlib.resources
|
||||||
|
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||||
|
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_error(self):
|
def get_error(self):
|
||||||
return self.error
|
return self.error
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
async def run(self,
|
def run(self,
|
||||||
url,
|
url,
|
||||||
timeout,
|
timeout,
|
||||||
request_headers,
|
request_headers,
|
||||||
@@ -83,7 +87,7 @@ class Fetcher():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def quit(self, watch=None):
|
def quit(self):
|
||||||
return
|
return
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@@ -123,7 +127,7 @@ class Fetcher():
|
|||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
async def iterate_browser_steps(self, start_url=None):
|
def iterate_browser_steps(self, start_url=None):
|
||||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||||
from playwright._impl._errors import TimeoutError, Error
|
from playwright._impl._errors import TimeoutError, Error
|
||||||
from changedetectionio.safe_jinja import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
@@ -137,9 +141,8 @@ class Fetcher():
|
|||||||
for step in valid_steps:
|
for step in valid_steps:
|
||||||
step_n += 1
|
step_n += 1
|
||||||
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
|
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
|
||||||
await self.screenshot_step("before-" + str(step_n))
|
self.screenshot_step("before-" + str(step_n))
|
||||||
await self.save_step_html("before-" + str(step_n))
|
self.save_step_html("before-" + str(step_n))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
optional_value = step['optional_value']
|
optional_value = step['optional_value']
|
||||||
selector = step['selector']
|
selector = step['selector']
|
||||||
@@ -149,11 +152,11 @@ class Fetcher():
|
|||||||
if '{%' in step['selector'] or '{{' in step['selector']:
|
if '{%' in step['selector'] or '{{' in step['selector']:
|
||||||
selector = jinja_render(template_str=step['selector'])
|
selector = jinja_render(template_str=step['selector'])
|
||||||
|
|
||||||
await getattr(interface, "call_action")(action_name=step['operation'],
|
getattr(interface, "call_action")(action_name=step['operation'],
|
||||||
selector=selector,
|
selector=selector,
|
||||||
optional_value=optional_value)
|
optional_value=optional_value)
|
||||||
await self.screenshot_step(step_n)
|
self.screenshot_step(step_n)
|
||||||
await self.save_step_html(step_n)
|
self.save_step_html(step_n)
|
||||||
except (Error, TimeoutError) as e:
|
except (Error, TimeoutError) as e:
|
||||||
logger.debug(str(e))
|
logger.debug(str(e))
|
||||||
# Stop processing here
|
# Stop processing here
|
||||||
|
|||||||
104
changedetectionio/content_fetchers/helpers.py
Normal file
104
changedetectionio/content_fetchers/helpers.py
Normal file
@@ -0,0 +1,104 @@
|
|||||||
|
|
||||||
|
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
||||||
|
|
||||||
|
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
||||||
|
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
||||||
|
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
||||||
|
|
||||||
|
|
||||||
|
# The size at which we will switch to stitching method
|
||||||
|
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
def capture_stitched_together_full_page(page):
|
||||||
|
import io
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
|
||||||
|
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
|
||||||
|
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
|
||||||
|
|
||||||
|
# Save the original viewport size
|
||||||
|
original_viewport = page.viewport_size
|
||||||
|
now = time.time()
|
||||||
|
|
||||||
|
try:
|
||||||
|
viewport = page.viewport_size
|
||||||
|
page_height = page.evaluate("document.documentElement.scrollHeight")
|
||||||
|
|
||||||
|
# Limit the total capture height
|
||||||
|
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
|
||||||
|
|
||||||
|
images = []
|
||||||
|
total_captured_height = 0
|
||||||
|
|
||||||
|
for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
|
||||||
|
# Ensure we do not exceed the total height limit
|
||||||
|
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
|
||||||
|
|
||||||
|
# Adjust viewport size for this chunk
|
||||||
|
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
|
||||||
|
|
||||||
|
# Scroll to the correct position
|
||||||
|
page.evaluate(f"window.scrollTo(0, {offset})")
|
||||||
|
|
||||||
|
# Capture screenshot chunk
|
||||||
|
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||||
|
images.append(Image.open(io.BytesIO(screenshot_bytes)))
|
||||||
|
|
||||||
|
total_captured_height += chunk_height
|
||||||
|
|
||||||
|
# Stop if we reached the maximum total height
|
||||||
|
if total_captured_height >= MAX_TOTAL_HEIGHT:
|
||||||
|
break
|
||||||
|
|
||||||
|
# Create the final stitched image
|
||||||
|
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
|
||||||
|
y_offset = 0
|
||||||
|
|
||||||
|
# Stitch the screenshot chunks together
|
||||||
|
for img in images:
|
||||||
|
stitched_image.paste(img, (0, y_offset))
|
||||||
|
y_offset += img.height
|
||||||
|
|
||||||
|
logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
|
||||||
|
|
||||||
|
# Overlay warning text if the screenshot was trimmed
|
||||||
|
if page_height > MAX_TOTAL_HEIGHT:
|
||||||
|
draw = ImageDraw.Draw(stitched_image)
|
||||||
|
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
|
||||||
|
|
||||||
|
# Load font (default system font if Arial is unavailable)
|
||||||
|
try:
|
||||||
|
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
|
||||||
|
except IOError:
|
||||||
|
font = ImageFont.load_default() # Default font if Arial not found
|
||||||
|
|
||||||
|
# Get text bounding box (correct method for newer Pillow versions)
|
||||||
|
text_bbox = draw.textbbox((0, 0), warning_text, font=font)
|
||||||
|
text_width = text_bbox[2] - text_bbox[0] # Calculate text width
|
||||||
|
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
|
||||||
|
|
||||||
|
# Define background rectangle (top of the image)
|
||||||
|
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
|
||||||
|
|
||||||
|
# Center text horizontally within the warning area
|
||||||
|
text_x = (viewport["width"] - text_width) // 2
|
||||||
|
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
|
||||||
|
|
||||||
|
# Draw the warning text in red
|
||||||
|
draw.text((text_x, text_y), warning_text, fill="red", font=font)
|
||||||
|
|
||||||
|
# Save or return the final image
|
||||||
|
output = io.BytesIO()
|
||||||
|
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||||
|
screenshot = output.getvalue()
|
||||||
|
|
||||||
|
finally:
|
||||||
|
# Restore the original viewport size
|
||||||
|
page.set_viewport_size(original_viewport)
|
||||||
|
|
||||||
|
return screenshot
|
||||||
@@ -4,75 +4,10 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
|
|
||||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||||
|
|
||||||
async def capture_full_page_async(page):
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from multiprocessing import Process, Pipe
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
|
||||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
|
||||||
original_viewport = page.viewport_size
|
|
||||||
|
|
||||||
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
|
||||||
|
|
||||||
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
|
|
||||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
|
||||||
screenshot_chunks = []
|
|
||||||
y = 0
|
|
||||||
|
|
||||||
if page_height > page.viewport_size['height']:
|
|
||||||
if page_height < step_size:
|
|
||||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
|
||||||
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
|
||||||
# Set viewport to a larger size to capture more content at once
|
|
||||||
await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
|
||||||
|
|
||||||
# Capture screenshots in chunks up to the max total height
|
|
||||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
|
||||||
await page.request_gc()
|
|
||||||
await page.evaluate(f"window.scrollTo(0, {y})")
|
|
||||||
await page.request_gc()
|
|
||||||
screenshot_chunks.append(await page.screenshot(
|
|
||||||
type="jpeg",
|
|
||||||
full_page=False,
|
|
||||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
|
|
||||||
))
|
|
||||||
y += step_size
|
|
||||||
await page.request_gc()
|
|
||||||
|
|
||||||
# Restore original viewport size
|
|
||||||
await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
|
||||||
|
|
||||||
# If we have multiple chunks, stitch them together
|
|
||||||
if len(screenshot_chunks) > 1:
|
|
||||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
|
||||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
|
||||||
parent_conn, child_conn = Pipe()
|
|
||||||
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
|
||||||
p.start()
|
|
||||||
screenshot = parent_conn.recv_bytes()
|
|
||||||
p.join()
|
|
||||||
logger.debug(
|
|
||||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
|
||||||
# Explicit cleanup
|
|
||||||
del screenshot_chunks
|
|
||||||
del p
|
|
||||||
del parent_conn, child_conn
|
|
||||||
screenshot_chunks = None
|
|
||||||
return screenshot
|
|
||||||
|
|
||||||
logger.debug(
|
|
||||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
|
||||||
|
|
||||||
return screenshot_chunks[0]
|
|
||||||
|
|
||||||
class fetcher(Fetcher):
|
class fetcher(Fetcher):
|
||||||
fetcher_description = "Playwright {}/Javascript".format(
|
fetcher_description = "Playwright {}/Javascript".format(
|
||||||
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
||||||
@@ -123,10 +58,9 @@ class fetcher(Fetcher):
|
|||||||
self.proxy['username'] = parsed.username
|
self.proxy['username'] = parsed.username
|
||||||
self.proxy['password'] = parsed.password
|
self.proxy['password'] = parsed.password
|
||||||
|
|
||||||
async def screenshot_step(self, step_n=''):
|
def screenshot_step(self, step_n=''):
|
||||||
super().screenshot_step(step_n=step_n)
|
super().screenshot_step(step_n=step_n)
|
||||||
screenshot = await capture_full_page_async(page=self.page)
|
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||||
|
|
||||||
|
|
||||||
if self.browser_steps_screenshot_path is not None:
|
if self.browser_steps_screenshot_path is not None:
|
||||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||||
@@ -134,15 +68,15 @@ class fetcher(Fetcher):
|
|||||||
with open(destination, 'wb') as f:
|
with open(destination, 'wb') as f:
|
||||||
f.write(screenshot)
|
f.write(screenshot)
|
||||||
|
|
||||||
async def save_step_html(self, step_n):
|
def save_step_html(self, step_n):
|
||||||
super().save_step_html(step_n=step_n)
|
super().save_step_html(step_n=step_n)
|
||||||
content = await self.page.content()
|
content = self.page.content()
|
||||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
|
||||||
logger.debug(f"Saving step HTML to {destination}")
|
logger.debug(f"Saving step HTML to {destination}")
|
||||||
with open(destination, 'w') as f:
|
with open(destination, 'w') as f:
|
||||||
f.write(content)
|
f.write(content)
|
||||||
|
|
||||||
async def run(self,
|
def run(self,
|
||||||
url,
|
url,
|
||||||
timeout,
|
timeout,
|
||||||
request_headers,
|
request_headers,
|
||||||
@@ -153,26 +87,27 @@ class fetcher(Fetcher):
|
|||||||
is_binary=False,
|
is_binary=False,
|
||||||
empty_pages_are_a_change=False):
|
empty_pages_are_a_change=False):
|
||||||
|
|
||||||
from playwright.async_api import async_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
import playwright._impl._errors
|
import playwright._impl._errors
|
||||||
|
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||||
import time
|
import time
|
||||||
self.delete_browser_steps_screenshots()
|
self.delete_browser_steps_screenshots()
|
||||||
response = None
|
response = None
|
||||||
|
|
||||||
async with async_playwright() as p:
|
with sync_playwright() as p:
|
||||||
browser_type = getattr(p, self.browser_type)
|
browser_type = getattr(p, self.browser_type)
|
||||||
|
|
||||||
# Seemed to cause a connection Exception even tho I can see it connect
|
# Seemed to cause a connection Exception even tho I can see it connect
|
||||||
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
|
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
|
||||||
# 60,000 connection timeout only
|
# 60,000 connection timeout only
|
||||||
browser = await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
|
browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
|
||||||
|
|
||||||
# SOCKS5 with authentication is not supported (yet)
|
# SOCKS5 with authentication is not supported (yet)
|
||||||
# https://github.com/microsoft/playwright/issues/10567
|
# https://github.com/microsoft/playwright/issues/10567
|
||||||
|
|
||||||
# Set user agent to prevent Cloudflare from blocking the browser
|
# Set user agent to prevent Cloudflare from blocking the browser
|
||||||
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
|
||||||
context = await browser.new_context(
|
context = browser.new_context(
|
||||||
accept_downloads=False, # Should never be needed
|
accept_downloads=False, # Should never be needed
|
||||||
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
bypass_csp=True, # This is needed to enable JavaScript execution on GitHub and others
|
||||||
extra_http_headers=request_headers,
|
extra_http_headers=request_headers,
|
||||||
@@ -182,47 +117,41 @@ class fetcher(Fetcher):
|
|||||||
user_agent=manage_user_agent(headers=request_headers),
|
user_agent=manage_user_agent(headers=request_headers),
|
||||||
)
|
)
|
||||||
|
|
||||||
self.page = await context.new_page()
|
self.page = context.new_page()
|
||||||
|
|
||||||
# Listen for all console events and handle errors
|
# Listen for all console events and handle errors
|
||||||
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
|
||||||
|
|
||||||
# Re-use as much code from browser steps as possible so its the same
|
# Re-use as much code from browser steps as possible so its the same
|
||||||
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
|
||||||
browsersteps_interface = steppable_browser_interface(start_url=url)
|
browsersteps_interface = steppable_browser_interface(start_url=url)
|
||||||
browsersteps_interface.page = self.page
|
browsersteps_interface.page = self.page
|
||||||
|
|
||||||
response = await browsersteps_interface.action_goto_url(value=url)
|
response = browsersteps_interface.action_goto_url(value=url)
|
||||||
|
self.headers = response.all_headers()
|
||||||
|
|
||||||
if response is None:
|
if response is None:
|
||||||
await context.close()
|
context.close()
|
||||||
await browser.close()
|
browser.close()
|
||||||
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
# In async_playwright, all_headers() returns a coroutine
|
|
||||||
try:
|
|
||||||
self.headers = await response.all_headers()
|
|
||||||
except TypeError:
|
|
||||||
# Fallback for sync version
|
|
||||||
self.headers = response.all_headers()
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
|
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
|
||||||
await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
||||||
except playwright._impl._errors.TimeoutError as e:
|
except playwright._impl._errors.TimeoutError as e:
|
||||||
await context.close()
|
context.close()
|
||||||
await browser.close()
|
browser.close()
|
||||||
# This can be ok, we will try to grab what we could retrieve
|
# This can be ok, we will try to grab what we could retrieve
|
||||||
pass
|
pass
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}")
|
logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}")
|
||||||
await context.close()
|
context.close()
|
||||||
await browser.close()
|
browser.close()
|
||||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
|
|
||||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
self.page.wait_for_timeout(extra_wait * 1000)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.status_code = response.status
|
self.status_code = response.status
|
||||||
@@ -230,56 +159,41 @@ class fetcher(Fetcher):
|
|||||||
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
|
# https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
|
||||||
logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
|
logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
|
||||||
logger.critical(response)
|
logger.critical(response)
|
||||||
await context.close()
|
context.close()
|
||||||
await browser.close()
|
browser.close()
|
||||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
|
|
||||||
try:
|
|
||||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
|
||||||
await self.page.request_gc()
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
|
||||||
|
|
||||||
if self.status_code != 200 and not ignore_status_codes:
|
if self.status_code != 200 and not ignore_status_codes:
|
||||||
screenshot = await capture_full_page_async(self.page)
|
screenshot = self.page.screenshot(type='jpeg', full_page=True,
|
||||||
|
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||||
|
|
||||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||||
|
|
||||||
if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
|
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
|
||||||
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
|
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
|
||||||
await context.close()
|
context.close()
|
||||||
await browser.close()
|
browser.close()
|
||||||
raise EmptyReply(url=url, status_code=response.status)
|
raise EmptyReply(url=url, status_code=response.status)
|
||||||
|
|
||||||
# Run Browser Steps here
|
# Run Browser Steps here
|
||||||
if self.browser_steps_get_valid_steps():
|
if self.browser_steps_get_valid_steps():
|
||||||
await self.iterate_browser_steps(start_url=url)
|
self.iterate_browser_steps(start_url=url)
|
||||||
|
|
||||||
await self.page.wait_for_timeout(extra_wait * 1000)
|
self.page.wait_for_timeout(extra_wait * 1000)
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||||
if current_include_filters is not None:
|
if current_include_filters is not None:
|
||||||
await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||||
else:
|
else:
|
||||||
await self.page.evaluate("var include_filters=''")
|
self.page.evaluate("var include_filters=''")
|
||||||
await self.page.request_gc()
|
|
||||||
|
|
||||||
# request_gc before and after evaluate to free up memory
|
self.xpath_data = self.page.evaluate(
|
||||||
# @todo browsersteps etc
|
"async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
||||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
||||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
|
||||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
|
||||||
"max_height": MAX_TOTAL_HEIGHT
|
|
||||||
})
|
|
||||||
await self.page.request_gc()
|
|
||||||
|
|
||||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
|
||||||
await self.page.request_gc()
|
|
||||||
|
|
||||||
self.content = await self.page.content()
|
|
||||||
await self.page.request_gc()
|
|
||||||
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
|
|
||||||
|
|
||||||
|
self.content = self.page.content()
|
||||||
|
logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s")
|
||||||
|
|
||||||
# Bug 3 in Playwright screenshot handling
|
# Bug 3 in Playwright screenshot handling
|
||||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||||
@@ -290,41 +204,18 @@ class fetcher(Fetcher):
|
|||||||
# acceptable screenshot quality here
|
# acceptable screenshot quality here
|
||||||
try:
|
try:
|
||||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||||
self.screenshot = await capture_full_page_async(page=self.page)
|
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||||
|
|
||||||
|
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||||
|
logger.warning(
|
||||||
|
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||||
|
self.screenshot = capture_stitched_together_full_page(self.page)
|
||||||
|
else:
|
||||||
|
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# It's likely the screenshot was too long/big and something crashed
|
# It's likely the screenshot was too long/big and something crashed
|
||||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||||
finally:
|
finally:
|
||||||
# Request garbage collection one more time before closing
|
context.close()
|
||||||
try:
|
browser.close()
|
||||||
await self.page.request_gc()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Clean up resources properly
|
|
||||||
try:
|
|
||||||
await self.page.request_gc()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
|
|
||||||
try:
|
|
||||||
await self.page.close()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
self.page = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
await context.close()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
context = None
|
|
||||||
|
|
||||||
try:
|
|
||||||
await browser.close()
|
|
||||||
except:
|
|
||||||
pass
|
|
||||||
browser = None
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,85 +6,8 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
|
||||||
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
|
|
||||||
SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS
|
|
||||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \
|
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
|
||||||
BrowserConnectError
|
|
||||||
|
|
||||||
|
|
||||||
# Bug 3 in Playwright screenshot handling
|
|
||||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
|
||||||
|
|
||||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
|
||||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
|
||||||
# acceptable screenshot quality here
|
|
||||||
async def capture_full_page(page):
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
from multiprocessing import Process, Pipe
|
|
||||||
|
|
||||||
start = time.time()
|
|
||||||
|
|
||||||
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
|
||||||
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
|
||||||
original_viewport = page.viewport
|
|
||||||
|
|
||||||
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
|
|
||||||
|
|
||||||
# Bug 3 in Playwright screenshot handling
|
|
||||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
|
||||||
# JPEG is better here because the screenshots can be very very large
|
|
||||||
|
|
||||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
|
||||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
|
||||||
# acceptable screenshot quality here
|
|
||||||
|
|
||||||
|
|
||||||
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot
|
|
||||||
screenshot_chunks = []
|
|
||||||
y = 0
|
|
||||||
if page_height > page.viewport['height']:
|
|
||||||
if page_height < step_size:
|
|
||||||
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
|
||||||
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
|
||||||
|
|
||||||
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
|
||||||
# better than scrollTo incase they override it in the page
|
|
||||||
await page.evaluate(
|
|
||||||
"""(y) => {
|
|
||||||
document.documentElement.scrollTop = y;
|
|
||||||
document.body.scrollTop = y;
|
|
||||||
}""",
|
|
||||||
y
|
|
||||||
)
|
|
||||||
|
|
||||||
screenshot_chunks.append(await page.screenshot(type_='jpeg',
|
|
||||||
fullPage=False,
|
|
||||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
|
|
||||||
y += step_size
|
|
||||||
|
|
||||||
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
|
||||||
|
|
||||||
if len(screenshot_chunks) > 1:
|
|
||||||
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
|
||||||
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
|
||||||
parent_conn, child_conn = Pipe()
|
|
||||||
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
|
||||||
p.start()
|
|
||||||
screenshot = parent_conn.recv_bytes()
|
|
||||||
p.join()
|
|
||||||
logger.debug(
|
|
||||||
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
|
||||||
|
|
||||||
screenshot_chunks = None
|
|
||||||
return screenshot
|
|
||||||
|
|
||||||
logger.debug(
|
|
||||||
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
|
||||||
return screenshot_chunks[0]
|
|
||||||
|
|
||||||
|
|
||||||
class fetcher(Fetcher):
|
class fetcher(Fetcher):
|
||||||
fetcher_description = "Puppeteer/direct {}/Javascript".format(
|
fetcher_description = "Puppeteer/direct {}/Javascript".format(
|
||||||
@@ -155,13 +78,10 @@ class fetcher(Fetcher):
|
|||||||
is_binary,
|
is_binary,
|
||||||
empty_pages_are_a_change
|
empty_pages_are_a_change
|
||||||
):
|
):
|
||||||
import re
|
|
||||||
|
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
||||||
self.delete_browser_steps_screenshots()
|
self.delete_browser_steps_screenshots()
|
||||||
|
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||||
n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
|
||||||
extra_wait = min(n, 15)
|
|
||||||
|
|
||||||
logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
|
|
||||||
|
|
||||||
from pyppeteer import Pyppeteer
|
from pyppeteer import Pyppeteer
|
||||||
pyppeteer_instance = Pyppeteer()
|
pyppeteer_instance = Pyppeteer()
|
||||||
@@ -177,21 +97,12 @@ class fetcher(Fetcher):
|
|||||||
except websockets.exceptions.InvalidURI:
|
except websockets.exceptions.InvalidURI:
|
||||||
raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://")
|
raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://")
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
|
raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}")
|
||||||
|
|
||||||
# more reliable is to just request a new page
|
# Better is to launch chrome with the URL as arg
|
||||||
self.page = await browser.newPage()
|
# non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab
|
||||||
|
# headless - ask a new page
|
||||||
if '--window-size' in self.browser_connection_url:
|
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
|
||||||
# Be sure the viewport is always the window-size, this is often not the same thing
|
|
||||||
match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url)
|
|
||||||
if match:
|
|
||||||
logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}")
|
|
||||||
await self.page.setViewport({
|
|
||||||
"width": int(match.group(1)),
|
|
||||||
"height": int(match.group(2))
|
|
||||||
})
|
|
||||||
logger.debug(f"Puppeteer viewport size {self.page.viewport}")
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from pyppeteerstealth import inject_evasions_into_page
|
from pyppeteerstealth import inject_evasions_into_page
|
||||||
@@ -237,35 +148,14 @@ class fetcher(Fetcher):
|
|||||||
# browsersteps_interface = steppable_browser_interface()
|
# browsersteps_interface = steppable_browser_interface()
|
||||||
# browsersteps_interface.page = self.page
|
# browsersteps_interface.page = self.page
|
||||||
|
|
||||||
async def handle_frame_navigation(event):
|
response = await self.page.goto(url, waitUntil="load")
|
||||||
logger.debug(f"Frame navigated: {event}")
|
|
||||||
w = extra_wait - 2 if extra_wait > 4 else 2
|
|
||||||
logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
|
|
||||||
await asyncio.sleep(w)
|
|
||||||
logger.debug("Issuing stopLoading command...")
|
|
||||||
await self.page._client.send('Page.stopLoading')
|
|
||||||
logger.debug("stopLoading command sent!")
|
|
||||||
|
|
||||||
self.page._client.on('Page.frameStartedNavigating', lambda event: asyncio.create_task(handle_frame_navigation(event)))
|
|
||||||
self.page._client.on('Page.frameStartedLoading', lambda event: asyncio.create_task(handle_frame_navigation(event)))
|
|
||||||
self.page._client.on('Page.frameStoppedLoading', lambda event: logger.debug(f"Frame stopped loading: {event}"))
|
|
||||||
|
|
||||||
response = None
|
if response is None:
|
||||||
attempt=0
|
await self.page.close()
|
||||||
while not response:
|
await browser.close()
|
||||||
logger.debug(f"Attempting page fetch {url} attempt {attempt}")
|
logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
|
||||||
response = await self.page.goto(url)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
await asyncio.sleep(1 + extra_wait)
|
|
||||||
if response:
|
|
||||||
break
|
|
||||||
if not response:
|
|
||||||
logger.warning("Page did not fetch! trying again!")
|
|
||||||
if response is None and attempt>=2:
|
|
||||||
await self.page.close()
|
|
||||||
await browser.close()
|
|
||||||
logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attmpt {attempt}")
|
|
||||||
raise EmptyReply(url=url, status_code=None)
|
|
||||||
attempt+=1
|
|
||||||
|
|
||||||
self.headers = response.headers
|
self.headers = response.headers
|
||||||
|
|
||||||
@@ -290,16 +180,12 @@ class fetcher(Fetcher):
|
|||||||
await browser.close()
|
await browser.close()
|
||||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
|
|
||||||
try:
|
|
||||||
self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
|
|
||||||
|
|
||||||
if self.status_code != 200 and not ignore_status_codes:
|
if self.status_code != 200 and not ignore_status_codes:
|
||||||
screenshot = await capture_full_page(page=self.page)
|
screenshot = await self.page.screenshot(type_='jpeg',
|
||||||
|
fullPage=True,
|
||||||
|
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||||
|
|
||||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||||
|
|
||||||
content = await self.page.content
|
content = await self.page.content
|
||||||
|
|
||||||
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
||||||
@@ -313,48 +199,64 @@ class fetcher(Fetcher):
|
|||||||
# if self.browser_steps_get_valid_steps():
|
# if self.browser_steps_get_valid_steps():
|
||||||
# self.iterate_browser_steps()
|
# self.iterate_browser_steps()
|
||||||
|
|
||||||
|
await asyncio.sleep(1 + extra_wait)
|
||||||
|
|
||||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||||
# Setup the xPath/VisualSelector scraper
|
# Setup the xPath/VisualSelector scraper
|
||||||
if current_include_filters:
|
if current_include_filters is not None:
|
||||||
js = json.dumps(current_include_filters)
|
js = json.dumps(current_include_filters)
|
||||||
await self.page.evaluate(f"var include_filters={js}")
|
await self.page.evaluate(f"var include_filters={js}")
|
||||||
else:
|
else:
|
||||||
await self.page.evaluate(f"var include_filters=''")
|
await self.page.evaluate(f"var include_filters=''")
|
||||||
|
|
||||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
self.xpath_data = await self.page.evaluate(
|
||||||
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
"async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
||||||
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
self.instock_data = await self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
||||||
"max_height": MAX_TOTAL_HEIGHT
|
|
||||||
})
|
|
||||||
if not self.xpath_data:
|
|
||||||
raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)")
|
|
||||||
|
|
||||||
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
|
||||||
|
|
||||||
self.content = await self.page.content
|
self.content = await self.page.content
|
||||||
|
# Bug 3 in Playwright screenshot handling
|
||||||
|
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||||
|
# JPEG is better here because the screenshots can be very very large
|
||||||
|
|
||||||
self.screenshot = await capture_full_page(page=self.page)
|
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||||
|
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||||
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
|
# acceptable screenshot quality here
|
||||||
logger.success(f"Fetching '{url}' complete, closing page")
|
try:
|
||||||
await self.page.close()
|
self.screenshot = await self.page.screenshot(type_='jpeg',
|
||||||
logger.success(f"Fetching '{url}' complete, closing browser")
|
fullPage=True,
|
||||||
await browser.close()
|
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error("Error fetching screenshot")
|
||||||
|
# // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
|
||||||
|
# // @ todo after text extract, we can place some overlay text with red background to say 'croppped'
|
||||||
|
logger.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot')
|
||||||
|
try:
|
||||||
|
self.screenshot = await self.page.screenshot(type_='jpeg',
|
||||||
|
fullPage=False,
|
||||||
|
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||||
|
except Exception as e:
|
||||||
|
logger.error('ERROR: Failed to get viewport-only reduced screenshot :(')
|
||||||
|
pass
|
||||||
|
finally:
|
||||||
|
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
|
||||||
|
logger.success(f"Fetching '{url}' complete, closing page")
|
||||||
|
await self.page.close()
|
||||||
|
logger.success(f"Fetching '{url}' complete, closing browser")
|
||||||
|
await browser.close()
|
||||||
logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")
|
logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")
|
||||||
|
|
||||||
async def main(self, **kwargs):
|
async def main(self, **kwargs):
|
||||||
await self.fetch_page(**kwargs)
|
await self.fetch_page(**kwargs)
|
||||||
|
|
||||||
async def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
|
||||||
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
|
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
|
||||||
|
|
||||||
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
|
||||||
max_time = int(os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180))
|
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
|
||||||
|
|
||||||
# Now we run this properly in async context since we're called from async worker
|
# This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only
|
||||||
try:
|
try:
|
||||||
await asyncio.wait_for(self.main(
|
asyncio.run(asyncio.wait_for(self.main(
|
||||||
url=url,
|
url=url,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
request_headers=request_headers,
|
request_headers=request_headers,
|
||||||
@@ -364,7 +266,7 @@ class fetcher(Fetcher):
|
|||||||
current_include_filters=current_include_filters,
|
current_include_filters=current_include_filters,
|
||||||
is_binary=is_binary,
|
is_binary=is_binary,
|
||||||
empty_pages_are_a_change=empty_pages_are_a_change
|
empty_pages_are_a_change=empty_pages_are_a_change
|
||||||
), timeout=max_time)
|
), timeout=max_time))
|
||||||
except asyncio.TimeoutError:
|
except asyncio.TimeoutError:
|
||||||
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||||
|
|
||||||
|
|||||||
@@ -1,7 +1,6 @@
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
import hashlib
|
import hashlib
|
||||||
import os
|
import os
|
||||||
import asyncio
|
|
||||||
from changedetectionio import strtobool
|
from changedetectionio import strtobool
|
||||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
|
||||||
from changedetectionio.content_fetchers.base import Fetcher
|
from changedetectionio.content_fetchers.base import Fetcher
|
||||||
@@ -16,7 +15,7 @@ class fetcher(Fetcher):
|
|||||||
self.proxy_override = proxy_override
|
self.proxy_override = proxy_override
|
||||||
# browser_connection_url is none because its always 'launched locally'
|
# browser_connection_url is none because its always 'launched locally'
|
||||||
|
|
||||||
def _run_sync(self,
|
def run(self,
|
||||||
url,
|
url,
|
||||||
timeout,
|
timeout,
|
||||||
request_headers,
|
request_headers,
|
||||||
@@ -26,11 +25,9 @@ class fetcher(Fetcher):
|
|||||||
current_include_filters=None,
|
current_include_filters=None,
|
||||||
is_binary=False,
|
is_binary=False,
|
||||||
empty_pages_are_a_change=False):
|
empty_pages_are_a_change=False):
|
||||||
"""Synchronous version of run - the original requests implementation"""
|
|
||||||
|
|
||||||
import chardet
|
import chardet
|
||||||
import requests
|
import requests
|
||||||
from requests.exceptions import ProxyError, ConnectionError, RequestException
|
|
||||||
|
|
||||||
if self.browser_steps_get_valid_steps():
|
if self.browser_steps_get_valid_steps():
|
||||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||||
@@ -38,6 +35,7 @@ class fetcher(Fetcher):
|
|||||||
proxies = {}
|
proxies = {}
|
||||||
|
|
||||||
# Allows override the proxy on a per-request basis
|
# Allows override the proxy on a per-request basis
|
||||||
|
|
||||||
# https://requests.readthedocs.io/en/latest/user/advanced/#socks
|
# https://requests.readthedocs.io/en/latest/user/advanced/#socks
|
||||||
# Should also work with `socks5://user:pass@host:port` type syntax.
|
# Should also work with `socks5://user:pass@host:port` type syntax.
|
||||||
|
|
||||||
@@ -54,19 +52,14 @@ class fetcher(Fetcher):
|
|||||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||||
from requests_file import FileAdapter
|
from requests_file import FileAdapter
|
||||||
session.mount('file://', FileAdapter())
|
session.mount('file://', FileAdapter())
|
||||||
try:
|
|
||||||
r = session.request(method=request_method,
|
r = session.request(method=request_method,
|
||||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||||
url=url,
|
url=url,
|
||||||
headers=request_headers,
|
headers=request_headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
proxies=proxies,
|
proxies=proxies,
|
||||||
verify=False)
|
verify=False)
|
||||||
except Exception as e:
|
|
||||||
msg = str(e)
|
|
||||||
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
|
||||||
msg = f"Proxy connection failed? {msg}"
|
|
||||||
raise Exception(msg) from e
|
|
||||||
|
|
||||||
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
||||||
# For example - some sites don't tell us it's utf-8, but return utf-8 content
|
# For example - some sites don't tell us it's utf-8, but return utf-8 content
|
||||||
@@ -101,48 +94,5 @@ class fetcher(Fetcher):
|
|||||||
else:
|
else:
|
||||||
self.content = r.text
|
self.content = r.text
|
||||||
|
|
||||||
|
|
||||||
self.raw_content = r.content
|
self.raw_content = r.content
|
||||||
|
|
||||||
async def run(self,
|
|
||||||
url,
|
|
||||||
timeout,
|
|
||||||
request_headers,
|
|
||||||
request_body,
|
|
||||||
request_method,
|
|
||||||
ignore_status_codes=False,
|
|
||||||
current_include_filters=None,
|
|
||||||
is_binary=False,
|
|
||||||
empty_pages_are_a_change=False):
|
|
||||||
"""Async wrapper that runs the synchronous requests code in a thread pool"""
|
|
||||||
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
|
|
||||||
# Run the synchronous _run_sync in a thread pool to avoid blocking the event loop
|
|
||||||
await loop.run_in_executor(
|
|
||||||
None, # Use default ThreadPoolExecutor
|
|
||||||
lambda: self._run_sync(
|
|
||||||
url=url,
|
|
||||||
timeout=timeout,
|
|
||||||
request_headers=request_headers,
|
|
||||||
request_body=request_body,
|
|
||||||
request_method=request_method,
|
|
||||||
ignore_status_codes=ignore_status_codes,
|
|
||||||
current_include_filters=current_include_filters,
|
|
||||||
is_binary=is_binary,
|
|
||||||
empty_pages_are_a_change=empty_pages_are_a_change
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
def quit(self, watch=None):
|
|
||||||
|
|
||||||
# In case they switched to `requests` fetcher from something else
|
|
||||||
# Then the screenshot could be old, in any case, it's not used here.
|
|
||||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
|
|
||||||
if strtobool(os.getenv("REMOVE_REQUESTS_OLD_SCREENSHOTS", 'true')):
|
|
||||||
screenshot = watch.get_screenshot()
|
|
||||||
if screenshot:
|
|
||||||
try:
|
|
||||||
os.unlink(screenshot)
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,79 +0,0 @@
|
|||||||
(async () => {
|
|
||||||
const links = Array.from(document.querySelectorAll(
|
|
||||||
'link[rel~="apple-touch-icon"], link[rel~="icon"]'
|
|
||||||
));
|
|
||||||
|
|
||||||
const icons = links.map(link => {
|
|
||||||
const sizesStr = link.getAttribute('sizes');
|
|
||||||
let size = 0;
|
|
||||||
if (sizesStr) {
|
|
||||||
const [w] = sizesStr.split('x').map(Number);
|
|
||||||
if (!isNaN(w)) size = w;
|
|
||||||
} else {
|
|
||||||
size = 16;
|
|
||||||
}
|
|
||||||
return {
|
|
||||||
size,
|
|
||||||
rel: link.getAttribute('rel'),
|
|
||||||
href: link.href
|
|
||||||
};
|
|
||||||
});
|
|
||||||
|
|
||||||
// If no icons found, add fallback favicon.ico
|
|
||||||
if (icons.length === 0) {
|
|
||||||
icons.push({
|
|
||||||
size: 16,
|
|
||||||
rel: 'icon',
|
|
||||||
href: '/favicon.ico'
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
// sort preference
|
|
||||||
icons.sort((a, b) => {
|
|
||||||
const isAppleA = /apple-touch-icon/.test(a.rel);
|
|
||||||
const isAppleB = /apple-touch-icon/.test(b.rel);
|
|
||||||
if (isAppleA && !isAppleB) return -1;
|
|
||||||
if (!isAppleA && isAppleB) return 1;
|
|
||||||
return b.size - a.size;
|
|
||||||
});
|
|
||||||
|
|
||||||
const timeoutMs = 2000;
|
|
||||||
|
|
||||||
for (const icon of icons) {
|
|
||||||
try {
|
|
||||||
const controller = new AbortController();
|
|
||||||
const timeout = setTimeout(() => controller.abort(), timeoutMs);
|
|
||||||
|
|
||||||
const resp = await fetch(icon.href, {
|
|
||||||
signal: controller.signal,
|
|
||||||
redirect: 'follow'
|
|
||||||
});
|
|
||||||
|
|
||||||
clearTimeout(timeout);
|
|
||||||
|
|
||||||
if (!resp.ok) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
const blob = await resp.blob();
|
|
||||||
|
|
||||||
// Convert blob to base64
|
|
||||||
const reader = new FileReader();
|
|
||||||
return await new Promise(resolve => {
|
|
||||||
reader.onloadend = () => {
|
|
||||||
resolve({
|
|
||||||
url: icon.href,
|
|
||||||
base64: reader.result.split(",")[1]
|
|
||||||
});
|
|
||||||
};
|
|
||||||
reader.readAsDataURL(blob);
|
|
||||||
});
|
|
||||||
|
|
||||||
} catch (e) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// nothing found
|
|
||||||
return null;
|
|
||||||
})();
|
|
||||||
190
changedetectionio/content_fetchers/res/puppeteer_fetch.js
Normal file
190
changedetectionio/content_fetchers/res/puppeteer_fetch.js
Normal file
@@ -0,0 +1,190 @@
|
|||||||
|
module.exports = async ({page, context}) => {
|
||||||
|
|
||||||
|
var {
|
||||||
|
url,
|
||||||
|
execute_js,
|
||||||
|
user_agent,
|
||||||
|
extra_wait_ms,
|
||||||
|
req_headers,
|
||||||
|
include_filters,
|
||||||
|
xpath_element_js,
|
||||||
|
screenshot_quality,
|
||||||
|
proxy_username,
|
||||||
|
proxy_password,
|
||||||
|
disk_cache_dir,
|
||||||
|
no_cache_list,
|
||||||
|
block_url_list,
|
||||||
|
} = context;
|
||||||
|
|
||||||
|
await page.setBypassCSP(true)
|
||||||
|
await page.setExtraHTTPHeaders(req_headers);
|
||||||
|
|
||||||
|
if (user_agent) {
|
||||||
|
await page.setUserAgent(user_agent);
|
||||||
|
}
|
||||||
|
// https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
|
||||||
|
|
||||||
|
await page.setDefaultNavigationTimeout(0);
|
||||||
|
|
||||||
|
if (proxy_username) {
|
||||||
|
// Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer
|
||||||
|
// https://github.com/puppeteer/puppeteer/issues/676 ?
|
||||||
|
// https://help.brightdata.com/hc/en-us/articles/12632549957649-Proxy-Manager-How-to-Guides#h_01HAKWR4Q0AFS8RZTNYWRDFJC2
|
||||||
|
// https://cri.dev/posts/2020-03-30-How-to-solve-Puppeteer-Chrome-Error-ERR_INVALID_ARGUMENT/
|
||||||
|
await page.authenticate({
|
||||||
|
username: proxy_username,
|
||||||
|
password: proxy_password
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
await page.setViewport({
|
||||||
|
width: 1024,
|
||||||
|
height: 768,
|
||||||
|
deviceScaleFactor: 1,
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.setRequestInterception(true);
|
||||||
|
if (disk_cache_dir) {
|
||||||
|
console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<");
|
||||||
|
}
|
||||||
|
const fs = require('fs');
|
||||||
|
const crypto = require('crypto');
|
||||||
|
|
||||||
|
function file_is_expired(file_path) {
|
||||||
|
if (!fs.existsSync(file_path)) {
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
var stats = fs.statSync(file_path);
|
||||||
|
const now_date = new Date();
|
||||||
|
const expire_seconds = 300;
|
||||||
|
if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) {
|
||||||
|
console.log("CACHE EXPIRED: " + file_path);
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
return false;
|
||||||
|
|
||||||
|
}
|
||||||
|
|
||||||
|
page.on('request', async (request) => {
|
||||||
|
// General blocking of requests that waste traffic
|
||||||
|
if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort();
|
||||||
|
|
||||||
|
if (disk_cache_dir) {
|
||||||
|
const url = request.url();
|
||||||
|
const key = crypto.createHash('md5').update(url).digest("hex");
|
||||||
|
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
|
||||||
|
|
||||||
|
// https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
|
||||||
|
|
||||||
|
if (fs.existsSync(dir_path + key)) {
|
||||||
|
console.log("* CACHE HIT , using - " + dir_path + key + " - " + url);
|
||||||
|
const cached_data = fs.readFileSync(dir_path + key);
|
||||||
|
// @todo headers can come from dir_path+key+".meta" json file
|
||||||
|
request.respond({
|
||||||
|
status: 200,
|
||||||
|
//contentType: 'text/html', //@todo
|
||||||
|
body: cached_data
|
||||||
|
});
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
request.continue();
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
if (disk_cache_dir) {
|
||||||
|
page.on('response', async (response) => {
|
||||||
|
const url = response.url();
|
||||||
|
// Basic filtering for sane responses
|
||||||
|
if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) {
|
||||||
|
console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) {
|
||||||
|
console.log("Skipping (no_cache_list) - " + url);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
if (url.toLowerCase().includes('data:')) {
|
||||||
|
console.log("Skipping (embedded-data) - " + url);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
response.buffer().then(buffer => {
|
||||||
|
if (buffer.length > 100) {
|
||||||
|
console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType());
|
||||||
|
|
||||||
|
const key = crypto.createHash('md5').update(url).digest("hex");
|
||||||
|
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
|
||||||
|
|
||||||
|
if (!fs.existsSync(dir_path)) {
|
||||||
|
fs.mkdirSync(dir_path, {recursive: true})
|
||||||
|
}
|
||||||
|
|
||||||
|
if (fs.existsSync(dir_path + key)) {
|
||||||
|
if (file_is_expired(dir_path + key)) {
|
||||||
|
fs.writeFileSync(dir_path + key, buffer);
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
fs.writeFileSync(dir_path + key, buffer);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
const r = await page.goto(url, {
|
||||||
|
waitUntil: 'load'
|
||||||
|
});
|
||||||
|
|
||||||
|
await page.waitForTimeout(1000);
|
||||||
|
await page.waitForTimeout(extra_wait_ms);
|
||||||
|
|
||||||
|
if (execute_js) {
|
||||||
|
await page.evaluate(execute_js);
|
||||||
|
await page.waitForTimeout(200);
|
||||||
|
}
|
||||||
|
|
||||||
|
var xpath_data;
|
||||||
|
var instock_data;
|
||||||
|
try {
|
||||||
|
// Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode
|
||||||
|
// (Once the old playwright is removed)
|
||||||
|
xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters);
|
||||||
|
instock_data = await page.evaluate(() => {%instock_scrape_code%});
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure
|
||||||
|
// Wrap it here (for now)
|
||||||
|
|
||||||
|
var b64s = false;
|
||||||
|
try {
|
||||||
|
b64s = await page.screenshot({encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg'});
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
|
||||||
|
if (!b64s) {
|
||||||
|
// @todo after text extract, we can place some overlay text with red background to say 'croppped'
|
||||||
|
console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot');
|
||||||
|
try {
|
||||||
|
b64s = await page.screenshot({encoding: "base64", quality: screenshot_quality, type: 'jpeg'});
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
var html = await page.content();
|
||||||
|
return {
|
||||||
|
data: {
|
||||||
|
'content': html,
|
||||||
|
'headers': r.headers(),
|
||||||
|
'instock_data': instock_data,
|
||||||
|
'screenshot': b64s,
|
||||||
|
'status_code': r.status(),
|
||||||
|
'xpath_data': xpath_data
|
||||||
|
},
|
||||||
|
type: 'application/json',
|
||||||
|
};
|
||||||
|
};
|
||||||
@@ -1,238 +1,223 @@
|
|||||||
async () => {
|
// Restock Detector
|
||||||
|
// (c) Leigh Morresi dgtlmoon@gmail.com
|
||||||
|
//
|
||||||
|
// Assumes the product is in stock to begin with, unless the following appears above the fold ;
|
||||||
|
// - outOfStockTexts appears above the fold (out of stock)
|
||||||
|
// - negateOutOfStockRegex (really is in stock)
|
||||||
|
|
||||||
function isItemInStock() {
|
function isItemInStock() {
|
||||||
// @todo Pass these in so the same list can be used in non-JS fetchers
|
// @todo Pass these in so the same list can be used in non-JS fetchers
|
||||||
const outOfStockTexts = [
|
const outOfStockTexts = [
|
||||||
' أخبرني عندما يتوفر',
|
' أخبرني عندما يتوفر',
|
||||||
'0 in stock',
|
'0 in stock',
|
||||||
'actuellement indisponible',
|
'actuellement indisponible',
|
||||||
'agotado',
|
'agotado',
|
||||||
'article épuisé',
|
'article épuisé',
|
||||||
'artikel zurzeit vergriffen',
|
'artikel zurzeit vergriffen',
|
||||||
'as soon as stock is available',
|
'as soon as stock is available',
|
||||||
'aucune offre n\'est disponible',
|
'ausverkauft', // sold out
|
||||||
'ausverkauft', // sold out
|
'available for back order',
|
||||||
'available for back order',
|
'awaiting stock',
|
||||||
'awaiting stock',
|
'back in stock soon',
|
||||||
'back in stock soon',
|
'back-order or out of stock',
|
||||||
'back-order or out of stock',
|
'backordered',
|
||||||
'backordered',
|
'benachrichtigt mich', // notify me
|
||||||
'backorder',
|
'brak na stanie',
|
||||||
'benachrichtigt mich', // notify me
|
'brak w magazynie',
|
||||||
'binnenkort leverbaar', // coming soon
|
'coming soon',
|
||||||
'brak na stanie',
|
'currently have any tickets for this',
|
||||||
'brak w magazynie',
|
'currently unavailable',
|
||||||
'coming soon',
|
'dieser artikel ist bald wieder verfügbar',
|
||||||
'currently have any tickets for this',
|
'dostępne wkrótce',
|
||||||
'currently unavailable',
|
'en rupture',
|
||||||
'dieser artikel ist bald wieder verfügbar',
|
'en rupture de stock',
|
||||||
'dostępne wkrótce',
|
'épuisé',
|
||||||
'en rupture',
|
'esgotado',
|
||||||
'esgotado',
|
'indisponible',
|
||||||
'in kürze lieferbar',
|
'indisponível',
|
||||||
'indisponible',
|
'isn\'t in stock right now',
|
||||||
'indisponível',
|
'isnt in stock right now',
|
||||||
'isn\'t in stock right now',
|
'isn’t in stock right now',
|
||||||
'isnt in stock right now',
|
'item is no longer available',
|
||||||
'isn’t in stock right now',
|
'let me know when it\'s available',
|
||||||
'item is no longer available',
|
'mail me when available',
|
||||||
'let me know when it\'s available',
|
'message if back in stock',
|
||||||
'mail me when available',
|
'mevcut değil',
|
||||||
'message if back in stock',
|
'nachricht bei',
|
||||||
'mevcut değil',
|
'nicht auf lager',
|
||||||
'more on order',
|
'nicht lagernd',
|
||||||
'nachricht bei',
|
'nicht lieferbar',
|
||||||
'nicht auf lager',
|
'nicht verfügbar',
|
||||||
'nicht lagernd',
|
'nicht vorrätig',
|
||||||
'nicht lieferbar',
|
'nicht zur verfügung',
|
||||||
'nicht verfügbar',
|
'nie znaleziono produktów',
|
||||||
'nicht vorrätig',
|
'niet beschikbaar',
|
||||||
'nicht zur verfügung',
|
'niet leverbaar',
|
||||||
'nie znaleziono produktów',
|
'niet op voorraad',
|
||||||
'niet beschikbaar',
|
'no disponible',
|
||||||
'niet leverbaar',
|
'non disponibile',
|
||||||
'niet op voorraad',
|
'non disponible',
|
||||||
'no disponible',
|
'no longer in stock',
|
||||||
'no featured offers available',
|
'no tickets available',
|
||||||
'no longer available',
|
'not available',
|
||||||
'no longer in stock',
|
'not currently available',
|
||||||
'no tickets available',
|
'not in stock',
|
||||||
'non disponibile',
|
'notify me when available',
|
||||||
'non disponible',
|
'notify me',
|
||||||
'not available',
|
'notify when available',
|
||||||
'not currently available',
|
'não disponível',
|
||||||
'not in stock',
|
'não estamos a aceitar encomendas',
|
||||||
'notify me when available',
|
'out of stock',
|
||||||
'notify me',
|
'out-of-stock',
|
||||||
'notify when available',
|
'plus disponible',
|
||||||
'não disponível',
|
'prodotto esaurito',
|
||||||
'não estamos a aceitar encomendas',
|
'produkt niedostępny',
|
||||||
'out of stock',
|
'rupture',
|
||||||
'out-of-stock',
|
'sold out',
|
||||||
'plus disponible',
|
'sold-out',
|
||||||
'prodotto esaurito',
|
'stokta yok',
|
||||||
'produkt niedostępny',
|
'temporarily out of stock',
|
||||||
'rupture',
|
'temporarily unavailable',
|
||||||
'sold out',
|
'there were no search results for',
|
||||||
'sold-out',
|
'this item is currently unavailable',
|
||||||
'stok habis',
|
'tickets unavailable',
|
||||||
'stok kosong',
|
'tijdelijk uitverkocht',
|
||||||
'stok varian ini habis',
|
'tükendi',
|
||||||
'stokta yok',
|
'unavailable nearby',
|
||||||
'temporarily out of stock',
|
'unavailable tickets',
|
||||||
'temporarily unavailable',
|
'vergriffen',
|
||||||
'there were no search results for',
|
'vorbestellen',
|
||||||
'this item is currently unavailable',
|
'vorbestellung ist bald möglich',
|
||||||
'tickets unavailable',
|
'we don\'t currently have any',
|
||||||
'tidak dijual',
|
'we couldn\'t find any products that match',
|
||||||
'tidak tersedia',
|
'we do not currently have an estimate of when this product will be back in stock.',
|
||||||
'tijdelijk uitverkocht',
|
'we don\'t know when or if this item will be back in stock.',
|
||||||
'tiket tidak tersedia',
|
'we were not able to find a match',
|
||||||
'to subscribe to back in stock',
|
'when this arrives in stock',
|
||||||
'tükendi',
|
'zur zeit nicht an lager',
|
||||||
'unavailable nearby',
|
'品切れ',
|
||||||
'unavailable tickets',
|
'已售',
|
||||||
'vergriffen',
|
'已售完',
|
||||||
'vorbestellen',
|
'품절'
|
||||||
'vorbestellung ist bald möglich',
|
];
|
||||||
'we couldn\'t find any products that match',
|
|
||||||
'we do not currently have an estimate of when this product will be back in stock.',
|
|
||||||
'we don\'t currently have any',
|
|
||||||
'we don\'t know when or if this item will be back in stock.',
|
|
||||||
'we were not able to find a match',
|
|
||||||
'when this arrives in stock',
|
|
||||||
'when this item is available to order',
|
|
||||||
'zur zeit nicht an lager',
|
|
||||||
'épuisé',
|
|
||||||
'品切れ',
|
|
||||||
'已售',
|
|
||||||
'已售完',
|
|
||||||
'품절'
|
|
||||||
];
|
|
||||||
|
|
||||||
|
|
||||||
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
|
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
|
||||||
|
|
||||||
function getElementBaseText(element) {
|
function getElementBaseText(element) {
|
||||||
// .textContent can include text from children which may give the wrong results
|
// .textContent can include text from children which may give the wrong results
|
||||||
// scan only immediate TEXT_NODEs, which will be a child of the element
|
// scan only immediate TEXT_NODEs, which will be a child of the element
|
||||||
var text = "";
|
var text = "";
|
||||||
for (var i = 0; i < element.childNodes.length; ++i)
|
for (var i = 0; i < element.childNodes.length; ++i)
|
||||||
if (element.childNodes[i].nodeType === Node.TEXT_NODE)
|
if (element.childNodes[i].nodeType === Node.TEXT_NODE)
|
||||||
text += element.childNodes[i].textContent;
|
text += element.childNodes[i].textContent;
|
||||||
return text.toLowerCase().trim();
|
return text.toLowerCase().trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock|arrives approximately)', 'ig');
|
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
|
||||||
// The out-of-stock or in-stock-text is generally always above-the-fold
|
|
||||||
// and often below-the-fold is a list of related products that may or may not contain trigger text
|
// The out-of-stock or in-stock-text is generally always above-the-fold
|
||||||
// so it's good to filter to just the 'above the fold' elements
|
// and often below-the-fold is a list of related products that may or may not contain trigger text
|
||||||
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
|
// so it's good to filter to just the 'above the fold' elements
|
||||||
|
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
|
||||||
|
|
||||||
function elementIsInEyeBallRange(element) {
|
|
||||||
// outside the 'fold' or some weird text in the heading area
|
|
||||||
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
|
||||||
// Note: theres also an automated test that places the 'out of stock' text fairly low down
|
|
||||||
// Skip text that could be in the header area
|
|
||||||
if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
// Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there
|
|
||||||
if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) {
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
|
|
||||||
// @todo - if it's SVG or IMG, go into image diff mode
|
// @todo - if it's SVG or IMG, go into image diff mode
|
||||||
|
// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
|
||||||
|
|
||||||
function collectVisibleElements(parent, visibleElements) {
|
console.log("Scanning %ELEMENTS%");
|
||||||
if (!parent) return; // Base case: if parent is null or undefined, return
|
|
||||||
|
|
||||||
// Add the parent itself to the visible elements array if it's of the specified types
|
function collectVisibleElements(parent, visibleElements) {
|
||||||
visibleElements.push(parent);
|
if (!parent) return; // Base case: if parent is null or undefined, return
|
||||||
|
|
||||||
// Iterate over the parent's children
|
// Add the parent itself to the visible elements array if it's of the specified types
|
||||||
const children = parent.children;
|
visibleElements.push(parent);
|
||||||
for (let i = 0; i < children.length; i++) {
|
|
||||||
const child = children[i];
|
// Iterate over the parent's children
|
||||||
if (
|
const children = parent.children;
|
||||||
child.nodeType === Node.ELEMENT_NODE &&
|
for (let i = 0; i < children.length; i++) {
|
||||||
window.getComputedStyle(child).display !== 'none' &&
|
const child = children[i];
|
||||||
window.getComputedStyle(child).visibility !== 'hidden' &&
|
if (
|
||||||
child.offsetWidth >= 0 &&
|
child.nodeType === Node.ELEMENT_NODE &&
|
||||||
child.offsetHeight >= 0 &&
|
window.getComputedStyle(child).display !== 'none' &&
|
||||||
window.getComputedStyle(child).contentVisibility !== 'hidden'
|
window.getComputedStyle(child).visibility !== 'hidden' &&
|
||||||
) {
|
child.offsetWidth >= 0 &&
|
||||||
// If the child is an element and is visible, recursively collect visible elements
|
child.offsetHeight >= 0 &&
|
||||||
collectVisibleElements(child, visibleElements);
|
window.getComputedStyle(child).contentVisibility !== 'hidden'
|
||||||
}
|
) {
|
||||||
|
// If the child is an element and is visible, recursively collect visible elements
|
||||||
|
collectVisibleElements(child, visibleElements);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
const elementsToScan = [];
|
const elementsToScan = [];
|
||||||
collectVisibleElements(document.body, elementsToScan);
|
collectVisibleElements(document.body, elementsToScan);
|
||||||
|
|
||||||
var elementText = "";
|
var elementText = "";
|
||||||
|
|
||||||
// REGEXS THAT REALLY MEAN IT'S IN STOCK
|
// REGEXS THAT REALLY MEAN IT'S IN STOCK
|
||||||
for (let i = elementsToScan.length - 1; i >= 0; i--) {
|
for (let i = elementsToScan.length - 1; i >= 0; i--) {
|
||||||
const element = elementsToScan[i];
|
const element = elementsToScan[i];
|
||||||
|
|
||||||
if (!elementIsInEyeBallRange(element)) {
|
// outside the 'fold' or some weird text in the heading area
|
||||||
continue
|
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
||||||
}
|
if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
||||||
|
continue
|
||||||
elementText = "";
|
|
||||||
try {
|
|
||||||
if (element.tagName.toLowerCase() === "input") {
|
|
||||||
elementText = element.value.toLowerCase().trim();
|
|
||||||
} else {
|
|
||||||
elementText = getElementBaseText(element);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
|
|
||||||
}
|
|
||||||
if (elementText.length) {
|
|
||||||
// try which ones could mean its in stock
|
|
||||||
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
|
|
||||||
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
|
|
||||||
element.style.border = "2px solid green"; // highlight the element that was detected as in stock
|
|
||||||
return 'Possibly in stock';
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
|
elementText = "";
|
||||||
for (let i = elementsToScan.length - 1; i >= 0; i--) {
|
try {
|
||||||
const element = elementsToScan[i];
|
|
||||||
|
|
||||||
if (!elementIsInEyeBallRange(element)) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
elementText = "";
|
|
||||||
if (element.tagName.toLowerCase() === "input") {
|
if (element.tagName.toLowerCase() === "input") {
|
||||||
elementText = element.value.toLowerCase().trim();
|
elementText = element.value.toLowerCase().trim();
|
||||||
} else {
|
} else {
|
||||||
elementText = getElementBaseText(element);
|
elementText = getElementBaseText(element);
|
||||||
}
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
|
||||||
|
}
|
||||||
|
|
||||||
if (elementText.length) {
|
if (elementText.length) {
|
||||||
// and these mean its out of stock
|
// try which ones could mean its in stock
|
||||||
for (const outOfStockText of outOfStockTexts) {
|
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
|
||||||
if (elementText.includes(outOfStockText)) {
|
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
|
||||||
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
|
return 'Possibly in stock';
|
||||||
element.style.border = "2px solid red"; // highlight the element that was detected as out of stock
|
}
|
||||||
return outOfStockText; // item is out of stock
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
|
||||||
|
for (let i = elementsToScan.length - 1; i >= 0; i--) {
|
||||||
|
const element = elementsToScan[i];
|
||||||
|
// outside the 'fold' or some weird text in the heading area
|
||||||
|
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
||||||
|
// Note: theres also an automated test that places the 'out of stock' text fairly low down
|
||||||
|
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
elementText = "";
|
||||||
|
if (element.tagName.toLowerCase() === "input") {
|
||||||
|
elementText = element.value.toLowerCase().trim();
|
||||||
|
} else {
|
||||||
|
elementText = getElementBaseText(element);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (elementText.length) {
|
||||||
|
// and these mean its out of stock
|
||||||
|
for (const outOfStockText of outOfStockTexts) {
|
||||||
|
if (elementText.includes(outOfStockText)) {
|
||||||
|
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
|
||||||
|
return outOfStockText; // item is out of stock
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
|
|
||||||
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// returns the element text that makes it think it's out of stock
|
console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
|
||||||
return isItemInStock().trim()
|
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// returns the element text that makes it think it's out of stock
|
||||||
|
return isItemInStock().trim()
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,284 +1,285 @@
|
|||||||
async (options) => {
|
// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
|
||||||
|
// All rights reserved.
|
||||||
|
|
||||||
let visualselector_xpath_selectors = options.visualselector_xpath_selectors
|
// @file Scrape the page looking for elements of concern (%ELEMENTS%)
|
||||||
let max_height = options.max_height
|
// http://matatk.agrip.org.uk/tests/position-and-width/
|
||||||
|
// https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate
|
||||||
|
//
|
||||||
|
// Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis
|
||||||
|
// will automatically force a scroll somewhere, so include the position offset
|
||||||
|
// Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
|
||||||
|
var scroll_y = 0;
|
||||||
|
try {
|
||||||
|
scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
var scroll_y = 0;
|
|
||||||
try {
|
|
||||||
scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Include the getXpath script directly, easier than fetching
|
// Include the getXpath script directly, easier than fetching
|
||||||
function getxpath(e) {
|
function getxpath(e) {
|
||||||
var n = e;
|
var n = e;
|
||||||
if (n && n.id) return '//*[@id="' + n.id + '"]';
|
if (n && n.id) return '//*[@id="' + n.id + '"]';
|
||||||
for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
|
for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
|
||||||
for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
|
for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
|
||||||
for (d = n.nextSibling; d;) {
|
for (d = n.nextSibling; d;) {
|
||||||
if (d.nodeName === n.nodeName) {
|
if (d.nodeName === n.nodeName) {
|
||||||
r = !0;
|
r = !0;
|
||||||
break
|
break
|
||||||
}
|
|
||||||
d = d.nextSibling
|
|
||||||
}
|
}
|
||||||
o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
|
d = d.nextSibling
|
||||||
}
|
}
|
||||||
return o.length ? "/" + o.reverse().join("/") : ""
|
o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
|
||||||
}
|
}
|
||||||
|
return o.length ? "/" + o.reverse().join("/") : ""
|
||||||
|
}
|
||||||
|
|
||||||
const findUpTag = (el) => {
|
const findUpTag = (el) => {
|
||||||
let r = el
|
let r = el
|
||||||
chained_css = [];
|
chained_css = [];
|
||||||
depth = 0;
|
depth = 0;
|
||||||
|
|
||||||
// Strategy 1: If it's an input, with name, and there's only one, prefer that
|
// Strategy 1: If it's an input, with name, and there's only one, prefer that
|
||||||
if (el.name !== undefined && el.name.length) {
|
if (el.name !== undefined && el.name.length) {
|
||||||
var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
|
var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
|
||||||
var proposed_element = window.document.querySelectorAll(proposed);
|
var proposed_element = window.document.querySelectorAll(proposed);
|
||||||
if (proposed_element.length) {
|
if (proposed_element.length) {
|
||||||
if (proposed_element.length === 1) {
|
if (proposed_element.length === 1) {
|
||||||
return proposed;
|
return proposed;
|
||||||
} else {
|
|
||||||
// Some sites change ID but name= stays the same, we can hit it if we know the index
|
|
||||||
// Find all the elements that match and work out the input[n]
|
|
||||||
var n = Array.from(proposed_element).indexOf(el);
|
|
||||||
// Return a Playwright selector for nthinput[name=zipcode]
|
|
||||||
return proposed + " >> nth=" + n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
|
|
||||||
while (r.parentNode) {
|
|
||||||
if (depth === 5) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if ('' !== r.id) {
|
|
||||||
chained_css.unshift("#" + CSS.escape(r.id));
|
|
||||||
final_selector = chained_css.join(' > ');
|
|
||||||
// Be sure theres only one, some sites have multiples of the same ID tag :-(
|
|
||||||
if (window.document.querySelectorAll(final_selector).length === 1) {
|
|
||||||
return final_selector;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} else {
|
} else {
|
||||||
chained_css.unshift(r.tagName.toLowerCase());
|
// Some sites change ID but name= stays the same, we can hit it if we know the index
|
||||||
|
// Find all the elements that match and work out the input[n]
|
||||||
|
var n = Array.from(proposed_element).indexOf(el);
|
||||||
|
// Return a Playwright selector for nthinput[name=zipcode]
|
||||||
|
return proposed + " >> nth=" + n;
|
||||||
}
|
}
|
||||||
r = r.parentNode;
|
|
||||||
depth += 1;
|
|
||||||
}
|
}
|
||||||
return null;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
|
||||||
|
while (r.parentNode) {
|
||||||
|
if (depth === 5) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ('' !== r.id) {
|
||||||
|
chained_css.unshift("#" + CSS.escape(r.id));
|
||||||
|
final_selector = chained_css.join(' > ');
|
||||||
|
// Be sure theres only one, some sites have multiples of the same ID tag :-(
|
||||||
|
if (window.document.querySelectorAll(final_selector).length === 1) {
|
||||||
|
return final_selector;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
chained_css.unshift(r.tagName.toLowerCase());
|
||||||
|
}
|
||||||
|
r = r.parentNode;
|
||||||
|
depth += 1;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
// @todo - if it's SVG or IMG, go into image diff mode
|
// @todo - if it's SVG or IMG, go into image diff mode
|
||||||
|
// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
|
||||||
|
|
||||||
var size_pos = [];
|
var size_pos = [];
|
||||||
// after page fetch, inject this JS
|
// after page fetch, inject this JS
|
||||||
// build a map of all elements and their positions (maybe that only include text?)
|
// build a map of all elements and their positions (maybe that only include text?)
|
||||||
var bbox;
|
var bbox;
|
||||||
console.log(`Scanning for "${visualselector_xpath_selectors}"`);
|
console.log("Scanning %ELEMENTS%");
|
||||||
|
|
||||||
function collectVisibleElements(parent, visibleElements) {
|
function collectVisibleElements(parent, visibleElements) {
|
||||||
if (!parent) return; // Base case: if parent is null or undefined, return
|
if (!parent) return; // Base case: if parent is null or undefined, return
|
||||||
|
|
||||||
|
|
||||||
// Add the parent itself to the visible elements array if it's of the specified types
|
// Add the parent itself to the visible elements array if it's of the specified types
|
||||||
const tagName = parent.tagName.toLowerCase();
|
const tagName = parent.tagName.toLowerCase();
|
||||||
if (visualselector_xpath_selectors.split(',').includes(tagName)) {
|
if ("%ELEMENTS%".split(',').includes(tagName)) {
|
||||||
visibleElements.push(parent);
|
visibleElements.push(parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over the parent's children
|
||||||
|
const children = parent.children;
|
||||||
|
for (let i = 0; i < children.length; i++) {
|
||||||
|
const child = children[i];
|
||||||
|
const computedStyle = window.getComputedStyle(child);
|
||||||
|
|
||||||
|
if (
|
||||||
|
child.nodeType === Node.ELEMENT_NODE &&
|
||||||
|
computedStyle.display !== 'none' &&
|
||||||
|
computedStyle.visibility !== 'hidden' &&
|
||||||
|
child.offsetWidth >= 0 &&
|
||||||
|
child.offsetHeight >= 0 &&
|
||||||
|
computedStyle.contentVisibility !== 'hidden'
|
||||||
|
) {
|
||||||
|
// If the child is an element and is visible, recursively collect visible elements
|
||||||
|
collectVisibleElements(child, visibleElements);
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Iterate over the parent's children
|
// Create an array to hold the visible elements
|
||||||
const children = parent.children;
|
const visibleElementsArray = [];
|
||||||
for (let i = 0; i < children.length; i++) {
|
|
||||||
const child = children[i];
|
|
||||||
const computedStyle = window.getComputedStyle(child);
|
|
||||||
|
|
||||||
if (
|
// Call collectVisibleElements with the starting parent element
|
||||||
child.nodeType === Node.ELEMENT_NODE &&
|
collectVisibleElements(document.body, visibleElementsArray);
|
||||||
computedStyle.display !== 'none' &&
|
|
||||||
computedStyle.visibility !== 'hidden' &&
|
|
||||||
child.offsetWidth >= 0 &&
|
visibleElementsArray.forEach(function (element) {
|
||||||
child.offsetHeight >= 0 &&
|
|
||||||
computedStyle.contentVisibility !== 'hidden'
|
bbox = element.getBoundingClientRect();
|
||||||
) {
|
|
||||||
// If the child is an element and is visible, recursively collect visible elements
|
// Skip really small ones, and where width or height ==0
|
||||||
collectVisibleElements(child, visibleElements);
|
if (bbox['width'] * bbox['height'] < 10) {
|
||||||
}
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// Don't include elements that are offset from canvas
|
||||||
|
if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
|
||||||
|
// it should not traverse when we know we can anchor off just an ID one level up etc..
|
||||||
|
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
|
||||||
|
|
||||||
|
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
|
||||||
|
xpath_result = false;
|
||||||
|
try {
|
||||||
|
var d = findUpTag(element);
|
||||||
|
if (d) {
|
||||||
|
xpath_result = d;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
// You could swap it and default to getXpath and then try the smarter one
|
||||||
|
// default back to the less intelligent one
|
||||||
|
if (!xpath_result) {
|
||||||
|
try {
|
||||||
|
// I've seen on FB and eBay that this doesnt work
|
||||||
|
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
|
||||||
|
xpath_result = getxpath(element);
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
return
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
// Create an array to hold the visible elements
|
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
|
||||||
const visibleElementsArray = [];
|
|
||||||
|
|
||||||
// Call collectVisibleElements with the starting parent element
|
let text = element.textContent.trim().slice(0, 30).trim();
|
||||||
collectVisibleElements(document.body, visibleElementsArray);
|
while (/\n{2,}|\t{2,}/.test(text)) {
|
||||||
|
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
||||||
|
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
|
||||||
|
const computedStyle = window.getComputedStyle(element);
|
||||||
|
|
||||||
visibleElementsArray.forEach(function (element) {
|
size_pos.push({
|
||||||
|
xpath: xpath_result,
|
||||||
bbox = element.getBoundingClientRect();
|
width: Math.round(bbox['width']),
|
||||||
|
height: Math.round(bbox['height']),
|
||||||
// Skip really small ones, and where width or height ==0
|
left: Math.floor(bbox['left']),
|
||||||
if (bbox['width'] * bbox['height'] < 10) {
|
top: Math.floor(bbox['top']) + scroll_y,
|
||||||
return
|
// tagName used by Browser Steps
|
||||||
}
|
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
||||||
|
// tagtype used by Browser Steps
|
||||||
// Don't include elements that are offset from canvas
|
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
||||||
if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
|
isClickable: computedStyle.cursor === "pointer",
|
||||||
return
|
// Used by the keras trainer
|
||||||
}
|
fontSize: computedStyle.getPropertyValue('font-size'),
|
||||||
|
fontWeight: computedStyle.getPropertyValue('font-weight'),
|
||||||
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
|
hasDigitCurrency: hasDigitCurrency,
|
||||||
// it should not traverse when we know we can anchor off just an ID one level up etc..
|
label: label,
|
||||||
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
|
|
||||||
|
|
||||||
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
|
|
||||||
xpath_result = false;
|
|
||||||
try {
|
|
||||||
var d = findUpTag(element);
|
|
||||||
if (d) {
|
|
||||||
xpath_result = d;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
// You could swap it and default to getXpath and then try the smarter one
|
|
||||||
// default back to the less intelligent one
|
|
||||||
if (!xpath_result) {
|
|
||||||
try {
|
|
||||||
// I've seen on FB and eBay that this doesnt work
|
|
||||||
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
|
|
||||||
xpath_result = getxpath(element);
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
return
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
|
|
||||||
|
|
||||||
let text = element.textContent.trim().slice(0, 30).trim();
|
|
||||||
while (/\n{2,}|\t{2,}/.test(text)) {
|
|
||||||
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
|
||||||
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6))) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text);
|
|
||||||
const computedStyle = window.getComputedStyle(element);
|
|
||||||
|
|
||||||
if (Math.floor(bbox['top']) + scroll_y > max_height) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
size_pos.push({
|
|
||||||
xpath: xpath_result,
|
|
||||||
width: Math.round(bbox['width']),
|
|
||||||
height: Math.round(bbox['height']),
|
|
||||||
left: Math.floor(bbox['left']),
|
|
||||||
top: Math.floor(bbox['top']) + scroll_y,
|
|
||||||
// tagName used by Browser Steps
|
|
||||||
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
|
||||||
// tagtype used by Browser Steps
|
|
||||||
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
|
||||||
isClickable: computedStyle.cursor === "pointer",
|
|
||||||
// Used by the keras trainer
|
|
||||||
fontSize: computedStyle.getPropertyValue('font-size'),
|
|
||||||
fontWeight: computedStyle.getPropertyValue('font-weight'),
|
|
||||||
hasDigitCurrency: hasDigitCurrency,
|
|
||||||
label: label,
|
|
||||||
});
|
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
// Inject the current one set in the include_filters, which may be a CSS rule
|
// Inject the current one set in the include_filters, which may be a CSS rule
|
||||||
// used for displaying the current one in VisualSelector, where its not one we generated.
|
// used for displaying the current one in VisualSelector, where its not one we generated.
|
||||||
if (include_filters.length) {
|
if (include_filters.length) {
|
||||||
let results;
|
let results;
|
||||||
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again
|
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again
|
||||||
for (const f of include_filters) {
|
for (const f of include_filters) {
|
||||||
bbox = false;
|
bbox = false;
|
||||||
|
q = false;
|
||||||
|
|
||||||
if (!f.length) {
|
if (!f.length) {
|
||||||
console.log("xpath_element_scraper: Empty filter, skipping");
|
console.log("xpath_element_scraper: Empty filter, skipping");
|
||||||
continue;
|
continue;
|
||||||
}
|
}
|
||||||
|
|
||||||
try {
|
try {
|
||||||
// is it xpath?
|
// is it xpath?
|
||||||
if (f.startsWith('/') || f.startsWith('xpath')) {
|
if (f.startsWith('/') || f.startsWith('xpath')) {
|
||||||
var qry_f = f.replace(/xpath(:|\d:)/, '')
|
var qry_f = f.replace(/xpath(:|\d:)/, '')
|
||||||
console.log("[xpath] Scanning for included filter " + qry_f)
|
console.log("[xpath] Scanning for included filter " + qry_f)
|
||||||
let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||||
results = [];
|
results = [];
|
||||||
for (let i = 0; i < xpathResult.snapshotLength; i++) {
|
for (let i = 0; i < xpathResult.snapshotLength; i++) {
|
||||||
results.push(xpathResult.snapshotItem(i));
|
results.push(xpathResult.snapshotItem(i));
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log("[css] Scanning for included filter " + f)
|
|
||||||
console.log("[css] Scanning for included filter " + f);
|
|
||||||
results = document.querySelectorAll(f);
|
|
||||||
}
|
}
|
||||||
} catch (e) {
|
} else {
|
||||||
// Maybe catch DOMException and alert?
|
console.log("[css] Scanning for included filter " + f)
|
||||||
console.log("xpath_element_scraper: Exception selecting element from filter " + f);
|
console.log("[css] Scanning for included filter " + f);
|
||||||
console.log(e);
|
results = document.querySelectorAll(f);
|
||||||
}
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Maybe catch DOMException and alert?
|
||||||
|
console.log("xpath_element_scraper: Exception selecting element from filter " + f);
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
if (results != null && results.length) {
|
if (results != null && results.length) {
|
||||||
|
|
||||||
// Iterate over the results
|
// Iterate over the results
|
||||||
results.forEach(node => {
|
results.forEach(node => {
|
||||||
// Try to resolve //something/text() back to its /something so we can atleast get the bounding box
|
// Try to resolve //something/text() back to its /something so we can atleast get the bounding box
|
||||||
|
try {
|
||||||
|
if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
|
||||||
|
node = node.parentElement
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e)
|
||||||
|
console.log("xpath_element_scraper: #text resolver")
|
||||||
|
}
|
||||||
|
|
||||||
|
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
|
||||||
|
if (typeof node.getBoundingClientRect == 'function') {
|
||||||
|
bbox = node.getBoundingClientRect();
|
||||||
|
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
|
||||||
|
} else {
|
||||||
try {
|
try {
|
||||||
if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
|
// Try and see we can find its ownerElement
|
||||||
node = node.parentElement
|
bbox = node.ownerElement.getBoundingClientRect();
|
||||||
}
|
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log(e)
|
console.log(e)
|
||||||
console.log("xpath_element_scraper: #text resolver")
|
console.log("xpath_element_scraper: error looking up q.ownerElement")
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
|
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
|
||||||
if (typeof node.getBoundingClientRect == 'function') {
|
size_pos.push({
|
||||||
bbox = node.getBoundingClientRect();
|
xpath: f,
|
||||||
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
|
width: parseInt(bbox['width']),
|
||||||
} else {
|
height: parseInt(bbox['height']),
|
||||||
try {
|
left: parseInt(bbox['left']),
|
||||||
// Try and see we can find its ownerElement
|
top: parseInt(bbox['top']) + scroll_y,
|
||||||
bbox = node.ownerElement.getBoundingClientRect();
|
highlight_as_custom_filter: true
|
||||||
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
|
});
|
||||||
} catch (e) {
|
}
|
||||||
console.log(e)
|
});
|
||||||
console.log("xpath_element_scraper: error looking up node.ownerElement")
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
|
|
||||||
size_pos.push({
|
|
||||||
xpath: f,
|
|
||||||
width: parseInt(bbox['width']),
|
|
||||||
height: parseInt(bbox['height']),
|
|
||||||
left: parseInt(bbox['left']),
|
|
||||||
top: parseInt(bbox['top']) + scroll_y,
|
|
||||||
highlight_as_custom_filter: true
|
|
||||||
});
|
|
||||||
}
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
}
|
||||||
|
|
||||||
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
|
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
|
||||||
// so that we dont select the wrapping element by mistake and be unable to select what we want
|
// so that we dont select the wrapping element by mistake and be unable to select what we want
|
||||||
size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
|
size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
|
||||||
|
|
||||||
// browser_width required for proper scaling in the frontend
|
|
||||||
// Return as a string to save playwright for juggling thousands of objects
|
|
||||||
return JSON.stringify({'size_pos': size_pos, 'browser_width': window.innerWidth});
|
|
||||||
}
|
|
||||||
|
|
||||||
|
// Window.width required for proper scaling in the frontend
|
||||||
|
return {'size_pos': size_pos, 'browser_width': window.innerWidth};
|
||||||
|
|||||||
@@ -1,73 +0,0 @@
|
|||||||
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
|
||||||
|
|
||||||
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
|
||||||
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
|
||||||
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
|
||||||
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
|
||||||
|
|
||||||
|
|
||||||
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
|
||||||
import os
|
|
||||||
import io
|
|
||||||
from PIL import Image, ImageDraw, ImageFont
|
|
||||||
|
|
||||||
try:
|
|
||||||
|
|
||||||
# Load images from byte chunks
|
|
||||||
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
|
||||||
total_height = sum(im.height for im in images)
|
|
||||||
max_width = max(im.width for im in images)
|
|
||||||
|
|
||||||
# Create stitched image
|
|
||||||
stitched = Image.new('RGB', (max_width, total_height))
|
|
||||||
y_offset = 0
|
|
||||||
for im in images:
|
|
||||||
stitched.paste(im, (0, y_offset))
|
|
||||||
y_offset += im.height
|
|
||||||
|
|
||||||
# Draw caption on top (overlaid, not extending canvas)
|
|
||||||
draw = ImageDraw.Draw(stitched)
|
|
||||||
|
|
||||||
if original_page_height > capture_height:
|
|
||||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
|
||||||
padding = 10
|
|
||||||
font_size = 35
|
|
||||||
font_color = (255, 0, 0)
|
|
||||||
background_color = (255, 255, 255)
|
|
||||||
|
|
||||||
|
|
||||||
# Try to load a proper font
|
|
||||||
try:
|
|
||||||
font = ImageFont.truetype("arial.ttf", font_size)
|
|
||||||
except IOError:
|
|
||||||
font = ImageFont.load_default()
|
|
||||||
|
|
||||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
|
||||||
text_width = bbox[2] - bbox[0]
|
|
||||||
text_height = bbox[3] - bbox[1]
|
|
||||||
|
|
||||||
# Draw white rectangle background behind text
|
|
||||||
rect_top = 0
|
|
||||||
rect_bottom = text_height + 2 * padding
|
|
||||||
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
|
|
||||||
|
|
||||||
# Draw text centered horizontally, 10px padding from top of the rectangle
|
|
||||||
text_x = (max_width - text_width) // 2
|
|
||||||
text_y = padding
|
|
||||||
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
|
|
||||||
|
|
||||||
# Encode and send image
|
|
||||||
output = io.BytesIO()
|
|
||||||
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
|
|
||||||
pipe_conn.send_bytes(output.getvalue())
|
|
||||||
|
|
||||||
stitched.close()
|
|
||||||
except Exception as e:
|
|
||||||
pipe_conn.send(f"error:{e}")
|
|
||||||
finally:
|
|
||||||
pipe_conn.close()
|
|
||||||
|
|
||||||
|
|
||||||
@@ -10,13 +10,16 @@ class fetcher(Fetcher):
|
|||||||
else:
|
else:
|
||||||
fetcher_description = "WebDriver Chrome/Javascript"
|
fetcher_description = "WebDriver Chrome/Javascript"
|
||||||
|
|
||||||
|
# Configs for Proxy setup
|
||||||
|
# In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
|
||||||
|
selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
|
||||||
|
'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
|
||||||
|
'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
|
||||||
proxy = None
|
proxy = None
|
||||||
proxy_url = None
|
|
||||||
|
|
||||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||||
super().__init__()
|
super().__init__()
|
||||||
from urllib.parse import urlparse
|
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
||||||
from selenium.webdriver.common.proxy import Proxy
|
|
||||||
|
|
||||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||||
if not custom_browser_connection_url:
|
if not custom_browser_connection_url:
|
||||||
@@ -25,29 +28,27 @@ class fetcher(Fetcher):
|
|||||||
self.browser_connection_is_custom = True
|
self.browser_connection_is_custom = True
|
||||||
self.browser_connection_url = custom_browser_connection_url
|
self.browser_connection_url = custom_browser_connection_url
|
||||||
|
|
||||||
|
# If any proxy settings are enabled, then we should setup the proxy object
|
||||||
|
proxy_args = {}
|
||||||
|
for k in self.selenium_proxy_settings_mappings:
|
||||||
|
v = os.getenv('webdriver_' + k, False)
|
||||||
|
if v:
|
||||||
|
proxy_args[k] = v.strip('"')
|
||||||
|
|
||||||
##### PROXY SETUP #####
|
# Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
|
||||||
|
if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
|
||||||
|
proxy_args['httpProxy'] = self.system_http_proxy
|
||||||
|
if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
|
||||||
|
proxy_args['httpsProxy'] = self.system_https_proxy
|
||||||
|
|
||||||
proxy_sources = [
|
# Allows override the proxy on a per-request basis
|
||||||
self.system_http_proxy,
|
if proxy_override is not None:
|
||||||
self.system_https_proxy,
|
proxy_args['httpProxy'] = proxy_override
|
||||||
os.getenv('webdriver_proxySocks'),
|
|
||||||
os.getenv('webdriver_socksProxy'),
|
|
||||||
os.getenv('webdriver_proxyHttp'),
|
|
||||||
os.getenv('webdriver_httpProxy'),
|
|
||||||
os.getenv('webdriver_proxyHttps'),
|
|
||||||
os.getenv('webdriver_httpsProxy'),
|
|
||||||
os.getenv('webdriver_sslProxy'),
|
|
||||||
proxy_override, # last one should override
|
|
||||||
]
|
|
||||||
# The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
|
|
||||||
for k in filter(None, proxy_sources):
|
|
||||||
if not k:
|
|
||||||
continue
|
|
||||||
self.proxy_url = k.strip()
|
|
||||||
|
|
||||||
|
if proxy_args:
|
||||||
|
self.proxy = SeleniumProxy(raw=proxy_args)
|
||||||
|
|
||||||
async def run(self,
|
def run(self,
|
||||||
url,
|
url,
|
||||||
timeout,
|
timeout,
|
||||||
request_headers,
|
request_headers,
|
||||||
@@ -58,86 +59,62 @@ class fetcher(Fetcher):
|
|||||||
is_binary=False,
|
is_binary=False,
|
||||||
empty_pages_are_a_change=False):
|
empty_pages_are_a_change=False):
|
||||||
|
|
||||||
import asyncio
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||||
# Wrap the entire selenium operation in a thread executor
|
from selenium.common.exceptions import WebDriverException
|
||||||
def _run_sync():
|
# request_body, request_method unused for now, until some magic in the future happens.
|
||||||
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
|
||||||
# request_body, request_method unused for now, until some magic in the future happens.
|
|
||||||
|
|
||||||
options = ChromeOptions()
|
options = ChromeOptions()
|
||||||
|
if self.proxy:
|
||||||
|
options.proxy = self.proxy
|
||||||
|
|
||||||
# Load Chrome options from env
|
self.driver = webdriver.Remote(
|
||||||
CHROME_OPTIONS = [
|
command_executor=self.browser_connection_url,
|
||||||
line.strip()
|
options=options)
|
||||||
for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
|
|
||||||
if line.strip()
|
|
||||||
]
|
|
||||||
|
|
||||||
for opt in CHROME_OPTIONS:
|
try:
|
||||||
options.add_argument(opt)
|
self.driver.get(url)
|
||||||
|
except WebDriverException as e:
|
||||||
|
# Be sure we close the session window
|
||||||
|
self.quit()
|
||||||
|
raise
|
||||||
|
|
||||||
# 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable
|
self.driver.set_window_size(1280, 1024)
|
||||||
# 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng
|
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||||
# 3. selenium only allows ONE runner at a time by default!
|
|
||||||
# 4. driver must use quit() or it will continue to block/hold the selenium process!!
|
|
||||||
|
|
||||||
if self.proxy_url:
|
if self.webdriver_js_execute_code is not None:
|
||||||
options.add_argument(f'--proxy-server={self.proxy_url}')
|
self.driver.execute_script(self.webdriver_js_execute_code)
|
||||||
|
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
||||||
|
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||||
|
|
||||||
from selenium.webdriver.remote.remote_connection import RemoteConnection
|
# @todo - how to check this? is it possible?
|
||||||
from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
|
self.status_code = 200
|
||||||
driver = None
|
# @todo somehow we should try to get this working for WebDriver
|
||||||
|
# raise EmptyReply(url=url, status_code=r.status_code)
|
||||||
|
|
||||||
|
# @todo - dom wait loaded?
|
||||||
|
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
||||||
|
self.content = self.driver.page_source
|
||||||
|
self.headers = {}
|
||||||
|
|
||||||
|
self.screenshot = self.driver.get_screenshot_as_png()
|
||||||
|
|
||||||
|
# Does the connection to the webdriver work? run a test connection.
|
||||||
|
def is_ready(self):
|
||||||
|
from selenium import webdriver
|
||||||
|
from selenium.webdriver.chrome.options import Options as ChromeOptions
|
||||||
|
|
||||||
|
self.driver = webdriver.Remote(
|
||||||
|
command_executor=self.command_executor,
|
||||||
|
options=ChromeOptions())
|
||||||
|
|
||||||
|
# driver.quit() seems to cause better exceptions
|
||||||
|
self.quit()
|
||||||
|
return True
|
||||||
|
|
||||||
|
def quit(self):
|
||||||
|
if self.driver:
|
||||||
try:
|
try:
|
||||||
# Create the RemoteConnection and set timeout (e.g., 30 seconds)
|
self.driver.quit()
|
||||||
remote_connection = RemoteConnection(
|
|
||||||
self.browser_connection_url,
|
|
||||||
)
|
|
||||||
remote_connection.set_timeout(30) # seconds
|
|
||||||
|
|
||||||
# Now create the driver with the RemoteConnection
|
|
||||||
driver = RemoteWebDriver(
|
|
||||||
command_executor=remote_connection,
|
|
||||||
options=options
|
|
||||||
)
|
|
||||||
|
|
||||||
driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45)))
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
if driver:
|
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
|
||||||
driver.quit()
|
|
||||||
raise e
|
|
||||||
|
|
||||||
try:
|
|
||||||
driver.get(url)
|
|
||||||
|
|
||||||
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
|
|
||||||
driver.set_window_size(1280, 1024)
|
|
||||||
|
|
||||||
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
|
||||||
|
|
||||||
if self.webdriver_js_execute_code is not None:
|
|
||||||
driver.execute_script(self.webdriver_js_execute_code)
|
|
||||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
|
||||||
driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
|
||||||
|
|
||||||
# @todo - how to check this? is it possible?
|
|
||||||
self.status_code = 200
|
|
||||||
# @todo somehow we should try to get this working for WebDriver
|
|
||||||
# raise EmptyReply(url=url, status_code=r.status_code)
|
|
||||||
|
|
||||||
# @todo - dom wait loaded?
|
|
||||||
import time
|
|
||||||
time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
|
|
||||||
self.content = driver.page_source
|
|
||||||
self.headers = {}
|
|
||||||
self.screenshot = driver.get_screenshot_as_png()
|
|
||||||
except Exception as e:
|
|
||||||
driver.quit()
|
|
||||||
raise e
|
|
||||||
|
|
||||||
driver.quit()
|
|
||||||
|
|
||||||
# Run the selenium operations in a thread pool to avoid blocking the event loop
|
|
||||||
loop = asyncio.get_event_loop()
|
|
||||||
await loop.run_in_executor(None, _run_sync)
|
|
||||||
|
|
||||||
@@ -1,535 +0,0 @@
|
|||||||
import queue
|
|
||||||
import asyncio
|
|
||||||
from blinker import signal
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
|
|
||||||
class NotificationQueue(queue.Queue):
|
|
||||||
"""
|
|
||||||
Extended Queue that sends a 'notification_event' signal when notifications are added.
|
|
||||||
|
|
||||||
This class extends the standard Queue and adds a signal emission after a notification
|
|
||||||
is put into the queue. The signal includes the watch UUID if available.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, maxsize=0):
|
|
||||||
super().__init__(maxsize)
|
|
||||||
try:
|
|
||||||
self.notification_event_signal = signal('notification_event')
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"Exception creating notification_event signal: {e}")
|
|
||||||
|
|
||||||
def put(self, item, block=True, timeout=None):
|
|
||||||
# Call the parent's put method first
|
|
||||||
super().put(item, block, timeout)
|
|
||||||
|
|
||||||
# After putting the notification in the queue, emit signal with watch UUID
|
|
||||||
try:
|
|
||||||
if self.notification_event_signal and isinstance(item, dict):
|
|
||||||
watch_uuid = item.get('uuid')
|
|
||||||
if watch_uuid:
|
|
||||||
# Send the notification_event signal with the watch UUID
|
|
||||||
self.notification_event_signal.send(watch_uuid=watch_uuid)
|
|
||||||
logger.trace(f"NotificationQueue: Emitted notification_event signal for watch UUID {watch_uuid}")
|
|
||||||
else:
|
|
||||||
# Send signal without UUID for system notifications
|
|
||||||
self.notification_event_signal.send()
|
|
||||||
logger.trace("NotificationQueue: Emitted notification_event signal for system notification")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Exception emitting notification_event signal: {e}")
|
|
||||||
|
|
||||||
class SignalPriorityQueue(queue.PriorityQueue):
|
|
||||||
"""
|
|
||||||
Extended PriorityQueue that sends a signal when items with a UUID are added.
|
|
||||||
|
|
||||||
This class extends the standard PriorityQueue and adds a signal emission
|
|
||||||
after an item is put into the queue. If the item contains a UUID, the signal
|
|
||||||
is sent with that UUID as a parameter.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, maxsize=0):
|
|
||||||
super().__init__(maxsize)
|
|
||||||
try:
|
|
||||||
self.queue_length_signal = signal('queue_length')
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"Exception: {e}")
|
|
||||||
|
|
||||||
def put(self, item, block=True, timeout=None):
|
|
||||||
# Call the parent's put method first
|
|
||||||
super().put(item, block, timeout)
|
|
||||||
|
|
||||||
# After putting the item in the queue, check if it has a UUID and emit signal
|
|
||||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
|
||||||
uuid = item.item['uuid']
|
|
||||||
# Get the signal and send it if it exists
|
|
||||||
watch_check_update = signal('watch_check_update')
|
|
||||||
if watch_check_update:
|
|
||||||
# Send the watch_uuid parameter
|
|
||||||
watch_check_update.send(watch_uuid=uuid)
|
|
||||||
|
|
||||||
# Send queue_length signal with current queue size
|
|
||||||
try:
|
|
||||||
|
|
||||||
if self.queue_length_signal:
|
|
||||||
self.queue_length_signal.send(length=self.qsize())
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"Exception: {e}")
|
|
||||||
|
|
||||||
def get(self, block=True, timeout=None):
|
|
||||||
# Call the parent's get method first
|
|
||||||
item = super().get(block, timeout)
|
|
||||||
|
|
||||||
# Send queue_length signal with current queue size
|
|
||||||
try:
|
|
||||||
if self.queue_length_signal:
|
|
||||||
self.queue_length_signal.send(length=self.qsize())
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"Exception: {e}")
|
|
||||||
return item
|
|
||||||
|
|
||||||
def get_uuid_position(self, target_uuid):
|
|
||||||
"""
|
|
||||||
Find the position of a watch UUID in the priority queue.
|
|
||||||
Optimized for large queues - O(n) complexity instead of O(n log n).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
target_uuid: The UUID to search for
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Contains position info or None if not found
|
|
||||||
- position: 0-based position in queue (0 = next to be processed)
|
|
||||||
- total_items: total number of items in queue
|
|
||||||
- priority: the priority value of the found item
|
|
||||||
"""
|
|
||||||
with self.mutex:
|
|
||||||
queue_list = list(self.queue)
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {
|
|
||||||
'position': None,
|
|
||||||
'total_items': 0,
|
|
||||||
'priority': None,
|
|
||||||
'found': False
|
|
||||||
}
|
|
||||||
|
|
||||||
# Find the target item and its priority first - O(n)
|
|
||||||
target_item = None
|
|
||||||
target_priority = None
|
|
||||||
|
|
||||||
for item in queue_list:
|
|
||||||
if (hasattr(item, 'item') and
|
|
||||||
isinstance(item.item, dict) and
|
|
||||||
item.item.get('uuid') == target_uuid):
|
|
||||||
target_item = item
|
|
||||||
target_priority = item.priority
|
|
||||||
break
|
|
||||||
|
|
||||||
if target_item is None:
|
|
||||||
return {
|
|
||||||
'position': None,
|
|
||||||
'total_items': total_items,
|
|
||||||
'priority': None,
|
|
||||||
'found': False
|
|
||||||
}
|
|
||||||
|
|
||||||
# Count how many items have higher priority (lower numbers) - O(n)
|
|
||||||
position = 0
|
|
||||||
for item in queue_list:
|
|
||||||
# Items with lower priority numbers are processed first
|
|
||||||
if item.priority < target_priority:
|
|
||||||
position += 1
|
|
||||||
elif item.priority == target_priority and item != target_item:
|
|
||||||
# For same priority, count items that come before this one
|
|
||||||
# (Note: this is approximate since heap order isn't guaranteed for equal priorities)
|
|
||||||
position += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
'position': position,
|
|
||||||
'total_items': total_items,
|
|
||||||
'priority': target_priority,
|
|
||||||
'found': True
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_all_queued_uuids(self, limit=None, offset=0):
|
|
||||||
"""
|
|
||||||
Get UUIDs currently in the queue with their positions.
|
|
||||||
For large queues, use limit/offset for pagination.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
limit: Maximum number of items to return (None = all)
|
|
||||||
offset: Number of items to skip (for pagination)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Contains items and metadata
|
|
||||||
- items: List of dicts with uuid, position, and priority
|
|
||||||
- total_items: Total number of items in queue
|
|
||||||
- returned_items: Number of items returned
|
|
||||||
- has_more: Whether there are more items after this page
|
|
||||||
"""
|
|
||||||
with self.mutex:
|
|
||||||
queue_list = list(self.queue)
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {
|
|
||||||
'items': [],
|
|
||||||
'total_items': 0,
|
|
||||||
'returned_items': 0,
|
|
||||||
'has_more': False
|
|
||||||
}
|
|
||||||
|
|
||||||
# For very large queues, warn about performance
|
|
||||||
if total_items > 1000 and limit is None:
|
|
||||||
logger.warning(f"Getting all {total_items} queued items without limit - this may be slow")
|
|
||||||
|
|
||||||
# Sort only if we need exact positions (expensive for large queues)
|
|
||||||
if limit is not None and limit <= 100:
|
|
||||||
# For small requests, we can afford to sort
|
|
||||||
queue_items = sorted(queue_list)
|
|
||||||
end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items)
|
|
||||||
items_to_process = queue_items[offset:end_idx]
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for position, item in enumerate(items_to_process, start=offset):
|
|
||||||
if (hasattr(item, 'item') and
|
|
||||||
isinstance(item.item, dict) and
|
|
||||||
'uuid' in item.item):
|
|
||||||
|
|
||||||
result.append({
|
|
||||||
'uuid': item.item['uuid'],
|
|
||||||
'position': position,
|
|
||||||
'priority': item.priority
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'items': result,
|
|
||||||
'total_items': total_items,
|
|
||||||
'returned_items': len(result),
|
|
||||||
'has_more': (offset + len(result)) < total_items
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# For large requests, return items with approximate positions
|
|
||||||
# This is much faster O(n) instead of O(n log n)
|
|
||||||
result = []
|
|
||||||
processed = 0
|
|
||||||
skipped = 0
|
|
||||||
|
|
||||||
for item in queue_list:
|
|
||||||
if (hasattr(item, 'item') and
|
|
||||||
isinstance(item.item, dict) and
|
|
||||||
'uuid' in item.item):
|
|
||||||
|
|
||||||
if skipped < offset:
|
|
||||||
skipped += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if limit and processed >= limit:
|
|
||||||
break
|
|
||||||
|
|
||||||
# Approximate position based on priority comparison
|
|
||||||
approx_position = sum(1 for other in queue_list if other.priority < item.priority)
|
|
||||||
|
|
||||||
result.append({
|
|
||||||
'uuid': item.item['uuid'],
|
|
||||||
'position': approx_position, # Approximate
|
|
||||||
'priority': item.priority
|
|
||||||
})
|
|
||||||
processed += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
'items': result,
|
|
||||||
'total_items': total_items,
|
|
||||||
'returned_items': len(result),
|
|
||||||
'has_more': (offset + len(result)) < total_items,
|
|
||||||
'note': 'Positions are approximate for performance with large queues'
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_queue_summary(self):
|
|
||||||
"""
|
|
||||||
Get a quick summary of queue state without expensive operations.
|
|
||||||
O(n) complexity - fast even for large queues.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Queue summary statistics
|
|
||||||
"""
|
|
||||||
with self.mutex:
|
|
||||||
queue_list = list(self.queue)
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {
|
|
||||||
'total_items': 0,
|
|
||||||
'priority_breakdown': {},
|
|
||||||
'immediate_items': 0,
|
|
||||||
'clone_items': 0,
|
|
||||||
'scheduled_items': 0
|
|
||||||
}
|
|
||||||
|
|
||||||
# Count items by priority type - O(n)
|
|
||||||
immediate_items = 0 # priority 1
|
|
||||||
clone_items = 0 # priority 5
|
|
||||||
scheduled_items = 0 # priority > 100 (timestamps)
|
|
||||||
priority_counts = {}
|
|
||||||
|
|
||||||
for item in queue_list:
|
|
||||||
priority = item.priority
|
|
||||||
priority_counts[priority] = priority_counts.get(priority, 0) + 1
|
|
||||||
|
|
||||||
if priority == 1:
|
|
||||||
immediate_items += 1
|
|
||||||
elif priority == 5:
|
|
||||||
clone_items += 1
|
|
||||||
elif priority > 100:
|
|
||||||
scheduled_items += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
'total_items': total_items,
|
|
||||||
'priority_breakdown': priority_counts,
|
|
||||||
'immediate_items': immediate_items,
|
|
||||||
'clone_items': clone_items,
|
|
||||||
'scheduled_items': scheduled_items,
|
|
||||||
'min_priority': min(priority_counts.keys()) if priority_counts else None,
|
|
||||||
'max_priority': max(priority_counts.keys()) if priority_counts else None
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
class AsyncSignalPriorityQueue(asyncio.PriorityQueue):
|
|
||||||
"""
|
|
||||||
Async version of SignalPriorityQueue that sends signals when items are added/removed.
|
|
||||||
|
|
||||||
This class extends asyncio.PriorityQueue and maintains the same signal behavior
|
|
||||||
as the synchronous version for real-time UI updates.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, maxsize=0):
|
|
||||||
super().__init__(maxsize)
|
|
||||||
try:
|
|
||||||
self.queue_length_signal = signal('queue_length')
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"Exception: {e}")
|
|
||||||
|
|
||||||
async def put(self, item):
|
|
||||||
# Call the parent's put method first
|
|
||||||
await super().put(item)
|
|
||||||
|
|
||||||
# After putting the item in the queue, check if it has a UUID and emit signal
|
|
||||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
|
||||||
uuid = item.item['uuid']
|
|
||||||
# Get the signal and send it if it exists
|
|
||||||
watch_check_update = signal('watch_check_update')
|
|
||||||
if watch_check_update:
|
|
||||||
# Send the watch_uuid parameter
|
|
||||||
watch_check_update.send(watch_uuid=uuid)
|
|
||||||
|
|
||||||
# Send queue_length signal with current queue size
|
|
||||||
try:
|
|
||||||
if self.queue_length_signal:
|
|
||||||
self.queue_length_signal.send(length=self.qsize())
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"Exception: {e}")
|
|
||||||
|
|
||||||
async def get(self):
|
|
||||||
# Call the parent's get method first
|
|
||||||
item = await super().get()
|
|
||||||
|
|
||||||
# Send queue_length signal with current queue size
|
|
||||||
try:
|
|
||||||
if self.queue_length_signal:
|
|
||||||
self.queue_length_signal.send(length=self.qsize())
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"Exception: {e}")
|
|
||||||
return item
|
|
||||||
|
|
||||||
@property
|
|
||||||
def queue(self):
|
|
||||||
"""
|
|
||||||
Provide compatibility with sync PriorityQueue.queue access
|
|
||||||
Returns the internal queue for template access
|
|
||||||
"""
|
|
||||||
return self._queue if hasattr(self, '_queue') else []
|
|
||||||
|
|
||||||
def get_uuid_position(self, target_uuid):
|
|
||||||
"""
|
|
||||||
Find the position of a watch UUID in the async priority queue.
|
|
||||||
Optimized for large queues - O(n) complexity instead of O(n log n).
|
|
||||||
|
|
||||||
Args:
|
|
||||||
target_uuid: The UUID to search for
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Contains position info or None if not found
|
|
||||||
- position: 0-based position in queue (0 = next to be processed)
|
|
||||||
- total_items: total number of items in queue
|
|
||||||
- priority: the priority value of the found item
|
|
||||||
"""
|
|
||||||
queue_list = list(self._queue)
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {
|
|
||||||
'position': None,
|
|
||||||
'total_items': 0,
|
|
||||||
'priority': None,
|
|
||||||
'found': False
|
|
||||||
}
|
|
||||||
|
|
||||||
# Find the target item and its priority first - O(n)
|
|
||||||
target_item = None
|
|
||||||
target_priority = None
|
|
||||||
|
|
||||||
for item in queue_list:
|
|
||||||
if (hasattr(item, 'item') and
|
|
||||||
isinstance(item.item, dict) and
|
|
||||||
item.item.get('uuid') == target_uuid):
|
|
||||||
target_item = item
|
|
||||||
target_priority = item.priority
|
|
||||||
break
|
|
||||||
|
|
||||||
if target_item is None:
|
|
||||||
return {
|
|
||||||
'position': None,
|
|
||||||
'total_items': total_items,
|
|
||||||
'priority': None,
|
|
||||||
'found': False
|
|
||||||
}
|
|
||||||
|
|
||||||
# Count how many items have higher priority (lower numbers) - O(n)
|
|
||||||
position = 0
|
|
||||||
for item in queue_list:
|
|
||||||
if item.priority < target_priority:
|
|
||||||
position += 1
|
|
||||||
elif item.priority == target_priority and item != target_item:
|
|
||||||
position += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
'position': position,
|
|
||||||
'total_items': total_items,
|
|
||||||
'priority': target_priority,
|
|
||||||
'found': True
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_all_queued_uuids(self, limit=None, offset=0):
|
|
||||||
"""
|
|
||||||
Get UUIDs currently in the async queue with their positions.
|
|
||||||
For large queues, use limit/offset for pagination.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
limit: Maximum number of items to return (None = all)
|
|
||||||
offset: Number of items to skip (for pagination)
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
dict: Contains items and metadata (same structure as sync version)
|
|
||||||
"""
|
|
||||||
queue_list = list(self._queue)
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {
|
|
||||||
'items': [],
|
|
||||||
'total_items': 0,
|
|
||||||
'returned_items': 0,
|
|
||||||
'has_more': False
|
|
||||||
}
|
|
||||||
|
|
||||||
# Same logic as sync version but without mutex
|
|
||||||
if limit is not None and limit <= 100:
|
|
||||||
queue_items = sorted(queue_list)
|
|
||||||
end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items)
|
|
||||||
items_to_process = queue_items[offset:end_idx]
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for position, item in enumerate(items_to_process, start=offset):
|
|
||||||
if (hasattr(item, 'item') and
|
|
||||||
isinstance(item.item, dict) and
|
|
||||||
'uuid' in item.item):
|
|
||||||
|
|
||||||
result.append({
|
|
||||||
'uuid': item.item['uuid'],
|
|
||||||
'position': position,
|
|
||||||
'priority': item.priority
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'items': result,
|
|
||||||
'total_items': total_items,
|
|
||||||
'returned_items': len(result),
|
|
||||||
'has_more': (offset + len(result)) < total_items
|
|
||||||
}
|
|
||||||
else:
|
|
||||||
# Fast approximate positions for large queues
|
|
||||||
result = []
|
|
||||||
processed = 0
|
|
||||||
skipped = 0
|
|
||||||
|
|
||||||
for item in queue_list:
|
|
||||||
if (hasattr(item, 'item') and
|
|
||||||
isinstance(item.item, dict) and
|
|
||||||
'uuid' in item.item):
|
|
||||||
|
|
||||||
if skipped < offset:
|
|
||||||
skipped += 1
|
|
||||||
continue
|
|
||||||
|
|
||||||
if limit and processed >= limit:
|
|
||||||
break
|
|
||||||
|
|
||||||
approx_position = sum(1 for other in queue_list if other.priority < item.priority)
|
|
||||||
|
|
||||||
result.append({
|
|
||||||
'uuid': item.item['uuid'],
|
|
||||||
'position': approx_position,
|
|
||||||
'priority': item.priority
|
|
||||||
})
|
|
||||||
processed += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
'items': result,
|
|
||||||
'total_items': total_items,
|
|
||||||
'returned_items': len(result),
|
|
||||||
'has_more': (offset + len(result)) < total_items,
|
|
||||||
'note': 'Positions are approximate for performance with large queues'
|
|
||||||
}
|
|
||||||
|
|
||||||
def get_queue_summary(self):
|
|
||||||
"""
|
|
||||||
Get a quick summary of async queue state.
|
|
||||||
O(n) complexity - fast even for large queues.
|
|
||||||
"""
|
|
||||||
queue_list = list(self._queue)
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {
|
|
||||||
'total_items': 0,
|
|
||||||
'priority_breakdown': {},
|
|
||||||
'immediate_items': 0,
|
|
||||||
'clone_items': 0,
|
|
||||||
'scheduled_items': 0
|
|
||||||
}
|
|
||||||
|
|
||||||
immediate_items = 0
|
|
||||||
clone_items = 0
|
|
||||||
scheduled_items = 0
|
|
||||||
priority_counts = {}
|
|
||||||
|
|
||||||
for item in queue_list:
|
|
||||||
priority = item.priority
|
|
||||||
priority_counts[priority] = priority_counts.get(priority, 0) + 1
|
|
||||||
|
|
||||||
if priority == 1:
|
|
||||||
immediate_items += 1
|
|
||||||
elif priority == 5:
|
|
||||||
clone_items += 1
|
|
||||||
elif priority > 100:
|
|
||||||
scheduled_items += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
'total_items': total_items,
|
|
||||||
'priority_breakdown': priority_counts,
|
|
||||||
'immediate_items': immediate_items,
|
|
||||||
'clone_items': clone_items,
|
|
||||||
'scheduled_items': scheduled_items,
|
|
||||||
'min_priority': min(priority_counts.keys()) if priority_counts else None,
|
|
||||||
'max_priority': max(priority_counts.keys()) if priority_counts else None
|
|
||||||
}
|
|
||||||
@@ -4,53 +4,49 @@ import flask_login
|
|||||||
import locale
|
import locale
|
||||||
import os
|
import os
|
||||||
import queue
|
import queue
|
||||||
import sys
|
|
||||||
import threading
|
import threading
|
||||||
import time
|
import time
|
||||||
import timeago
|
import timeago
|
||||||
from blinker import signal
|
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from threading import Event
|
from threading import Event
|
||||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
|
|
||||||
from flask import (
|
from flask import (
|
||||||
Flask,
|
Flask,
|
||||||
abort,
|
abort,
|
||||||
flash,
|
flash,
|
||||||
|
make_response,
|
||||||
redirect,
|
redirect,
|
||||||
render_template,
|
render_template,
|
||||||
request,
|
request,
|
||||||
send_from_directory,
|
send_from_directory,
|
||||||
|
session,
|
||||||
url_for,
|
url_for,
|
||||||
)
|
)
|
||||||
from flask_compress import Compress as FlaskCompress
|
from flask_compress import Compress as FlaskCompress
|
||||||
from flask_login import current_user
|
from flask_login import current_user
|
||||||
|
from flask_paginate import Pagination, get_page_parameter
|
||||||
from flask_restful import abort, Api
|
from flask_restful import abort, Api
|
||||||
from flask_cors import CORS
|
from flask_cors import CORS
|
||||||
|
|
||||||
# Create specific signals for application events
|
|
||||||
# Make this a global singleton to avoid multiple signal objects
|
|
||||||
watch_check_update = signal('watch_check_update', doc='Signal sent when a watch check is completed')
|
|
||||||
from flask_wtf import CSRFProtect
|
from flask_wtf import CSRFProtect
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from changedetectionio import __version__
|
from changedetectionio import __version__
|
||||||
from changedetectionio import queuedWatchMetaData
|
from changedetectionio import queuedWatchMetaData
|
||||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
|
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags
|
||||||
from changedetectionio.api.Search import Search
|
from changedetectionio.api.Search import Search
|
||||||
from .time_handler import is_within_schedule
|
from .time_handler import is_within_schedule
|
||||||
|
|
||||||
datastore = None
|
datastore = None
|
||||||
|
|
||||||
# Local
|
# Local
|
||||||
|
running_update_threads = []
|
||||||
ticker_thread = None
|
ticker_thread = None
|
||||||
|
|
||||||
extra_stylesheets = []
|
extra_stylesheets = []
|
||||||
|
|
||||||
# Use bulletproof janus-based queues for sync/async reliability
|
update_q = queue.PriorityQueue()
|
||||||
update_q = RecheckPriorityQueue()
|
notification_q = queue.Queue()
|
||||||
notification_q = NotificationQueue()
|
|
||||||
MAX_QUEUE_SIZE = 2000
|
MAX_QUEUE_SIZE = 2000
|
||||||
|
|
||||||
app = Flask(__name__,
|
app = Flask(__name__,
|
||||||
@@ -58,9 +54,6 @@ app = Flask(__name__,
|
|||||||
static_folder="static",
|
static_folder="static",
|
||||||
template_folder="templates")
|
template_folder="templates")
|
||||||
|
|
||||||
# Will be initialized in changedetection_app
|
|
||||||
socketio_server = None
|
|
||||||
|
|
||||||
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
|
||||||
CORS(app)
|
CORS(app)
|
||||||
|
|
||||||
@@ -98,7 +91,7 @@ watch_api = Api(app, decorators=[csrf.exempt])
|
|||||||
def init_app_secret(datastore_path):
|
def init_app_secret(datastore_path):
|
||||||
secret = ""
|
secret = ""
|
||||||
|
|
||||||
path = os.path.join(datastore_path, "secret.txt")
|
path = "{}/secret.txt".format(datastore_path)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(path, "r") as f:
|
with open(path, "r") as f:
|
||||||
@@ -122,18 +115,6 @@ def get_darkmode_state():
|
|||||||
def get_css_version():
|
def get_css_version():
|
||||||
return __version__
|
return __version__
|
||||||
|
|
||||||
@app.template_global()
|
|
||||||
def get_socketio_path():
|
|
||||||
"""Generate the correct Socket.IO path prefix for the client"""
|
|
||||||
# If behind a proxy with a sub-path, we need to respect that path
|
|
||||||
prefix = ""
|
|
||||||
if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
|
|
||||||
prefix = request.headers['X-Forwarded-Prefix']
|
|
||||||
|
|
||||||
# Socket.IO will be available at {prefix}/socket.io/
|
|
||||||
return prefix
|
|
||||||
|
|
||||||
|
|
||||||
@app.template_filter('format_number_locale')
|
@app.template_filter('format_number_locale')
|
||||||
def _jinja2_filter_format_number_locale(value: float) -> str:
|
def _jinja2_filter_format_number_locale(value: float) -> str:
|
||||||
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
"Formats for example 4000.10 to the local locale default of 4,000.10"
|
||||||
@@ -144,32 +125,10 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
|
|||||||
|
|
||||||
@app.template_global('is_checking_now')
|
@app.template_global('is_checking_now')
|
||||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
|
||||||
return worker_handler.is_watch_running(watch_obj['uuid'])
|
# Worker thread tells us which UUID it is currently processing.
|
||||||
|
for t in running_update_threads:
|
||||||
@app.template_global('get_watch_queue_position')
|
if t.current_uuid == watch_obj['uuid']:
|
||||||
def _get_watch_queue_position(watch_obj):
|
return True
|
||||||
"""Get the position of a watch in the queue"""
|
|
||||||
uuid = watch_obj['uuid']
|
|
||||||
return update_q.get_uuid_position(uuid)
|
|
||||||
|
|
||||||
@app.template_global('get_current_worker_count')
|
|
||||||
def _get_current_worker_count():
|
|
||||||
"""Get the current number of operational workers"""
|
|
||||||
return worker_handler.get_worker_count()
|
|
||||||
|
|
||||||
@app.template_global('get_worker_status_info')
|
|
||||||
def _get_worker_status_info():
|
|
||||||
"""Get detailed worker status information for display"""
|
|
||||||
status = worker_handler.get_worker_status()
|
|
||||||
running_uuids = worker_handler.get_running_uuids()
|
|
||||||
|
|
||||||
return {
|
|
||||||
'count': status['worker_count'],
|
|
||||||
'type': status['worker_type'],
|
|
||||||
'active_workers': len(running_uuids),
|
|
||||||
'processing_watches': running_uuids,
|
|
||||||
'loop_running': status.get('async_loop_running', None)
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
|
||||||
@@ -256,15 +215,12 @@ class User(flask_login.UserMixin):
|
|||||||
def changedetection_app(config=None, datastore_o=None):
|
def changedetection_app(config=None, datastore_o=None):
|
||||||
logger.trace("TRACE log is enabled")
|
logger.trace("TRACE log is enabled")
|
||||||
|
|
||||||
global datastore, socketio_server
|
global datastore
|
||||||
datastore = datastore_o
|
datastore = datastore_o
|
||||||
|
|
||||||
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
# so far just for read-only via tests, but this will be moved eventually to be the main source
|
||||||
# (instead of the global var)
|
# (instead of the global var)
|
||||||
app.config['DATASTORE'] = datastore_o
|
app.config['DATASTORE'] = datastore_o
|
||||||
|
|
||||||
# Store the signal in the app config to ensure it's accessible everywhere
|
|
||||||
app.config['watch_check_update_SIGNAL'] = watch_check_update
|
|
||||||
|
|
||||||
login_manager = flask_login.LoginManager(app)
|
login_manager = flask_login.LoginManager(app)
|
||||||
login_manager.login_view = 'login'
|
login_manager.login_view = 'login'
|
||||||
@@ -277,8 +233,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
if has_password_enabled and not flask_login.current_user.is_authenticated:
|
if has_password_enabled and not flask_login.current_user.is_authenticated:
|
||||||
# Permitted
|
# Permitted
|
||||||
if request.endpoint and request.endpoint == 'static_content' and request.view_args:
|
if request.endpoint and request.endpoint == 'static_content' and request.view_args and request.view_args.get('group') in ['styles', 'js', 'images', 'favicons']:
|
||||||
# Handled by static_content handler
|
|
||||||
return None
|
return None
|
||||||
# Permitted
|
# Permitted
|
||||||
elif request.endpoint and 'login' in request.endpoint:
|
elif request.endpoint and 'login' in request.endpoint:
|
||||||
@@ -292,9 +247,6 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
# RSS access with token is allowed
|
# RSS access with token is allowed
|
||||||
elif request.endpoint and 'rss.feed' in request.endpoint:
|
elif request.endpoint and 'rss.feed' in request.endpoint:
|
||||||
return None
|
return None
|
||||||
# Socket.IO routes - need separate handling
|
|
||||||
elif request.path.startswith('/socket.io/'):
|
|
||||||
return None
|
|
||||||
# API routes - use their own auth mechanism (@auth.check_token)
|
# API routes - use their own auth mechanism (@auth.check_token)
|
||||||
elif request.path.startswith('/api/'):
|
elif request.path.startswith('/api/'):
|
||||||
return None
|
return None
|
||||||
@@ -305,9 +257,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
watch_api.add_resource(WatchSingleHistory,
|
watch_api.add_resource(WatchSingleHistory,
|
||||||
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
'/api/v1/watch/<string:uuid>/history/<string:timestamp>',
|
||||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||||
watch_api.add_resource(WatchFavicon,
|
|
||||||
'/api/v1/watch/<string:uuid>/favicon',
|
|
||||||
resource_class_kwargs={'datastore': datastore})
|
|
||||||
watch_api.add_resource(WatchHistory,
|
watch_api.add_resource(WatchHistory,
|
||||||
'/api/v1/watch/<string:uuid>/history',
|
'/api/v1/watch/<string:uuid>/history',
|
||||||
resource_class_kwargs={'datastore': datastore})
|
resource_class_kwargs={'datastore': datastore})
|
||||||
@@ -334,8 +284,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
watch_api.add_resource(Search, '/api/v1/search',
|
watch_api.add_resource(Search, '/api/v1/search',
|
||||||
resource_class_kwargs={'datastore': datastore})
|
resource_class_kwargs={'datastore': datastore})
|
||||||
|
|
||||||
watch_api.add_resource(Notifications, '/api/v1/notifications',
|
|
||||||
resource_class_kwargs={'datastore': datastore})
|
|
||||||
|
|
||||||
@login_manager.user_loader
|
@login_manager.user_loader
|
||||||
def user_loader(email):
|
def user_loader(email):
|
||||||
@@ -402,15 +351,11 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
|
@app.route("/static/<string:group>/<string:filename>", methods=['GET'])
|
||||||
def static_content(group, filename):
|
def static_content(group, filename):
|
||||||
from flask import make_response
|
from flask import make_response
|
||||||
import re
|
|
||||||
group = re.sub(r'[^\w.-]+', '', group.lower())
|
|
||||||
filename = re.sub(r'[^\w.-]+', '', filename.lower())
|
|
||||||
|
|
||||||
if group == 'screenshot':
|
if group == 'screenshot':
|
||||||
# Could be sensitive, follow password requirements
|
# Could be sensitive, follow password requirements
|
||||||
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
|
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
|
||||||
if not datastore.data['settings']['application'].get('shared_diff_access'):
|
abort(403)
|
||||||
abort(403)
|
|
||||||
|
|
||||||
screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png"
|
screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png"
|
||||||
|
|
||||||
@@ -427,32 +372,6 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
abort(404)
|
abort(404)
|
||||||
|
|
||||||
if group == 'favicon':
|
|
||||||
# Could be sensitive, follow password requirements
|
|
||||||
if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
|
|
||||||
abort(403)
|
|
||||||
# Get the watch object
|
|
||||||
watch = datastore.data['watching'].get(filename)
|
|
||||||
if not watch:
|
|
||||||
abort(404)
|
|
||||||
|
|
||||||
favicon_filename = watch.get_favicon_filename()
|
|
||||||
if favicon_filename:
|
|
||||||
try:
|
|
||||||
import magic
|
|
||||||
mime = magic.from_file(
|
|
||||||
os.path.join(watch.watch_data_dir, favicon_filename),
|
|
||||||
mime=True
|
|
||||||
)
|
|
||||||
except ImportError:
|
|
||||||
# Fallback, no python-magic
|
|
||||||
import mimetypes
|
|
||||||
mime, encoding = mimetypes.guess_type(favicon_filename)
|
|
||||||
|
|
||||||
response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
|
|
||||||
response.headers['Content-type'] = mime
|
|
||||||
response.headers['Cache-Control'] = 'max-age=300, must-revalidate' # Cache for 5 minutes, then revalidate
|
|
||||||
return response
|
|
||||||
|
|
||||||
if group == 'visual_selector_data':
|
if group == 'visual_selector_data':
|
||||||
# Could be sensitive, follow password requirements
|
# Could be sensitive, follow password requirements
|
||||||
@@ -470,7 +389,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
response.headers['Content-Type'] = 'application/json'
|
response.headers['Content-Type'] = 'application/json'
|
||||||
response.headers['Content-Encoding'] = 'deflate'
|
response.headers['Content-Encoding'] = 'deflate'
|
||||||
else:
|
else:
|
||||||
logger.error(f'Request elements.deflate at "{watch_directory}" but was not found.')
|
logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.')
|
||||||
abort(404)
|
abort(404)
|
||||||
|
|
||||||
if response:
|
if response:
|
||||||
@@ -485,7 +404,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
# These files should be in our subdirectory
|
# These files should be in our subdirectory
|
||||||
try:
|
try:
|
||||||
return send_from_directory(f"static/{group}", path=filename)
|
return send_from_directory("static/{}".format(group), path=filename)
|
||||||
except FileNotFoundError:
|
except FileNotFoundError:
|
||||||
abort(404)
|
abort(404)
|
||||||
|
|
||||||
@@ -514,126 +433,24 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
import changedetectionio.conditions.blueprint as conditions
|
import changedetectionio.conditions.blueprint as conditions
|
||||||
app.register_blueprint(conditions.construct_blueprint(datastore), url_prefix='/conditions')
|
app.register_blueprint(conditions.construct_blueprint(datastore), url_prefix='/conditions')
|
||||||
|
|
||||||
import changedetectionio.blueprint.rss.blueprint as rss
|
import changedetectionio.blueprint.rss as rss
|
||||||
app.register_blueprint(rss.construct_blueprint(datastore), url_prefix='/rss')
|
app.register_blueprint(rss.construct_blueprint(datastore), url_prefix='/rss')
|
||||||
|
|
||||||
# watchlist UI buttons etc
|
# watchlist UI buttons etc
|
||||||
import changedetectionio.blueprint.ui as ui
|
import changedetectionio.blueprint.ui as ui
|
||||||
app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
|
app.register_blueprint(ui.construct_blueprint(datastore, update_q, running_update_threads, queuedWatchMetaData))
|
||||||
|
|
||||||
import changedetectionio.blueprint.watchlist as watchlist
|
import changedetectionio.blueprint.watchlist as watchlist
|
||||||
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
|
||||||
|
|
||||||
# Initialize Socket.IO server conditionally based on settings
|
|
||||||
socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
|
|
||||||
if socket_io_enabled:
|
|
||||||
from changedetectionio.realtime.socket_server import init_socketio
|
|
||||||
global socketio_server
|
|
||||||
socketio_server = init_socketio(app, datastore)
|
|
||||||
logger.info("Socket.IO server initialized")
|
|
||||||
else:
|
|
||||||
logger.info("Socket.IO server disabled via settings")
|
|
||||||
socketio_server = None
|
|
||||||
|
|
||||||
# Memory cleanup endpoint
|
|
||||||
@app.route('/gc-cleanup', methods=['GET'])
|
|
||||||
@login_optionally_required
|
|
||||||
def gc_cleanup():
|
|
||||||
from changedetectionio.gc_cleanup import memory_cleanup
|
|
||||||
from flask import jsonify
|
|
||||||
|
|
||||||
result = memory_cleanup(app)
|
|
||||||
return jsonify({"status": "success", "message": "Memory cleanup completed", "result": result})
|
|
||||||
|
|
||||||
# Worker health check endpoint
|
|
||||||
@app.route('/worker-health', methods=['GET'])
|
|
||||||
@login_optionally_required
|
|
||||||
def worker_health():
|
|
||||||
from flask import jsonify
|
|
||||||
|
|
||||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
|
||||||
|
|
||||||
# Get basic status
|
|
||||||
status = worker_handler.get_worker_status()
|
|
||||||
|
|
||||||
# Perform health check
|
|
||||||
health_result = worker_handler.check_worker_health(
|
|
||||||
expected_count=expected_workers,
|
|
||||||
update_q=update_q,
|
|
||||||
notification_q=notification_q,
|
|
||||||
app=app,
|
|
||||||
datastore=datastore
|
|
||||||
)
|
|
||||||
|
|
||||||
return jsonify({
|
|
||||||
"status": "success",
|
|
||||||
"worker_status": status,
|
|
||||||
"health_check": health_result,
|
|
||||||
"expected_workers": expected_workers
|
|
||||||
})
|
|
||||||
|
|
||||||
# Queue status endpoint
|
|
||||||
@app.route('/queue-status', methods=['GET'])
|
|
||||||
@login_optionally_required
|
|
||||||
def queue_status():
|
|
||||||
from flask import jsonify, request
|
|
||||||
|
|
||||||
# Get specific UUID position if requested
|
|
||||||
target_uuid = request.args.get('uuid')
|
|
||||||
|
|
||||||
if target_uuid:
|
|
||||||
position_info = update_q.get_uuid_position(target_uuid)
|
|
||||||
return jsonify({
|
|
||||||
"status": "success",
|
|
||||||
"uuid": target_uuid,
|
|
||||||
"queue_position": position_info
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
# Get pagination parameters
|
|
||||||
limit = request.args.get('limit', type=int)
|
|
||||||
offset = request.args.get('offset', type=int, default=0)
|
|
||||||
summary_only = request.args.get('summary', type=bool, default=False)
|
|
||||||
|
|
||||||
if summary_only:
|
|
||||||
# Fast summary for large queues
|
|
||||||
summary = update_q.get_queue_summary()
|
|
||||||
return jsonify({
|
|
||||||
"status": "success",
|
|
||||||
"queue_summary": summary
|
|
||||||
})
|
|
||||||
else:
|
|
||||||
# Get queued items with pagination support
|
|
||||||
if limit is None:
|
|
||||||
# Default limit for large queues to prevent performance issues
|
|
||||||
queue_size = update_q.qsize()
|
|
||||||
if queue_size > 100:
|
|
||||||
limit = 50
|
|
||||||
logger.warning(f"Large queue ({queue_size} items) detected, limiting to {limit} items. Use ?limit=N for more.")
|
|
||||||
|
|
||||||
all_queued = update_q.get_all_queued_uuids(limit=limit, offset=offset)
|
|
||||||
return jsonify({
|
|
||||||
"status": "success",
|
|
||||||
"queue_size": update_q.qsize(),
|
|
||||||
"queued_data": all_queued
|
|
||||||
})
|
|
||||||
|
|
||||||
# Start the async workers during app initialization
|
|
||||||
# Can be overridden by ENV or use the default settings
|
|
||||||
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
|
||||||
logger.info(f"Starting {n_workers} workers during app initialization")
|
|
||||||
worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
|
|
||||||
|
|
||||||
# @todo handle ctrl break
|
# @todo handle ctrl break
|
||||||
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
|
||||||
threading.Thread(target=notification_runner).start()
|
threading.Thread(target=notification_runner).start()
|
||||||
|
|
||||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
|
||||||
# Check for new release version, but not when running in test/build or pytest
|
# Check for new release version, but not when running in test/build or pytest
|
||||||
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
|
if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')):
|
||||||
threading.Thread(target=check_for_new_version).start()
|
threading.Thread(target=check_for_new_version).start()
|
||||||
|
|
||||||
# Return the Flask app - the Socket.IO will be attached to it but initialized separately
|
|
||||||
# This avoids circular dependencies
|
|
||||||
return app
|
return app
|
||||||
|
|
||||||
|
|
||||||
@@ -669,87 +486,72 @@ def notification_runner():
|
|||||||
global notification_debug_log
|
global notification_debug_log
|
||||||
from datetime import datetime
|
from datetime import datetime
|
||||||
import json
|
import json
|
||||||
with app.app_context():
|
while not app.config.exit.is_set():
|
||||||
while not app.config.exit.is_set():
|
try:
|
||||||
|
# At the moment only one thread runs (single runner)
|
||||||
|
n_object = notification_q.get(block=False)
|
||||||
|
except queue.Empty:
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
else:
|
||||||
|
|
||||||
|
now = datetime.now()
|
||||||
|
sent_obj = None
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# At the moment only one thread runs (single runner)
|
from changedetectionio import notification
|
||||||
n_object = notification_q.get(block=False)
|
# Fallback to system config if not set
|
||||||
except queue.Empty:
|
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
|
||||||
time.sleep(1)
|
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
|
||||||
|
|
||||||
else:
|
if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
|
||||||
|
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
|
||||||
|
|
||||||
now = datetime.now()
|
if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
|
||||||
sent_obj = None
|
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
|
||||||
|
|
||||||
try:
|
sent_obj = notification.process_notification(n_object, datastore)
|
||||||
from changedetectionio.notification.handler import process_notification
|
|
||||||
|
|
||||||
# Fallback to system config if not set
|
except Exception as e:
|
||||||
if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
|
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
||||||
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
|
|
||||||
|
|
||||||
if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
|
# UUID wont be present when we submit a 'test' from the global settings
|
||||||
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
|
if 'uuid' in n_object:
|
||||||
|
datastore.update_watch(uuid=n_object['uuid'],
|
||||||
if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
|
update_obj={'last_notification_error': "Notification error detected, goto notification log."})
|
||||||
n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
|
|
||||||
if n_object.get('notification_urls', {}):
|
|
||||||
sent_obj = process_notification(n_object, datastore)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Watch URL: {n_object['watch_url']} Error {str(e)}")
|
|
||||||
|
|
||||||
# UUID wont be present when we submit a 'test' from the global settings
|
|
||||||
if 'uuid' in n_object:
|
|
||||||
datastore.update_watch(uuid=n_object['uuid'],
|
|
||||||
update_obj={'last_notification_error': "Notification error detected, goto notification log."})
|
|
||||||
|
|
||||||
log_lines = str(e).splitlines()
|
|
||||||
notification_debug_log += log_lines
|
|
||||||
|
|
||||||
with app.app_context():
|
|
||||||
app.config['watch_check_update_SIGNAL'].send(app_context=app, watch_uuid=n_object.get('uuid'))
|
|
||||||
|
|
||||||
# Process notifications
|
|
||||||
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
|
|
||||||
# Trim the log length
|
|
||||||
notification_debug_log = notification_debug_log[-100:]
|
|
||||||
|
|
||||||
|
log_lines = str(e).splitlines()
|
||||||
|
notification_debug_log += log_lines
|
||||||
|
|
||||||
|
# Process notifications
|
||||||
|
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
|
||||||
|
# Trim the log length
|
||||||
|
notification_debug_log = notification_debug_log[-100:]
|
||||||
|
|
||||||
# Threaded runner, look for new watches to feed into the Queue.
|
# Threaded runner, look for new watches to feed into the Queue.
|
||||||
def ticker_thread_check_time_launch_checks():
|
def ticker_thread_check_time_launch_checks():
|
||||||
import random
|
import random
|
||||||
|
from changedetectionio import update_worker
|
||||||
proxy_last_called_time = {}
|
proxy_last_called_time = {}
|
||||||
last_health_check = 0
|
|
||||||
|
|
||||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
|
||||||
logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}")
|
logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}")
|
||||||
|
|
||||||
# Workers are now started during app initialization, not here
|
# Spin up Workers that do the fetching
|
||||||
|
# Can be overriden by ENV or use the default settings
|
||||||
|
n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
||||||
|
for _ in range(n_workers):
|
||||||
|
new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
|
||||||
|
running_update_threads.append(new_worker)
|
||||||
|
new_worker.start()
|
||||||
|
|
||||||
while not app.config.exit.is_set():
|
while not app.config.exit.is_set():
|
||||||
|
|
||||||
# Periodic worker health check (every 60 seconds)
|
|
||||||
now = time.time()
|
|
||||||
if now - last_health_check > 60:
|
|
||||||
expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
|
|
||||||
health_result = worker_handler.check_worker_health(
|
|
||||||
expected_count=expected_workers,
|
|
||||||
update_q=update_q,
|
|
||||||
notification_q=notification_q,
|
|
||||||
app=app,
|
|
||||||
datastore=datastore
|
|
||||||
)
|
|
||||||
|
|
||||||
if health_result['status'] != 'healthy':
|
|
||||||
logger.warning(f"Worker health check: {health_result['message']}")
|
|
||||||
|
|
||||||
last_health_check = now
|
|
||||||
|
|
||||||
# Get a list of watches by UUID that are currently fetching data
|
# Get a list of watches by UUID that are currently fetching data
|
||||||
running_uuids = worker_handler.get_running_uuids()
|
running_uuids = []
|
||||||
|
for t in running_update_threads:
|
||||||
|
if t.current_uuid:
|
||||||
|
running_uuids.append(t.current_uuid)
|
||||||
|
|
||||||
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
|
# Re #232 - Deepcopy the data incase it changes while we're iterating through it all
|
||||||
watch_uuid_list = []
|
watch_uuid_list = []
|
||||||
@@ -844,22 +646,16 @@ def ticker_thread_check_time_launch_checks():
|
|||||||
|
|
||||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||||
priority = int(time.time())
|
priority = int(time.time())
|
||||||
|
logger.debug(
|
||||||
|
f"> Queued watch UUID {uuid} "
|
||||||
|
f"last checked at {watch['last_checked']} "
|
||||||
|
f"queued at {now:0.2f} priority {priority} "
|
||||||
|
f"jitter {watch.jitter_seconds:0.2f}s, "
|
||||||
|
f"{now - watch['last_checked']:0.2f}s since last checked")
|
||||||
|
|
||||||
# Into the queue with you
|
# Into the queue with you
|
||||||
queued_successfully = worker_handler.queue_item_async_safe(update_q,
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
|
||||||
queuedWatchMetaData.PrioritizedItem(priority=priority,
|
|
||||||
item={'uuid': uuid})
|
|
||||||
)
|
|
||||||
if queued_successfully:
|
|
||||||
logger.debug(
|
|
||||||
f"> Queued watch UUID {uuid} "
|
|
||||||
f"last checked at {watch['last_checked']} "
|
|
||||||
f"queued at {now:0.2f} priority {priority} "
|
|
||||||
f"jitter {watch.jitter_seconds:0.2f}s, "
|
|
||||||
f"{now - watch['last_checked']:0.2f}s since last checked")
|
|
||||||
else:
|
|
||||||
logger.critical(f"CRITICAL: Failed to queue watch UUID {uuid} in ticker thread!")
|
|
||||||
|
|
||||||
# Reset for next time
|
# Reset for next time
|
||||||
watch.jitter_seconds = 0
|
watch.jitter_seconds = 0
|
||||||
|
|
||||||
|
|||||||
@@ -3,7 +3,6 @@ import re
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from wtforms.widgets.core import TimeInput
|
from wtforms.widgets.core import TimeInput
|
||||||
|
|
||||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
|
||||||
from changedetectionio.conditions.form import ConditionFormRow
|
from changedetectionio.conditions.form import ConditionFormRow
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
|
|
||||||
@@ -224,37 +223,27 @@ class StringDictKeyValue(StringField):
|
|||||||
|
|
||||||
def _value(self):
|
def _value(self):
|
||||||
if self.data:
|
if self.data:
|
||||||
output = ''
|
output = u''
|
||||||
for k, v in self.data.items():
|
for k in self.data.keys():
|
||||||
output += f"{k}: {v}\r\n"
|
output += "{}: {}\r\n".format(k, self.data[k])
|
||||||
|
|
||||||
return output
|
return output
|
||||||
else:
|
else:
|
||||||
return ''
|
return u''
|
||||||
|
|
||||||
# incoming data processing + validation
|
# incoming
|
||||||
def process_formdata(self, valuelist):
|
def process_formdata(self, valuelist):
|
||||||
self.data = {}
|
|
||||||
errors = []
|
|
||||||
if valuelist:
|
if valuelist:
|
||||||
# Remove empty strings (blank lines)
|
self.data = {}
|
||||||
cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()]
|
# Remove empty strings
|
||||||
for idx, s in enumerate(cleaned, start=1):
|
cleaned = list(filter(None, valuelist[0].split("\n")))
|
||||||
if ':' not in s:
|
for s in cleaned:
|
||||||
errors.append(f"Line {idx} is missing a ':' separator.")
|
parts = s.strip().split(':', 1)
|
||||||
continue
|
if len(parts) == 2:
|
||||||
parts = s.split(':', 1)
|
self.data.update({parts[0].strip(): parts[1].strip()})
|
||||||
key = parts[0].strip()
|
|
||||||
value = parts[1].strip()
|
|
||||||
|
|
||||||
if not key:
|
else:
|
||||||
errors.append(f"Line {idx} has an empty key.")
|
self.data = {}
|
||||||
if not value:
|
|
||||||
errors.append(f"Line {idx} has an empty value.")
|
|
||||||
|
|
||||||
self.data[key] = value
|
|
||||||
|
|
||||||
if errors:
|
|
||||||
raise ValidationError("Invalid input:\n" + "\n".join(errors))
|
|
||||||
|
|
||||||
class ValidateContentFetcherIsReady(object):
|
class ValidateContentFetcherIsReady(object):
|
||||||
"""
|
"""
|
||||||
@@ -316,10 +305,10 @@ class ValidateAppRiseServers(object):
|
|||||||
|
|
||||||
def __call__(self, form, field):
|
def __call__(self, form, field):
|
||||||
import apprise
|
import apprise
|
||||||
from .notification.apprise_plugin.assets import apprise_asset
|
apobj = apprise.Apprise()
|
||||||
from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler # noqa: F401
|
|
||||||
|
|
||||||
apobj = apprise.Apprise(asset=apprise_asset)
|
# so that the custom endpoints are registered
|
||||||
|
from .apprise_asset import asset
|
||||||
|
|
||||||
for server_url in field.data:
|
for server_url in field.data:
|
||||||
url = server_url.strip()
|
url = server_url.strip()
|
||||||
@@ -396,19 +385,6 @@ def validate_url(test_url):
|
|||||||
# This should be wtforms.validators.
|
# This should be wtforms.validators.
|
||||||
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
|
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
|
||||||
|
|
||||||
|
|
||||||
class ValidateSinglePythonRegexString(object):
|
|
||||||
def __init__(self, message=None):
|
|
||||||
self.message = message
|
|
||||||
|
|
||||||
def __call__(self, form, field):
|
|
||||||
try:
|
|
||||||
re.compile(field.data)
|
|
||||||
except re.error:
|
|
||||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
|
||||||
raise ValidationError(message % (field.data))
|
|
||||||
|
|
||||||
|
|
||||||
class ValidateListRegex(object):
|
class ValidateListRegex(object):
|
||||||
"""
|
"""
|
||||||
Validates that anything that looks like a regex passes as a regex
|
Validates that anything that looks like a regex passes as a regex
|
||||||
@@ -427,7 +403,6 @@ class ValidateListRegex(object):
|
|||||||
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
|
||||||
raise ValidationError(message % (line))
|
raise ValidationError(message % (line))
|
||||||
|
|
||||||
|
|
||||||
class ValidateCSSJSONXPATHInput(object):
|
class ValidateCSSJSONXPATHInput(object):
|
||||||
"""
|
"""
|
||||||
Filter validation
|
Filter validation
|
||||||
@@ -610,7 +585,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
||||||
filter_text_removed = BooleanField('Removed lines', default=True)
|
filter_text_removed = BooleanField('Removed lines', default=True)
|
||||||
|
|
||||||
trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||||
@@ -733,12 +708,6 @@ class globalSettingsRequestForm(Form):
|
|||||||
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
jitter_seconds = IntegerField('Random jitter seconds ± check',
|
||||||
render_kw={"style": "width: 5em;"},
|
render_kw={"style": "width: 5em;"},
|
||||||
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
|
||||||
|
|
||||||
workers = IntegerField('Number of fetch workers',
|
|
||||||
render_kw={"style": "width: 5em;"},
|
|
||||||
validators=[validators.NumberRange(min=1, max=50,
|
|
||||||
message="Should be between 1 and 50")])
|
|
||||||
|
|
||||||
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
|
||||||
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
|
||||||
|
|
||||||
@@ -751,10 +720,6 @@ class globalSettingsRequestForm(Form):
|
|||||||
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
|
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
class globalSettingsApplicationUIForm(Form):
|
|
||||||
open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
|
|
||||||
socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
|
|
||||||
favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
|
|
||||||
|
|
||||||
# datastore.data['settings']['application']..
|
# datastore.data['settings']['application']..
|
||||||
class globalSettingsApplicationForm(commonSettingsForm):
|
class globalSettingsApplicationForm(commonSettingsForm):
|
||||||
@@ -774,9 +739,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||||||
render_kw={"style": "width: 5em;"},
|
render_kw={"style": "width: 5em;"},
|
||||||
validators=[validators.NumberRange(min=0,
|
validators=[validators.NumberRange(min=0,
|
||||||
message="Should be atleast zero (disabled)")])
|
message="Should be atleast zero (disabled)")])
|
||||||
|
|
||||||
rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES)
|
|
||||||
|
|
||||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||||
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
||||||
@@ -786,7 +748,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||||||
render_kw={"style": "width: 5em;"},
|
render_kw={"style": "width: 5em;"},
|
||||||
validators=[validators.NumberRange(min=0,
|
validators=[validators.NumberRange(min=0,
|
||||||
message="Should contain zero or more attempts")])
|
message="Should contain zero or more attempts")])
|
||||||
ui = FormField(globalSettingsApplicationUIForm)
|
|
||||||
|
|
||||||
|
|
||||||
class globalSettingsForm(Form):
|
class globalSettingsForm(Form):
|
||||||
@@ -805,5 +766,5 @@ class globalSettingsForm(Form):
|
|||||||
|
|
||||||
|
|
||||||
class extractDataForm(Form):
|
class extractDataForm(Form):
|
||||||
extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
|
extract_regex = StringField('RegEx to extract', validators=[validators.Length(min=1, message="Needs a RegEx")])
|
||||||
extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})
|
extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
|
|||||||
@@ -1,162 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import ctypes
|
|
||||||
import gc
|
|
||||||
import re
|
|
||||||
import psutil
|
|
||||||
import sys
|
|
||||||
import threading
|
|
||||||
import importlib
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
def memory_cleanup(app=None):
|
|
||||||
"""
|
|
||||||
Perform comprehensive memory cleanup operations and log memory usage
|
|
||||||
at each step with nicely formatted numbers.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
app: Optional Flask app instance for clearing Flask-specific caches
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: Status message
|
|
||||||
"""
|
|
||||||
# Get current process
|
|
||||||
process = psutil.Process()
|
|
||||||
|
|
||||||
# Log initial memory usage with nicely formatted numbers
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"Memory cleanup started - Current memory usage: {current_memory:,.2f} MB")
|
|
||||||
|
|
||||||
# 1. Standard garbage collection - force full collection on all generations
|
|
||||||
gc.collect(0) # Collect youngest generation
|
|
||||||
gc.collect(1) # Collect middle generation
|
|
||||||
gc.collect(2) # Collect oldest generation
|
|
||||||
|
|
||||||
# Run full collection again to ensure maximum cleanup
|
|
||||||
gc.collect()
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After full gc.collect() - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
|
|
||||||
|
|
||||||
# 3. Call libc's malloc_trim to release memory back to the OS
|
|
||||||
libc = ctypes.CDLL("libc.so.6")
|
|
||||||
libc.malloc_trim(0)
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After malloc_trim(0) - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
|
|
||||||
# 4. Clear Python's regex cache
|
|
||||||
re.purge()
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After re.purge() - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
|
|
||||||
# 5. Reset thread-local storage
|
|
||||||
# Create a new thread local object to encourage cleanup of old ones
|
|
||||||
threading.local()
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After threading.local() - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
|
|
||||||
# 6. Clear sys.intern cache if Python version supports it
|
|
||||||
try:
|
|
||||||
sys.intern.clear()
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After sys.intern.clear() - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
logger.debug("sys.intern.clear() not supported in this Python version")
|
|
||||||
|
|
||||||
# 7. Clear XML/lxml caches if available
|
|
||||||
try:
|
|
||||||
# Check if lxml.etree is in use
|
|
||||||
lxml_etree = sys.modules.get('lxml.etree')
|
|
||||||
if lxml_etree:
|
|
||||||
# Clear module-level caches
|
|
||||||
if hasattr(lxml_etree, 'clear_error_log'):
|
|
||||||
lxml_etree.clear_error_log()
|
|
||||||
|
|
||||||
# Check for _ErrorLog and _RotatingErrorLog objects and clear them
|
|
||||||
for obj in gc.get_objects():
|
|
||||||
if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'):
|
|
||||||
class_name = obj.__class__.__name__
|
|
||||||
if class_name in ('_ErrorLog', '_RotatingErrorLog', '_DomainErrorLog') and hasattr(obj, 'clear'):
|
|
||||||
try:
|
|
||||||
obj.clear()
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
# Clear Element objects which can hold references to documents
|
|
||||||
elif class_name in ('_Element', 'ElementBase') and hasattr(obj, 'clear'):
|
|
||||||
try:
|
|
||||||
obj.clear()
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After lxml.etree cleanup - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
|
|
||||||
# Check if lxml.html is in use
|
|
||||||
lxml_html = sys.modules.get('lxml.html')
|
|
||||||
if lxml_html:
|
|
||||||
# Clear HTML-specific element types
|
|
||||||
for obj in gc.get_objects():
|
|
||||||
if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'):
|
|
||||||
class_name = obj.__class__.__name__
|
|
||||||
if class_name in ('HtmlElement', 'FormElement', 'InputElement',
|
|
||||||
'SelectElement', 'TextareaElement', 'CheckboxGroup',
|
|
||||||
'RadioGroup', 'MultipleSelectOptions', 'FieldsDict') and hasattr(obj, 'clear'):
|
|
||||||
try:
|
|
||||||
obj.clear()
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
pass
|
|
||||||
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After lxml.html cleanup - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
except (ImportError, AttributeError):
|
|
||||||
logger.debug("lxml cleanup not applicable")
|
|
||||||
|
|
||||||
# 8. Clear JSON parser caches if applicable
|
|
||||||
try:
|
|
||||||
# Check if json module is being used and try to clear its cache
|
|
||||||
json_module = sys.modules.get('json')
|
|
||||||
if json_module and hasattr(json_module, '_default_encoder'):
|
|
||||||
json_module._default_encoder.markers.clear()
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After JSON parser cleanup - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
logger.debug("JSON cleanup not applicable")
|
|
||||||
|
|
||||||
# 9. Force Python's memory allocator to release unused memory
|
|
||||||
try:
|
|
||||||
if hasattr(sys, 'pypy_version_info'):
|
|
||||||
# PyPy has different memory management
|
|
||||||
gc.collect()
|
|
||||||
else:
|
|
||||||
# CPython - try to release unused memory
|
|
||||||
ctypes.pythonapi.PyGC_Collect()
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After PyGC_Collect - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
except (AttributeError, TypeError):
|
|
||||||
logger.debug("PyGC_Collect not supported")
|
|
||||||
|
|
||||||
# 10. Clear Flask-specific caches if applicable
|
|
||||||
if app:
|
|
||||||
try:
|
|
||||||
# Clear Flask caches if they exist
|
|
||||||
for key in list(app.config.get('_cache', {}).keys()):
|
|
||||||
app.config['_cache'].pop(key, None)
|
|
||||||
|
|
||||||
# Clear Jinja2 template cache if available
|
|
||||||
if hasattr(app, 'jinja_env') and hasattr(app.jinja_env, 'cache'):
|
|
||||||
app.jinja_env.cache.clear()
|
|
||||||
|
|
||||||
current_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.debug(f"After Flask cache clear - Memory usage: {current_memory:,.2f} MB")
|
|
||||||
except (AttributeError, KeyError):
|
|
||||||
logger.debug("No Flask cache to clear")
|
|
||||||
|
|
||||||
# Final garbage collection pass
|
|
||||||
gc.collect()
|
|
||||||
libc.malloc_trim(0)
|
|
||||||
|
|
||||||
# Log final memory usage
|
|
||||||
final_memory = process.memory_info().rss / 1024 / 1024
|
|
||||||
logger.info(f"Memory cleanup completed - Final memory usage: {final_memory:,.2f} MB")
|
|
||||||
return "cleaned"
|
|
||||||
@@ -309,10 +309,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||||||
soup = BeautifulSoup(content, 'html.parser')
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
|
|
||||||
if ensure_is_ldjson_info_type:
|
if ensure_is_ldjson_info_type:
|
||||||
bs_result = soup.find_all('script', {"type": "application/ld+json"})
|
bs_result = soup.findAll('script', {"type": "application/ld+json"})
|
||||||
else:
|
else:
|
||||||
bs_result = soup.find_all('script')
|
bs_result = soup.findAll('script')
|
||||||
bs_result += soup.find_all('body')
|
bs_result += soup.findAll('body')
|
||||||
|
|
||||||
bs_jsons = []
|
bs_jsons = []
|
||||||
for result in bs_result:
|
for result in bs_result:
|
||||||
@@ -366,41 +366,22 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||||||
# wordlist - list of regex's (str) or words (str)
|
# wordlist - list of regex's (str) or words (str)
|
||||||
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
|
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
|
||||||
def strip_ignore_text(content, wordlist, mode="content"):
|
def strip_ignore_text(content, wordlist, mode="content"):
|
||||||
|
i = 0
|
||||||
|
output = []
|
||||||
ignore_text = []
|
ignore_text = []
|
||||||
ignore_regex = []
|
ignore_regex = []
|
||||||
ignore_regex_multiline = []
|
ignored_line_numbers = []
|
||||||
ignored_lines = []
|
|
||||||
|
|
||||||
for k in wordlist:
|
for k in wordlist:
|
||||||
# Is it a regex?
|
# Is it a regex?
|
||||||
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
|
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
|
||||||
if res:
|
if res:
|
||||||
res = re.compile(perl_style_slash_enclosed_regex_to_options(k))
|
ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k)))
|
||||||
if res.flags & re.DOTALL or res.flags & re.MULTILINE:
|
|
||||||
ignore_regex_multiline.append(res)
|
|
||||||
else:
|
|
||||||
ignore_regex.append(res)
|
|
||||||
else:
|
else:
|
||||||
ignore_text.append(k.strip())
|
ignore_text.append(k.strip())
|
||||||
|
|
||||||
for r in ignore_regex_multiline:
|
for line in content.splitlines(keepends=True):
|
||||||
for match in r.finditer(content):
|
i += 1
|
||||||
content_lines = content[:match.end()].splitlines(keepends=True)
|
|
||||||
match_lines = content[match.start():match.end()].splitlines(keepends=True)
|
|
||||||
|
|
||||||
end_line = len(content_lines)
|
|
||||||
start_line = end_line - len(match_lines)
|
|
||||||
|
|
||||||
if end_line - start_line <= 1:
|
|
||||||
# Match is empty or in the middle of the line
|
|
||||||
ignored_lines.append(start_line)
|
|
||||||
else:
|
|
||||||
for i in range(start_line, end_line):
|
|
||||||
ignored_lines.append(i)
|
|
||||||
|
|
||||||
line_index = 0
|
|
||||||
lines = content.splitlines(keepends=True)
|
|
||||||
for line in lines:
|
|
||||||
# Always ignore blank lines in this mode. (when this function gets called)
|
# Always ignore blank lines in this mode. (when this function gets called)
|
||||||
got_match = False
|
got_match = False
|
||||||
for l in ignore_text:
|
for l in ignore_text:
|
||||||
@@ -412,19 +393,17 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
|||||||
if r.search(line):
|
if r.search(line):
|
||||||
got_match = True
|
got_match = True
|
||||||
|
|
||||||
if got_match:
|
if not got_match:
|
||||||
ignored_lines.append(line_index)
|
# Not ignored, and should preserve "keepends"
|
||||||
|
output.append(line)
|
||||||
line_index += 1
|
else:
|
||||||
|
ignored_line_numbers.append(i)
|
||||||
ignored_lines = set([i for i in ignored_lines if i >= 0 and i < len(lines)])
|
|
||||||
|
|
||||||
# Used for finding out what to highlight
|
# Used for finding out what to highlight
|
||||||
if mode == "line numbers":
|
if mode == "line numbers":
|
||||||
return [i + 1 for i in ignored_lines]
|
return ignored_line_numbers
|
||||||
|
|
||||||
output_lines = set(range(len(lines))) - ignored_lines
|
return ''.join(output)
|
||||||
return ''.join([lines[i] for i in output_lines])
|
|
||||||
|
|
||||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||||
from xml.sax.saxutils import escape as xml_escape
|
from xml.sax.saxutils import escape as xml_escape
|
||||||
@@ -435,36 +414,50 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
|
|||||||
|
|
||||||
return re.sub(pattern, repl, html_content)
|
return re.sub(pattern, repl, html_content)
|
||||||
|
|
||||||
|
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
|
||||||
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
|
|
||||||
|
|
||||||
|
|
||||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
|
|
||||||
from inscriptis import get_text
|
from inscriptis import get_text
|
||||||
from inscriptis.model.config import ParserConfig
|
from inscriptis.model.config import ParserConfig
|
||||||
|
|
||||||
|
"""Converts html string to a string with just the text. If ignoring
|
||||||
|
rendering anchor tag content is enable, anchor tag content are also
|
||||||
|
included in the text
|
||||||
|
|
||||||
|
:param html_content: string with html content
|
||||||
|
:param render_anchor_tag_content: boolean flag indicating whether to extract
|
||||||
|
hyperlinks (the anchor tag content) together with text. This refers to the
|
||||||
|
'href' inside 'a' tags.
|
||||||
|
Anchor tag content is rendered in the following manner:
|
||||||
|
'[ text ](anchor tag content)'
|
||||||
|
:return: extracted text from the HTML
|
||||||
|
"""
|
||||||
|
# if anchor tag content flag is set to True define a config for
|
||||||
|
# extracting this content
|
||||||
if render_anchor_tag_content:
|
if render_anchor_tag_content:
|
||||||
parser_config = ParserConfig(
|
parser_config = ParserConfig(
|
||||||
annotation_rules={"a": ["hyperlink"]},
|
annotation_rules={"a": ["hyperlink"]},
|
||||||
display_links=True
|
display_links=True
|
||||||
)
|
)
|
||||||
|
# otherwise set config to None/default
|
||||||
else:
|
else:
|
||||||
parser_config = None
|
parser_config = None
|
||||||
|
|
||||||
|
# RSS Mode - Inscriptis will treat `title` as something else.
|
||||||
|
# Make it as a regular block display element (//item/title)
|
||||||
|
# This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874
|
||||||
if is_rss:
|
if is_rss:
|
||||||
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
|
||||||
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
||||||
|
|
||||||
text_content = get_text(html_content, config=parser_config)
|
text_content = get_text(html_content, config=parser_config)
|
||||||
|
|
||||||
return text_content
|
return text_content
|
||||||
|
|
||||||
|
|
||||||
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
||||||
def has_ldjson_product_info(content):
|
def has_ldjson_product_info(content):
|
||||||
try:
|
try:
|
||||||
# Better than .lower() which can use a lot of ram
|
lc = content.lower()
|
||||||
if (re.search(r'application/ld\+json', content, re.IGNORECASE) and
|
if 'application/ld+json' in lc and lc.count('"price"') == 1 and '"pricecurrency"' in lc:
|
||||||
re.search(r'"price"', content, re.IGNORECASE) and
|
|
||||||
re.search(r'"pricecurrency"', content, re.IGNORECASE)):
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# On some pages this is really terribly expensive when they dont really need it
|
# On some pages this is really terribly expensive when they dont really need it
|
||||||
|
|||||||
@@ -1,7 +1,4 @@
|
|||||||
from os import getenv
|
from os import getenv
|
||||||
|
|
||||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
|
||||||
|
|
||||||
from changedetectionio.notification import (
|
from changedetectionio.notification import (
|
||||||
default_notification_body,
|
default_notification_body,
|
||||||
default_notification_format,
|
default_notification_format,
|
||||||
@@ -12,8 +9,6 @@ from changedetectionio.notification import (
|
|||||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
||||||
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
|
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class model(dict):
|
class model(dict):
|
||||||
base_config = {
|
base_config = {
|
||||||
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
||||||
@@ -53,18 +48,12 @@ class model(dict):
|
|||||||
'password': False,
|
'password': False,
|
||||||
'render_anchor_tag_content': False,
|
'render_anchor_tag_content': False,
|
||||||
'rss_access_token': None,
|
'rss_access_token': None,
|
||||||
'rss_content_format': RSS_FORMAT_TYPES[0][0],
|
|
||||||
'rss_hide_muted_watches': True,
|
'rss_hide_muted_watches': True,
|
||||||
'schema_version' : 0,
|
'schema_version' : 0,
|
||||||
'shared_diff_access': False,
|
'shared_diff_access': False,
|
||||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||||
'tags': {}, #@todo use Tag.model initialisers
|
'tags': {}, #@todo use Tag.model initialisers
|
||||||
'timezone': None, # Default IANA timezone name
|
'timezone': None, # Default IANA timezone name
|
||||||
'ui': {
|
|
||||||
'open_diff_in_new_tab': True,
|
|
||||||
'socket_io_enabled': True,
|
|
||||||
'favicons_enabled': True
|
|
||||||
},
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -1,5 +1,3 @@
|
|||||||
from blinker import signal
|
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from changedetectionio.safe_jinja import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
from . import watch_base
|
from . import watch_base
|
||||||
@@ -8,7 +6,6 @@ import re
|
|||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
from .. import safe_jinja
|
|
||||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
|
||||||
|
|
||||||
# Allowable protocols, protects against javascript: etc
|
# Allowable protocols, protects against javascript: etc
|
||||||
@@ -44,7 +41,6 @@ class model(watch_base):
|
|||||||
self.__datastore_path = kw.get('datastore_path')
|
self.__datastore_path = kw.get('datastore_path')
|
||||||
if kw.get('datastore_path'):
|
if kw.get('datastore_path'):
|
||||||
del kw['datastore_path']
|
del kw['datastore_path']
|
||||||
|
|
||||||
super(model, self).__init__(*arg, **kw)
|
super(model, self).__init__(*arg, **kw)
|
||||||
if kw.get('default'):
|
if kw.get('default'):
|
||||||
self.update(kw['default'])
|
self.update(kw['default'])
|
||||||
@@ -64,10 +60,6 @@ class model(watch_base):
|
|||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
@property
|
|
||||||
def has_unviewed(self):
|
|
||||||
return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
|
|
||||||
|
|
||||||
def ensure_data_dir_exists(self):
|
def ensure_data_dir_exists(self):
|
||||||
if not os.path.isdir(self.watch_data_dir):
|
if not os.path.isdir(self.watch_data_dir):
|
||||||
logger.debug(f"> Creating data dir {self.watch_data_dir}")
|
logger.debug(f"> Creating data dir {self.watch_data_dir}")
|
||||||
@@ -103,13 +95,6 @@ class model(watch_base):
|
|||||||
return 'DISABLED'
|
return 'DISABLED'
|
||||||
return ready_url
|
return ready_url
|
||||||
|
|
||||||
@property
|
|
||||||
def domain_only_from_link(self):
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
parsed = urlparse(self.link)
|
|
||||||
domain = parsed.hostname
|
|
||||||
return domain
|
|
||||||
|
|
||||||
def clear_watch(self):
|
def clear_watch(self):
|
||||||
import pathlib
|
import pathlib
|
||||||
|
|
||||||
@@ -135,10 +120,6 @@ class model(watch_base):
|
|||||||
'remote_server_reply': None,
|
'remote_server_reply': None,
|
||||||
'track_ldjson_price_data': None
|
'track_ldjson_price_data': None
|
||||||
})
|
})
|
||||||
watch_check_update = signal('watch_check_update')
|
|
||||||
if watch_check_update:
|
|
||||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
|
||||||
|
|
||||||
return
|
return
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -420,132 +401,6 @@ class model(watch_base):
|
|||||||
# False is not an option for AppRise, must be type None
|
# False is not an option for AppRise, must be type None
|
||||||
return None
|
return None
|
||||||
|
|
||||||
def bump_favicon(self, url, favicon_base_64: str) -> None:
|
|
||||||
from urllib.parse import urlparse
|
|
||||||
import base64
|
|
||||||
import binascii
|
|
||||||
decoded = None
|
|
||||||
|
|
||||||
if url:
|
|
||||||
try:
|
|
||||||
parsed = urlparse(url)
|
|
||||||
filename = os.path.basename(parsed.path)
|
|
||||||
(base, extension) = filename.lower().strip().rsplit('.', 1)
|
|
||||||
except ValueError:
|
|
||||||
logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
|
|
||||||
return None
|
|
||||||
else:
|
|
||||||
# Assume favicon.ico
|
|
||||||
base = "favicon"
|
|
||||||
extension = "ico"
|
|
||||||
|
|
||||||
fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
|
|
||||||
|
|
||||||
try:
|
|
||||||
# validate=True makes sure the string only contains valid base64 chars
|
|
||||||
decoded = base64.b64decode(favicon_base_64, validate=True)
|
|
||||||
except (binascii.Error, ValueError) as e:
|
|
||||||
logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
|
|
||||||
else:
|
|
||||||
if decoded:
|
|
||||||
try:
|
|
||||||
with open(fname, 'wb') as f:
|
|
||||||
f.write(decoded)
|
|
||||||
# A signal that could trigger the socket server to update the browser also
|
|
||||||
watch_check_update = signal('watch_favicon_bump')
|
|
||||||
if watch_check_update:
|
|
||||||
watch_check_update.send(watch_uuid=self.get('uuid'))
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
|
|
||||||
|
|
||||||
# @todo - Store some checksum and only write when its different
|
|
||||||
logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
|
|
||||||
|
|
||||||
def get_favicon_filename(self) -> str | None:
|
|
||||||
"""
|
|
||||||
Find any favicon.* file in the current working directory
|
|
||||||
and return the contents of the newest one.
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
bytes: Contents of the newest favicon file, or None if not found.
|
|
||||||
"""
|
|
||||||
import glob
|
|
||||||
|
|
||||||
# Search for all favicon.* files
|
|
||||||
files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
|
|
||||||
|
|
||||||
if not files:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Find the newest by modification time
|
|
||||||
newest_file = max(files, key=os.path.getmtime)
|
|
||||||
return os.path.basename(newest_file)
|
|
||||||
|
|
||||||
def get_screenshot_as_thumbnail(self, max_age=3200):
|
|
||||||
"""Return path to a square thumbnail of the most recent screenshot.
|
|
||||||
|
|
||||||
Creates a 150x150 pixel thumbnail from the top portion of the screenshot.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
max_age: Maximum age in seconds before recreating thumbnail
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
Path to thumbnail or None if no screenshot exists
|
|
||||||
"""
|
|
||||||
import os
|
|
||||||
import time
|
|
||||||
|
|
||||||
thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
|
|
||||||
top_trim = 500 # Pixels from top of screenshot to use
|
|
||||||
|
|
||||||
screenshot_path = self.get_screenshot()
|
|
||||||
if not screenshot_path:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# Reuse thumbnail if it's fresh and screenshot hasn't changed
|
|
||||||
if os.path.isfile(thumbnail_path):
|
|
||||||
thumbnail_mtime = os.path.getmtime(thumbnail_path)
|
|
||||||
screenshot_mtime = os.path.getmtime(screenshot_path)
|
|
||||||
|
|
||||||
if screenshot_mtime <= thumbnail_mtime and time.time() - thumbnail_mtime < max_age:
|
|
||||||
return thumbnail_path
|
|
||||||
|
|
||||||
try:
|
|
||||||
from PIL import Image
|
|
||||||
|
|
||||||
with Image.open(screenshot_path) as img:
|
|
||||||
# Crop top portion first (full width, top_trim height)
|
|
||||||
top_crop_height = min(top_trim, img.height)
|
|
||||||
img = img.crop((0, 0, img.width, top_crop_height))
|
|
||||||
|
|
||||||
# Create a smaller intermediate image (to reduce memory usage)
|
|
||||||
aspect = img.width / img.height
|
|
||||||
interim_width = min(top_trim, img.width)
|
|
||||||
interim_height = int(interim_width / aspect) if aspect > 0 else top_trim
|
|
||||||
img = img.resize((interim_width, interim_height), Image.NEAREST)
|
|
||||||
|
|
||||||
# Convert to RGB if needed
|
|
||||||
if img.mode != 'RGB':
|
|
||||||
img = img.convert('RGB')
|
|
||||||
|
|
||||||
# Crop to square from top center
|
|
||||||
square_size = min(img.width, img.height)
|
|
||||||
left = (img.width - square_size) // 2
|
|
||||||
img = img.crop((left, 0, left + square_size, square_size))
|
|
||||||
|
|
||||||
# Final resize to exact thumbnail size with better filter
|
|
||||||
img = img.resize((350, 350), Image.BILINEAR)
|
|
||||||
|
|
||||||
# Save with optimized settings
|
|
||||||
img.save(thumbnail_path, "JPEG", quality=75, optimize=True)
|
|
||||||
|
|
||||||
return thumbnail_path
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error creating thumbnail for {self.get('uuid')}: {str(e)}")
|
|
||||||
return None
|
|
||||||
|
|
||||||
def __get_file_ctime(self, filename):
|
def __get_file_ctime(self, filename):
|
||||||
fname = os.path.join(self.watch_data_dir, filename)
|
fname = os.path.join(self.watch_data_dir, filename)
|
||||||
if os.path.isfile(fname):
|
if os.path.isfile(fname):
|
||||||
@@ -639,7 +494,7 @@ class model(watch_base):
|
|||||||
if res:
|
if res:
|
||||||
if not csv_writer:
|
if not csv_writer:
|
||||||
# A file on the disk can be transferred much faster via flask than a string reply
|
# A file on the disk can be transferred much faster via flask than a string reply
|
||||||
csv_output_filename = f"report-{self.get('uuid')}.csv"
|
csv_output_filename = 'report.csv'
|
||||||
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
|
||||||
# @todo some headers in the future
|
# @todo some headers in the future
|
||||||
#fieldnames = ['Epoch seconds', 'Date']
|
#fieldnames = ['Epoch seconds', 'Date']
|
||||||
@@ -698,10 +553,7 @@ class model(watch_base):
|
|||||||
self.ensure_data_dir_exists()
|
self.ensure_data_dir_exists()
|
||||||
|
|
||||||
with open(target_path, 'wb') as f:
|
with open(target_path, 'wb') as f:
|
||||||
if not isinstance(data, str):
|
f.write(zlib.compress(json.dumps(data).encode()))
|
||||||
f.write(zlib.compress(json.dumps(data).encode()))
|
|
||||||
else:
|
|
||||||
f.write(zlib.compress(data.encode()))
|
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
# Save as PNG, PNG is larger but better for doing visual diff in the future
|
# Save as PNG, PNG is larger but better for doing visual diff in the future
|
||||||
@@ -723,7 +575,7 @@ class model(watch_base):
|
|||||||
import brotli
|
import brotli
|
||||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||||
|
|
||||||
if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
|
if not os.path.isfile(filepath):
|
||||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||||
dates = list(self.history.keys())
|
dates = list(self.history.keys())
|
||||||
if len(dates):
|
if len(dates):
|
||||||
@@ -793,44 +645,3 @@ class model(watch_base):
|
|||||||
if step_n:
|
if step_n:
|
||||||
available.append(step_n.group(1))
|
available.append(step_n.group(1))
|
||||||
return available
|
return available
|
||||||
|
|
||||||
def compile_error_texts(self, has_proxies=None):
|
|
||||||
"""Compile error texts for this watch.
|
|
||||||
Accepts has_proxies parameter to ensure it works even outside app context"""
|
|
||||||
from flask import url_for
|
|
||||||
from markupsafe import Markup
|
|
||||||
|
|
||||||
output = [] # Initialize as list since we're using append
|
|
||||||
last_error = self.get('last_error','')
|
|
||||||
|
|
||||||
try:
|
|
||||||
url_for('settings.settings_page')
|
|
||||||
except Exception as e:
|
|
||||||
has_app_context = False
|
|
||||||
else:
|
|
||||||
has_app_context = True
|
|
||||||
|
|
||||||
# has app+request context, we can use url_for()
|
|
||||||
if has_app_context:
|
|
||||||
if last_error:
|
|
||||||
if '403' in last_error:
|
|
||||||
if has_proxies:
|
|
||||||
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a> '")))
|
|
||||||
else:
|
|
||||||
output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try adding external proxies/locations</a> '")))
|
|
||||||
else:
|
|
||||||
output.append(str(Markup(last_error)))
|
|
||||||
|
|
||||||
if self.get('last_notification_error'):
|
|
||||||
output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>")))
|
|
||||||
|
|
||||||
else:
|
|
||||||
# Lo_Fi version - no app context, cant rely on Jinja2 Markup
|
|
||||||
if last_error:
|
|
||||||
output.append(safe_jinja.render_fully_escaped(last_error))
|
|
||||||
if self.get('last_notification_error'):
|
|
||||||
output.append(safe_jinja.render_fully_escaped(self.get('last_notification_error')))
|
|
||||||
|
|
||||||
res = "\n".join(output)
|
|
||||||
return res
|
|
||||||
|
|
||||||
|
|||||||
@@ -2,8 +2,7 @@ import os
|
|||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from changedetectionio import strtobool
|
from changedetectionio import strtobool
|
||||||
default_notification_format_for_watch = 'System default'
|
from changedetectionio.notification import default_notification_format_for_watch
|
||||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
|
|
||||||
|
|
||||||
class watch_base(dict):
|
class watch_base(dict):
|
||||||
|
|
||||||
@@ -16,8 +15,6 @@ class watch_base(dict):
|
|||||||
'body': None,
|
'body': None,
|
||||||
'browser_steps': [],
|
'browser_steps': [],
|
||||||
'browser_steps_last_error_step': None,
|
'browser_steps_last_error_step': None,
|
||||||
'conditions' : {},
|
|
||||||
'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
|
|
||||||
'check_count': 0,
|
'check_count': 0,
|
||||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||||
@@ -39,7 +36,6 @@ class watch_base(dict):
|
|||||||
'include_filters': [],
|
'include_filters': [],
|
||||||
'last_checked': 0,
|
'last_checked': 0,
|
||||||
'last_error': False,
|
'last_error': False,
|
||||||
'last_notification_error': None,
|
|
||||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||||
'method': 'GET',
|
'method': 'GET',
|
||||||
'notification_alert_count': 0,
|
'notification_alert_count': 0,
|
||||||
|
|||||||
@@ -1,15 +1,48 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
|
from apprise import NotifyFormat
|
||||||
import apprise
|
import apprise
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
|
|
||||||
|
|
||||||
|
valid_tokens = {
|
||||||
|
'base_url': '',
|
||||||
|
'current_snapshot': '',
|
||||||
|
'diff': '',
|
||||||
|
'diff_added': '',
|
||||||
|
'diff_full': '',
|
||||||
|
'diff_patch': '',
|
||||||
|
'diff_removed': '',
|
||||||
|
'diff_url': '',
|
||||||
|
'preview_url': '',
|
||||||
|
'triggered_text': '',
|
||||||
|
'watch_tag': '',
|
||||||
|
'watch_title': '',
|
||||||
|
'watch_url': '',
|
||||||
|
'watch_uuid': '',
|
||||||
|
}
|
||||||
|
|
||||||
|
default_notification_format_for_watch = 'System default'
|
||||||
|
default_notification_format = 'HTML Color'
|
||||||
|
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
|
||||||
|
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
|
||||||
|
|
||||||
|
valid_notification_formats = {
|
||||||
|
'Text': NotifyFormat.TEXT,
|
||||||
|
'Markdown': NotifyFormat.MARKDOWN,
|
||||||
|
'HTML': NotifyFormat.HTML,
|
||||||
|
'HTML Color': 'htmlcolor',
|
||||||
|
# Used only for editing a watch (not for global)
|
||||||
|
default_notification_format_for_watch: default_notification_format_for_watch
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def process_notification(n_object, datastore):
|
def process_notification(n_object, datastore):
|
||||||
from changedetectionio.safe_jinja import render as jinja_render
|
# so that the custom endpoints are registered
|
||||||
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
|
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
||||||
# be sure its registered
|
|
||||||
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
|
|
||||||
|
|
||||||
|
from .safe_jinja import render as jinja_render
|
||||||
now = time.time()
|
now = time.time()
|
||||||
if n_object.get('notification_timestamp'):
|
if n_object.get('notification_timestamp'):
|
||||||
logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
|
logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
|
||||||
@@ -26,18 +59,19 @@ def process_notification(n_object, datastore):
|
|||||||
# Initially text or whatever
|
# Initially text or whatever
|
||||||
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
|
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
|
||||||
|
|
||||||
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s")
|
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.3f}s")
|
||||||
|
|
||||||
# https://github.com/caronc/apprise/wiki/Development_LogCapture
|
# https://github.com/caronc/apprise/wiki/Development_LogCapture
|
||||||
# Anything higher than or equal to WARNING (which covers things like Connection errors)
|
# Anything higher than or equal to WARNING (which covers things like Connection errors)
|
||||||
# raise it as an exception
|
# raise it as an exception
|
||||||
|
|
||||||
sent_objs = []
|
sent_objs = []
|
||||||
|
from .apprise_asset import asset
|
||||||
|
|
||||||
if 'as_async' in n_object:
|
if 'as_async' in n_object:
|
||||||
apprise_asset.async_mode = n_object.get('as_async')
|
asset.async_mode = n_object.get('as_async')
|
||||||
|
|
||||||
apobj = apprise.Apprise(debug=True, asset=apprise_asset)
|
apobj = apprise.Apprise(debug=True, asset=asset)
|
||||||
|
|
||||||
if not n_object.get('notification_urls'):
|
if not n_object.get('notification_urls'):
|
||||||
return None
|
return None
|
||||||
@@ -78,7 +112,7 @@ def process_notification(n_object, datastore):
|
|||||||
and not url.startswith('get') \
|
and not url.startswith('get') \
|
||||||
and not url.startswith('delete') \
|
and not url.startswith('delete') \
|
||||||
and not url.startswith('put'):
|
and not url.startswith('put'):
|
||||||
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
|
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
|
||||||
|
|
||||||
if url.startswith('tgram://'):
|
if url.startswith('tgram://'):
|
||||||
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
||||||
@@ -143,7 +177,6 @@ def process_notification(n_object, datastore):
|
|||||||
# ( Where we prepare the tokens in the notification to be replaced with actual values )
|
# ( Where we prepare the tokens in the notification to be replaced with actual values )
|
||||||
def create_notification_parameters(n_object, datastore):
|
def create_notification_parameters(n_object, datastore):
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from . import valid_tokens
|
|
||||||
|
|
||||||
# in the case we send a test notification from the main settings, there is no UUID.
|
# in the case we send a test notification from the main settings, there is no UUID.
|
||||||
uuid = n_object['uuid'] if 'uuid' in n_object else ''
|
uuid = n_object['uuid'] if 'uuid' in n_object else ''
|
||||||
@@ -1,35 +0,0 @@
|
|||||||
from changedetectionio.model import default_notification_format_for_watch
|
|
||||||
|
|
||||||
ult_notification_format_for_watch = 'System default'
|
|
||||||
default_notification_format = 'HTML Color'
|
|
||||||
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
|
|
||||||
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
|
|
||||||
|
|
||||||
# The values (markdown etc) are from apprise NotifyFormat,
|
|
||||||
# But to avoid importing the whole heavy module just use the same strings here.
|
|
||||||
valid_notification_formats = {
|
|
||||||
'Text': 'text',
|
|
||||||
'Markdown': 'markdown',
|
|
||||||
'HTML': 'html',
|
|
||||||
'HTML Color': 'htmlcolor',
|
|
||||||
# Used only for editing a watch (not for global)
|
|
||||||
default_notification_format_for_watch: default_notification_format_for_watch
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
valid_tokens = {
|
|
||||||
'base_url': '',
|
|
||||||
'current_snapshot': '',
|
|
||||||
'diff': '',
|
|
||||||
'diff_added': '',
|
|
||||||
'diff_full': '',
|
|
||||||
'diff_patch': '',
|
|
||||||
'diff_removed': '',
|
|
||||||
'diff_url': '',
|
|
||||||
'preview_url': '',
|
|
||||||
'triggered_text': '',
|
|
||||||
'watch_tag': '',
|
|
||||||
'watch_title': '',
|
|
||||||
'watch_url': '',
|
|
||||||
'watch_uuid': '',
|
|
||||||
}
|
|
||||||
@@ -1,16 +0,0 @@
|
|||||||
from apprise import AppriseAsset
|
|
||||||
|
|
||||||
# Refer to:
|
|
||||||
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
|
|
||||||
|
|
||||||
APPRISE_APP_ID = "changedetection.io"
|
|
||||||
APPRISE_APP_DESC = "ChangeDetection.io best and simplest website monitoring and change detection"
|
|
||||||
APPRISE_APP_URL = "https://changedetection.io"
|
|
||||||
APPRISE_AVATAR_URL = "https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png"
|
|
||||||
|
|
||||||
apprise_asset = AppriseAsset(
|
|
||||||
app_id=APPRISE_APP_ID,
|
|
||||||
app_desc=APPRISE_APP_DESC,
|
|
||||||
app_url=APPRISE_APP_URL,
|
|
||||||
image_url_logo=APPRISE_AVATAR_URL,
|
|
||||||
)
|
|
||||||
@@ -1,112 +0,0 @@
|
|||||||
import json
|
|
||||||
import re
|
|
||||||
from urllib.parse import unquote_plus
|
|
||||||
|
|
||||||
import requests
|
|
||||||
from apprise.decorators import notify
|
|
||||||
from apprise.utils.parse import parse_url as apprise_parse_url
|
|
||||||
from loguru import logger
|
|
||||||
from requests.structures import CaseInsensitiveDict
|
|
||||||
|
|
||||||
SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
|
|
||||||
|
|
||||||
|
|
||||||
def notify_supported_methods(func):
|
|
||||||
for method in SUPPORTED_HTTP_METHODS:
|
|
||||||
func = notify(on=method)(func)
|
|
||||||
# Add support for https, for each supported http method
|
|
||||||
func = notify(on=f"{method}s")(func)
|
|
||||||
return func
|
|
||||||
|
|
||||||
|
|
||||||
def _get_auth(parsed_url: dict) -> str | tuple[str, str]:
|
|
||||||
user: str | None = parsed_url.get("user")
|
|
||||||
password: str | None = parsed_url.get("password")
|
|
||||||
|
|
||||||
if user is not None and password is not None:
|
|
||||||
return (unquote_plus(user), unquote_plus(password))
|
|
||||||
|
|
||||||
if user is not None:
|
|
||||||
return unquote_plus(user)
|
|
||||||
|
|
||||||
return ""
|
|
||||||
|
|
||||||
|
|
||||||
def _get_headers(parsed_url: dict, body: str) -> CaseInsensitiveDict:
|
|
||||||
headers = CaseInsensitiveDict(
|
|
||||||
{unquote_plus(k).title(): unquote_plus(v) for k, v in parsed_url["qsd+"].items()}
|
|
||||||
)
|
|
||||||
|
|
||||||
# If Content-Type is not specified, guess if the body is a valid JSON
|
|
||||||
if headers.get("Content-Type") is None:
|
|
||||||
try:
|
|
||||||
json.loads(body)
|
|
||||||
headers["Content-Type"] = "application/json; charset=utf-8"
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return headers
|
|
||||||
|
|
||||||
|
|
||||||
def _get_params(parsed_url: dict) -> CaseInsensitiveDict:
|
|
||||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
|
||||||
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
|
|
||||||
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
|
||||||
params = CaseInsensitiveDict(
|
|
||||||
{
|
|
||||||
unquote_plus(k): unquote_plus(v)
|
|
||||||
for k, v in parsed_url["qsd"].items()
|
|
||||||
if k.strip("-") not in parsed_url["qsd-"]
|
|
||||||
and k.strip("+") not in parsed_url["qsd+"]
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
return params
|
|
||||||
|
|
||||||
|
|
||||||
@notify_supported_methods
|
|
||||||
def apprise_http_custom_handler(
|
|
||||||
body: str,
|
|
||||||
title: str,
|
|
||||||
notify_type: str,
|
|
||||||
meta: dict,
|
|
||||||
*args,
|
|
||||||
**kwargs,
|
|
||||||
) -> bool:
|
|
||||||
url: str = meta.get("url")
|
|
||||||
schema: str = meta.get("schema")
|
|
||||||
method: str = re.sub(r"s$", "", schema).upper()
|
|
||||||
|
|
||||||
# Convert /foobar?+some-header=hello to proper header dictionary
|
|
||||||
parsed_url: dict[str, str | dict | None] | None = apprise_parse_url(url)
|
|
||||||
if parsed_url is None:
|
|
||||||
return False
|
|
||||||
|
|
||||||
auth = _get_auth(parsed_url=parsed_url)
|
|
||||||
headers = _get_headers(parsed_url=parsed_url, body=body)
|
|
||||||
params = _get_params(parsed_url=parsed_url)
|
|
||||||
|
|
||||||
url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
|
|
||||||
|
|
||||||
try:
|
|
||||||
response = requests.request(
|
|
||||||
method=method,
|
|
||||||
url=url,
|
|
||||||
auth=auth,
|
|
||||||
headers=headers,
|
|
||||||
params=params,
|
|
||||||
data=body.encode("utf-8") if isinstance(body, str) else body,
|
|
||||||
)
|
|
||||||
|
|
||||||
response.raise_for_status()
|
|
||||||
|
|
||||||
logger.info(f"Successfully sent custom notification to {url}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
except requests.RequestException as e:
|
|
||||||
logger.error(f"Remote host error while sending custom notification to {url}: {e}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}")
|
|
||||||
return False
|
|
||||||
@@ -1,246 +0,0 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
"""
|
|
||||||
Notification Service Module
|
|
||||||
Extracted from update_worker.py to provide standalone notification functionality
|
|
||||||
for both sync and async workers
|
|
||||||
"""
|
|
||||||
|
|
||||||
import time
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
|
|
||||||
class NotificationService:
|
|
||||||
"""
|
|
||||||
Standalone notification service that handles all notification functionality
|
|
||||||
previously embedded in the update_worker class
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, datastore, notification_q):
|
|
||||||
self.datastore = datastore
|
|
||||||
self.notification_q = notification_q
|
|
||||||
|
|
||||||
def queue_notification_for_watch(self, n_object, watch):
|
|
||||||
"""
|
|
||||||
Queue a notification for a watch with full diff rendering and template variables
|
|
||||||
"""
|
|
||||||
from changedetectionio import diff
|
|
||||||
from changedetectionio.notification import default_notification_format_for_watch
|
|
||||||
|
|
||||||
dates = []
|
|
||||||
trigger_text = ''
|
|
||||||
|
|
||||||
now = time.time()
|
|
||||||
|
|
||||||
if watch:
|
|
||||||
watch_history = watch.history
|
|
||||||
dates = list(watch_history.keys())
|
|
||||||
trigger_text = watch.get('trigger_text', [])
|
|
||||||
|
|
||||||
# Add text that was triggered
|
|
||||||
if len(dates):
|
|
||||||
snapshot_contents = watch.get_history_snapshot(dates[-1])
|
|
||||||
else:
|
|
||||||
snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
|
|
||||||
|
|
||||||
# If we ended up here with "System default"
|
|
||||||
if n_object.get('notification_format') == default_notification_format_for_watch:
|
|
||||||
n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
|
|
||||||
|
|
||||||
html_colour_enable = False
|
|
||||||
# HTML needs linebreak, but MarkDown and Text can use a linefeed
|
|
||||||
if n_object.get('notification_format') == 'HTML':
|
|
||||||
line_feed_sep = "<br>"
|
|
||||||
# Snapshot will be plaintext on the disk, convert to some kind of HTML
|
|
||||||
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
|
|
||||||
elif n_object.get('notification_format') == 'HTML Color':
|
|
||||||
line_feed_sep = "<br>"
|
|
||||||
# Snapshot will be plaintext on the disk, convert to some kind of HTML
|
|
||||||
snapshot_contents = snapshot_contents.replace('\n', line_feed_sep)
|
|
||||||
html_colour_enable = True
|
|
||||||
else:
|
|
||||||
line_feed_sep = "\n"
|
|
||||||
|
|
||||||
triggered_text = ''
|
|
||||||
if len(trigger_text):
|
|
||||||
from . import html_tools
|
|
||||||
triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
|
|
||||||
if triggered_text:
|
|
||||||
triggered_text = line_feed_sep.join(triggered_text)
|
|
||||||
|
|
||||||
# Could be called as a 'test notification' with only 1 snapshot available
|
|
||||||
prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
|
|
||||||
current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
|
|
||||||
|
|
||||||
if len(dates) > 1:
|
|
||||||
prev_snapshot = watch.get_history_snapshot(dates[-2])
|
|
||||||
current_snapshot = watch.get_history_snapshot(dates[-1])
|
|
||||||
|
|
||||||
n_object.update({
|
|
||||||
'current_snapshot': snapshot_contents,
|
|
||||||
'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
|
|
||||||
'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep),
|
|
||||||
'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable),
|
|
||||||
'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True),
|
|
||||||
'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep),
|
|
||||||
'notification_timestamp': now,
|
|
||||||
'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
|
|
||||||
'triggered_text': triggered_text,
|
|
||||||
'uuid': watch.get('uuid') if watch else None,
|
|
||||||
'watch_url': watch.get('url') if watch else None,
|
|
||||||
})
|
|
||||||
|
|
||||||
if watch:
|
|
||||||
n_object.update(watch.extra_notification_token_values())
|
|
||||||
|
|
||||||
logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s")
|
|
||||||
logger.debug("Queued notification for sending")
|
|
||||||
self.notification_q.put(n_object)
|
|
||||||
|
|
||||||
def _check_cascading_vars(self, var_name, watch):
|
|
||||||
"""
|
|
||||||
Check notification variables in cascading priority:
|
|
||||||
Individual watch settings > Tag settings > Global settings
|
|
||||||
"""
|
|
||||||
from changedetectionio.notification import (
|
|
||||||
default_notification_format_for_watch,
|
|
||||||
default_notification_body,
|
|
||||||
default_notification_title
|
|
||||||
)
|
|
||||||
|
|
||||||
# Would be better if this was some kind of Object where Watch can reference the parent datastore etc
|
|
||||||
v = watch.get(var_name)
|
|
||||||
if v and not watch.get('notification_muted'):
|
|
||||||
if var_name == 'notification_format' and v == default_notification_format_for_watch:
|
|
||||||
return self.datastore.data['settings']['application'].get('notification_format')
|
|
||||||
|
|
||||||
return v
|
|
||||||
|
|
||||||
tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
|
|
||||||
if tags:
|
|
||||||
for tag_uuid, tag in tags.items():
|
|
||||||
v = tag.get(var_name)
|
|
||||||
if v and not tag.get('notification_muted'):
|
|
||||||
return v
|
|
||||||
|
|
||||||
if self.datastore.data['settings']['application'].get(var_name):
|
|
||||||
return self.datastore.data['settings']['application'].get(var_name)
|
|
||||||
|
|
||||||
# Otherwise could be defaults
|
|
||||||
if var_name == 'notification_format':
|
|
||||||
return default_notification_format_for_watch
|
|
||||||
if var_name == 'notification_body':
|
|
||||||
return default_notification_body
|
|
||||||
if var_name == 'notification_title':
|
|
||||||
return default_notification_title
|
|
||||||
|
|
||||||
return None
|
|
||||||
|
|
||||||
def send_content_changed_notification(self, watch_uuid):
|
|
||||||
"""
|
|
||||||
Send notification when content changes are detected
|
|
||||||
"""
|
|
||||||
n_object = {}
|
|
||||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
|
||||||
if not watch:
|
|
||||||
return
|
|
||||||
|
|
||||||
watch_history = watch.history
|
|
||||||
dates = list(watch_history.keys())
|
|
||||||
# Theoretically it's possible that this could be just 1 long,
|
|
||||||
# - In the case that the timestamp key was not unique
|
|
||||||
if len(dates) == 1:
|
|
||||||
raise ValueError(
|
|
||||||
"History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Should be a better parent getter in the model object
|
|
||||||
|
|
||||||
# Prefer - Individual watch settings > Tag settings > Global settings (in that order)
|
|
||||||
n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch)
|
|
||||||
n_object['notification_title'] = self._check_cascading_vars('notification_title', watch)
|
|
||||||
n_object['notification_body'] = self._check_cascading_vars('notification_body', watch)
|
|
||||||
n_object['notification_format'] = self._check_cascading_vars('notification_format', watch)
|
|
||||||
|
|
||||||
# (Individual watch) Only prepare to notify if the rules above matched
|
|
||||||
queued = False
|
|
||||||
if n_object and n_object.get('notification_urls'):
|
|
||||||
queued = True
|
|
||||||
|
|
||||||
count = watch.get('notification_alert_count', 0) + 1
|
|
||||||
self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
|
|
||||||
|
|
||||||
self.queue_notification_for_watch(n_object=n_object, watch=watch)
|
|
||||||
|
|
||||||
return queued
|
|
||||||
|
|
||||||
def send_filter_failure_notification(self, watch_uuid):
|
|
||||||
"""
|
|
||||||
Send notification when CSS/XPath filters fail consecutively
|
|
||||||
"""
|
|
||||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
|
|
||||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
|
||||||
if not watch:
|
|
||||||
return
|
|
||||||
|
|
||||||
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
|
||||||
'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
|
|
||||||
", ".join(watch['include_filters']),
|
|
||||||
threshold),
|
|
||||||
'notification_format': 'text'}
|
|
||||||
|
|
||||||
if len(watch['notification_urls']):
|
|
||||||
n_object['notification_urls'] = watch['notification_urls']
|
|
||||||
|
|
||||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
|
||||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
|
||||||
|
|
||||||
# Only prepare to notify if the rules above matched
|
|
||||||
if 'notification_urls' in n_object:
|
|
||||||
n_object.update({
|
|
||||||
'watch_url': watch['url'],
|
|
||||||
'uuid': watch_uuid,
|
|
||||||
'screenshot': None
|
|
||||||
})
|
|
||||||
self.notification_q.put(n_object)
|
|
||||||
logger.debug(f"Sent filter not found notification for {watch_uuid}")
|
|
||||||
else:
|
|
||||||
logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
|
|
||||||
|
|
||||||
def send_step_failure_notification(self, watch_uuid, step_n):
|
|
||||||
"""
|
|
||||||
Send notification when browser steps fail consecutively
|
|
||||||
"""
|
|
||||||
watch = self.datastore.data['watching'].get(watch_uuid, False)
|
|
||||||
if not watch:
|
|
||||||
return
|
|
||||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
|
|
||||||
n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1),
|
|
||||||
'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} "
|
|
||||||
"did not appear on the page after {} attempts, did the page change layout? "
|
|
||||||
"Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n"
|
|
||||||
"Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold),
|
|
||||||
'notification_format': 'text'}
|
|
||||||
|
|
||||||
if len(watch['notification_urls']):
|
|
||||||
n_object['notification_urls'] = watch['notification_urls']
|
|
||||||
|
|
||||||
elif len(self.datastore.data['settings']['application']['notification_urls']):
|
|
||||||
n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
|
|
||||||
|
|
||||||
# Only prepare to notify if the rules above matched
|
|
||||||
if 'notification_urls' in n_object:
|
|
||||||
n_object.update({
|
|
||||||
'watch_url': watch['url'],
|
|
||||||
'uuid': watch_uuid
|
|
||||||
})
|
|
||||||
self.notification_q.put(n_object)
|
|
||||||
logger.error(f"Sent step not found notification for {watch_uuid}")
|
|
||||||
|
|
||||||
|
|
||||||
# Convenience functions for creating notification service instances
|
|
||||||
def create_notification_service(datastore, notification_q):
|
|
||||||
"""
|
|
||||||
Factory function to create a NotificationService instance
|
|
||||||
"""
|
|
||||||
return NotificationService(datastore, notification_q)
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
import pluggy
|
|
||||||
import os
|
|
||||||
import importlib
|
|
||||||
import sys
|
|
||||||
|
|
||||||
# Global plugin namespace for changedetection.io
|
|
||||||
PLUGIN_NAMESPACE = "changedetectionio"
|
|
||||||
|
|
||||||
hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE)
|
|
||||||
hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE)
|
|
||||||
|
|
||||||
|
|
||||||
class ChangeDetectionSpec:
|
|
||||||
"""Hook specifications for extending changedetection.io functionality."""
|
|
||||||
|
|
||||||
@hookspec
|
|
||||||
def ui_edit_stats_extras(watch):
|
|
||||||
"""Return HTML content to add to the stats tab in the edit view.
|
|
||||||
|
|
||||||
Args:
|
|
||||||
watch: The watch object being edited
|
|
||||||
|
|
||||||
Returns:
|
|
||||||
str: HTML content to be inserted in the stats tab
|
|
||||||
"""
|
|
||||||
pass
|
|
||||||
|
|
||||||
|
|
||||||
# Set up Plugin Manager
|
|
||||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
|
||||||
|
|
||||||
# Register hookspecs
|
|
||||||
plugin_manager.add_hookspecs(ChangeDetectionSpec)
|
|
||||||
|
|
||||||
# Load plugins from subdirectories
|
|
||||||
def load_plugins_from_directories():
|
|
||||||
# Dictionary of directories to scan for plugins
|
|
||||||
plugin_dirs = {
|
|
||||||
'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
|
|
||||||
# Add more plugin directories here as needed
|
|
||||||
}
|
|
||||||
|
|
||||||
# Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory
|
|
||||||
|
|
||||||
for dir_name, dir_path in plugin_dirs.items():
|
|
||||||
if not os.path.exists(dir_path):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# Get all Python files (excluding __init__.py)
|
|
||||||
for filename in os.listdir(dir_path):
|
|
||||||
if filename.endswith(".py") and filename != "__init__.py":
|
|
||||||
module_name = filename[:-3] # Remove .py extension
|
|
||||||
module_path = f"changedetectionio.{dir_name}.plugins.{module_name}"
|
|
||||||
|
|
||||||
try:
|
|
||||||
module = importlib.import_module(module_path)
|
|
||||||
# Register the plugin with pluggy
|
|
||||||
plugin_manager.register(module, module_name)
|
|
||||||
except (ImportError, AttributeError) as e:
|
|
||||||
print(f"Error loading plugin {module_name}: {e}")
|
|
||||||
|
|
||||||
# Load plugins
|
|
||||||
load_plugins_from_directories()
|
|
||||||
|
|
||||||
# Discover installed plugins from external packages (if any)
|
|
||||||
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
|
|
||||||
|
|
||||||
# Helper function to collect UI stats extras from all plugins
|
|
||||||
def collect_ui_edit_stats_extras(watch):
|
|
||||||
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
|
|
||||||
extras_content = []
|
|
||||||
|
|
||||||
# Get all plugins that implement the ui_edit_stats_extras hook
|
|
||||||
results = plugin_manager.hook.ui_edit_stats_extras(watch=watch)
|
|
||||||
|
|
||||||
# If we have results, add them to our content
|
|
||||||
if results:
|
|
||||||
for result in results:
|
|
||||||
if result: # Skip empty results
|
|
||||||
extras_content.append(result)
|
|
||||||
|
|
||||||
return "\n".join(extras_content) if extras_content else ""
|
|
||||||
@@ -27,7 +27,7 @@ class difference_detection_processor():
|
|||||||
# Generic fetcher that should be extended (requests, playwright etc)
|
# Generic fetcher that should be extended (requests, playwright etc)
|
||||||
self.fetcher = Fetcher()
|
self.fetcher = Fetcher()
|
||||||
|
|
||||||
async def call_browser(self, preferred_proxy_id=None):
|
def call_browser(self, preferred_proxy_id=None):
|
||||||
|
|
||||||
from requests.structures import CaseInsensitiveDict
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
@@ -89,7 +89,7 @@ class difference_detection_processor():
|
|||||||
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
|
||||||
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
|
||||||
else:
|
else:
|
||||||
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
|
logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified. ")
|
||||||
|
|
||||||
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
|
||||||
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
|
||||||
@@ -147,20 +147,19 @@ class difference_detection_processor():
|
|||||||
# And here we go! call the right browser with browser-specific settings
|
# And here we go! call the right browser with browser-specific settings
|
||||||
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||||
|
|
||||||
# All fetchers are now async
|
self.fetcher.run(url=url,
|
||||||
await self.fetcher.run(url=url,
|
timeout=timeout,
|
||||||
timeout=timeout,
|
request_headers=request_headers,
|
||||||
request_headers=request_headers,
|
request_body=request_body,
|
||||||
request_body=request_body,
|
request_method=request_method,
|
||||||
request_method=request_method,
|
ignore_status_codes=ignore_status_codes,
|
||||||
ignore_status_codes=ignore_status_codes,
|
current_include_filters=self.watch.get('include_filters'),
|
||||||
current_include_filters=self.watch.get('include_filters'),
|
is_binary=is_binary,
|
||||||
is_binary=is_binary,
|
empty_pages_are_a_change=empty_pages_are_a_change
|
||||||
empty_pages_are_a_change=empty_pages_are_a_change
|
)
|
||||||
)
|
|
||||||
|
|
||||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||||
self.fetcher.quit(watch=self.watch)
|
self.fetcher.quit()
|
||||||
|
|
||||||
# After init, call run_changedetection() which will do the actual change-detection
|
# After init, call run_changedetection() which will do the actual change-detection
|
||||||
|
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import urllib3
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
name = 'Re-stock & Price detection for pages with a SINGLE product'
|
name = 'Re-stock & Price detection for single product pages'
|
||||||
description = 'Detects if the product goes back to in-stock'
|
description = 'Detects if the product goes back to in-stock'
|
||||||
|
|
||||||
class UnableToExtractRestockData(Exception):
|
class UnableToExtractRestockData(Exception):
|
||||||
@@ -79,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
|
|||||||
# First phase, dead simple scanning of anything that looks useful
|
# First phase, dead simple scanning of anything that looks useful
|
||||||
value = Restock()
|
value = Restock()
|
||||||
if data:
|
if data:
|
||||||
logger.debug("Using jsonpath to find price/availability/etc")
|
logger.debug(f"Using jsonpath to find price/availability/etc")
|
||||||
price_parse = parse('$..(price|Price)')
|
price_parse = parse('$..(price|Price)')
|
||||||
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
|
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
|
||||||
availability_parse = parse('$..(availability|Availability)')
|
availability_parse = parse('$..(availability|Availability)')
|
||||||
@@ -110,7 +110,7 @@ def get_itemprop_availability(html_content) -> Restock:
|
|||||||
|
|
||||||
# Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
|
# Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
|
||||||
if not value.get('price') or value.get('availability'):
|
if not value.get('price') or value.get('availability'):
|
||||||
logger.debug("Alternatively digging through OpenGraph properties for restock/price info..")
|
logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..")
|
||||||
jsonpath_expr = parse('$..properties')
|
jsonpath_expr = parse('$..properties')
|
||||||
|
|
||||||
for match in jsonpath_expr.find(data):
|
for match in jsonpath_expr.find(data):
|
||||||
|
|||||||
@@ -15,7 +15,7 @@ def _task(watch, update_handler):
|
|||||||
except FilterNotFoundInResponse as e:
|
except FilterNotFoundInResponse as e:
|
||||||
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
text_after_filter = f"Filter not found in HTML: {str(e)}"
|
||||||
except ReplyWithContentButNoText as e:
|
except ReplyWithContentButNoText as e:
|
||||||
text_after_filter = "Filter found but no text (empty result)"
|
text_after_filter = f"Filter found but no text (empty result)"
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
text_after_filter = f"Error: {str(e)}"
|
text_after_filter = f"Error: {str(e)}"
|
||||||
|
|
||||||
|
|||||||
@@ -252,7 +252,6 @@ class perform_site_check(difference_detection_processor):
|
|||||||
|
|
||||||
# 615 Extract text by regex
|
# 615 Extract text by regex
|
||||||
extract_text = watch.get('extract_text', [])
|
extract_text = watch.get('extract_text', [])
|
||||||
extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
|
|
||||||
if len(extract_text) > 0:
|
if len(extract_text) > 0:
|
||||||
regex_matched_output = []
|
regex_matched_output = []
|
||||||
for s_re in extract_text:
|
for s_re in extract_text:
|
||||||
@@ -297,8 +296,6 @@ class perform_site_check(difference_detection_processor):
|
|||||||
### CALCULATE MD5
|
### CALCULATE MD5
|
||||||
# If there's text to ignore
|
# If there's text to ignore
|
||||||
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||||
text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
|
|
||||||
|
|
||||||
text_for_checksuming = stripped_text_from_html
|
text_for_checksuming = stripped_text_from_html
|
||||||
if text_to_ignore:
|
if text_to_ignore:
|
||||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||||
@@ -311,8 +308,8 @@ class perform_site_check(difference_detection_processor):
|
|||||||
|
|
||||||
############ Blocking rules, after checksum #################
|
############ Blocking rules, after checksum #################
|
||||||
blocked = False
|
blocked = False
|
||||||
|
|
||||||
trigger_text = watch.get('trigger_text', [])
|
trigger_text = watch.get('trigger_text', [])
|
||||||
trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
|
|
||||||
if len(trigger_text):
|
if len(trigger_text):
|
||||||
# Assume blocked
|
# Assume blocked
|
||||||
blocked = True
|
blocked = True
|
||||||
@@ -327,7 +324,6 @@ class perform_site_check(difference_detection_processor):
|
|||||||
blocked = False
|
blocked = False
|
||||||
|
|
||||||
text_should_not_be_present = watch.get('text_should_not_be_present', [])
|
text_should_not_be_present = watch.get('text_should_not_be_present', [])
|
||||||
text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
|
|
||||||
if len(text_should_not_be_present):
|
if len(text_should_not_be_present):
|
||||||
# If anything matched, then we should block a change from happening
|
# If anything matched, then we should block a change from happening
|
||||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||||
@@ -338,14 +334,12 @@ class perform_site_check(difference_detection_processor):
|
|||||||
|
|
||||||
# And check if 'conditions' will let this pass through
|
# And check if 'conditions' will let this pass through
|
||||||
if watch.get('conditions') and watch.get('conditions_match_logic'):
|
if watch.get('conditions') and watch.get('conditions_match_logic'):
|
||||||
conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
|
if not execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
|
||||||
application_datastruct=self.datastore.data,
|
application_datastruct=self.datastore.data,
|
||||||
ephemeral_data={
|
ephemeral_data={
|
||||||
'text': stripped_text_from_html
|
'text': stripped_text_from_html
|
||||||
}
|
}
|
||||||
)
|
):
|
||||||
|
|
||||||
if not conditions_result.get('result'):
|
|
||||||
# Conditions say "Condition not met" so we block it.
|
# Conditions say "Condition not met" so we block it.
|
||||||
blocked = True
|
blocked = True
|
||||||
|
|
||||||
|
|||||||
@@ -1,435 +0,0 @@
|
|||||||
from blinker import signal
|
|
||||||
from loguru import logger
|
|
||||||
from typing import Dict, List, Any, Optional
|
|
||||||
import heapq
|
|
||||||
import queue
|
|
||||||
import threading
|
|
||||||
|
|
||||||
try:
|
|
||||||
import janus
|
|
||||||
except ImportError:
|
|
||||||
logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
class RecheckPriorityQueue:
|
|
||||||
"""
|
|
||||||
Ultra-reliable priority queue using janus for async/sync bridging.
|
|
||||||
|
|
||||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
|
||||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
|
||||||
- async_q: Used by async workers (the actual fetchers/processors) and coroutines
|
|
||||||
|
|
||||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
|
|
||||||
- Synchronous code (Flask, threads) cannot use async methods without blocking
|
|
||||||
- Async code cannot use sync methods without blocking the event loop
|
|
||||||
- janus provides the only safe bridge between these two worlds
|
|
||||||
|
|
||||||
Attempting to unify to async-only would require:
|
|
||||||
- Converting all Flask routes to async (major breaking change)
|
|
||||||
- Using asyncio.run() in sync contexts (causes deadlocks)
|
|
||||||
- Thread-pool wrapping (adds complexity and overhead)
|
|
||||||
|
|
||||||
Minimal implementation focused on reliability:
|
|
||||||
- Pure janus for sync/async bridge
|
|
||||||
- Thread-safe priority ordering
|
|
||||||
- Bulletproof error handling with critical logging
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, maxsize: int = 0):
|
|
||||||
try:
|
|
||||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
|
||||||
# BOTH interfaces required - see class docstring for why
|
|
||||||
self.sync_q = self._janus_queue.sync_q # Flask routes, ticker thread
|
|
||||||
self.async_q = self._janus_queue.async_q # Async workers
|
|
||||||
|
|
||||||
# Priority storage - thread-safe
|
|
||||||
self._priority_items = []
|
|
||||||
self._lock = threading.RLock()
|
|
||||||
|
|
||||||
# Signals for UI updates
|
|
||||||
self.queue_length_signal = signal('queue_length')
|
|
||||||
|
|
||||||
logger.debug("RecheckPriorityQueue initialized successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# SYNC INTERFACE (for ticker thread)
|
|
||||||
def put(self, item, block: bool = True, timeout: Optional[float] = None):
|
|
||||||
"""Thread-safe sync put with priority ordering"""
|
|
||||||
try:
|
|
||||||
# Add to priority storage
|
|
||||||
with self._lock:
|
|
||||||
heapq.heappush(self._priority_items, item)
|
|
||||||
|
|
||||||
# Notify via janus sync queue
|
|
||||||
self.sync_q.put(True, block=block, timeout=timeout)
|
|
||||||
|
|
||||||
# Emit signals
|
|
||||||
self._emit_put_signals(item)
|
|
||||||
|
|
||||||
logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
|
|
||||||
# Remove from priority storage if janus put failed
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
if item in self._priority_items:
|
|
||||||
self._priority_items.remove(item)
|
|
||||||
heapq.heapify(self._priority_items)
|
|
||||||
except Exception as cleanup_e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
|
||||||
"""Thread-safe sync get with priority ordering"""
|
|
||||||
try:
|
|
||||||
# Wait for notification
|
|
||||||
self.sync_q.get(block=block, timeout=timeout)
|
|
||||||
|
|
||||||
# Get highest priority item
|
|
||||||
with self._lock:
|
|
||||||
if not self._priority_items:
|
|
||||||
logger.critical(f"CRITICAL: Queue notification received but no priority items available")
|
|
||||||
raise Exception("Priority queue inconsistency")
|
|
||||||
item = heapq.heappop(self._priority_items)
|
|
||||||
|
|
||||||
# Emit signals
|
|
||||||
self._emit_get_signals()
|
|
||||||
|
|
||||||
logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
|
|
||||||
return item
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to get item from queue: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# ASYNC INTERFACE (for workers)
|
|
||||||
async def async_put(self, item):
|
|
||||||
"""Pure async put with priority ordering"""
|
|
||||||
try:
|
|
||||||
# Add to priority storage
|
|
||||||
with self._lock:
|
|
||||||
heapq.heappush(self._priority_items, item)
|
|
||||||
|
|
||||||
# Notify via janus async queue
|
|
||||||
await self.async_q.put(True)
|
|
||||||
|
|
||||||
# Emit signals
|
|
||||||
self._emit_put_signals(item)
|
|
||||||
|
|
||||||
logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
|
|
||||||
return True
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
|
|
||||||
# Remove from priority storage if janus put failed
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
if item in self._priority_items:
|
|
||||||
self._priority_items.remove(item)
|
|
||||||
heapq.heapify(self._priority_items)
|
|
||||||
except Exception as cleanup_e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def async_get(self):
|
|
||||||
"""Pure async get with priority ordering"""
|
|
||||||
try:
|
|
||||||
# Wait for notification
|
|
||||||
await self.async_q.get()
|
|
||||||
|
|
||||||
# Get highest priority item
|
|
||||||
with self._lock:
|
|
||||||
if not self._priority_items:
|
|
||||||
logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
|
|
||||||
raise Exception("Priority queue inconsistency")
|
|
||||||
item = heapq.heappop(self._priority_items)
|
|
||||||
|
|
||||||
# Emit signals
|
|
||||||
self._emit_get_signals()
|
|
||||||
|
|
||||||
logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
|
|
||||||
return item
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
# UTILITY METHODS
|
|
||||||
def qsize(self) -> int:
|
|
||||||
"""Get current queue size"""
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
return len(self._priority_items)
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to get queue size: {str(e)}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def empty(self) -> bool:
|
|
||||||
"""Check if queue is empty"""
|
|
||||||
return self.qsize() == 0
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
"""Close the janus queue"""
|
|
||||||
try:
|
|
||||||
self._janus_queue.close()
|
|
||||||
logger.debug("RecheckPriorityQueue closed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
|
|
||||||
|
|
||||||
# COMPATIBILITY METHODS (from original implementation)
|
|
||||||
@property
|
|
||||||
def queue(self):
|
|
||||||
"""Provide compatibility with original queue access"""
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
return list(self._priority_items)
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to get queue list: {str(e)}")
|
|
||||||
return []
|
|
||||||
|
|
||||||
def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]:
|
|
||||||
"""Find position of UUID in queue"""
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
queue_list = list(self._priority_items)
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
|
|
||||||
|
|
||||||
# Find target item
|
|
||||||
for item in queue_list:
|
|
||||||
if (hasattr(item, 'item') and isinstance(item.item, dict) and
|
|
||||||
item.item.get('uuid') == target_uuid):
|
|
||||||
|
|
||||||
# Count items with higher priority
|
|
||||||
position = sum(1 for other in queue_list if other.priority < item.priority)
|
|
||||||
return {
|
|
||||||
'position': position,
|
|
||||||
'total_items': total_items,
|
|
||||||
'priority': item.priority,
|
|
||||||
'found': True
|
|
||||||
}
|
|
||||||
|
|
||||||
return {'position': None, 'total_items': total_items, 'priority': None, 'found': False}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {str(e)}")
|
|
||||||
return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
|
|
||||||
|
|
||||||
def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]:
|
|
||||||
"""Get all queued UUIDs with pagination"""
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
queue_list = sorted(self._priority_items) # Sort by priority
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
|
|
||||||
|
|
||||||
# Apply pagination
|
|
||||||
end_idx = min(offset + limit, total_items) if limit else total_items
|
|
||||||
items_to_process = queue_list[offset:end_idx]
|
|
||||||
|
|
||||||
result = []
|
|
||||||
for position, item in enumerate(items_to_process, start=offset):
|
|
||||||
if (hasattr(item, 'item') and isinstance(item.item, dict) and
|
|
||||||
'uuid' in item.item):
|
|
||||||
result.append({
|
|
||||||
'uuid': item.item['uuid'],
|
|
||||||
'position': position,
|
|
||||||
'priority': item.priority
|
|
||||||
})
|
|
||||||
|
|
||||||
return {
|
|
||||||
'items': result,
|
|
||||||
'total_items': total_items,
|
|
||||||
'returned_items': len(result),
|
|
||||||
'has_more': (offset + len(result)) < total_items
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {str(e)}")
|
|
||||||
return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
|
|
||||||
|
|
||||||
def get_queue_summary(self) -> Dict[str, Any]:
|
|
||||||
"""Get queue summary statistics"""
|
|
||||||
try:
|
|
||||||
with self._lock:
|
|
||||||
queue_list = list(self._priority_items)
|
|
||||||
total_items = len(queue_list)
|
|
||||||
|
|
||||||
if total_items == 0:
|
|
||||||
return {
|
|
||||||
'total_items': 0, 'priority_breakdown': {},
|
|
||||||
'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0
|
|
||||||
}
|
|
||||||
|
|
||||||
immediate_items = clone_items = scheduled_items = 0
|
|
||||||
priority_counts = {}
|
|
||||||
|
|
||||||
for item in queue_list:
|
|
||||||
priority = item.priority
|
|
||||||
priority_counts[priority] = priority_counts.get(priority, 0) + 1
|
|
||||||
|
|
||||||
if priority == 1:
|
|
||||||
immediate_items += 1
|
|
||||||
elif priority == 5:
|
|
||||||
clone_items += 1
|
|
||||||
elif priority > 100:
|
|
||||||
scheduled_items += 1
|
|
||||||
|
|
||||||
return {
|
|
||||||
'total_items': total_items,
|
|
||||||
'priority_breakdown': priority_counts,
|
|
||||||
'immediate_items': immediate_items,
|
|
||||||
'clone_items': clone_items,
|
|
||||||
'scheduled_items': scheduled_items,
|
|
||||||
'min_priority': min(priority_counts.keys()) if priority_counts else None,
|
|
||||||
'max_priority': max(priority_counts.keys()) if priority_counts else None
|
|
||||||
}
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to get queue summary: {str(e)}")
|
|
||||||
return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0,
|
|
||||||
'clone_items': 0, 'scheduled_items': 0}
|
|
||||||
|
|
||||||
# PRIVATE METHODS
|
|
||||||
def _get_item_uuid(self, item) -> str:
|
|
||||||
"""Safely extract UUID from item for logging"""
|
|
||||||
try:
|
|
||||||
if hasattr(item, 'item') and isinstance(item.item, dict):
|
|
||||||
return item.item.get('uuid', 'unknown')
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return 'unknown'
|
|
||||||
|
|
||||||
def _emit_put_signals(self, item):
|
|
||||||
"""Emit signals when item is added"""
|
|
||||||
try:
|
|
||||||
# Watch update signal
|
|
||||||
if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
|
|
||||||
watch_check_update = signal('watch_check_update')
|
|
||||||
if watch_check_update:
|
|
||||||
watch_check_update.send(watch_uuid=item.item['uuid'])
|
|
||||||
|
|
||||||
# Queue length signal
|
|
||||||
if self.queue_length_signal:
|
|
||||||
self.queue_length_signal.send(length=self.qsize())
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")
|
|
||||||
|
|
||||||
def _emit_get_signals(self):
|
|
||||||
"""Emit signals when item is removed"""
|
|
||||||
try:
|
|
||||||
if self.queue_length_signal:
|
|
||||||
self.queue_length_signal.send(length=self.qsize())
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to emit get signals: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
class NotificationQueue:
|
|
||||||
"""
|
|
||||||
Ultra-reliable notification queue using pure janus.
|
|
||||||
|
|
||||||
CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
|
|
||||||
- sync_q: Used by Flask routes, ticker threads, and other synchronous code
|
|
||||||
- async_q: Used by async workers and coroutines
|
|
||||||
|
|
||||||
DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts.
|
|
||||||
See RecheckPriorityQueue docstring above for detailed explanation.
|
|
||||||
|
|
||||||
Simple wrapper around janus with bulletproof error handling.
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, maxsize: int = 0):
|
|
||||||
try:
|
|
||||||
self._janus_queue = janus.Queue(maxsize=maxsize)
|
|
||||||
# BOTH interfaces required - see class docstring for why
|
|
||||||
self.sync_q = self._janus_queue.sync_q # Flask routes, threads
|
|
||||||
self.async_q = self._janus_queue.async_q # Async workers
|
|
||||||
self.notification_event_signal = signal('notification_event')
|
|
||||||
logger.debug("NotificationQueue initialized successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
|
|
||||||
"""Thread-safe sync put with signal emission"""
|
|
||||||
try:
|
|
||||||
self.sync_q.put(item, block=block, timeout=timeout)
|
|
||||||
self._emit_notification_signal(item)
|
|
||||||
logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
async def async_put(self, item: Dict[str, Any]):
|
|
||||||
"""Pure async put with signal emission"""
|
|
||||||
try:
|
|
||||||
await self.async_q.put(item)
|
|
||||||
self._emit_notification_signal(item)
|
|
||||||
logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
|
|
||||||
return True
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
|
|
||||||
return False
|
|
||||||
|
|
||||||
def get(self, block: bool = True, timeout: Optional[float] = None):
|
|
||||||
"""Thread-safe sync get"""
|
|
||||||
try:
|
|
||||||
return self.sync_q.get(block=block, timeout=timeout)
|
|
||||||
except queue.Empty as e:
|
|
||||||
raise e
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
|
|
||||||
raise e
|
|
||||||
|
|
||||||
async def async_get(self):
|
|
||||||
"""Pure async get"""
|
|
||||||
try:
|
|
||||||
return await self.async_q.get()
|
|
||||||
except queue.Empty as e:
|
|
||||||
raise e
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}")
|
|
||||||
raise e
|
|
||||||
|
|
||||||
def qsize(self) -> int:
|
|
||||||
"""Get current queue size"""
|
|
||||||
try:
|
|
||||||
return self.sync_q.qsize()
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
|
|
||||||
return 0
|
|
||||||
|
|
||||||
def empty(self) -> bool:
|
|
||||||
"""Check if queue is empty"""
|
|
||||||
return self.qsize() == 0
|
|
||||||
|
|
||||||
def close(self):
|
|
||||||
"""Close the janus queue"""
|
|
||||||
try:
|
|
||||||
self._janus_queue.close()
|
|
||||||
logger.debug("NotificationQueue closed successfully")
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
|
|
||||||
|
|
||||||
def _emit_notification_signal(self, item: Dict[str, Any]):
|
|
||||||
"""Emit notification signal"""
|
|
||||||
try:
|
|
||||||
if self.notification_event_signal and isinstance(item, dict):
|
|
||||||
watch_uuid = item.get('uuid')
|
|
||||||
if watch_uuid:
|
|
||||||
self.notification_event_signal.send(watch_uuid=watch_uuid)
|
|
||||||
else:
|
|
||||||
self.notification_event_signal.send()
|
|
||||||
except Exception as e:
|
|
||||||
logger.critical(f"CRITICAL: Failed to emit notification signal: {str(e)}")
|
|
||||||
@@ -1,124 +0,0 @@
|
|||||||
# Real-time Socket.IO Implementation
|
|
||||||
|
|
||||||
This directory contains the Socket.IO implementation for changedetection.io's real-time updates.
|
|
||||||
|
|
||||||
## Architecture Overview
|
|
||||||
|
|
||||||
The real-time system provides live updates to the web interface for:
|
|
||||||
- Watch status changes (checking, completed, errors)
|
|
||||||
- Queue length updates
|
|
||||||
- General statistics updates
|
|
||||||
|
|
||||||
## Current Implementation
|
|
||||||
|
|
||||||
### Socket.IO Configuration
|
|
||||||
- **Async Mode**: `threading` (default) or `gevent` (optional via SOCKETIO_MODE env var)
|
|
||||||
- **Server**: Flask-SocketIO with threading support
|
|
||||||
- **Background Tasks**: Python threading with daemon threads
|
|
||||||
|
|
||||||
### Async Worker Integration
|
|
||||||
- **Workers**: Async workers using asyncio for watch processing
|
|
||||||
- **Queue**: AsyncSignalPriorityQueue for job distribution
|
|
||||||
- **Signals**: Blinker signals for real-time updates between workers and Socket.IO
|
|
||||||
|
|
||||||
### Environment Variables
|
|
||||||
- `SOCKETIO_MODE=threading` (default, recommended)
|
|
||||||
- `SOCKETIO_MODE=gevent` (optional, has cross-platform limitations)
|
|
||||||
|
|
||||||
## Architecture Decision: Why Threading Mode?
|
|
||||||
|
|
||||||
### Previous Issues with Eventlet
|
|
||||||
**Eventlet was completely removed** due to fundamental compatibility issues:
|
|
||||||
|
|
||||||
1. **Monkey Patching Conflicts**: `eventlet.monkey_patch()` globally replaced Python's threading/socket modules, causing conflicts with:
|
|
||||||
- Playwright's synchronous browser automation
|
|
||||||
- Async worker event loops
|
|
||||||
- Various Python libraries expecting real threading
|
|
||||||
|
|
||||||
2. **Python 3.12+ Compatibility**: Eventlet had issues with newer Python versions and asyncio integration
|
|
||||||
|
|
||||||
3. **CVE-2023-29483**: Security vulnerability in eventlet's dnspython dependency
|
|
||||||
|
|
||||||
### Current Solution Benefits
|
|
||||||
✅ **Threading Mode Advantages**:
|
|
||||||
- Full compatibility with async workers and Playwright
|
|
||||||
- No monkey patching - uses standard Python threading
|
|
||||||
- Better Python 3.12+ support
|
|
||||||
- Cross-platform compatibility (Windows, macOS, Linux)
|
|
||||||
- No external async library dependencies
|
|
||||||
- Fast shutdown capabilities
|
|
||||||
|
|
||||||
✅ **Optional Gevent Support**:
|
|
||||||
- Available via `SOCKETIO_MODE=gevent` for high-concurrency scenarios
|
|
||||||
- Cross-platform limitations documented in requirements.txt
|
|
||||||
- Not recommended as default due to Windows socket limits and macOS ARM build issues
|
|
||||||
|
|
||||||
## Socket.IO Mode Configuration
|
|
||||||
|
|
||||||
### Threading Mode (Default)
|
|
||||||
```python
|
|
||||||
# Enabled automatically
|
|
||||||
async_mode = 'threading'
|
|
||||||
socketio = SocketIO(app, async_mode='threading')
|
|
||||||
```
|
|
||||||
|
|
||||||
### Gevent Mode (Optional)
|
|
||||||
```bash
|
|
||||||
# Set environment variable
|
|
||||||
export SOCKETIO_MODE=gevent
|
|
||||||
```
|
|
||||||
|
|
||||||
## Background Tasks
|
|
||||||
|
|
||||||
### Queue Polling
|
|
||||||
- **Threading Mode**: `threading.Thread` with `threading.Event` for shutdown
|
|
||||||
- **Signal Handling**: Blinker signals for watch state changes
|
|
||||||
- **Real-time Updates**: Direct Socket.IO `emit()` calls to connected clients
|
|
||||||
|
|
||||||
### Worker Integration
|
|
||||||
- **Async Workers**: Run in separate asyncio event loop thread
|
|
||||||
- **Communication**: AsyncSignalPriorityQueue bridges async workers and Socket.IO
|
|
||||||
- **Updates**: Real-time updates sent when workers complete tasks
|
|
||||||
|
|
||||||
## Files in This Directory
|
|
||||||
|
|
||||||
- `socket_server.py`: Main Socket.IO initialization and event handling
|
|
||||||
- `events.py`: Watch operation event handlers
|
|
||||||
- `__init__.py`: Module initialization
|
|
||||||
|
|
||||||
## Production Deployment
|
|
||||||
|
|
||||||
### Recommended WSGI Servers
|
|
||||||
For production with Socket.IO threading mode:
|
|
||||||
- **Gunicorn**: `gunicorn --worker-class eventlet changedetection:app` (if using gevent mode)
|
|
||||||
- **uWSGI**: With threading support
|
|
||||||
- **Docker**: Built-in Flask server works well for containerized deployments
|
|
||||||
|
|
||||||
### Performance Considerations
|
|
||||||
- Threading mode: Better memory usage, standard Python threading
|
|
||||||
- Gevent mode: Higher concurrency but platform limitations
|
|
||||||
- Async workers: Separate from Socket.IO, provides scalability
|
|
||||||
|
|
||||||
## Environment Variables
|
|
||||||
|
|
||||||
| Variable | Default | Description |
|
|
||||||
|----------|---------|-------------|
|
|
||||||
| `SOCKETIO_MODE` | `threading` | Socket.IO async mode (`threading` or `gevent`) |
|
|
||||||
| `FETCH_WORKERS` | `10` | Number of async workers for watch processing |
|
|
||||||
| `CHANGEDETECTION_HOST` | `0.0.0.0` | Server bind address |
|
|
||||||
| `CHANGEDETECTION_PORT` | `5000` | Server port |
|
|
||||||
|
|
||||||
## Debugging Tips
|
|
||||||
|
|
||||||
1. **Socket.IO Issues**: Check browser dev tools for WebSocket connection errors
|
|
||||||
2. **Threading Issues**: Monitor with `ps -T` to check thread count
|
|
||||||
3. **Worker Issues**: Use `/worker-health` endpoint to check async worker status
|
|
||||||
4. **Queue Issues**: Use `/queue-status` endpoint to monitor job queue
|
|
||||||
5. **Performance**: Use `/gc-cleanup` endpoint to trigger memory cleanup
|
|
||||||
|
|
||||||
## Migration Notes
|
|
||||||
|
|
||||||
If upgrading from eventlet-based versions:
|
|
||||||
- Remove any `EVENTLET_*` environment variables
|
|
||||||
- No code changes needed - Socket.IO mode is automatically configured
|
|
||||||
- Optional: Set `SOCKETIO_MODE=gevent` if high concurrency is required and platform supports it
|
|
||||||
@@ -1,3 +0,0 @@
|
|||||||
"""
|
|
||||||
Socket.IO realtime updates module for changedetection.io
|
|
||||||
"""
|
|
||||||
@@ -1,58 +0,0 @@
|
|||||||
from flask_socketio import emit
|
|
||||||
from loguru import logger
|
|
||||||
from blinker import signal
|
|
||||||
|
|
||||||
|
|
||||||
def register_watch_operation_handlers(socketio, datastore):
|
|
||||||
"""Register Socket.IO event handlers for watch operations"""
|
|
||||||
|
|
||||||
@socketio.on('watch_operation')
|
|
||||||
def handle_watch_operation(data):
|
|
||||||
"""Handle watch operations like pause, mute, recheck via Socket.IO"""
|
|
||||||
try:
|
|
||||||
op = data.get('op')
|
|
||||||
uuid = data.get('uuid')
|
|
||||||
|
|
||||||
logger.debug(f"Socket.IO: Received watch operation '{op}' for UUID {uuid}")
|
|
||||||
|
|
||||||
if not op or not uuid:
|
|
||||||
emit('operation_result', {'success': False, 'error': 'Missing operation or UUID'})
|
|
||||||
return
|
|
||||||
|
|
||||||
# Check if watch exists
|
|
||||||
if not datastore.data['watching'].get(uuid):
|
|
||||||
emit('operation_result', {'success': False, 'error': 'Watch not found'})
|
|
||||||
return
|
|
||||||
|
|
||||||
watch = datastore.data['watching'][uuid]
|
|
||||||
|
|
||||||
# Perform the operation
|
|
||||||
if op == 'pause':
|
|
||||||
watch.toggle_pause()
|
|
||||||
logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
|
|
||||||
elif op == 'mute':
|
|
||||||
watch.toggle_mute()
|
|
||||||
logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
|
|
||||||
elif op == 'recheck':
|
|
||||||
# Import here to avoid circular imports
|
|
||||||
from changedetectionio.flask_app import update_q
|
|
||||||
from changedetectionio import queuedWatchMetaData
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
|
|
||||||
worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
|
||||||
logger.info(f"Socket.IO: Queued recheck for watch {uuid}")
|
|
||||||
else:
|
|
||||||
emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'})
|
|
||||||
return
|
|
||||||
|
|
||||||
# Send signal to update UI
|
|
||||||
watch_check_update = signal('watch_check_update')
|
|
||||||
if watch_check_update:
|
|
||||||
watch_check_update.send(watch_uuid=uuid)
|
|
||||||
|
|
||||||
# Send success response to client
|
|
||||||
emit('operation_result', {'success': True, 'operation': op, 'uuid': uuid})
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Socket.IO error in handle_watch_operation: {str(e)}")
|
|
||||||
emit('operation_result', {'success': False, 'error': str(e)})
|
|
||||||
@@ -1,407 +0,0 @@
|
|||||||
import timeago
|
|
||||||
from flask_socketio import SocketIO
|
|
||||||
|
|
||||||
import time
|
|
||||||
import os
|
|
||||||
from loguru import logger
|
|
||||||
from blinker import signal
|
|
||||||
|
|
||||||
from changedetectionio import strtobool
|
|
||||||
|
|
||||||
|
|
||||||
class SignalHandler:
|
|
||||||
"""A standalone class to receive signals"""
|
|
||||||
|
|
||||||
def __init__(self, socketio_instance, datastore):
|
|
||||||
self.socketio_instance = socketio_instance
|
|
||||||
self.datastore = datastore
|
|
||||||
|
|
||||||
# Connect to the watch_check_update signal
|
|
||||||
from changedetectionio.flask_app import watch_check_update as wcc
|
|
||||||
wcc.connect(self.handle_signal, weak=False)
|
|
||||||
# logger.info("SignalHandler: Connected to signal from direct import")
|
|
||||||
|
|
||||||
# Connect to the queue_length signal
|
|
||||||
queue_length_signal = signal('queue_length')
|
|
||||||
queue_length_signal.connect(self.handle_queue_length, weak=False)
|
|
||||||
# logger.info("SignalHandler: Connected to queue_length signal")
|
|
||||||
|
|
||||||
watch_delete_signal = signal('watch_deleted')
|
|
||||||
watch_delete_signal.connect(self.handle_deleted_signal, weak=False)
|
|
||||||
|
|
||||||
watch_favicon_bumped_signal = signal('watch_favicon_bump')
|
|
||||||
watch_favicon_bumped_signal.connect(self.handle_watch_bumped_favicon_signal, weak=False)
|
|
||||||
|
|
||||||
# Connect to the notification_event signal
|
|
||||||
notification_event_signal = signal('notification_event')
|
|
||||||
notification_event_signal.connect(self.handle_notification_event, weak=False)
|
|
||||||
logger.info("SignalHandler: Connected to notification_event signal")
|
|
||||||
|
|
||||||
# Create and start the queue update thread using standard threading
|
|
||||||
import threading
|
|
||||||
self.polling_emitter_thread = threading.Thread(
|
|
||||||
target=self.polling_emit_running_or_queued_watches_threaded,
|
|
||||||
daemon=True
|
|
||||||
)
|
|
||||||
self.polling_emitter_thread.start()
|
|
||||||
logger.info("Started polling thread using threading (eventlet-free)")
|
|
||||||
|
|
||||||
# Store the thread reference in socketio for clean shutdown
|
|
||||||
self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread
|
|
||||||
|
|
||||||
def handle_signal(self, *args, **kwargs):
|
|
||||||
logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs")
|
|
||||||
# Safely extract the watch UUID from kwargs
|
|
||||||
watch_uuid = kwargs.get('watch_uuid')
|
|
||||||
app_context = kwargs.get('app_context')
|
|
||||||
|
|
||||||
if watch_uuid:
|
|
||||||
# Get the watch object from the datastore
|
|
||||||
watch = self.datastore.data['watching'].get(watch_uuid)
|
|
||||||
if watch:
|
|
||||||
if app_context:
|
|
||||||
# note
|
|
||||||
with app_context.app_context():
|
|
||||||
with app_context.test_request_context():
|
|
||||||
# Forward to handle_watch_update with the watch parameter
|
|
||||||
handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore)
|
|
||||||
else:
|
|
||||||
handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore)
|
|
||||||
|
|
||||||
logger.trace(f"Signal handler processed watch UUID {watch_uuid}")
|
|
||||||
else:
|
|
||||||
logger.warning(f"Watch UUID {watch_uuid} not found in datastore")
|
|
||||||
|
|
||||||
def handle_watch_bumped_favicon_signal(self, *args, **kwargs):
|
|
||||||
watch_uuid = kwargs.get('watch_uuid')
|
|
||||||
if watch_uuid:
|
|
||||||
# Emit the queue size to all connected clients
|
|
||||||
self.socketio_instance.emit("watch_bumped_favicon", {
|
|
||||||
"uuid": watch_uuid,
|
|
||||||
"event_timestamp": time.time()
|
|
||||||
})
|
|
||||||
logger.debug(f"Watch UUID {watch_uuid} got its favicon updated")
|
|
||||||
|
|
||||||
def handle_deleted_signal(self, *args, **kwargs):
|
|
||||||
watch_uuid = kwargs.get('watch_uuid')
|
|
||||||
if watch_uuid:
|
|
||||||
# Emit the queue size to all connected clients
|
|
||||||
self.socketio_instance.emit("watch_deleted", {
|
|
||||||
"uuid": watch_uuid,
|
|
||||||
"event_timestamp": time.time()
|
|
||||||
})
|
|
||||||
logger.debug(f"Watch UUID {watch_uuid} was deleted")
|
|
||||||
|
|
||||||
def handle_queue_length(self, *args, **kwargs):
|
|
||||||
"""Handle queue_length signal and emit to all clients"""
|
|
||||||
try:
|
|
||||||
queue_length = kwargs.get('length', 0)
|
|
||||||
logger.debug(f"SignalHandler: Queue length update received: {queue_length}")
|
|
||||||
|
|
||||||
# Emit the queue size to all connected clients
|
|
||||||
self.socketio_instance.emit("queue_size", {
|
|
||||||
"q_length": queue_length,
|
|
||||||
"event_timestamp": time.time()
|
|
||||||
})
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Socket.IO error in handle_queue_length: {str(e)}")
|
|
||||||
|
|
||||||
def handle_notification_event(self, *args, **kwargs):
|
|
||||||
"""Handle notification_event signal and emit to all clients"""
|
|
||||||
try:
|
|
||||||
watch_uuid = kwargs.get('watch_uuid')
|
|
||||||
logger.debug(f"SignalHandler: Notification event received for watch UUID: {watch_uuid}")
|
|
||||||
|
|
||||||
# Emit the notification event to all connected clients
|
|
||||||
self.socketio_instance.emit("notification_event", {
|
|
||||||
"watch_uuid": watch_uuid,
|
|
||||||
"event_timestamp": time.time()
|
|
||||||
})
|
|
||||||
|
|
||||||
logger.trace(f"Socket.IO: Emitted notification_event for watch UUID {watch_uuid}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Socket.IO error in handle_notification_event: {str(e)}")
|
|
||||||
|
|
||||||
def polling_emit_running_or_queued_watches_threaded(self):
|
|
||||||
"""Threading version of polling for Windows compatibility"""
|
|
||||||
import time
|
|
||||||
import threading
|
|
||||||
logger.info("Queue update thread started (threading mode)")
|
|
||||||
|
|
||||||
# Import here to avoid circular imports
|
|
||||||
from changedetectionio.flask_app import app
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
watch_check_update = signal('watch_check_update')
|
|
||||||
|
|
||||||
# Track previous state to avoid unnecessary emissions
|
|
||||||
previous_running_uuids = set()
|
|
||||||
|
|
||||||
# Run until app shutdown - check exit flag more frequently for fast shutdown
|
|
||||||
exit_event = getattr(app.config, 'exit', threading.Event())
|
|
||||||
|
|
||||||
while not exit_event.is_set():
|
|
||||||
try:
|
|
||||||
# Get current running UUIDs from async workers
|
|
||||||
running_uuids = set(worker_handler.get_running_uuids())
|
|
||||||
|
|
||||||
# Only send updates for UUIDs that changed state
|
|
||||||
newly_running = running_uuids - previous_running_uuids
|
|
||||||
no_longer_running = previous_running_uuids - running_uuids
|
|
||||||
|
|
||||||
# Send updates for newly running UUIDs (but exit fast if shutdown requested)
|
|
||||||
for uuid in newly_running:
|
|
||||||
if exit_event.is_set():
|
|
||||||
break
|
|
||||||
logger.trace(f"Threading polling: UUID {uuid} started processing")
|
|
||||||
with app.app_context():
|
|
||||||
watch_check_update.send(app_context=app, watch_uuid=uuid)
|
|
||||||
time.sleep(0.01) # Small yield
|
|
||||||
|
|
||||||
# Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
|
|
||||||
if not exit_event.is_set():
|
|
||||||
for uuid in no_longer_running:
|
|
||||||
if exit_event.is_set():
|
|
||||||
break
|
|
||||||
logger.trace(f"Threading polling: UUID {uuid} finished processing")
|
|
||||||
with app.app_context():
|
|
||||||
watch_check_update.send(app_context=app, watch_uuid=uuid)
|
|
||||||
time.sleep(0.01) # Small yield
|
|
||||||
|
|
||||||
# Update tracking for next iteration
|
|
||||||
previous_running_uuids = running_uuids
|
|
||||||
|
|
||||||
# Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
|
|
||||||
for _ in range(20): # 20 * 0.5 = 10 seconds total
|
|
||||||
if exit_event.is_set():
|
|
||||||
break
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Error in threading polling: {str(e)}")
|
|
||||||
# Even during error recovery, check for exit quickly
|
|
||||||
for _ in range(1): # 1 * 0.5 = 0.5 seconds
|
|
||||||
if exit_event.is_set():
|
|
||||||
break
|
|
||||||
time.sleep(0.5)
|
|
||||||
|
|
||||||
# Check if we're in pytest environment - if so, be more gentle with logging
|
|
||||||
import sys
|
|
||||||
in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
|
|
||||||
|
|
||||||
if not in_pytest:
|
|
||||||
logger.info("Queue update thread stopped (threading mode)")
|
|
||||||
|
|
||||||
|
|
||||||
def handle_watch_update(socketio, **kwargs):
|
|
||||||
"""Handle watch update signal from blinker"""
|
|
||||||
try:
|
|
||||||
watch = kwargs.get('watch')
|
|
||||||
datastore = kwargs.get('datastore')
|
|
||||||
|
|
||||||
# Emit the watch update to all connected clients
|
|
||||||
from changedetectionio.flask_app import update_q
|
|
||||||
from changedetectionio.flask_app import _jinja2_filter_datetime
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
|
|
||||||
# Get list of watches that are currently running
|
|
||||||
running_uuids = worker_handler.get_running_uuids()
|
|
||||||
|
|
||||||
# Get list of watches in the queue
|
|
||||||
queue_list = []
|
|
||||||
for q_item in update_q.queue:
|
|
||||||
if hasattr(q_item, 'item') and 'uuid' in q_item.item:
|
|
||||||
queue_list.append(q_item.item['uuid'])
|
|
||||||
|
|
||||||
# Get the error texts from the watch
|
|
||||||
error_texts = watch.compile_error_texts()
|
|
||||||
# Create a simplified watch data object to send to clients
|
|
||||||
|
|
||||||
watch_data = {
|
|
||||||
'checking_now': True if watch.get('uuid') in running_uuids else False,
|
|
||||||
'error_text': error_texts,
|
|
||||||
'event_timestamp': time.time(),
|
|
||||||
'fetch_time': watch.get('fetch_time'),
|
|
||||||
'has_error': True if error_texts else False,
|
|
||||||
'has_favicon': True if watch.get_favicon_filename() else False,
|
|
||||||
'history_n': watch.history_n,
|
|
||||||
'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet',
|
|
||||||
'last_checked': watch.get('last_checked'),
|
|
||||||
'last_checked_text': _jinja2_filter_datetime(watch),
|
|
||||||
'notification_muted': True if watch.get('notification_muted') else False,
|
|
||||||
'paused': True if watch.get('paused') else False,
|
|
||||||
'queued': True if watch.get('uuid') in queue_list else False,
|
|
||||||
'unviewed': watch.has_unviewed,
|
|
||||||
'uuid': watch.get('uuid'),
|
|
||||||
}
|
|
||||||
|
|
||||||
errored_count = 0
|
|
||||||
for watch_uuid_iter, watch_iter in datastore.data['watching'].items():
|
|
||||||
if watch_iter.get('last_error'):
|
|
||||||
errored_count += 1
|
|
||||||
|
|
||||||
general_stats = {
|
|
||||||
'count_errors': errored_count,
|
|
||||||
'has_unviewed': datastore.has_unviewed
|
|
||||||
}
|
|
||||||
|
|
||||||
# Debug what's being emitted
|
|
||||||
# logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
|
|
||||||
|
|
||||||
# Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
|
|
||||||
socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats})
|
|
||||||
|
|
||||||
# Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
|
|
||||||
logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
|
|
||||||
|
|
||||||
|
|
||||||
def init_socketio(app, datastore):
|
|
||||||
"""Initialize SocketIO with the main Flask app"""
|
|
||||||
import platform
|
|
||||||
import sys
|
|
||||||
|
|
||||||
# Platform-specific async_mode selection for better stability
|
|
||||||
system = platform.system().lower()
|
|
||||||
python_version = sys.version_info
|
|
||||||
|
|
||||||
# Check for SocketIO mode configuration via environment variable
|
|
||||||
# Default is 'threading' for best cross-platform compatibility
|
|
||||||
socketio_mode = os.getenv('SOCKETIO_MODE', 'threading').lower()
|
|
||||||
|
|
||||||
if socketio_mode == 'gevent':
|
|
||||||
# Use gevent mode (higher concurrency but platform limitations)
|
|
||||||
try:
|
|
||||||
import gevent
|
|
||||||
async_mode = 'gevent'
|
|
||||||
logger.info(f"SOCKETIO_MODE=gevent: Using {async_mode} mode for Socket.IO")
|
|
||||||
except ImportError:
|
|
||||||
async_mode = 'threading'
|
|
||||||
logger.warning(f"SOCKETIO_MODE=gevent but gevent not available, falling back to {async_mode} mode")
|
|
||||||
elif socketio_mode == 'threading':
|
|
||||||
# Use threading mode (default - best compatibility)
|
|
||||||
async_mode = 'threading'
|
|
||||||
logger.info(f"SOCKETIO_MODE=threading: Using {async_mode} mode for Socket.IO")
|
|
||||||
else:
|
|
||||||
# Invalid mode specified, use default
|
|
||||||
async_mode = 'threading'
|
|
||||||
logger.warning(f"Invalid SOCKETIO_MODE='{socketio_mode}', using default {async_mode} mode for Socket.IO")
|
|
||||||
|
|
||||||
# Log platform info for debugging
|
|
||||||
logger.info(f"Platform: {system}, Python: {python_version.major}.{python_version.minor}, Socket.IO mode: {async_mode}")
|
|
||||||
|
|
||||||
# Restrict SocketIO CORS to same origin by default, can be overridden with env var
|
|
||||||
cors_origins = os.environ.get('SOCKETIO_CORS_ORIGINS', None)
|
|
||||||
|
|
||||||
socketio = SocketIO(app,
|
|
||||||
async_mode=async_mode,
|
|
||||||
cors_allowed_origins=cors_origins, # None means same-origin only
|
|
||||||
logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')),
|
|
||||||
engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')))
|
|
||||||
|
|
||||||
# Set up event handlers
|
|
||||||
logger.info("Socket.IO: Registering connect event handler")
|
|
||||||
|
|
||||||
@socketio.on('checkbox-operation')
|
|
||||||
def event_checkbox_operations(data):
|
|
||||||
from changedetectionio.blueprint.ui import _handle_operations
|
|
||||||
from changedetectionio import queuedWatchMetaData
|
|
||||||
from changedetectionio import worker_handler
|
|
||||||
from changedetectionio.flask_app import update_q, watch_check_update
|
|
||||||
logger.trace(f"Got checkbox operations event: {data}")
|
|
||||||
|
|
||||||
datastore = socketio.datastore
|
|
||||||
|
|
||||||
_handle_operations(
|
|
||||||
op=data.get('op'),
|
|
||||||
uuids=data.get('uuids'),
|
|
||||||
datastore=datastore,
|
|
||||||
extra_data=data.get('extra_data'),
|
|
||||||
worker_handler=worker_handler,
|
|
||||||
update_q=update_q,
|
|
||||||
queuedWatchMetaData=queuedWatchMetaData,
|
|
||||||
watch_check_update=watch_check_update,
|
|
||||||
emit_flash=False
|
|
||||||
)
|
|
||||||
|
|
||||||
@socketio.on('connect')
|
|
||||||
def handle_connect():
|
|
||||||
"""Handle client connection"""
|
|
||||||
# logger.info("Socket.IO: CONNECT HANDLER CALLED - Starting connection process")
|
|
||||||
from flask import request
|
|
||||||
from flask_login import current_user
|
|
||||||
from changedetectionio.flask_app import update_q
|
|
||||||
|
|
||||||
# Access datastore from socketio
|
|
||||||
datastore = socketio.datastore
|
|
||||||
# logger.info(f"Socket.IO: Current user authenticated: {current_user.is_authenticated if hasattr(current_user, 'is_authenticated') else 'No current_user'}")
|
|
||||||
|
|
||||||
# Check if authentication is required and user is not authenticated
|
|
||||||
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
|
|
||||||
# logger.info(f"Socket.IO: Password enabled: {has_password_enabled}")
|
|
||||||
if has_password_enabled and not current_user.is_authenticated:
|
|
||||||
logger.warning("Socket.IO: Rejecting unauthenticated connection")
|
|
||||||
return False # Reject the connection
|
|
||||||
|
|
||||||
# Send the current queue size to the newly connected client
|
|
||||||
try:
|
|
||||||
queue_size = update_q.qsize()
|
|
||||||
socketio.emit("queue_size", {
|
|
||||||
"q_length": queue_size,
|
|
||||||
"event_timestamp": time.time()
|
|
||||||
}, room=request.sid) # Send only to this client
|
|
||||||
logger.debug(f"Socket.IO: Sent initial queue size {queue_size} to new client")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Socket.IO error sending initial queue size: {str(e)}")
|
|
||||||
|
|
||||||
logger.info("Socket.IO: Client connected")
|
|
||||||
|
|
||||||
# logger.info("Socket.IO: Registering disconnect event handler")
|
|
||||||
@socketio.on('disconnect')
|
|
||||||
def handle_disconnect():
|
|
||||||
"""Handle client disconnection"""
|
|
||||||
logger.info("Socket.IO: Client disconnected")
|
|
||||||
|
|
||||||
# Create a dedicated signal handler that will receive signals and emit them to clients
|
|
||||||
signal_handler = SignalHandler(socketio, datastore)
|
|
||||||
|
|
||||||
# Register watch operation event handlers
|
|
||||||
from .events import register_watch_operation_handlers
|
|
||||||
register_watch_operation_handlers(socketio, datastore)
|
|
||||||
|
|
||||||
# Store the datastore reference on the socketio object for later use
|
|
||||||
socketio.datastore = datastore
|
|
||||||
|
|
||||||
# No stop event needed for threading mode - threads check app.config.exit directly
|
|
||||||
|
|
||||||
# Add a shutdown method to the socketio object
|
|
||||||
def shutdown():
|
|
||||||
"""Shutdown the SocketIO server fast and aggressively"""
|
|
||||||
try:
|
|
||||||
logger.info("Socket.IO: Fast shutdown initiated...")
|
|
||||||
|
|
||||||
# For threading mode, give the thread a very short time to exit gracefully
|
|
||||||
if hasattr(socketio, 'polling_emitter_thread'):
|
|
||||||
if socketio.polling_emitter_thread.is_alive():
|
|
||||||
logger.info("Socket.IO: Waiting 1 second for polling thread to stop...")
|
|
||||||
socketio.polling_emitter_thread.join(timeout=1.0) # Only 1 second timeout
|
|
||||||
if socketio.polling_emitter_thread.is_alive():
|
|
||||||
logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown")
|
|
||||||
else:
|
|
||||||
logger.info("Socket.IO: Polling thread stopped quickly")
|
|
||||||
else:
|
|
||||||
logger.info("Socket.IO: Polling thread already stopped")
|
|
||||||
|
|
||||||
logger.info("Socket.IO: Fast shutdown complete")
|
|
||||||
except Exception as e:
|
|
||||||
logger.error(f"Socket.IO error during shutdown: {str(e)}")
|
|
||||||
|
|
||||||
# Attach the shutdown method to the socketio object
|
|
||||||
socketio.shutdown = shutdown
|
|
||||||
|
|
||||||
logger.info("Socket.IO initialized and attached to main Flask app")
|
|
||||||
logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
|
|
||||||
return socketio
|
|
||||||
@@ -14,8 +14,7 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
|||||||
find tests/test_*py -type f|while read test_name
|
find tests/test_*py -type f|while read test_name
|
||||||
do
|
do
|
||||||
echo "TEST RUNNING $test_name"
|
echo "TEST RUNNING $test_name"
|
||||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
pytest $test_name
|
||||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
|
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "RUNNING WITH BASE_URL SET"
|
echo "RUNNING WITH BASE_URL SET"
|
||||||
@@ -23,7 +22,7 @@ echo "RUNNING WITH BASE_URL SET"
|
|||||||
# Now re-run some tests with BASE_URL enabled
|
# Now re-run some tests with BASE_URL enabled
|
||||||
# Re #65 - Ability to include a link back to the installation, in the notification.
|
# Re #65 - Ability to include a link back to the installation, in the notification.
|
||||||
export BASE_URL="https://really-unique-domain.io"
|
export BASE_URL="https://really-unique-domain.io"
|
||||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
pytest tests/test_notification.py
|
||||||
|
|
||||||
|
|
||||||
# Re-run with HIDE_REFERER set - could affect login
|
# Re-run with HIDE_REFERER set - could affect login
|
||||||
@@ -33,14 +32,11 @@ pytest tests/test_access_control.py
|
|||||||
# Re-run a few tests that will trigger brotli based storage
|
# Re-run a few tests that will trigger brotli based storage
|
||||||
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
||||||
pytest tests/test_access_control.py
|
pytest tests/test_access_control.py
|
||||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
pytest tests/test_notification.py
|
||||||
pytest tests/test_backend.py
|
pytest tests/test_backend.py
|
||||||
pytest tests/test_rss.py
|
pytest tests/test_rss.py
|
||||||
pytest tests/test_unique_lines.py
|
pytest tests/test_unique_lines.py
|
||||||
|
|
||||||
# Try high concurrency
|
|
||||||
FETCH_WORKERS=130 pytest tests/test_history_consistency.py -v -l
|
|
||||||
|
|
||||||
# Check file:// will pickup a file when enabled
|
# Check file:// will pickup a file when enabled
|
||||||
echo "Hello world" > /tmp/test-file.txt
|
echo "Hello world" > /tmp/test-file.txt
|
||||||
ALLOW_FILE_URI=yes pytest tests/test_security.py
|
ALLOW_FILE_URI=yes pytest tests/test_security.py
|
||||||
|
|||||||
@@ -82,25 +82,3 @@ done
|
|||||||
|
|
||||||
|
|
||||||
docker kill squid-one squid-two squid-custom
|
docker kill squid-one squid-two squid-custom
|
||||||
|
|
||||||
# Test that the UI is returning the correct error message when a proxy is not available
|
|
||||||
|
|
||||||
# Requests
|
|
||||||
docker run --network changedet-network \
|
|
||||||
test-changedetectionio \
|
|
||||||
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
|
|
||||||
|
|
||||||
# Playwright
|
|
||||||
docker run --network changedet-network \
|
|
||||||
test-changedetectionio \
|
|
||||||
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
|
|
||||||
|
|
||||||
# Puppeteer fast
|
|
||||||
docker run --network changedet-network \
|
|
||||||
test-changedetectionio \
|
|
||||||
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
|
|
||||||
|
|
||||||
# Selenium
|
|
||||||
docker run --network changedet-network \
|
|
||||||
test-changedetectionio \
|
|
||||||
bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py'
|
|
||||||
|
|||||||
@@ -10,15 +10,9 @@ import os
|
|||||||
|
|
||||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
|
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
|
||||||
|
|
||||||
# This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available.
|
|
||||||
# (Which also limits available functions that could be called)
|
|
||||||
def render(template_str, **args: t.Any) -> str:
|
def render(template_str, **args: t.Any) -> str:
|
||||||
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension'])
|
jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension'])
|
||||||
output = jinja2_env.from_string(template_str).render(args)
|
output = jinja2_env.from_string(template_str).render(args)
|
||||||
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
|
||||||
|
|
||||||
def render_fully_escaped(content):
|
|
||||||
env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
|
|
||||||
template = env.from_string("{{ some_html|e }}")
|
|
||||||
return template.render(some_html=content)
|
|
||||||
|
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 569 B After Width: | Height: | Size: 569 B |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 6.2 KiB After Width: | Height: | Size: 6.2 KiB |
@@ -211,14 +211,7 @@ $(document).ready(function () {
|
|||||||
$('input[type=text]', first_available).first().val(x['xpath']);
|
$('input[type=text]', first_available).first().val(x['xpath']);
|
||||||
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
|
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
|
||||||
found_something = true;
|
found_something = true;
|
||||||
}
|
} else {
|
||||||
else if (x['tagName'] === 'select') {
|
|
||||||
$('select', first_available).val('<select> by option text').change();
|
|
||||||
$('input[type=text]', first_available).first().val(x['xpath']);
|
|
||||||
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
|
|
||||||
found_something = true;
|
|
||||||
}
|
|
||||||
else {
|
|
||||||
// There's no good way (that I know) to find if this
|
// There's no good way (that I know) to find if this
|
||||||
// see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging
|
// see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging
|
||||||
// https://codepen.io/azaslavsky/pen/DEJVWv
|
// https://codepen.io/azaslavsky/pen/DEJVWv
|
||||||
@@ -258,10 +251,6 @@ $(document).ready(function () {
|
|||||||
400: function () {
|
400: function () {
|
||||||
// More than likely the CSRF token was lost when the server restarted
|
// More than likely the CSRF token was lost when the server restarted
|
||||||
alert("There was a problem processing the request, please reload the page.");
|
alert("There was a problem processing the request, please reload the page.");
|
||||||
},
|
|
||||||
401: function (err) {
|
|
||||||
// This will be a custom error
|
|
||||||
alert(err.responseText);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}).done(function (data) {
|
}).done(function (data) {
|
||||||
|
|||||||
@@ -8,7 +8,7 @@ $(document).ready(function () {
|
|||||||
$(".addRuleRow").on("click", function(e) {
|
$(".addRuleRow").on("click", function(e) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
|
|
||||||
let currentRow = $(this).closest(".fieldlist-row");
|
let currentRow = $(this).closest("tr");
|
||||||
|
|
||||||
// Clone without events
|
// Clone without events
|
||||||
let newRow = currentRow.clone(false);
|
let newRow = currentRow.clone(false);
|
||||||
@@ -29,8 +29,8 @@ $(document).ready(function () {
|
|||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
|
|
||||||
// Only remove if there's more than one row
|
// Only remove if there's more than one row
|
||||||
if ($("#rulesTable .fieldlist-row").length > 1) {
|
if ($("#rulesTable tbody tr").length > 1) {
|
||||||
$(this).closest(".fieldlist-row").remove();
|
$(this).closest("tr").remove();
|
||||||
reindexRules();
|
reindexRules();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
@@ -39,7 +39,7 @@ $(document).ready(function () {
|
|||||||
$(".verifyRuleRow").on("click", function(e) {
|
$(".verifyRuleRow").on("click", function(e) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
|
|
||||||
let row = $(this).closest(".fieldlist-row");
|
let row = $(this).closest("tr");
|
||||||
let field = row.find("select[name$='field']").val();
|
let field = row.find("select[name$='field']").val();
|
||||||
let operator = row.find("select[name$='operator']").val();
|
let operator = row.find("select[name$='operator']").val();
|
||||||
let value = row.find("input[name$='value']").val();
|
let value = row.find("input[name$='value']").val();
|
||||||
@@ -52,7 +52,7 @@ $(document).ready(function () {
|
|||||||
|
|
||||||
|
|
||||||
// Create a rule object
|
// Create a rule object
|
||||||
let rule = {
|
const rule = {
|
||||||
field: field,
|
field: field,
|
||||||
operator: operator,
|
operator: operator,
|
||||||
value: value
|
value: value
|
||||||
@@ -96,10 +96,6 @@ $(document).ready(function () {
|
|||||||
contentType: false, // Let the browser set the correct content type
|
contentType: false, // Let the browser set the correct content type
|
||||||
success: function (response) {
|
success: function (response) {
|
||||||
if (response.status === "success") {
|
if (response.status === "success") {
|
||||||
if(rule['field'] !== "page_filtered_text") {
|
|
||||||
// A little debug helper for the user
|
|
||||||
$('#verify-state-text').text(`${rule['field']} was value "${response.data[rule['field']]}"`)
|
|
||||||
}
|
|
||||||
if (response.result) {
|
if (response.result) {
|
||||||
alert("✅ Condition PASSES verification against current snapshot!");
|
alert("✅ Condition PASSES verification against current snapshot!");
|
||||||
} else {
|
} else {
|
||||||
@@ -128,7 +124,7 @@ $(document).ready(function () {
|
|||||||
$(".addRuleRow, .removeRuleRow, .verifyRuleRow").off("click");
|
$(".addRuleRow, .removeRuleRow, .verifyRuleRow").off("click");
|
||||||
|
|
||||||
// Reindex all form elements
|
// Reindex all form elements
|
||||||
$("#rulesTable .fieldlist-row").each(function(index) {
|
$("#rulesTable tbody tr").each(function(index) {
|
||||||
$(this).find("select, input").each(function() {
|
$(this).find("select, input").each(function() {
|
||||||
let oldName = $(this).attr("name");
|
let oldName = $(this).attr("name");
|
||||||
let oldId = $(this).attr("id");
|
let oldId = $(this).attr("id");
|
||||||
|
|||||||
13
changedetectionio/static/js/feather-icons.min.js
vendored
13
changedetectionio/static/js/feather-icons.min.js
vendored
File diff suppressed because one or more lines are too long
@@ -159,7 +159,6 @@
|
|||||||
// Return the current request in case it's needed
|
// Return the current request in case it's needed
|
||||||
return requests[namespace];
|
return requests[namespace];
|
||||||
};
|
};
|
||||||
|
|
||||||
})(jQuery);
|
})(jQuery);
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,167 +0,0 @@
|
|||||||
// Socket.IO client-side integration for changedetection.io
|
|
||||||
|
|
||||||
$(document).ready(function () {
|
|
||||||
|
|
||||||
function bindSocketHandlerButtonsEvents(socket) {
|
|
||||||
$('.ajax-op').on('click.socketHandlerNamespace', function (e) {
|
|
||||||
e.preventDefault();
|
|
||||||
const op = $(this).data('op');
|
|
||||||
const uuid = $(this).closest('tr').data('watch-uuid');
|
|
||||||
|
|
||||||
console.log(`Socket.IO: Sending watch operation '${op}' for UUID ${uuid}`);
|
|
||||||
|
|
||||||
// Emit the operation via Socket.IO
|
|
||||||
socket.emit('watch_operation', {
|
|
||||||
'op': op,
|
|
||||||
'uuid': uuid
|
|
||||||
});
|
|
||||||
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
$('#checkbox-operations button').on('click.socketHandlerNamespace', function (e) {
|
|
||||||
e.preventDefault();
|
|
||||||
const op = $(this).val();
|
|
||||||
const checkedUuids = $('input[name="uuids"]:checked').map(function () {
|
|
||||||
return this.value.trim();
|
|
||||||
}).get();
|
|
||||||
console.log(`Socket.IO: Sending watch operation '${op}' for UUIDs:`, checkedUuids);
|
|
||||||
socket.emit('checkbox-operation', {
|
|
||||||
op: op,
|
|
||||||
uuids: checkedUuids,
|
|
||||||
extra_data: $('#op_extradata').val() // Set by the alert() handler
|
|
||||||
});
|
|
||||||
$('input[name="uuids"]:checked').prop('checked', false);
|
|
||||||
$('#check-all:checked').prop('checked', false);
|
|
||||||
return false;
|
|
||||||
});
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// Only try to connect if authentication isn't required or user is authenticated
|
|
||||||
// The 'is_authenticated' variable will be set in the template
|
|
||||||
if (typeof is_authenticated !== 'undefined' ? is_authenticated : true) {
|
|
||||||
// Try to create the socket connection to the SocketIO server - if it fails, the site will still work normally
|
|
||||||
try {
|
|
||||||
// Connect to Socket.IO on the same host/port, with path from template
|
|
||||||
const socket = io({
|
|
||||||
path: socketio_url, // This will be the path prefix like "/app/socket.io" from the template
|
|
||||||
transports: ['websocket', 'polling'],
|
|
||||||
reconnectionDelay: 3000,
|
|
||||||
reconnectionAttempts: 25
|
|
||||||
});
|
|
||||||
|
|
||||||
// Connection status logging
|
|
||||||
socket.on('connect', function () {
|
|
||||||
$('#realtime-conn-error').hide();
|
|
||||||
console.log('Socket.IO connected with path:', socketio_url);
|
|
||||||
console.log('Socket transport:', socket.io.engine.transport.name);
|
|
||||||
bindSocketHandlerButtonsEvents(socket);
|
|
||||||
});
|
|
||||||
|
|
||||||
socket.on('connect_error', function(error) {
|
|
||||||
console.error('Socket.IO connection error:', error);
|
|
||||||
});
|
|
||||||
|
|
||||||
socket.on('connect_timeout', function() {
|
|
||||||
console.error('Socket.IO connection timeout');
|
|
||||||
});
|
|
||||||
|
|
||||||
socket.on('error', function(error) {
|
|
||||||
console.error('Socket.IO error:', error);
|
|
||||||
});
|
|
||||||
|
|
||||||
socket.on('disconnect', function (reason) {
|
|
||||||
console.log('Socket.IO disconnected, reason:', reason);
|
|
||||||
$('.ajax-op').off('.socketHandlerNamespace');
|
|
||||||
$('#realtime-conn-error').show();
|
|
||||||
});
|
|
||||||
|
|
||||||
socket.on('queue_size', function (data) {
|
|
||||||
console.log(`${data.event_timestamp} - Queue size update: ${data.q_length}`);
|
|
||||||
// Update queue size display if implemented in the UI
|
|
||||||
})
|
|
||||||
|
|
||||||
// Listen for operation results
|
|
||||||
socket.on('operation_result', function (data) {
|
|
||||||
if (data.success) {
|
|
||||||
console.log(`Socket.IO: Operation '${data.operation}' completed successfully for UUID ${data.uuid}`);
|
|
||||||
} else {
|
|
||||||
console.error(`Socket.IO: Operation failed: ${data.error}`);
|
|
||||||
alert("There was a problem processing the request: " + data.error);
|
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
socket.on('notification_event', function (data) {
|
|
||||||
console.log(`Stub handler for notification_event ${data.watch_uuid}`)
|
|
||||||
});
|
|
||||||
|
|
||||||
socket.on('watch_deleted', function (data) {
|
|
||||||
$('tr[data-watch-uuid="' + data.uuid + '"] td').fadeOut(500, function () {
|
|
||||||
$(this).closest('tr').remove();
|
|
||||||
});
|
|
||||||
});
|
|
||||||
|
|
||||||
// So that the favicon is only updated when the server has written the scraped favicon to disk.
|
|
||||||
socket.on('watch_bumped_favicon', function (watch) {
|
|
||||||
const $watchRow = $(`tr[data-watch-uuid="${watch.uuid}"]`);
|
|
||||||
if ($watchRow.length) {
|
|
||||||
$watchRow.addClass('has-favicon');
|
|
||||||
// Because the event could be emitted from a process that is outside the app context, url_for() might not work.
|
|
||||||
// Lets use url_for at template generation time to give us a PLACEHOLDER instead
|
|
||||||
let favicon_url = favicon_baseURL.replace('/PLACEHOLDER', `/${watch.uuid}?cache=${watch.event_timestamp}`);
|
|
||||||
console.log(`Setting favicon for UUID - ${watch.uuid} - ${favicon_url}`);
|
|
||||||
$('img.favicon', $watchRow).attr('src', favicon_url);
|
|
||||||
}
|
|
||||||
})
|
|
||||||
|
|
||||||
socket.on('watch_update', function (data) {
|
|
||||||
const watch = data.watch;
|
|
||||||
const general_stats = data.general_stats;
|
|
||||||
|
|
||||||
// Log the entire watch object for debugging
|
|
||||||
console.log('!!! WATCH UPDATE EVENT RECEIVED !!!');
|
|
||||||
console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`);
|
|
||||||
console.log('Watch data:', watch);
|
|
||||||
console.log('General stats:', general_stats);
|
|
||||||
|
|
||||||
// Updating watch table rows
|
|
||||||
const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]');
|
|
||||||
console.log('Found watch row elements:', $watchRow.length);
|
|
||||||
|
|
||||||
if ($watchRow.length) {
|
|
||||||
$($watchRow).toggleClass('checking-now', watch.checking_now);
|
|
||||||
$($watchRow).toggleClass('queued', watch.queued);
|
|
||||||
$($watchRow).toggleClass('unviewed', watch.unviewed);
|
|
||||||
$($watchRow).toggleClass('has-error', watch.has_error);
|
|
||||||
$($watchRow).toggleClass('has-favicon', watch.has_favicon);
|
|
||||||
$($watchRow).toggleClass('notification_muted', watch.notification_muted);
|
|
||||||
$($watchRow).toggleClass('paused', watch.paused);
|
|
||||||
$($watchRow).toggleClass('single-history', watch.history_n === 1);
|
|
||||||
$($watchRow).toggleClass('multiple-history', watch.history_n >= 2);
|
|
||||||
|
|
||||||
$('td.title-col .error-text', $watchRow).html(watch.error_text)
|
|
||||||
$('td.last-changed', $watchRow).text(watch.last_changed_text)
|
|
||||||
$('td.last-checked .innertext', $watchRow).text(watch.last_checked_text)
|
|
||||||
$('td.last-checked', $watchRow).data('timestamp', watch.last_checked).data('fetchduration', watch.fetch_time);
|
|
||||||
$('td.last-checked', $watchRow).data('eta_complete', watch.last_checked + watch.fetch_time);
|
|
||||||
|
|
||||||
console.log('Updated UI for watch:', watch.uuid);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Tabs at bottom of list
|
|
||||||
$('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed);
|
|
||||||
$('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0)
|
|
||||||
$('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`);
|
|
||||||
|
|
||||||
$('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid));
|
|
||||||
});
|
|
||||||
|
|
||||||
} catch (e) {
|
|
||||||
// If Socket.IO fails to initialize, just log it and continue
|
|
||||||
console.log('Socket.IO initialization error:', e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user