Compare commits
134 Commits
sent-test-
...
selenium-p
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
78f3f2b26a | ||
|
|
f57bc10973 | ||
|
|
535ee97ef7 | ||
|
|
b2923b8c3a | ||
|
|
d2e8f822d6 | ||
|
|
5fd8200fd9 | ||
|
|
d0da8c9825 | ||
|
|
fd7574d21b | ||
|
|
c70706a27b | ||
|
|
968c364999 | ||
|
|
031cb76b7d | ||
|
|
af568d064c | ||
|
|
a75f57de43 | ||
|
|
72a1c3dda1 | ||
|
|
ffde79ecac | ||
|
|
66ad43b2df | ||
|
|
6b0e56ca80 | ||
|
|
5a2d84d8b4 | ||
|
|
a941156f26 | ||
|
|
a1fdeeaa29 | ||
|
|
40ea2604a7 | ||
|
|
ceda526093 | ||
|
|
4197254c53 | ||
|
|
a0b7efb436 | ||
|
|
5f5e8ede6c | ||
|
|
52ca855a29 | ||
|
|
079efd0a85 | ||
|
|
3a583a4e5d | ||
|
|
cfb4decf67 | ||
|
|
8067d5170b | ||
|
|
5551acf67d | ||
|
|
45a030bac6 | ||
|
|
96dc49e229 | ||
|
|
5f43d988a3 | ||
|
|
4269079c54 | ||
|
|
cdfb3f206c | ||
|
|
9f326783e5 | ||
|
|
4e6e680d79 | ||
|
|
1378b5b2ff | ||
|
|
456c6e3f58 | ||
|
|
61be7f68db | ||
|
|
0e38a3c881 | ||
|
|
2c630e9853 | ||
|
|
786e0d1fab | ||
|
|
78b7aee512 | ||
|
|
9d9d01863a | ||
|
|
108cdf84a5 | ||
|
|
8c6f6f1578 | ||
|
|
df4ffaaff8 | ||
|
|
d522c65e50 | ||
|
|
c3b2a8b019 | ||
|
|
28d3151090 | ||
|
|
2a1c832f8d | ||
|
|
0170adb171 | ||
|
|
cb62404b8c | ||
|
|
8f9c46bd3f | ||
|
|
97291ce6d0 | ||
|
|
f689e5418e | ||
|
|
f751f0b0ef | ||
|
|
ea9ba3bb2e | ||
|
|
c7ffebce2a | ||
|
|
54b7c070f7 | ||
|
|
6c1b687cd1 | ||
|
|
e850540a91 | ||
|
|
d4bc9dfc50 | ||
|
|
f26ea55e9c | ||
|
|
b53e1985ac | ||
|
|
302ef80d95 | ||
|
|
5b97c29714 | ||
|
|
64075c87ee | ||
|
|
d58a71cffc | ||
|
|
036b006226 | ||
|
|
f29f89d078 | ||
|
|
289f118581 | ||
|
|
10b2bbea83 | ||
|
|
32d110b92f | ||
|
|
860a5f5c1a | ||
|
|
70a18ee4b5 | ||
|
|
73189672c3 | ||
|
|
7e7d5dc383 | ||
|
|
1c2cfc37aa | ||
|
|
0634fe021d | ||
|
|
04934b6b3b | ||
|
|
ff00417bc5 | ||
|
|
849c5b2293 | ||
|
|
4bf560256b | ||
|
|
7903b03a0c | ||
|
|
5e7c0880c1 | ||
|
|
957aef4ff3 | ||
|
|
8e9a83d8f4 | ||
|
|
5961838143 | ||
|
|
8cf4a8128b | ||
|
|
24c3bfe5ad | ||
|
|
bdd9760f3c | ||
|
|
e37467f649 | ||
|
|
d42fdf0257 | ||
|
|
939fa86582 | ||
|
|
b87c92b9e0 | ||
|
|
4d5535d72c | ||
|
|
ad08219d03 | ||
|
|
82211eef82 | ||
|
|
5d9380609c | ||
|
|
a8b3918fca | ||
|
|
e83fb37fb6 | ||
|
|
6b99afe0f7 | ||
|
|
09ebc6ec63 | ||
|
|
6b1065502e | ||
|
|
d4c470984a | ||
|
|
55da48f719 | ||
|
|
dbd4adf23a | ||
|
|
b1e700b3ff | ||
|
|
1c61b5a623 | ||
|
|
e799a1cdcb | ||
|
|
938065db6f | ||
|
|
4f2d38ff49 | ||
|
|
8960f401b7 | ||
|
|
1c1f1c6f6b | ||
|
|
a2a98811a5 | ||
|
|
5a0ef8fc01 | ||
|
|
d90de0851d | ||
|
|
360b4f0d8b | ||
|
|
6fc04d7f1c | ||
|
|
66fb05527b | ||
|
|
202e47d728 | ||
|
|
d67d396b88 | ||
|
|
05f54f0ce6 | ||
|
|
6adf10597e | ||
|
|
4419bc0e61 | ||
|
|
f7e9846c9b | ||
|
|
5dea5e1def | ||
|
|
0fade0a473 | ||
|
|
121e9c20e0 | ||
|
|
12cec2d541 | ||
|
|
d52e6e8e11 |
23
.github/test/Dockerfile-alpine
vendored
@@ -2,32 +2,33 @@
|
|||||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
||||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
||||||
|
|
||||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.18
|
FROM ghcr.io/linuxserver/baseimage-alpine:3.21
|
||||||
ENV PYTHONUNBUFFERED=1
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
COPY requirements.txt /requirements.txt
|
COPY requirements.txt /requirements.txt
|
||||||
|
|
||||||
RUN \
|
RUN \
|
||||||
apk add --update --no-cache --virtual=build-dependencies \
|
apk add --update --no-cache --virtual=build-dependencies \
|
||||||
|
build-base \
|
||||||
cargo \
|
cargo \
|
||||||
g++ \
|
git \
|
||||||
gcc \
|
|
||||||
jpeg-dev \
|
jpeg-dev \
|
||||||
libc-dev \
|
libc-dev \
|
||||||
libffi-dev \
|
libffi-dev \
|
||||||
libjpeg \
|
|
||||||
libxslt-dev \
|
libxslt-dev \
|
||||||
make \
|
|
||||||
openssl-dev \
|
openssl-dev \
|
||||||
py3-wheel \
|
|
||||||
python3-dev \
|
python3-dev \
|
||||||
|
zip \
|
||||||
zlib-dev && \
|
zlib-dev && \
|
||||||
apk add --update --no-cache \
|
apk add --update --no-cache \
|
||||||
|
libjpeg \
|
||||||
libxslt \
|
libxslt \
|
||||||
python3 \
|
nodejs \
|
||||||
py3-pip && \
|
poppler-utils \
|
||||||
|
python3 && \
|
||||||
echo "**** pip3 install test of changedetection.io ****" && \
|
echo "**** pip3 install test of changedetection.io ****" && \
|
||||||
pip3 install -U pip wheel setuptools && \
|
python3 -m venv /lsiopy && \
|
||||||
pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.18/ -r /requirements.txt && \
|
pip install -U pip wheel setuptools && \
|
||||||
|
pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
|
||||||
apk del --purge \
|
apk del --purge \
|
||||||
build-dependencies
|
build-dependencies
|
||||||
|
|||||||
19
.github/workflows/containers.yml
vendored
@@ -103,6 +103,19 @@ jobs:
|
|||||||
# provenance: false
|
# provenance: false
|
||||||
|
|
||||||
# A new tagged release is required, which builds :tag and :latest
|
# A new tagged release is required, which builds :tag and :latest
|
||||||
|
- name: Docker meta :tag
|
||||||
|
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||||
|
uses: docker/metadata-action@v5
|
||||||
|
id: meta
|
||||||
|
with:
|
||||||
|
images: |
|
||||||
|
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
|
||||||
|
ghcr.io/dgtlmoon/changedetection.io
|
||||||
|
tags: |
|
||||||
|
type=semver,pattern={{version}}
|
||||||
|
type=semver,pattern={{major}}.{{minor}}
|
||||||
|
type=semver,pattern={{major}}
|
||||||
|
|
||||||
- name: Build and push :tag
|
- name: Build and push :tag
|
||||||
id: docker_build_tag_release
|
id: docker_build_tag_release
|
||||||
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
|
||||||
@@ -111,11 +124,7 @@ jobs:
|
|||||||
context: ./
|
context: ./
|
||||||
file: ./Dockerfile
|
file: ./Dockerfile
|
||||||
push: true
|
push: true
|
||||||
tags: |
|
tags: ${{ steps.meta.outputs.tags }}
|
||||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }}
|
|
||||||
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
|
|
||||||
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
|
|
||||||
ghcr.io/dgtlmoon/changedetection.io:latest
|
|
||||||
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
|
||||||
cache-from: type=gha
|
cache-from: type=gha
|
||||||
cache-to: type=gha,mode=max
|
cache-to: type=gha,mode=max
|
||||||
|
|||||||
7
.github/workflows/pypi-release.yml
vendored
@@ -45,9 +45,12 @@ jobs:
|
|||||||
- name: Test that the basic pip built package runs without error
|
- name: Test that the basic pip built package runs without error
|
||||||
run: |
|
run: |
|
||||||
set -ex
|
set -ex
|
||||||
sudo pip3 install --upgrade pip
|
ls -alR
|
||||||
pip3 install dist/changedetection.io*.whl
|
|
||||||
|
# Find and install the first .whl file
|
||||||
|
find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
|
||||||
changedetection.io -d /tmp -p 10000 &
|
changedetection.io -d /tmp -p 10000 &
|
||||||
|
|
||||||
sleep 3
|
sleep 3
|
||||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
|
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
|
||||||
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
|
curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
|
||||||
|
|||||||
16
.github/workflows/test-only.yml
vendored
@@ -8,13 +8,13 @@ jobs:
|
|||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
- uses: actions/checkout@v4
|
- uses: actions/checkout@v4
|
||||||
- name: Lint with flake8
|
- name: Lint with Ruff
|
||||||
run: |
|
run: |
|
||||||
pip3 install flake8
|
pip install ruff
|
||||||
# stop the build if there are Python syntax errors or undefined names
|
# Check for syntax errors and undefined names
|
||||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
ruff check . --select E9,F63,F7,F82
|
||||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
# Complete check with errors treated as warnings
|
||||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
ruff check . --exit-zero
|
||||||
|
|
||||||
test-application-3-10:
|
test-application-3-10:
|
||||||
needs: lint-code
|
needs: lint-code
|
||||||
@@ -28,7 +28,6 @@ jobs:
|
|||||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||||
with:
|
with:
|
||||||
python-version: '3.11'
|
python-version: '3.11'
|
||||||
skip-pypuppeteer: true
|
|
||||||
|
|
||||||
test-application-3-12:
|
test-application-3-12:
|
||||||
needs: lint-code
|
needs: lint-code
|
||||||
@@ -42,5 +41,4 @@ jobs:
|
|||||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||||
with:
|
with:
|
||||||
python-version: '3.13'
|
python-version: '3.13'
|
||||||
skip-pypuppeteer: true
|
skip-pypuppeteer: true
|
||||||
|
|
||||||
@@ -7,7 +7,7 @@ on:
|
|||||||
description: 'Python version to use'
|
description: 'Python version to use'
|
||||||
required: true
|
required: true
|
||||||
type: string
|
type: string
|
||||||
default: '3.10'
|
default: '3.11'
|
||||||
skip-pypuppeteer:
|
skip-pypuppeteer:
|
||||||
description: 'Skip PyPuppeteer (not supported in 3.11/3.12)'
|
description: 'Skip PyPuppeteer (not supported in 3.11/3.12)'
|
||||||
required: false
|
required: false
|
||||||
@@ -64,14 +64,16 @@ jobs:
|
|||||||
echo "Running processes in docker..."
|
echo "Running processes in docker..."
|
||||||
docker ps
|
docker ps
|
||||||
|
|
||||||
- name: Test built container with Pytest (generally as requests/plaintext fetching)
|
- name: Run Unit Tests
|
||||||
run: |
|
run: |
|
||||||
# Unit tests
|
# Unit tests
|
||||||
echo "run test with unittest"
|
|
||||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
|
||||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
|
||||||
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
|
||||||
|
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
|
||||||
|
|
||||||
|
- name: Test built container with Pytest (generally as requests/plaintext fetching)
|
||||||
|
run: |
|
||||||
# All tests
|
# All tests
|
||||||
echo "run test with pytest"
|
echo "run test with pytest"
|
||||||
# The default pytest logger_level is TRACE
|
# The default pytest logger_level is TRACE
|
||||||
@@ -170,8 +172,8 @@ jobs:
|
|||||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||||
|
|
||||||
# Check whether TRACE log is enabled.
|
# Check whether TRACE log is enabled.
|
||||||
# Also, check whether TRACE is came from STDERR
|
# Also, check whether TRACE came from STDOUT
|
||||||
docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1
|
docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
|
||||||
# Check whether DEBUG is came from STDOUT
|
# Check whether DEBUG is came from STDOUT
|
||||||
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
|
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
|
||||||
|
|
||||||
|
|||||||
1
.gitignore
vendored
@@ -16,6 +16,7 @@ dist/
|
|||||||
.env
|
.env
|
||||||
.venv/
|
.venv/
|
||||||
venv/
|
venv/
|
||||||
|
.python-version
|
||||||
|
|
||||||
# IDEs
|
# IDEs
|
||||||
.idea
|
.idea
|
||||||
|
|||||||
9
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
repos:
|
||||||
|
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||||
|
rev: v0.11.2
|
||||||
|
hooks:
|
||||||
|
# Lint (and apply safe fixes)
|
||||||
|
- id: ruff
|
||||||
|
args: [--fix]
|
||||||
|
# Fomrat
|
||||||
|
- id: ruff-format
|
||||||
48
.ruff.toml
Normal file
@@ -0,0 +1,48 @@
|
|||||||
|
# Minimum supported version
|
||||||
|
target-version = "py310"
|
||||||
|
|
||||||
|
# Formatting options
|
||||||
|
line-length = 100
|
||||||
|
indent-width = 4
|
||||||
|
|
||||||
|
exclude = [
|
||||||
|
"__pycache__",
|
||||||
|
".eggs",
|
||||||
|
".git",
|
||||||
|
".tox",
|
||||||
|
".venv",
|
||||||
|
"*.egg-info",
|
||||||
|
"*.pyc",
|
||||||
|
]
|
||||||
|
|
||||||
|
[lint]
|
||||||
|
# https://docs.astral.sh/ruff/rules/
|
||||||
|
select = [
|
||||||
|
"B", # flake8-bugbear
|
||||||
|
"B9",
|
||||||
|
"C",
|
||||||
|
"E", # pycodestyle
|
||||||
|
"F", # Pyflakes
|
||||||
|
"I", # isort
|
||||||
|
"N", # pep8-naming
|
||||||
|
"UP", # pyupgrade
|
||||||
|
"W", # pycodestyle
|
||||||
|
]
|
||||||
|
ignore = [
|
||||||
|
"B007", # unused-loop-control-variable
|
||||||
|
"B909", # loop-iterator-mutation
|
||||||
|
"E203", # whitespace-before-punctuation
|
||||||
|
"E266", # multiple-leading-hashes-for-block-comment
|
||||||
|
"E501", # redundant-backslash
|
||||||
|
"F403", # undefined-local-with-import-star
|
||||||
|
"N802", # invalid-function-name
|
||||||
|
"N806", # non-lowercase-variable-in-function
|
||||||
|
"N815", # mixed-case-variable-in-class-scope
|
||||||
|
]
|
||||||
|
|
||||||
|
[lint.mccabe]
|
||||||
|
max-complexity = 12
|
||||||
|
|
||||||
|
[format]
|
||||||
|
indent-style = "space"
|
||||||
|
quote-style = "preserve"
|
||||||
@@ -1,8 +1,5 @@
|
|||||||
# pip dependencies install stage
|
# pip dependencies install stage
|
||||||
|
|
||||||
# @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py
|
|
||||||
# If you know how to fix it, please do! and test it for both 3.10 and 3.11
|
|
||||||
|
|
||||||
ARG PYTHON_VERSION=3.11
|
ARG PYTHON_VERSION=3.11
|
||||||
|
|
||||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
|
||||||
@@ -71,7 +68,7 @@ COPY changedetection.py /app/changedetection.py
|
|||||||
# Github Action test purpose(test-only.yml).
|
# Github Action test purpose(test-only.yml).
|
||||||
# On production, it is effectively LOGGER_LEVEL=''.
|
# On production, it is effectively LOGGER_LEVEL=''.
|
||||||
ARG LOGGER_LEVEL=''
|
ARG LOGGER_LEVEL=''
|
||||||
ENV LOGGER_LEVEL "$LOGGER_LEVEL"
|
ENV LOGGER_LEVEL="$LOGGER_LEVEL"
|
||||||
|
|
||||||
WORKDIR /app
|
WORKDIR /app
|
||||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
CMD ["python", "./changedetection.py", "-d", "/datastore"]
|
||||||
|
|||||||
@@ -1,8 +1,9 @@
|
|||||||
recursive-include changedetectionio/api *
|
recursive-include changedetectionio/api *
|
||||||
recursive-include changedetectionio/apprise_plugin *
|
|
||||||
recursive-include changedetectionio/blueprint *
|
recursive-include changedetectionio/blueprint *
|
||||||
recursive-include changedetectionio/content_fetchers *
|
recursive-include changedetectionio/content_fetchers *
|
||||||
|
recursive-include changedetectionio/conditions *
|
||||||
recursive-include changedetectionio/model *
|
recursive-include changedetectionio/model *
|
||||||
|
recursive-include changedetectionio/notification *
|
||||||
recursive-include changedetectionio/processors *
|
recursive-include changedetectionio/processors *
|
||||||
recursive-include changedetectionio/static *
|
recursive-include changedetectionio/static *
|
||||||
recursive-include changedetectionio/templates *
|
recursive-include changedetectionio/templates *
|
||||||
|
|||||||
10
README.md
@@ -89,7 +89,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
|||||||
#### Key Features
|
#### Key Features
|
||||||
|
|
||||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||||
- Target elements with xPath(1.0) and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
- Target elements with xPath 1 and xPath 2, CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
||||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||||
- Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums)
|
- Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums)
|
||||||
- Easily specify how often a site should be checked
|
- Easily specify how often a site should be checked
|
||||||
@@ -105,6 +105,12 @@ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) glob
|
|||||||
|
|
||||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||||
|
|
||||||
|
### Conditional web page changes
|
||||||
|
|
||||||
|
Easily [configure conditional actions](https://changedetection.io/tutorial/conditional-actions-web-page-changes), for example, only trigger when a price is above or below a preset amount, or [when a web page includes (or does not include) a keyword](https://changedetection.io/tutorial/how-monitor-keywords-any-website)
|
||||||
|
|
||||||
|
<img src="./docs/web-page-change-conditions.png" style="max-width:80%;" alt="Conditional web page changes" title="Conditional web page changes" />
|
||||||
|
|
||||||
### Schedule web page watches in any timezone, limit by day of week and time.
|
### Schedule web page watches in any timezone, limit by day of week and time.
|
||||||
|
|
||||||
Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours.
|
Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours.
|
||||||
@@ -120,7 +126,7 @@ Easily add the current web page to your changedetection.io tool, simply install
|
|||||||
|
|
||||||
[<img src="./docs/chrome-extension-screenshot.png" style="max-width:80%;" alt="Chrome Extension to easily add the current web-page to detect a change." title="Chrome Extension to easily add the current web-page to detect a change." />](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop)
|
[<img src="./docs/chrome-extension-screenshot.png" style="max-width:80%;" alt="Chrome Extension to easily add the current web-page to detect a change." title="Chrome Extension to easily add the current web-page to detect a change." />](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop)
|
||||||
|
|
||||||
[Goto the Chrome Webstore to download the extension.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop)
|
[Goto the Chrome Webstore to download the extension.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) ( Or check out the [GitHub repo](https://github.com/dgtlmoon/changedetection.io-browser-extension) )
|
||||||
|
|
||||||
## Installation
|
## Installation
|
||||||
|
|
||||||
|
|||||||
@@ -3,4 +3,6 @@
|
|||||||
# Only exists for direct CLI usage
|
# Only exists for direct CLI usage
|
||||||
|
|
||||||
import changedetectionio
|
import changedetectionio
|
||||||
changedetectionio.main()
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
changedetectionio.main()
|
||||||
|
|||||||
98
changedetectionio/PLUGIN_README.md
Normal file
@@ -0,0 +1,98 @@
|
|||||||
|
# Creating Plugins for changedetection.io
|
||||||
|
|
||||||
|
This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways.
|
||||||
|
|
||||||
|
## Plugin Types
|
||||||
|
|
||||||
|
### UI Stats Tab Plugins
|
||||||
|
|
||||||
|
These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch.
|
||||||
|
|
||||||
|
#### Creating a UI Stats Tab Plugin
|
||||||
|
|
||||||
|
1. Create a Python file in a directory that will be loaded by the plugin system.
|
||||||
|
|
||||||
|
2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pluggy
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
||||||
|
|
||||||
|
@global_hookimpl
|
||||||
|
def ui_edit_stats_extras(watch):
|
||||||
|
"""Add custom content to the stats tab"""
|
||||||
|
# Calculate or retrieve your stats
|
||||||
|
my_stat = calculate_something(watch)
|
||||||
|
|
||||||
|
# Return HTML content as a string
|
||||||
|
html = f"""
|
||||||
|
<div class="my-plugin-stats">
|
||||||
|
<h4>My Plugin Statistics</h4>
|
||||||
|
<p>My statistic: {my_stat}</p>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
return html
|
||||||
|
```
|
||||||
|
|
||||||
|
3. The HTML you return will be included in the Stats tab.
|
||||||
|
|
||||||
|
## Plugin Loading
|
||||||
|
|
||||||
|
Plugins can be loaded from:
|
||||||
|
|
||||||
|
1. Built-in plugin directories in the codebase
|
||||||
|
2. External packages using setuptools entry points
|
||||||
|
|
||||||
|
To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`.
|
||||||
|
|
||||||
|
## Example Plugin
|
||||||
|
|
||||||
|
Here's a simple example of a plugin that adds a word count statistic to the Stats tab:
|
||||||
|
|
||||||
|
```python
|
||||||
|
import pluggy
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
||||||
|
|
||||||
|
def count_words_in_history(watch):
|
||||||
|
"""Count words in the latest snapshot"""
|
||||||
|
try:
|
||||||
|
if not watch.history.keys():
|
||||||
|
return 0
|
||||||
|
|
||||||
|
latest_key = list(watch.history.keys())[-1]
|
||||||
|
latest_content = watch.get_history_snapshot(latest_key)
|
||||||
|
return len(latest_content.split())
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error counting words: {str(e)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
@global_hookimpl
|
||||||
|
def ui_edit_stats_extras(watch):
|
||||||
|
"""Add word count to the Stats tab"""
|
||||||
|
word_count = count_words_in_history(watch)
|
||||||
|
|
||||||
|
html = f"""
|
||||||
|
<div class="word-count-stats">
|
||||||
|
<h4>Content Analysis</h4>
|
||||||
|
<table class="pure-table">
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>Word count (latest snapshot)</td>
|
||||||
|
<td>{word_count}</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
return html
|
||||||
|
```
|
||||||
|
|
||||||
|
## Testing Your Plugin
|
||||||
|
|
||||||
|
1. Place your plugin in one of the directories scanned by the plugin system
|
||||||
|
2. Restart changedetection.io
|
||||||
|
3. Go to the Edit page of a watch and check the Stats tab to see your content
|
||||||
@@ -2,7 +2,7 @@
|
|||||||
|
|
||||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||||
|
|
||||||
__version__ = '0.48.01'
|
__version__ = '0.49.15'
|
||||||
|
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
from json.decoder import JSONDecodeError
|
from json.decoder import JSONDecodeError
|
||||||
@@ -11,6 +11,7 @@ os.environ['EVENTLET_NO_GREENDNS'] = 'yes'
|
|||||||
import eventlet
|
import eventlet
|
||||||
import eventlet.wsgi
|
import eventlet.wsgi
|
||||||
import getopt
|
import getopt
|
||||||
|
import platform
|
||||||
import signal
|
import signal
|
||||||
import socket
|
import socket
|
||||||
import sys
|
import sys
|
||||||
@@ -19,15 +20,15 @@ from changedetectionio import store
|
|||||||
from changedetectionio.flask_app import changedetection_app
|
from changedetectionio.flask_app import changedetection_app
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
|
||||||
# Only global so we can access it in the signal handler
|
# Only global so we can access it in the signal handler
|
||||||
app = None
|
app = None
|
||||||
datastore = None
|
datastore = None
|
||||||
|
|
||||||
|
def get_version():
|
||||||
|
return __version__
|
||||||
|
|
||||||
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
|
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
|
||||||
def sigshutdown_handler(_signo, _stack_frame):
|
def sigshutdown_handler(_signo, _stack_frame):
|
||||||
global app
|
|
||||||
global datastore
|
|
||||||
name = signal.Signals(_signo).name
|
name = signal.Signals(_signo).name
|
||||||
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown')
|
logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown')
|
||||||
datastore.sync_to_json()
|
datastore.sync_to_json()
|
||||||
@@ -105,7 +106,7 @@ def main():
|
|||||||
# Without this, a logger will be duplicated
|
# Without this, a logger will be duplicated
|
||||||
logger.remove()
|
logger.remove()
|
||||||
try:
|
try:
|
||||||
log_level_for_stdout = { 'DEBUG', 'SUCCESS' }
|
log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' }
|
||||||
logger.configure(handlers=[
|
logger.configure(handlers=[
|
||||||
{"sink": sys.stdout, "level": logger_level,
|
{"sink": sys.stdout, "level": logger_level,
|
||||||
"filter" : lambda record: record['level'].name in log_level_for_stdout},
|
"filter" : lambda record: record['level'].name in log_level_for_stdout},
|
||||||
@@ -144,6 +145,19 @@ def main():
|
|||||||
|
|
||||||
signal.signal(signal.SIGTERM, sigshutdown_handler)
|
signal.signal(signal.SIGTERM, sigshutdown_handler)
|
||||||
signal.signal(signal.SIGINT, sigshutdown_handler)
|
signal.signal(signal.SIGINT, sigshutdown_handler)
|
||||||
|
|
||||||
|
# Custom signal handler for memory cleanup
|
||||||
|
def sigusr_clean_handler(_signo, _stack_frame):
|
||||||
|
from changedetectionio.gc_cleanup import memory_cleanup
|
||||||
|
logger.info('SIGUSR1 received: Running memory cleanup')
|
||||||
|
return memory_cleanup(app)
|
||||||
|
|
||||||
|
# Register the SIGUSR1 signal handler
|
||||||
|
# Only register the signal handler if running on Linux
|
||||||
|
if platform.system() == "Linux":
|
||||||
|
signal.signal(signal.SIGUSR1, sigusr_clean_handler)
|
||||||
|
else:
|
||||||
|
logger.info("SIGUSR1 handler only registered on Linux, skipped.")
|
||||||
|
|
||||||
# Go into cleanup mode
|
# Go into cleanup mode
|
||||||
if do_cleanup:
|
if do_cleanup:
|
||||||
|
|||||||
62
changedetectionio/api/Import.py
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
import os
|
||||||
|
from changedetectionio.strtobool import strtobool
|
||||||
|
from flask_restful import abort, Resource
|
||||||
|
from flask import request
|
||||||
|
import validators
|
||||||
|
from . import auth
|
||||||
|
|
||||||
|
|
||||||
|
class Import(Resource):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
# datastore is a black box dependency
|
||||||
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
def post(self):
|
||||||
|
"""
|
||||||
|
@api {post} /api/v1/import Import a list of watched URLs
|
||||||
|
@apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line.
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a"
|
||||||
|
@apiName Import
|
||||||
|
@apiGroup Watch
|
||||||
|
@apiSuccess (200) {List} OK List of watch UUIDs added
|
||||||
|
@apiSuccess (500) {String} ERR Some other error
|
||||||
|
"""
|
||||||
|
|
||||||
|
extras = {}
|
||||||
|
|
||||||
|
if request.args.get('proxy'):
|
||||||
|
plist = self.datastore.proxy_list
|
||||||
|
if not request.args.get('proxy') in plist:
|
||||||
|
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
||||||
|
else:
|
||||||
|
extras['proxy'] = request.args.get('proxy')
|
||||||
|
|
||||||
|
dedupe = strtobool(request.args.get('dedupe', 'true'))
|
||||||
|
|
||||||
|
tags = request.args.get('tag')
|
||||||
|
tag_uuids = request.args.get('tag_uuids')
|
||||||
|
|
||||||
|
if tag_uuids:
|
||||||
|
tag_uuids = tag_uuids.split(',')
|
||||||
|
|
||||||
|
urls = request.get_data().decode('utf8').splitlines()
|
||||||
|
added = []
|
||||||
|
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
||||||
|
for url in urls:
|
||||||
|
url = url.strip()
|
||||||
|
if not len(url):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
||||||
|
if not validators.url(url, simple_host=allow_simplehost):
|
||||||
|
return f"Invalid or unsupported URL - {url}", 400
|
||||||
|
|
||||||
|
if dedupe and self.datastore.url_exists(url):
|
||||||
|
continue
|
||||||
|
|
||||||
|
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
||||||
|
added.append(new_uuid)
|
||||||
|
|
||||||
|
return added
|
||||||
145
changedetectionio/api/Notifications.py
Normal file
@@ -0,0 +1,145 @@
|
|||||||
|
from flask_expects_json import expects_json
|
||||||
|
from flask_restful import Resource
|
||||||
|
from . import auth
|
||||||
|
from flask_restful import abort, Resource
|
||||||
|
from flask import request
|
||||||
|
from . import auth
|
||||||
|
from . import schema_create_notification_urls, schema_delete_notification_urls
|
||||||
|
|
||||||
|
class Notifications(Resource):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
# datastore is a black box dependency
|
||||||
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
def get(self):
|
||||||
|
"""
|
||||||
|
@api {get} /api/v1/notifications Return Notification URL List
|
||||||
|
@apiDescription Return the Notification URL List from the configuration
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
HTTP/1.0 200
|
||||||
|
{
|
||||||
|
'notification_urls': ["notification-urls-list"]
|
||||||
|
}
|
||||||
|
@apiName Get
|
||||||
|
@apiGroup Notifications
|
||||||
|
"""
|
||||||
|
|
||||||
|
notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])
|
||||||
|
|
||||||
|
return {
|
||||||
|
'notification_urls': notification_urls,
|
||||||
|
}, 200
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
@expects_json(schema_create_notification_urls)
|
||||||
|
def post(self):
|
||||||
|
"""
|
||||||
|
@api {post} /api/v1/notifications Create Notification URLs
|
||||||
|
@apiDescription Add one or more notification URLs from the configuration
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/notifications/batch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||||
|
@apiName CreateBatch
|
||||||
|
@apiGroup Notifications
|
||||||
|
@apiSuccess (201) {Object[]} notification_urls List of added notification URLs
|
||||||
|
@apiError (400) {String} Invalid input
|
||||||
|
"""
|
||||||
|
|
||||||
|
json_data = request.get_json()
|
||||||
|
notification_urls = json_data.get("notification_urls", [])
|
||||||
|
|
||||||
|
from wtforms import ValidationError
|
||||||
|
try:
|
||||||
|
validate_notification_urls(notification_urls)
|
||||||
|
except ValidationError as e:
|
||||||
|
return str(e), 400
|
||||||
|
|
||||||
|
added_urls = []
|
||||||
|
|
||||||
|
for url in notification_urls:
|
||||||
|
clean_url = url.strip()
|
||||||
|
added_url = self.datastore.add_notification_url(clean_url)
|
||||||
|
if added_url:
|
||||||
|
added_urls.append(added_url)
|
||||||
|
|
||||||
|
if not added_urls:
|
||||||
|
return "No valid notification URLs were added", 400
|
||||||
|
|
||||||
|
return {'notification_urls': added_urls}, 201
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
@expects_json(schema_create_notification_urls)
|
||||||
|
def put(self):
|
||||||
|
"""
|
||||||
|
@api {put} /api/v1/notifications Replace Notification URLs
|
||||||
|
@apiDescription Replace all notification URLs with the provided list (can be empty)
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl -X PUT http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||||
|
@apiName Replace
|
||||||
|
@apiGroup Notifications
|
||||||
|
@apiSuccess (200) {Object[]} notification_urls List of current notification URLs
|
||||||
|
@apiError (400) {String} Invalid input
|
||||||
|
"""
|
||||||
|
json_data = request.get_json()
|
||||||
|
notification_urls = json_data.get("notification_urls", [])
|
||||||
|
|
||||||
|
from wtforms import ValidationError
|
||||||
|
try:
|
||||||
|
validate_notification_urls(notification_urls)
|
||||||
|
except ValidationError as e:
|
||||||
|
return str(e), 400
|
||||||
|
|
||||||
|
if not isinstance(notification_urls, list):
|
||||||
|
return "Invalid input format", 400
|
||||||
|
|
||||||
|
clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)]
|
||||||
|
self.datastore.data['settings']['application']['notification_urls'] = clean_urls
|
||||||
|
self.datastore.needs_write = True
|
||||||
|
|
||||||
|
return {'notification_urls': clean_urls}, 200
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
@expects_json(schema_delete_notification_urls)
|
||||||
|
def delete(self):
|
||||||
|
"""
|
||||||
|
@api {delete} /api/v1/notifications Delete Notification URLs
|
||||||
|
@apiDescription Deletes one or more notification URLs from the configuration
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/notifications -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
|
||||||
|
@apiParam {String[]} notification_urls The notification URLs to delete.
|
||||||
|
@apiName Delete
|
||||||
|
@apiGroup Notifications
|
||||||
|
@apiSuccess (204) {String} OK Deleted
|
||||||
|
@apiError (400) {String} No matching notification URLs found.
|
||||||
|
"""
|
||||||
|
|
||||||
|
json_data = request.get_json()
|
||||||
|
urls_to_delete = json_data.get("notification_urls", [])
|
||||||
|
if not isinstance(urls_to_delete, list):
|
||||||
|
abort(400, message="Expected a list of notification URLs.")
|
||||||
|
|
||||||
|
notification_urls = self.datastore.data['settings']['application'].get('notification_urls', [])
|
||||||
|
deleted = []
|
||||||
|
|
||||||
|
for url in urls_to_delete:
|
||||||
|
clean_url = url.strip()
|
||||||
|
if clean_url in notification_urls:
|
||||||
|
notification_urls.remove(clean_url)
|
||||||
|
deleted.append(clean_url)
|
||||||
|
|
||||||
|
if not deleted:
|
||||||
|
abort(400, message="No matching notification URLs found.")
|
||||||
|
|
||||||
|
self.datastore.data['settings']['application']['notification_urls'] = notification_urls
|
||||||
|
self.datastore.needs_write = True
|
||||||
|
|
||||||
|
return 'OK', 204
|
||||||
|
|
||||||
|
def validate_notification_urls(notification_urls):
|
||||||
|
from changedetectionio.forms import ValidateAppRiseServers
|
||||||
|
validator = ValidateAppRiseServers()
|
||||||
|
class DummyForm: pass
|
||||||
|
dummy_form = DummyForm()
|
||||||
|
field = type("Field", (object,), {"data": notification_urls, "gettext": lambda self, x: x})()
|
||||||
|
validator(dummy_form, field)
|
||||||
51
changedetectionio/api/Search.py
Normal file
@@ -0,0 +1,51 @@
|
|||||||
|
from flask_restful import Resource, abort
|
||||||
|
from flask import request
|
||||||
|
from . import auth
|
||||||
|
|
||||||
|
class Search(Resource):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
# datastore is a black box dependency
|
||||||
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
def get(self):
|
||||||
|
"""
|
||||||
|
@api {get} /api/v1/search Search for watches
|
||||||
|
@apiDescription Search watches by URL or title text
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl "http://localhost:5000/api/v1/search?q=https://example.com/page1" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
curl "http://localhost:5000/api/v1/search?q=https://example.com/page1?tag=Favourites" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
curl "http://localhost:5000/api/v1/search?q=https://example.com?partial=true" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
@apiName Search
|
||||||
|
@apiGroup Watch Management
|
||||||
|
@apiQuery {String} q Search query to match against watch URLs and titles
|
||||||
|
@apiQuery {String} [tag] Optional name of tag to limit results (name not UUID)
|
||||||
|
@apiQuery {String} [partial] Allow partial matching of URL query
|
||||||
|
@apiSuccess (200) {Object} JSON Object containing matched watches
|
||||||
|
"""
|
||||||
|
query = request.args.get('q', '').strip()
|
||||||
|
tag_limit = request.args.get('tag', '').strip()
|
||||||
|
from changedetectionio.strtobool import strtobool
|
||||||
|
partial = bool(strtobool(request.args.get('partial', '0'))) if 'partial' in request.args else False
|
||||||
|
|
||||||
|
# Require a search query
|
||||||
|
if not query:
|
||||||
|
abort(400, message="Search query 'q' parameter is required")
|
||||||
|
|
||||||
|
# Use the search function from the datastore
|
||||||
|
matching_uuids = self.datastore.search_watches_for_url(query=query, tag_limit=tag_limit, partial=partial)
|
||||||
|
|
||||||
|
# Build the response with watch details
|
||||||
|
results = {}
|
||||||
|
for uuid in matching_uuids:
|
||||||
|
watch = self.datastore.data['watching'].get(uuid)
|
||||||
|
results[uuid] = {
|
||||||
|
'last_changed': watch.last_changed,
|
||||||
|
'last_checked': watch['last_checked'],
|
||||||
|
'last_error': watch['last_error'],
|
||||||
|
'title': watch['title'],
|
||||||
|
'url': watch['url'],
|
||||||
|
'viewed': watch.viewed
|
||||||
|
}
|
||||||
|
|
||||||
|
return results, 200
|
||||||
54
changedetectionio/api/SystemInfo.py
Normal file
@@ -0,0 +1,54 @@
|
|||||||
|
from flask_restful import Resource
|
||||||
|
from . import auth
|
||||||
|
|
||||||
|
|
||||||
|
class SystemInfo(Resource):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
# datastore is a black box dependency
|
||||||
|
self.datastore = kwargs['datastore']
|
||||||
|
self.update_q = kwargs['update_q']
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
def get(self):
|
||||||
|
"""
|
||||||
|
@api {get} /api/v1/systeminfo Return system info
|
||||||
|
@apiDescription Return some info about the current system state
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
HTTP/1.0 200
|
||||||
|
{
|
||||||
|
'queue_size': 10 ,
|
||||||
|
'overdue_watches': ["watch-uuid-list"],
|
||||||
|
'uptime': 38344.55,
|
||||||
|
'watch_count': 800,
|
||||||
|
'version': "0.40.1"
|
||||||
|
}
|
||||||
|
@apiName Get Info
|
||||||
|
@apiGroup System Information
|
||||||
|
"""
|
||||||
|
import time
|
||||||
|
overdue_watches = []
|
||||||
|
|
||||||
|
# Check all watches and report which have not been checked but should have been
|
||||||
|
|
||||||
|
for uuid, watch in self.datastore.data.get('watching', {}).items():
|
||||||
|
# see if now - last_checked is greater than the time that should have been
|
||||||
|
# this is not super accurate (maybe they just edited it) but better than nothing
|
||||||
|
t = watch.threshold_seconds()
|
||||||
|
if not t:
|
||||||
|
# Use the system wide default
|
||||||
|
t = self.datastore.threshold_seconds
|
||||||
|
|
||||||
|
time_since_check = time.time() - watch.get('last_checked')
|
||||||
|
|
||||||
|
# Allow 5 minutes of grace time before we decide it's overdue
|
||||||
|
if time_since_check - (5 * 60) > t:
|
||||||
|
overdue_watches.append(uuid)
|
||||||
|
from changedetectionio import __version__ as main_version
|
||||||
|
return {
|
||||||
|
'queue_size': self.update_q.qsize(),
|
||||||
|
'overdue_watches': overdue_watches,
|
||||||
|
'uptime': round(time.time() - self.datastore.start_time, 2),
|
||||||
|
'watch_count': len(self.datastore.data.get('watching', {})),
|
||||||
|
'version': main_version
|
||||||
|
}, 200
|
||||||
156
changedetectionio/api/Tags.py
Normal file
@@ -0,0 +1,156 @@
|
|||||||
|
from flask_expects_json import expects_json
|
||||||
|
from flask_restful import abort, Resource
|
||||||
|
from flask import request
|
||||||
|
from . import auth
|
||||||
|
|
||||||
|
# Import schemas from __init__.py
|
||||||
|
from . import schema_tag, schema_create_tag, schema_update_tag
|
||||||
|
|
||||||
|
|
||||||
|
class Tag(Resource):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
# datastore is a black box dependency
|
||||||
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
|
# Get information about a single tag
|
||||||
|
# curl http://localhost:5000/api/v1/tag/<string:uuid>
|
||||||
|
@auth.check_token
|
||||||
|
def get(self, uuid):
|
||||||
|
"""
|
||||||
|
@api {get} /api/v1/tag/:uuid Single tag - get data or toggle notification muting.
|
||||||
|
@apiDescription Retrieve tag information and set notification_muted status
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
curl "http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=muted" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
@apiName Tag
|
||||||
|
@apiGroup Tag
|
||||||
|
@apiParam {uuid} uuid Tag unique ID.
|
||||||
|
@apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
|
||||||
|
@apiSuccess (200) {String} OK When muted operation OR full JSON object of the tag
|
||||||
|
@apiSuccess (200) {JSON} TagJSON JSON Full JSON object of the tag
|
||||||
|
"""
|
||||||
|
from copy import deepcopy
|
||||||
|
tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
|
||||||
|
if not tag:
|
||||||
|
abort(404, message=f'No tag exists with the UUID of {uuid}')
|
||||||
|
|
||||||
|
if request.args.get('muted', '') == 'muted':
|
||||||
|
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
|
||||||
|
return "OK", 200
|
||||||
|
elif request.args.get('muted', '') == 'unmuted':
|
||||||
|
self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False
|
||||||
|
return "OK", 200
|
||||||
|
|
||||||
|
return tag
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
def delete(self, uuid):
|
||||||
|
"""
|
||||||
|
@api {delete} /api/v1/tag/:uuid Delete a tag and remove it from all watches
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
@apiParam {uuid} uuid Tag unique ID.
|
||||||
|
@apiName DeleteTag
|
||||||
|
@apiGroup Tag
|
||||||
|
@apiSuccess (200) {String} OK Was deleted
|
||||||
|
"""
|
||||||
|
if not self.datastore.data['settings']['application']['tags'].get(uuid):
|
||||||
|
abort(400, message='No tag exists with the UUID of {}'.format(uuid))
|
||||||
|
|
||||||
|
# Delete the tag, and any tag reference
|
||||||
|
del self.datastore.data['settings']['application']['tags'][uuid]
|
||||||
|
|
||||||
|
# Remove tag from all watches
|
||||||
|
for watch_uuid, watch in self.datastore.data['watching'].items():
|
||||||
|
if watch.get('tags') and uuid in watch['tags']:
|
||||||
|
watch['tags'].remove(uuid)
|
||||||
|
|
||||||
|
return 'OK', 204
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
@expects_json(schema_update_tag)
|
||||||
|
def put(self, uuid):
|
||||||
|
"""
|
||||||
|
@api {put} /api/v1/tag/:uuid Update tag information
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
Update (PUT)
|
||||||
|
curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"title": "New Tag Title"}'
|
||||||
|
|
||||||
|
@apiDescription Updates an existing tag using JSON
|
||||||
|
@apiParam {uuid} uuid Tag unique ID.
|
||||||
|
@apiName UpdateTag
|
||||||
|
@apiGroup Tag
|
||||||
|
@apiSuccess (200) {String} OK Was updated
|
||||||
|
@apiSuccess (500) {String} ERR Some other error
|
||||||
|
"""
|
||||||
|
tag = self.datastore.data['settings']['application']['tags'].get(uuid)
|
||||||
|
if not tag:
|
||||||
|
abort(404, message='No tag exists with the UUID of {}'.format(uuid))
|
||||||
|
|
||||||
|
tag.update(request.json)
|
||||||
|
self.datastore.needs_write_urgent = True
|
||||||
|
|
||||||
|
return "OK", 200
|
||||||
|
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
# Only cares for {'title': 'xxxx'}
|
||||||
|
def post(self):
|
||||||
|
"""
|
||||||
|
@api {post} /api/v1/watch Create a single tag
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"name": "Work related"}'
|
||||||
|
@apiName Create
|
||||||
|
@apiGroup Tag
|
||||||
|
@apiSuccess (200) {String} OK Was created
|
||||||
|
@apiSuccess (500) {String} ERR Some other error
|
||||||
|
"""
|
||||||
|
|
||||||
|
json_data = request.get_json()
|
||||||
|
title = json_data.get("title",'').strip()
|
||||||
|
|
||||||
|
|
||||||
|
new_uuid = self.datastore.add_tag(title=title)
|
||||||
|
if new_uuid:
|
||||||
|
return {'uuid': new_uuid}, 201
|
||||||
|
else:
|
||||||
|
return "Invalid or unsupported tag", 400
|
||||||
|
|
||||||
|
class Tags(Resource):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
# datastore is a black box dependency
|
||||||
|
self.datastore = kwargs['datastore']
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
def get(self):
|
||||||
|
"""
|
||||||
|
@api {get} /api/v1/tags List tags
|
||||||
|
@apiDescription Return list of available tags
|
||||||
|
@apiExample {curl} Example usage:
|
||||||
|
curl http://localhost:5000/api/v1/tags -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
||||||
|
{
|
||||||
|
"cc0cfffa-f449-477b-83ea-0caafd1dc091": {
|
||||||
|
"title": "Tech News",
|
||||||
|
"notification_muted": false,
|
||||||
|
"date_created": 1677103794
|
||||||
|
},
|
||||||
|
"e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
|
||||||
|
"title": "Shopping",
|
||||||
|
"notification_muted": true,
|
||||||
|
"date_created": 1676662819
|
||||||
|
}
|
||||||
|
}
|
||||||
|
@apiName ListTags
|
||||||
|
@apiGroup Tag Management
|
||||||
|
@apiSuccess (200) {String} OK JSON dict
|
||||||
|
"""
|
||||||
|
result = {}
|
||||||
|
for uuid, tag in self.datastore.data['settings']['application']['tags'].items():
|
||||||
|
result[uuid] = {
|
||||||
|
'date_created': tag.get('date_created', 0),
|
||||||
|
'notification_muted': tag.get('notification_muted', False),
|
||||||
|
'title': tag.get('title', ''),
|
||||||
|
'uuid': tag.get('uuid')
|
||||||
|
}
|
||||||
|
|
||||||
|
return result, 200
|
||||||
@@ -9,20 +9,9 @@ import validators
|
|||||||
from . import auth
|
from . import auth
|
||||||
import copy
|
import copy
|
||||||
|
|
||||||
# See docs/README.md for rebuilding the docs/apidoc information
|
# Import schemas from __init__.py
|
||||||
|
from . import schema, schema_create_watch, schema_update_watch
|
||||||
|
|
||||||
from . import api_schema
|
|
||||||
from ..model import watch_base
|
|
||||||
|
|
||||||
# Build a JSON Schema atleast partially based on our Watch model
|
|
||||||
watch_base_config = watch_base()
|
|
||||||
schema = api_schema.build_watch_json_schema(watch_base_config)
|
|
||||||
|
|
||||||
schema_create_watch = copy.deepcopy(schema)
|
|
||||||
schema_create_watch['required'] = ['url']
|
|
||||||
|
|
||||||
schema_update_watch = copy.deepcopy(schema)
|
|
||||||
schema_update_watch['additionalProperties'] = False
|
|
||||||
|
|
||||||
class Watch(Resource):
|
class Watch(Resource):
|
||||||
def __init__(self, **kwargs):
|
def __init__(self, **kwargs):
|
||||||
@@ -76,6 +65,7 @@ class Watch(Resource):
|
|||||||
# Return without history, get that via another API call
|
# Return without history, get that via another API call
|
||||||
# Properties are not returned as a JSON, so add the required props manually
|
# Properties are not returned as a JSON, so add the required props manually
|
||||||
watch['history_n'] = watch.history_n
|
watch['history_n'] = watch.history_n
|
||||||
|
# attr .last_changed will check for the last written text snapshot on change
|
||||||
watch['last_changed'] = watch.last_changed
|
watch['last_changed'] = watch.last_changed
|
||||||
watch['viewed'] = watch.viewed
|
watch['viewed'] = watch.viewed
|
||||||
return watch
|
return watch
|
||||||
@@ -284,8 +274,6 @@ class CreateWatch(Resource):
|
|||||||
list = {}
|
list = {}
|
||||||
|
|
||||||
tag_limit = request.args.get('tag', '').lower()
|
tag_limit = request.args.get('tag', '').lower()
|
||||||
|
|
||||||
|
|
||||||
for uuid, watch in self.datastore.data['watching'].items():
|
for uuid, watch in self.datastore.data['watching'].items():
|
||||||
# Watch tags by name (replace the other calls?)
|
# Watch tags by name (replace the other calls?)
|
||||||
tags = self.datastore.get_all_tags_for_watch(uuid=uuid)
|
tags = self.datastore.get_all_tags_for_watch(uuid=uuid)
|
||||||
@@ -306,110 +294,4 @@ class CreateWatch(Resource):
|
|||||||
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return {'status': "OK"}, 200
|
return {'status': "OK"}, 200
|
||||||
|
|
||||||
return list, 200
|
return list, 200
|
||||||
|
|
||||||
class Import(Resource):
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
# datastore is a black box dependency
|
|
||||||
self.datastore = kwargs['datastore']
|
|
||||||
|
|
||||||
@auth.check_token
|
|
||||||
def post(self):
|
|
||||||
"""
|
|
||||||
@api {post} /api/v1/import Import a list of watched URLs
|
|
||||||
@apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line.
|
|
||||||
@apiExample {curl} Example usage:
|
|
||||||
curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a"
|
|
||||||
@apiName Import
|
|
||||||
@apiGroup Watch
|
|
||||||
@apiSuccess (200) {List} OK List of watch UUIDs added
|
|
||||||
@apiSuccess (500) {String} ERR Some other error
|
|
||||||
"""
|
|
||||||
|
|
||||||
extras = {}
|
|
||||||
|
|
||||||
if request.args.get('proxy'):
|
|
||||||
plist = self.datastore.proxy_list
|
|
||||||
if not request.args.get('proxy') in plist:
|
|
||||||
return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
|
|
||||||
else:
|
|
||||||
extras['proxy'] = request.args.get('proxy')
|
|
||||||
|
|
||||||
dedupe = strtobool(request.args.get('dedupe', 'true'))
|
|
||||||
|
|
||||||
tags = request.args.get('tag')
|
|
||||||
tag_uuids = request.args.get('tag_uuids')
|
|
||||||
|
|
||||||
if tag_uuids:
|
|
||||||
tag_uuids = tag_uuids.split(',')
|
|
||||||
|
|
||||||
urls = request.get_data().decode('utf8').splitlines()
|
|
||||||
added = []
|
|
||||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
|
|
||||||
for url in urls:
|
|
||||||
url = url.strip()
|
|
||||||
if not len(url):
|
|
||||||
continue
|
|
||||||
|
|
||||||
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
|
|
||||||
if not validators.url(url, simple_host=allow_simplehost):
|
|
||||||
return f"Invalid or unsupported URL - {url}", 400
|
|
||||||
|
|
||||||
if dedupe and self.datastore.url_exists(url):
|
|
||||||
continue
|
|
||||||
|
|
||||||
new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids)
|
|
||||||
added.append(new_uuid)
|
|
||||||
|
|
||||||
return added
|
|
||||||
|
|
||||||
class SystemInfo(Resource):
|
|
||||||
def __init__(self, **kwargs):
|
|
||||||
# datastore is a black box dependency
|
|
||||||
self.datastore = kwargs['datastore']
|
|
||||||
self.update_q = kwargs['update_q']
|
|
||||||
|
|
||||||
@auth.check_token
|
|
||||||
def get(self):
|
|
||||||
"""
|
|
||||||
@api {get} /api/v1/systeminfo Return system info
|
|
||||||
@apiDescription Return some info about the current system state
|
|
||||||
@apiExample {curl} Example usage:
|
|
||||||
curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45"
|
|
||||||
HTTP/1.0 200
|
|
||||||
{
|
|
||||||
'queue_size': 10 ,
|
|
||||||
'overdue_watches': ["watch-uuid-list"],
|
|
||||||
'uptime': 38344.55,
|
|
||||||
'watch_count': 800,
|
|
||||||
'version': "0.40.1"
|
|
||||||
}
|
|
||||||
@apiName Get Info
|
|
||||||
@apiGroup System Information
|
|
||||||
"""
|
|
||||||
import time
|
|
||||||
overdue_watches = []
|
|
||||||
|
|
||||||
# Check all watches and report which have not been checked but should have been
|
|
||||||
|
|
||||||
for uuid, watch in self.datastore.data.get('watching', {}).items():
|
|
||||||
# see if now - last_checked is greater than the time that should have been
|
|
||||||
# this is not super accurate (maybe they just edited it) but better than nothing
|
|
||||||
t = watch.threshold_seconds()
|
|
||||||
if not t:
|
|
||||||
# Use the system wide default
|
|
||||||
t = self.datastore.threshold_seconds
|
|
||||||
|
|
||||||
time_since_check = time.time() - watch.get('last_checked')
|
|
||||||
|
|
||||||
# Allow 5 minutes of grace time before we decide it's overdue
|
|
||||||
if time_since_check - (5 * 60) > t:
|
|
||||||
overdue_watches.append(uuid)
|
|
||||||
from changedetectionio import __version__ as main_version
|
|
||||||
return {
|
|
||||||
'queue_size': self.update_q.qsize(),
|
|
||||||
'overdue_watches': overdue_watches,
|
|
||||||
'uptime': round(time.time() - self.datastore.start_time, 2),
|
|
||||||
'watch_count': len(self.datastore.data.get('watching', {})),
|
|
||||||
'version': main_version
|
|
||||||
}, 200
|
|
||||||
@@ -0,0 +1,33 @@
|
|||||||
|
import copy
|
||||||
|
from . import api_schema
|
||||||
|
from ..model import watch_base
|
||||||
|
|
||||||
|
# Build a JSON Schema atleast partially based on our Watch model
|
||||||
|
watch_base_config = watch_base()
|
||||||
|
schema = api_schema.build_watch_json_schema(watch_base_config)
|
||||||
|
|
||||||
|
schema_create_watch = copy.deepcopy(schema)
|
||||||
|
schema_create_watch['required'] = ['url']
|
||||||
|
|
||||||
|
schema_update_watch = copy.deepcopy(schema)
|
||||||
|
schema_update_watch['additionalProperties'] = False
|
||||||
|
|
||||||
|
# Tag schema is also based on watch_base since Tag inherits from it
|
||||||
|
schema_tag = copy.deepcopy(schema)
|
||||||
|
schema_create_tag = copy.deepcopy(schema_tag)
|
||||||
|
schema_create_tag['required'] = ['title']
|
||||||
|
schema_update_tag = copy.deepcopy(schema_tag)
|
||||||
|
schema_update_tag['additionalProperties'] = False
|
||||||
|
|
||||||
|
schema_notification_urls = copy.deepcopy(schema)
|
||||||
|
schema_create_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||||
|
schema_create_notification_urls['required'] = ['notification_urls']
|
||||||
|
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
|
||||||
|
schema_delete_notification_urls['required'] = ['notification_urls']
|
||||||
|
|
||||||
|
# Import all API resources
|
||||||
|
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch
|
||||||
|
from .Tags import Tags, Tag
|
||||||
|
from .Import import Import
|
||||||
|
from .SystemInfo import SystemInfo
|
||||||
|
from .Notifications import Notifications
|
||||||
|
|||||||
@@ -1,5 +1,7 @@
|
|||||||
# Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
|
# Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API
|
||||||
# Probably other ways to solve this when the backend switches to some ORM
|
# Probably other ways to solve this when the backend switches to some ORM
|
||||||
|
from changedetectionio.notification import valid_notification_formats
|
||||||
|
|
||||||
|
|
||||||
def build_time_between_check_json_schema():
|
def build_time_between_check_json_schema():
|
||||||
# Setup time between check schema
|
# Setup time between check schema
|
||||||
@@ -98,8 +100,6 @@ def build_watch_json_schema(d):
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
from changedetectionio.notification import valid_notification_formats
|
|
||||||
|
|
||||||
schema['properties']['notification_format'] = {'type': 'string',
|
schema['properties']['notification_format'] = {'type': 'string',
|
||||||
'enum': list(valid_notification_formats.keys())
|
'enum': list(valid_notification_formats.keys())
|
||||||
}
|
}
|
||||||
@@ -112,6 +112,35 @@ def build_watch_json_schema(d):
|
|||||||
|
|
||||||
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
schema['properties']['time_between_check'] = build_time_between_check_json_schema()
|
||||||
|
|
||||||
|
schema['properties']['browser_steps'] = {
|
||||||
|
"anyOf": [
|
||||||
|
{
|
||||||
|
"type": "array",
|
||||||
|
"items": {
|
||||||
|
"type": "object",
|
||||||
|
"properties": {
|
||||||
|
"operation": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"maxLength": 5000 # Allows null and any string up to 5000 chars (including "")
|
||||||
|
},
|
||||||
|
"selector": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"maxLength": 5000
|
||||||
|
},
|
||||||
|
"optional_value": {
|
||||||
|
"type": ["string", "null"],
|
||||||
|
"maxLength": 5000
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"required": ["operation", "selector", "optional_value"],
|
||||||
|
"additionalProperties": False # No extra keys allowed
|
||||||
|
}
|
||||||
|
},
|
||||||
|
{"type": "null"}, # Allows null for `browser_steps`
|
||||||
|
{"type": "array", "maxItems": 0} # Allows empty array []
|
||||||
|
]
|
||||||
|
}
|
||||||
|
|
||||||
# headers ?
|
# headers ?
|
||||||
return schema
|
return schema
|
||||||
|
|
||||||
|
|||||||
@@ -11,22 +11,14 @@ def check_token(f):
|
|||||||
datastore = args[0].datastore
|
datastore = args[0].datastore
|
||||||
|
|
||||||
config_api_token_enabled = datastore.data['settings']['application'].get('api_access_token_enabled')
|
config_api_token_enabled = datastore.data['settings']['application'].get('api_access_token_enabled')
|
||||||
if not config_api_token_enabled:
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
|
||||||
api_key_header = request.headers['x-api-key']
|
|
||||||
except KeyError:
|
|
||||||
return make_response(
|
|
||||||
jsonify("No authorization x-api-key header."), 403
|
|
||||||
)
|
|
||||||
|
|
||||||
config_api_token = datastore.data['settings']['application'].get('api_access_token')
|
config_api_token = datastore.data['settings']['application'].get('api_access_token')
|
||||||
|
|
||||||
if api_key_header != config_api_token:
|
# config_api_token_enabled - a UI option in settings if access should obey the key or not
|
||||||
return make_response(
|
if config_api_token_enabled:
|
||||||
jsonify("Invalid access - API key invalid."), 403
|
if request.headers.get('x-api-key') != config_api_token:
|
||||||
)
|
return make_response(
|
||||||
|
jsonify("Invalid access - API key invalid."), 403
|
||||||
|
)
|
||||||
|
|
||||||
return f(*args, **kwargs)
|
return f(*args, **kwargs)
|
||||||
|
|
||||||
|
|||||||
@@ -1,11 +0,0 @@
|
|||||||
import apprise
|
|
||||||
|
|
||||||
# Create our AppriseAsset and populate it with some of our new values:
|
|
||||||
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
|
|
||||||
asset = apprise.AppriseAsset(
|
|
||||||
image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
|
|
||||||
)
|
|
||||||
|
|
||||||
asset.app_id = "changedetection.io"
|
|
||||||
asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection"
|
|
||||||
asset.app_url = "https://changedetection.io"
|
|
||||||
@@ -1,82 +0,0 @@
|
|||||||
# include the decorator
|
|
||||||
from apprise.decorators import notify
|
|
||||||
from loguru import logger
|
|
||||||
|
|
||||||
@notify(on="delete")
|
|
||||||
@notify(on="deletes")
|
|
||||||
@notify(on="get")
|
|
||||||
@notify(on="gets")
|
|
||||||
@notify(on="post")
|
|
||||||
@notify(on="posts")
|
|
||||||
@notify(on="put")
|
|
||||||
@notify(on="puts")
|
|
||||||
def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
|
|
||||||
import requests
|
|
||||||
import json
|
|
||||||
from urllib.parse import unquote_plus
|
|
||||||
from apprise.utils import parse_url as apprise_parse_url
|
|
||||||
from apprise import URLBase
|
|
||||||
|
|
||||||
url = kwargs['meta'].get('url')
|
|
||||||
|
|
||||||
if url.startswith('post'):
|
|
||||||
r = requests.post
|
|
||||||
elif url.startswith('get'):
|
|
||||||
r = requests.get
|
|
||||||
elif url.startswith('put'):
|
|
||||||
r = requests.put
|
|
||||||
elif url.startswith('delete'):
|
|
||||||
r = requests.delete
|
|
||||||
|
|
||||||
url = url.replace('post://', 'http://')
|
|
||||||
url = url.replace('posts://', 'https://')
|
|
||||||
url = url.replace('put://', 'http://')
|
|
||||||
url = url.replace('puts://', 'https://')
|
|
||||||
url = url.replace('get://', 'http://')
|
|
||||||
url = url.replace('gets://', 'https://')
|
|
||||||
url = url.replace('put://', 'http://')
|
|
||||||
url = url.replace('puts://', 'https://')
|
|
||||||
url = url.replace('delete://', 'http://')
|
|
||||||
url = url.replace('deletes://', 'https://')
|
|
||||||
|
|
||||||
headers = {}
|
|
||||||
params = {}
|
|
||||||
auth = None
|
|
||||||
|
|
||||||
# Convert /foobar?+some-header=hello to proper header dictionary
|
|
||||||
results = apprise_parse_url(url)
|
|
||||||
if results:
|
|
||||||
# Add our headers that the user can potentially over-ride if they wish
|
|
||||||
# to to our returned result set and tidy entries by unquoting them
|
|
||||||
headers = {unquote_plus(x): unquote_plus(y)
|
|
||||||
for x, y in results['qsd+'].items()}
|
|
||||||
|
|
||||||
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
|
||||||
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
|
|
||||||
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
|
||||||
for k, v in results['qsd'].items():
|
|
||||||
if not k.strip('+-') in results['qsd+'].keys():
|
|
||||||
params[unquote_plus(k)] = unquote_plus(v)
|
|
||||||
|
|
||||||
# Determine Authentication
|
|
||||||
auth = ''
|
|
||||||
if results.get('user') and results.get('password'):
|
|
||||||
auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user')))
|
|
||||||
elif results.get('user'):
|
|
||||||
auth = (unquote_plus(results.get('user')))
|
|
||||||
|
|
||||||
# Try to auto-guess if it's JSON
|
|
||||||
h = 'application/json; charset=utf-8'
|
|
||||||
try:
|
|
||||||
json.loads(body)
|
|
||||||
headers['Content-Type'] = h
|
|
||||||
except ValueError as e:
|
|
||||||
logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}")
|
|
||||||
pass
|
|
||||||
|
|
||||||
r(results.get('url'),
|
|
||||||
auth=auth,
|
|
||||||
data=body.encode('utf-8') if type(body) is str else body,
|
|
||||||
headers=headers,
|
|
||||||
params=params
|
|
||||||
)
|
|
||||||
33
changedetectionio/auth_decorator.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
import os
|
||||||
|
from functools import wraps
|
||||||
|
from flask import current_app, redirect, request
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
def login_optionally_required(func):
|
||||||
|
"""
|
||||||
|
If password authentication is enabled, verify the user is logged in.
|
||||||
|
To be used as a decorator for routes that should optionally require login.
|
||||||
|
This version is blueprint-friendly as it uses current_app instead of directly accessing app.
|
||||||
|
"""
|
||||||
|
@wraps(func)
|
||||||
|
def decorated_view(*args, **kwargs):
|
||||||
|
from flask import current_app
|
||||||
|
import flask_login
|
||||||
|
from flask_login import current_user
|
||||||
|
|
||||||
|
# Access datastore through the app config
|
||||||
|
datastore = current_app.config['DATASTORE']
|
||||||
|
has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
|
||||||
|
|
||||||
|
# Permitted
|
||||||
|
if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'):
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
elif request.method in flask_login.config.EXEMPT_METHODS:
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
elif current_app.config.get('LOGIN_DISABLED'):
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
elif has_password_enabled and not current_user.is_authenticated:
|
||||||
|
return current_app.login_manager.unauthorized()
|
||||||
|
|
||||||
|
return func(*args, **kwargs)
|
||||||
|
return decorated_view
|
||||||
@@ -138,7 +138,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True)
|
||||||
|
|
||||||
@login_optionally_required
|
@login_optionally_required
|
||||||
@backups_blueprint.route("/", methods=['GET'])
|
@backups_blueprint.route("", methods=['GET'])
|
||||||
def index():
|
def index():
|
||||||
backups = find_backups()
|
backups = find_backups()
|
||||||
output = render_template("overview.html",
|
output = render_template("overview.html",
|
||||||
|
|||||||
@@ -22,7 +22,9 @@ from loguru import logger
|
|||||||
|
|
||||||
browsersteps_sessions = {}
|
browsersteps_sessions = {}
|
||||||
io_interface_context = None
|
io_interface_context = None
|
||||||
|
import json
|
||||||
|
import hashlib
|
||||||
|
from flask import Response
|
||||||
|
|
||||||
def construct_blueprint(datastore: ChangeDetectionStore):
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
|
||||||
@@ -31,10 +33,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
from . import nonContext
|
from . import nonContext
|
||||||
from . import browser_steps
|
from . import browser_steps
|
||||||
import time
|
import time
|
||||||
global browsersteps_sessions
|
|
||||||
global io_interface_context
|
global io_interface_context
|
||||||
|
|
||||||
|
|
||||||
# We keep the playwright session open for many minutes
|
# We keep the playwright session open for many minutes
|
||||||
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
|
||||||
|
|
||||||
@@ -53,14 +53,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
a = "?" if not '?' in base_url else '&'
|
a = "?" if not '?' in base_url else '&'
|
||||||
base_url += a + f"timeout={keepalive_ms}"
|
base_url += a + f"timeout={keepalive_ms}"
|
||||||
|
|
||||||
try:
|
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
|
||||||
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
|
|
||||||
except Exception as e:
|
|
||||||
if 'ECONNREFUSED' in str(e):
|
|
||||||
return make_response('Unable to start the Playwright Browser session, is it running?', 401)
|
|
||||||
else:
|
|
||||||
# Other errors, bad URL syntax, bad reply etc
|
|
||||||
return make_response(str(e), 401)
|
|
||||||
|
|
||||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||||
proxy = None
|
proxy = None
|
||||||
@@ -85,7 +78,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
|
||||||
playwright_browser=browsersteps_start_session['browser'],
|
playwright_browser=browsersteps_start_session['browser'],
|
||||||
proxy=proxy,
|
proxy=proxy,
|
||||||
start_url=datastore.data['watching'][watch_uuid].get('url'),
|
start_url=datastore.data['watching'][watch_uuid].link,
|
||||||
headers=datastore.data['watching'][watch_uuid].get('headers')
|
headers=datastore.data['watching'][watch_uuid].get('headers')
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -101,8 +94,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# A new session was requested, return sessionID
|
# A new session was requested, return sessionID
|
||||||
|
|
||||||
import uuid
|
import uuid
|
||||||
global browsersteps_sessions
|
|
||||||
|
|
||||||
browsersteps_session_id = str(uuid.uuid4())
|
browsersteps_session_id = str(uuid.uuid4())
|
||||||
watch_uuid = request.args.get('uuid')
|
watch_uuid = request.args.get('uuid')
|
||||||
|
|
||||||
@@ -111,7 +102,16 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
logger.debug("Starting connection with playwright")
|
logger.debug("Starting connection with playwright")
|
||||||
logger.debug("browser_steps.py connecting")
|
logger.debug("browser_steps.py connecting")
|
||||||
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
|
|
||||||
|
try:
|
||||||
|
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
|
||||||
|
except Exception as e:
|
||||||
|
if 'ECONNREFUSED' in str(e):
|
||||||
|
return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
|
||||||
|
else:
|
||||||
|
# Other errors, bad URL syntax, bad reply etc
|
||||||
|
return make_response(str(e), 401)
|
||||||
|
|
||||||
logger.debug("Starting connection with playwright - done")
|
logger.debug("Starting connection with playwright - done")
|
||||||
return {'browsersteps_session_id': browsersteps_session_id}
|
return {'browsersteps_session_id': browsersteps_session_id}
|
||||||
|
|
||||||
@@ -146,7 +146,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
def browsersteps_ui_update():
|
def browsersteps_ui_update():
|
||||||
import base64
|
import base64
|
||||||
import playwright._impl._errors
|
import playwright._impl._errors
|
||||||
global browsersteps_sessions
|
|
||||||
from changedetectionio.blueprint.browser_steps import browser_steps
|
from changedetectionio.blueprint.browser_steps import browser_steps
|
||||||
|
|
||||||
remaining =0
|
remaining =0
|
||||||
@@ -160,19 +159,16 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
if not browsersteps_sessions.get(browsersteps_session_id):
|
if not browsersteps_sessions.get(browsersteps_session_id):
|
||||||
return make_response('No session exists under that ID', 500)
|
return make_response('No session exists under that ID', 500)
|
||||||
|
|
||||||
|
is_last_step = False
|
||||||
# Actions - step/apply/etc, do the thing and return state
|
# Actions - step/apply/etc, do the thing and return state
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
# @todo - should always be an existing session
|
# @todo - should always be an existing session
|
||||||
step_operation = request.form.get('operation')
|
step_operation = request.form.get('operation')
|
||||||
step_selector = request.form.get('selector')
|
step_selector = request.form.get('selector')
|
||||||
step_optional_value = request.form.get('optional_value')
|
step_optional_value = request.form.get('optional_value')
|
||||||
step_n = int(request.form.get('step_n'))
|
|
||||||
is_last_step = strtobool(request.form.get('is_last_step'))
|
is_last_step = strtobool(request.form.get('is_last_step'))
|
||||||
|
|
||||||
# @todo try.. accept.. nice errors not popups..
|
|
||||||
try:
|
try:
|
||||||
|
|
||||||
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
|
browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
|
||||||
selector=step_selector,
|
selector=step_selector,
|
||||||
optional_value=step_optional_value)
|
optional_value=step_optional_value)
|
||||||
@@ -182,16 +178,6 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
# Try to find something of value to give back to the user
|
# Try to find something of value to give back to the user
|
||||||
return make_response(str(e).splitlines()[0], 401)
|
return make_response(str(e).splitlines()[0], 401)
|
||||||
|
|
||||||
# Get visual selector ready/update its data (also use the current filter info from the page?)
|
|
||||||
# When the last 'apply' button was pressed
|
|
||||||
# @todo this adds overhead because the xpath selection is happening twice
|
|
||||||
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
|
||||||
if is_last_step and u:
|
|
||||||
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data()
|
|
||||||
watch = datastore.data['watching'].get(uuid)
|
|
||||||
if watch:
|
|
||||||
watch.save_screenshot(screenshot=screenshot)
|
|
||||||
watch.save_xpath_data(data=xpath_data)
|
|
||||||
|
|
||||||
# if not this_session.page:
|
# if not this_session.page:
|
||||||
# cleanup_playwright_session()
|
# cleanup_playwright_session()
|
||||||
@@ -199,31 +185,35 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
|
|
||||||
# Screenshots and other info only needed on requesting a step (POST)
|
# Screenshots and other info only needed on requesting a step (POST)
|
||||||
try:
|
try:
|
||||||
state = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
(screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
|
||||||
|
if is_last_step:
|
||||||
|
watch = datastore.data['watching'].get(uuid)
|
||||||
|
u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
|
||||||
|
if watch and u:
|
||||||
|
watch.save_screenshot(screenshot=screenshot)
|
||||||
|
watch.save_xpath_data(data=xpath_data)
|
||||||
|
|
||||||
except playwright._impl._api_types.Error as e:
|
except playwright._impl._api_types.Error as e:
|
||||||
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
|
||||||
|
except Exception as e:
|
||||||
|
return make_response("Error fetching screenshot and element data - " + str(e), 401)
|
||||||
|
|
||||||
# Use send_file() which is way faster than read/write loop on bytes
|
# SEND THIS BACK TO THE BROWSER
|
||||||
import json
|
|
||||||
from tempfile import mkstemp
|
|
||||||
from flask import send_file
|
|
||||||
tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-")
|
|
||||||
|
|
||||||
output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format(
|
output = {
|
||||||
base64.b64encode(state[0]).decode('ascii')),
|
"screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
|
||||||
'xpath_data': state[1],
|
"xpath_data": xpath_data,
|
||||||
'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
|
"session_age_start": browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start,
|
||||||
'browser_time_remaining': round(remaining)
|
"browser_time_remaining": round(remaining)
|
||||||
})
|
}
|
||||||
|
json_data = json.dumps(output)
|
||||||
|
|
||||||
with os.fdopen(tmp_fd, 'w') as f:
|
# Generate an ETag (hash of the response body)
|
||||||
f.write(output)
|
etag_hash = hashlib.md5(json_data.encode('utf-8')).hexdigest()
|
||||||
|
|
||||||
response = make_response(send_file(path_or_file=tmp_file,
|
# Create the response with ETag
|
||||||
mimetype='application/json; charset=UTF-8',
|
response = Response(json_data, mimetype="application/json; charset=UTF-8")
|
||||||
etag=True))
|
response.set_etag(etag_hash)
|
||||||
# No longer needed
|
|
||||||
os.unlink(tmp_file)
|
|
||||||
|
|
||||||
return response
|
return response
|
||||||
|
|
||||||
|
|||||||
@@ -1,14 +1,17 @@
|
|||||||
#!/usr/bin/env python3
|
|
||||||
|
|
||||||
import os
|
import os
|
||||||
import time
|
import time
|
||||||
import re
|
import re
|
||||||
|
import sys
|
||||||
|
import traceback
|
||||||
from random import randint
|
from random import randint
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
|
||||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||||
from changedetectionio.safe_jinja import render as jinja_render
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
# Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end
|
||||||
# 0- off, 1- on
|
# 0- off, 1- on
|
||||||
browser_step_ui_config = {'Choose one': '0 0',
|
browser_step_ui_config = {'Choose one': '0 0',
|
||||||
@@ -31,13 +34,16 @@ browser_step_ui_config = {'Choose one': '0 0',
|
|||||||
# 'Extract text and use as filter': '1 0',
|
# 'Extract text and use as filter': '1 0',
|
||||||
'Goto site': '0 0',
|
'Goto site': '0 0',
|
||||||
'Goto URL': '0 1',
|
'Goto URL': '0 1',
|
||||||
|
'Make all child elements visible': '1 0',
|
||||||
'Press Enter': '0 0',
|
'Press Enter': '0 0',
|
||||||
'Select by label': '1 1',
|
'Select by label': '1 1',
|
||||||
|
'<select> by option text': '1 1',
|
||||||
'Scroll down': '0 0',
|
'Scroll down': '0 0',
|
||||||
'Uncheck checkbox': '1 0',
|
'Uncheck checkbox': '1 0',
|
||||||
'Wait for seconds': '0 1',
|
'Wait for seconds': '0 1',
|
||||||
'Wait for text': '0 1',
|
'Wait for text': '0 1',
|
||||||
'Wait for text in element': '1 1',
|
'Wait for text in element': '1 1',
|
||||||
|
'Remove elements': '1 0',
|
||||||
# 'Press Page Down': '0 0',
|
# 'Press Page Down': '0 0',
|
||||||
# 'Press Page Up': '0 0',
|
# 'Press Page Up': '0 0',
|
||||||
# weird bug, come back to it later
|
# weird bug, come back to it later
|
||||||
@@ -51,12 +57,17 @@ browser_step_ui_config = {'Choose one': '0 0',
|
|||||||
class steppable_browser_interface():
|
class steppable_browser_interface():
|
||||||
page = None
|
page = None
|
||||||
start_url = None
|
start_url = None
|
||||||
|
action_timeout = 10 * 1000
|
||||||
|
|
||||||
def __init__(self, start_url):
|
def __init__(self, start_url):
|
||||||
self.start_url = start_url
|
self.start_url = start_url
|
||||||
|
|
||||||
# Convert and perform "Click Button" for example
|
# Convert and perform "Click Button" for example
|
||||||
def call_action(self, action_name, selector=None, optional_value=None):
|
def call_action(self, action_name, selector=None, optional_value=None):
|
||||||
|
if self.page is None:
|
||||||
|
logger.warning("Cannot call action on None page object")
|
||||||
|
return
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
|
call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
|
||||||
if call_action_name == 'choose_one':
|
if call_action_name == 'choose_one':
|
||||||
@@ -67,28 +78,33 @@ class steppable_browser_interface():
|
|||||||
if selector and selector.startswith('/') and not selector.startswith('//'):
|
if selector and selector.startswith('/') and not selector.startswith('//'):
|
||||||
selector = "xpath=" + selector
|
selector = "xpath=" + selector
|
||||||
|
|
||||||
|
# Check if action handler exists
|
||||||
|
if not hasattr(self, "action_" + call_action_name):
|
||||||
|
logger.warning(f"Action handler for '{call_action_name}' not found")
|
||||||
|
return
|
||||||
|
|
||||||
action_handler = getattr(self, "action_" + call_action_name)
|
action_handler = getattr(self, "action_" + call_action_name)
|
||||||
|
|
||||||
# Support for Jinja2 variables in the value and selector
|
# Support for Jinja2 variables in the value and selector
|
||||||
|
|
||||||
if selector and ('{%' in selector or '{{' in selector):
|
if selector and ('{%' in selector or '{{' in selector):
|
||||||
selector = jinja_render(template_str=selector)
|
selector = jinja_render(template_str=selector)
|
||||||
|
|
||||||
if optional_value and ('{%' in optional_value or '{{' in optional_value):
|
if optional_value and ('{%' in optional_value or '{{' in optional_value):
|
||||||
optional_value = jinja_render(template_str=optional_value)
|
optional_value = jinja_render(template_str=optional_value)
|
||||||
|
|
||||||
|
|
||||||
action_handler(selector, optional_value)
|
action_handler(selector, optional_value)
|
||||||
|
# Safely wait for timeout
|
||||||
self.page.wait_for_timeout(1.5 * 1000)
|
self.page.wait_for_timeout(1.5 * 1000)
|
||||||
logger.debug(f"Call action done in {time.time()-now:.2f}s")
|
logger.debug(f"Call action done in {time.time()-now:.2f}s")
|
||||||
|
|
||||||
def action_goto_url(self, selector=None, value=None):
|
def action_goto_url(self, selector=None, value=None):
|
||||||
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
if not value:
|
||||||
|
logger.warning("No URL provided for goto_url action")
|
||||||
|
return None
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
response = self.page.goto(value, timeout=0, wait_until='load')
|
response = self.page.goto(value, timeout=0, wait_until='load')
|
||||||
# Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout)
|
|
||||||
#and also wait for seconds ?
|
|
||||||
#await page.waitForTimeout(1000);
|
|
||||||
#await page.waitForTimeout(extra_wait_ms);
|
|
||||||
logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
|
logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
|
||||||
return response
|
return response
|
||||||
|
|
||||||
@@ -98,61 +114,79 @@ class steppable_browser_interface():
|
|||||||
|
|
||||||
def action_click_element_containing_text(self, selector=None, value=''):
|
def action_click_element_containing_text(self, selector=None, value=''):
|
||||||
logger.debug("Clicking element containing text")
|
logger.debug("Clicking element containing text")
|
||||||
if not len(value.strip()):
|
if not value or not len(value.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
elem = self.page.get_by_text(value)
|
elem = self.page.get_by_text(value)
|
||||||
if elem.count():
|
if elem.count():
|
||||||
elem.first.click(delay=randint(200, 500), timeout=3000)
|
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||||
|
|
||||||
|
|
||||||
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
||||||
logger.debug("Clicking element containing text if exists")
|
logger.debug("Clicking element containing text if exists")
|
||||||
if not len(value.strip()):
|
if not value or not len(value.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
elem = self.page.get_by_text(value)
|
elem = self.page.get_by_text(value)
|
||||||
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
||||||
if elem.count():
|
if elem.count():
|
||||||
elem.first.click(delay=randint(200, 500), timeout=3000)
|
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||||
else:
|
|
||||||
return
|
|
||||||
|
|
||||||
def action_enter_text_in_field(self, selector, value):
|
def action_enter_text_in_field(self, selector, value):
|
||||||
if not len(selector.strip()):
|
if not selector or not len(selector.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
self.page.fill(selector, value, timeout=10 * 1000)
|
self.page.fill(selector, value, timeout=self.action_timeout)
|
||||||
|
|
||||||
def action_execute_js(self, selector, value):
|
def action_execute_js(self, selector, value):
|
||||||
response = self.page.evaluate(value)
|
if not value:
|
||||||
return response
|
return None
|
||||||
|
|
||||||
|
return self.page.evaluate(value)
|
||||||
|
|
||||||
def action_click_element(self, selector, value):
|
def action_click_element(self, selector, value):
|
||||||
logger.debug("Clicking element")
|
logger.debug("Clicking element")
|
||||||
if not len(selector.strip()):
|
if not selector or not len(selector.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
self.page.click(selector=selector, timeout=30 * 1000, delay=randint(200, 500))
|
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
||||||
|
|
||||||
def action_click_element_if_exists(self, selector, value):
|
def action_click_element_if_exists(self, selector, value):
|
||||||
import playwright._impl._errors as _api_types
|
import playwright._impl._errors as _api_types
|
||||||
logger.debug("Clicking element if exists")
|
logger.debug("Clicking element if exists")
|
||||||
if not len(selector.strip()):
|
if not selector or not len(selector.strip()):
|
||||||
return
|
return
|
||||||
|
|
||||||
try:
|
try:
|
||||||
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
|
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
||||||
except _api_types.TimeoutError as e:
|
except _api_types.TimeoutError:
|
||||||
return
|
return
|
||||||
except _api_types.Error as e:
|
except _api_types.Error:
|
||||||
# Element was there, but page redrew and now its long long gone
|
# Element was there, but page redrew and now its long long gone
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
def action_click_x_y(self, selector, value):
|
def action_click_x_y(self, selector, value):
|
||||||
if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
|
if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
|
||||||
raise Exception("'Click X,Y' step should be in the format of '100 , 90'")
|
logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
|
||||||
|
return
|
||||||
|
|
||||||
x, y = value.strip().split(',')
|
try:
|
||||||
x = int(float(x.strip()))
|
x, y = value.strip().split(',')
|
||||||
y = int(float(y.strip()))
|
x = int(float(x.strip()))
|
||||||
self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
|
y = int(float(y.strip()))
|
||||||
|
|
||||||
|
self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error parsing x,y coordinates: {str(e)}")
|
||||||
|
|
||||||
|
def action__select_by_option_text(self, selector, value):
|
||||||
|
if not selector or not len(selector.strip()):
|
||||||
|
return
|
||||||
|
|
||||||
|
self.page.select_option(selector, label=value, timeout=self.action_timeout)
|
||||||
|
|
||||||
def action_scroll_down(self, selector, value):
|
def action_scroll_down(self, selector, value):
|
||||||
# Some sites this doesnt work on for some reason
|
# Some sites this doesnt work on for some reason
|
||||||
@@ -160,23 +194,42 @@ class steppable_browser_interface():
|
|||||||
self.page.wait_for_timeout(1000)
|
self.page.wait_for_timeout(1000)
|
||||||
|
|
||||||
def action_wait_for_seconds(self, selector, value):
|
def action_wait_for_seconds(self, selector, value):
|
||||||
self.page.wait_for_timeout(float(value.strip()) * 1000)
|
try:
|
||||||
|
seconds = float(value.strip()) if value else 1.0
|
||||||
|
self.page.wait_for_timeout(seconds * 1000)
|
||||||
|
except (ValueError, TypeError) as e:
|
||||||
|
logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
|
||||||
|
|
||||||
def action_wait_for_text(self, selector, value):
|
def action_wait_for_text(self, selector, value):
|
||||||
|
if not value:
|
||||||
|
return
|
||||||
|
|
||||||
import json
|
import json
|
||||||
v = json.dumps(value)
|
v = json.dumps(value)
|
||||||
self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000)
|
self.page.wait_for_function(
|
||||||
|
f'document.querySelector("body").innerText.includes({v});',
|
||||||
|
timeout=30000
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
def action_wait_for_text_in_element(self, selector, value):
|
def action_wait_for_text_in_element(self, selector, value):
|
||||||
|
if not selector or not value:
|
||||||
|
return
|
||||||
|
|
||||||
import json
|
import json
|
||||||
s = json.dumps(selector)
|
s = json.dumps(selector)
|
||||||
v = json.dumps(value)
|
v = json.dumps(value)
|
||||||
self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
|
|
||||||
|
self.page.wait_for_function(
|
||||||
|
f'document.querySelector({s}).innerText.includes({v});',
|
||||||
|
timeout=30000
|
||||||
|
)
|
||||||
|
|
||||||
# @todo - in the future make some popout interface to capture what needs to be set
|
# @todo - in the future make some popout interface to capture what needs to be set
|
||||||
# https://playwright.dev/python/docs/api/class-keyboard
|
# https://playwright.dev/python/docs/api/class-keyboard
|
||||||
def action_press_enter(self, selector, value):
|
def action_press_enter(self, selector, value):
|
||||||
self.page.keyboard.press("Enter", delay=randint(200, 500))
|
self.page.keyboard.press("Enter", delay=randint(200, 500))
|
||||||
|
|
||||||
|
|
||||||
def action_press_page_up(self, selector, value):
|
def action_press_page_up(self, selector, value):
|
||||||
self.page.keyboard.press("PageUp", delay=randint(200, 500))
|
self.page.keyboard.press("PageUp", delay=randint(200, 500))
|
||||||
@@ -185,11 +238,42 @@ class steppable_browser_interface():
|
|||||||
self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
||||||
|
|
||||||
def action_check_checkbox(self, selector, value):
|
def action_check_checkbox(self, selector, value):
|
||||||
self.page.locator(selector).check(timeout=1000)
|
if not selector:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.page.locator(selector).check(timeout=self.action_timeout)
|
||||||
|
|
||||||
def action_uncheck_checkbox(self, selector, value):
|
def action_uncheck_checkbox(self, selector, value):
|
||||||
self.page.locator(selector, timeout=1000).uncheck(timeout=1000)
|
if not selector:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
||||||
|
|
||||||
|
|
||||||
|
def action_remove_elements(self, selector, value):
|
||||||
|
"""Removes all elements matching the given selector from the DOM."""
|
||||||
|
if not selector:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
||||||
|
|
||||||
|
def action_make_all_child_elements_visible(self, selector, value):
|
||||||
|
"""Recursively makes all child elements inside the given selector fully visible."""
|
||||||
|
if not selector:
|
||||||
|
return
|
||||||
|
|
||||||
|
self.page.locator(selector).locator("*").evaluate_all("""
|
||||||
|
els => els.forEach(el => {
|
||||||
|
el.style.display = 'block'; // Forces it to be displayed
|
||||||
|
el.style.visibility = 'visible'; // Ensures it's not hidden
|
||||||
|
el.style.opacity = '1'; // Fully opaque
|
||||||
|
el.style.position = 'relative'; // Avoids 'absolute' hiding
|
||||||
|
el.style.height = 'auto'; // Expands collapsed elements
|
||||||
|
el.style.width = 'auto'; // Ensures full visibility
|
||||||
|
el.removeAttribute('hidden'); // Removes hidden attribute
|
||||||
|
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
|
||||||
|
})
|
||||||
|
""")
|
||||||
|
|
||||||
# Responsible for maintaining a live 'context' with the chrome CDP
|
# Responsible for maintaining a live 'context' with the chrome CDP
|
||||||
# @todo - how long do contexts live for anyway?
|
# @todo - how long do contexts live for anyway?
|
||||||
@@ -201,7 +285,9 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
# bump and kill this if idle after X sec
|
# bump and kill this if idle after X sec
|
||||||
age_start = 0
|
age_start = 0
|
||||||
headers = {}
|
headers = {}
|
||||||
|
# Track if resources are properly cleaned up
|
||||||
|
_is_cleaned_up = False
|
||||||
|
|
||||||
# use a special driver, maybe locally etc
|
# use a special driver, maybe locally etc
|
||||||
command_executor = os.getenv(
|
command_executor = os.getenv(
|
||||||
"PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
|
"PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
|
||||||
@@ -220,9 +306,14 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
self.age_start = time.time()
|
self.age_start = time.time()
|
||||||
self.playwright_browser = playwright_browser
|
self.playwright_browser = playwright_browser
|
||||||
self.start_url = start_url
|
self.start_url = start_url
|
||||||
|
self._is_cleaned_up = False
|
||||||
if self.context is None:
|
if self.context is None:
|
||||||
self.connect(proxy=proxy)
|
self.connect(proxy=proxy)
|
||||||
|
|
||||||
|
def __del__(self):
|
||||||
|
# Ensure cleanup happens if object is garbage collected
|
||||||
|
self.cleanup()
|
||||||
|
|
||||||
# Connect and setup a new context
|
# Connect and setup a new context
|
||||||
def connect(self, proxy=None):
|
def connect(self, proxy=None):
|
||||||
# Should only get called once - test that
|
# Should only get called once - test that
|
||||||
@@ -241,16 +332,12 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
user_agent=manage_user_agent(headers=self.headers),
|
user_agent=manage_user_agent(headers=self.headers),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
self.page = self.context.new_page()
|
self.page = self.context.new_page()
|
||||||
|
|
||||||
# self.page.set_default_navigation_timeout(keep_open)
|
# self.page.set_default_navigation_timeout(keep_open)
|
||||||
self.page.set_default_timeout(keep_open)
|
self.page.set_default_timeout(keep_open)
|
||||||
# @todo probably this doesnt work
|
# Set event handlers
|
||||||
self.page.on(
|
self.page.on("close", self.mark_as_closed)
|
||||||
"close",
|
|
||||||
self.mark_as_closed,
|
|
||||||
)
|
|
||||||
# Listen for all console events and handle errors
|
# Listen for all console events and handle errors
|
||||||
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
||||||
|
|
||||||
@@ -259,54 +346,117 @@ class browsersteps_live_ui(steppable_browser_interface):
|
|||||||
|
|
||||||
def mark_as_closed(self):
|
def mark_as_closed(self):
|
||||||
logger.debug("Page closed, cleaning up..")
|
logger.debug("Page closed, cleaning up..")
|
||||||
|
self.cleanup()
|
||||||
|
|
||||||
|
def cleanup(self):
|
||||||
|
"""Properly clean up all resources to prevent memory leaks"""
|
||||||
|
if self._is_cleaned_up:
|
||||||
|
return
|
||||||
|
|
||||||
|
logger.debug("Cleaning up browser steps resources")
|
||||||
|
|
||||||
|
# Clean up page
|
||||||
|
if hasattr(self, 'page') and self.page is not None:
|
||||||
|
try:
|
||||||
|
# Force garbage collection before closing
|
||||||
|
self.page.request_gc()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error during page garbage collection: {str(e)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Remove event listeners before closing
|
||||||
|
self.page.remove_listener("close", self.mark_as_closed)
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error removing event listeners: {str(e)}")
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.page.close()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error closing page: {str(e)}")
|
||||||
|
|
||||||
|
self.page = None
|
||||||
|
|
||||||
|
# Clean up context
|
||||||
|
if hasattr(self, 'context') and self.context is not None:
|
||||||
|
try:
|
||||||
|
self.context.close()
|
||||||
|
except Exception as e:
|
||||||
|
logger.debug(f"Error closing context: {str(e)}")
|
||||||
|
|
||||||
|
self.context = None
|
||||||
|
|
||||||
|
self._is_cleaned_up = True
|
||||||
|
logger.debug("Browser steps resources cleanup complete")
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def has_expired(self):
|
def has_expired(self):
|
||||||
if not self.page:
|
if not self.page or self._is_cleaned_up:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
# Check if session has expired based on age
|
||||||
|
max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10)) # Default 10 minutes
|
||||||
|
if (time.time() - self.age_start) > max_age_seconds:
|
||||||
|
logger.debug(f"Browser steps session expired after {max_age_seconds} seconds")
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
def get_current_state(self):
|
def get_current_state(self):
|
||||||
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
|
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
|
||||||
import importlib.resources
|
import importlib.resources
|
||||||
|
import json
|
||||||
|
# because we for now only run browser steps in playwright mode (not puppeteer mode)
|
||||||
|
from changedetectionio.content_fetchers.playwright import capture_full_page
|
||||||
|
|
||||||
|
# Safety check - don't proceed if resources are cleaned up
|
||||||
|
if self._is_cleaned_up or self.page is None:
|
||||||
|
logger.warning("Attempted to get current state after cleanup")
|
||||||
|
return (None, None)
|
||||||
|
|
||||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||||
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
self.page.wait_for_timeout(1 * 1000)
|
self.page.wait_for_timeout(1 * 1000)
|
||||||
|
|
||||||
# The actual screenshot
|
screenshot = None
|
||||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
xpath_data = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get screenshot first
|
||||||
|
screenshot = capture_full_page(page=self.page)
|
||||||
|
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||||
|
|
||||||
self.page.evaluate("var include_filters=''")
|
# Then get interactive elements
|
||||||
# Go find the interactive elements
|
now = time.time()
|
||||||
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
|
self.page.evaluate("var include_filters=''")
|
||||||
elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
self.page.request_gc()
|
||||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements)
|
|
||||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
||||||
# So the JS will find the smallest one first
|
|
||||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||||
logger.debug(f"Time to complete get_current_state of browser {time.time()-now:.2f}s")
|
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
|
||||||
# except
|
"visualselector_xpath_selectors": scan_elements,
|
||||||
# playwright._impl._api_types.Error: Browser closed.
|
"max_height": MAX_TOTAL_HEIGHT
|
||||||
# @todo show some countdown timer?
|
}))
|
||||||
|
self.page.request_gc()
|
||||||
|
|
||||||
|
# Sort elements by size
|
||||||
|
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||||
|
logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error getting current state: {str(e)}")
|
||||||
|
# Attempt recovery - force garbage collection
|
||||||
|
try:
|
||||||
|
self.page.request_gc()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Request garbage collection one final time
|
||||||
|
try:
|
||||||
|
self.page.request_gc()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
return (screenshot, xpath_data)
|
return (screenshot, xpath_data)
|
||||||
|
|
||||||
def request_visualselector_data(self):
|
|
||||||
"""
|
|
||||||
Does the same that the playwright operation in content_fetcher does
|
|
||||||
This is used to just bump the VisualSelector data so it' ready to go if they click on the tab
|
|
||||||
@todo refactor and remove duplicate code, add include_filters
|
|
||||||
:param xpath_data:
|
|
||||||
:param screenshot:
|
|
||||||
:param current_include_filters:
|
|
||||||
:return:
|
|
||||||
"""
|
|
||||||
import importlib.resources
|
|
||||||
self.page.evaluate("var include_filters=''")
|
|
||||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
|
||||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
|
||||||
xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors)
|
|
||||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
|
||||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
|
||||||
|
|
||||||
return (screenshot, xpath_data)
|
|
||||||
|
|||||||
74
changedetectionio/blueprint/imports/__init__.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
from flask import Blueprint, request, redirect, url_for, flash, render_template
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
from changedetectionio.blueprint.imports.importer import (
|
||||||
|
import_url_list,
|
||||||
|
import_distill_io_json,
|
||||||
|
import_xlsx_wachete,
|
||||||
|
import_xlsx_custom
|
||||||
|
)
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||||
|
import_blueprint = Blueprint('imports', __name__, template_folder="templates")
|
||||||
|
|
||||||
|
@import_blueprint.route("/import", methods=['GET', 'POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def import_page():
|
||||||
|
remaining_urls = []
|
||||||
|
from changedetectionio import forms
|
||||||
|
|
||||||
|
if request.method == 'POST':
|
||||||
|
# URL List import
|
||||||
|
if request.values.get('urls') and len(request.values.get('urls').strip()):
|
||||||
|
# Import and push into the queue for immediate update check
|
||||||
|
importer_handler = import_url_list()
|
||||||
|
importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
|
||||||
|
for uuid in importer_handler.new_uuids:
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
|
if len(importer_handler.remaining_data) == 0:
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
else:
|
||||||
|
remaining_urls = importer_handler.remaining_data
|
||||||
|
|
||||||
|
# Distill.io import
|
||||||
|
if request.values.get('distill-io') and len(request.values.get('distill-io').strip()):
|
||||||
|
# Import and push into the queue for immediate update check
|
||||||
|
d_importer = import_distill_io_json()
|
||||||
|
d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
|
||||||
|
for uuid in d_importer.new_uuids:
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
|
# XLSX importer
|
||||||
|
if request.files and request.files.get('xlsx_file'):
|
||||||
|
file = request.files['xlsx_file']
|
||||||
|
|
||||||
|
if request.values.get('file_mapping') == 'wachete':
|
||||||
|
w_importer = import_xlsx_wachete()
|
||||||
|
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||||
|
else:
|
||||||
|
w_importer = import_xlsx_custom()
|
||||||
|
# Building mapping of col # to col # type
|
||||||
|
map = {}
|
||||||
|
for i in range(10):
|
||||||
|
c = request.values.get(f"custom_xlsx[col_{i}]")
|
||||||
|
v = request.values.get(f"custom_xlsx[col_type_{i}]")
|
||||||
|
if c and v:
|
||||||
|
map[int(c)] = v
|
||||||
|
|
||||||
|
w_importer.import_profile = map
|
||||||
|
w_importer.run(data=file, flash=flash, datastore=datastore)
|
||||||
|
|
||||||
|
for uuid in w_importer.new_uuids:
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
|
# Could be some remaining, or we could be on GET
|
||||||
|
form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
|
||||||
|
output = render_template("import.html",
|
||||||
|
form=form,
|
||||||
|
import_url_list_remaining="\n".join(remaining_urls),
|
||||||
|
original_distill_json=''
|
||||||
|
)
|
||||||
|
return output
|
||||||
|
|
||||||
|
return import_blueprint
|
||||||
@@ -1,6 +1,5 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import abstractmethod
|
||||||
import time
|
import time
|
||||||
import validators
|
|
||||||
from wtforms import ValidationError
|
from wtforms import ValidationError
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
@@ -241,7 +240,7 @@ class import_xlsx_custom(Importer):
|
|||||||
return
|
return
|
||||||
|
|
||||||
# @todo cehck atleast 2 rows, same in other method
|
# @todo cehck atleast 2 rows, same in other method
|
||||||
from .forms import validate_url
|
from changedetectionio.forms import validate_url
|
||||||
row_i = 1
|
row_i = 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -300,4 +299,4 @@ class import_xlsx_custom(Importer):
|
|||||||
row_i += 1
|
row_i += 1
|
||||||
|
|
||||||
flash(
|
flash(
|
||||||
"{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
|
"{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now))
|
||||||
@@ -13,29 +13,27 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="box-wrap inner">
|
<div class="box-wrap inner">
|
||||||
<form class="pure-form" action="{{url_for('import_page')}}" method="POST" enctype="multipart/form-data">
|
<form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data">
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}">
|
||||||
<div class="tab-pane-inner" id="url-list">
|
<div class="tab-pane-inner" id="url-list">
|
||||||
<legend>
|
<div class="pure-control-group">
|
||||||
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
|
Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma
|
||||||
(,):
|
(,):
|
||||||
<br>
|
<br>
|
||||||
<code>https://example.com tag1, tag2, last tag</code>
|
<p><strong>Example: </strong><code>https://example.com tag1, tag2, last tag</code></p>
|
||||||
<br>
|
|
||||||
URLs which do not pass validation will stay in the textarea.
|
URLs which do not pass validation will stay in the textarea.
|
||||||
</legend>
|
</div>
|
||||||
{{ render_field(form.processor, class="processor") }}
|
{{ render_field(form.processor, class="processor") }}
|
||||||
|
|
||||||
|
<div class="pure-control-group">
|
||||||
<textarea name="urls" class="pure-input-1-2" placeholder="https://"
|
<textarea name="urls" class="pure-input-1-2" placeholder="https://"
|
||||||
style="width: 100%;
|
style="width: 100%;
|
||||||
font-family:monospace;
|
font-family:monospace;
|
||||||
white-space: pre;
|
white-space: pre;
|
||||||
overflow-wrap: normal;
|
overflow-wrap: normal;
|
||||||
overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
|
overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea>
|
||||||
|
</div>
|
||||||
<div id="quick-watch-processor-type">
|
<div id="quick-watch-processor-type"></div>
|
||||||
|
|
||||||
</div>
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
@@ -43,7 +41,7 @@
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
<legend>
|
<div class="pure-control-group">
|
||||||
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
|
Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br>
|
||||||
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
|
This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored.
|
||||||
<br>
|
<br>
|
||||||
@@ -51,7 +49,7 @@
|
|||||||
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
|
How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br>
|
||||||
Be sure to set your default fetcher to Chrome if required.<br>
|
Be sure to set your default fetcher to Chrome if required.<br>
|
||||||
</p>
|
</p>
|
||||||
</legend>
|
</div>
|
||||||
|
|
||||||
|
|
||||||
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
<textarea name="distill-io" class="pure-input-1-2" style="width: 100%;
|
||||||
@@ -122,4 +120,4 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
@@ -20,13 +20,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
|
|||||||
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
datastore.data['watching'][uuid]['processor'] = 'restock_diff'
|
||||||
datastore.data['watching'][uuid].clear_watch()
|
datastore.data['watching'][uuid].clear_watch()
|
||||||
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
return redirect(url_for("index"))
|
return redirect(url_for("watchlist.index"))
|
||||||
|
|
||||||
@login_required
|
@login_required
|
||||||
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
@price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET'])
|
||||||
def reject(uuid):
|
def reject(uuid):
|
||||||
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT
|
||||||
return redirect(url_for("index"))
|
return redirect(url_for("watchlist.index"))
|
||||||
|
|
||||||
|
|
||||||
return price_data_follower_blueprint
|
return price_data_follower_blueprint
|
||||||
|
|||||||
1
changedetectionio/blueprint/rss/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')]
|
||||||
147
changedetectionio/blueprint/rss/blueprint.py
Normal file
@@ -0,0 +1,147 @@
|
|||||||
|
|
||||||
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from feedgen.feed import FeedGenerator
|
||||||
|
from flask import Blueprint, make_response, request, url_for, redirect
|
||||||
|
from loguru import logger
|
||||||
|
import datetime
|
||||||
|
import pytz
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
|
BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]'
|
||||||
|
|
||||||
|
# Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc)
|
||||||
|
def scan_invalid_chars_in_rss(content):
|
||||||
|
for match in re.finditer(BAD_CHARS_REGEX, content):
|
||||||
|
i = match.start()
|
||||||
|
bad_char = content[i]
|
||||||
|
hex_value = f"0x{ord(bad_char):02x}"
|
||||||
|
# Grab context
|
||||||
|
start = max(0, i - 20)
|
||||||
|
end = min(len(content), i + 21)
|
||||||
|
context = content[start:end].replace('\n', '\\n').replace('\r', '\\r')
|
||||||
|
logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...")
|
||||||
|
# First match is enough
|
||||||
|
return True
|
||||||
|
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
|
def clean_entry_content(content):
|
||||||
|
cleaned = re.sub(BAD_CHARS_REGEX, '', content)
|
||||||
|
return cleaned
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
|
rss_blueprint = Blueprint('rss', __name__)
|
||||||
|
|
||||||
|
# Some RSS reader situations ended up with rss/ (forward slash after RSS) due
|
||||||
|
# to some earlier blueprint rerouting work, it should goto feed.
|
||||||
|
@rss_blueprint.route("/", methods=['GET'])
|
||||||
|
def extraslash():
|
||||||
|
return redirect(url_for('rss.feed'))
|
||||||
|
|
||||||
|
# Import the login decorator if needed
|
||||||
|
# from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
@rss_blueprint.route("", methods=['GET'])
|
||||||
|
def feed():
|
||||||
|
now = time.time()
|
||||||
|
# Always requires token set
|
||||||
|
app_rss_token = datastore.data['settings']['application'].get('rss_access_token')
|
||||||
|
rss_url_token = request.args.get('token')
|
||||||
|
if rss_url_token != app_rss_token:
|
||||||
|
return "Access denied, bad token", 403
|
||||||
|
|
||||||
|
from changedetectionio import diff
|
||||||
|
limit_tag = request.args.get('tag', '').lower().strip()
|
||||||
|
# Be sure limit_tag is a uuid
|
||||||
|
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||||
|
if limit_tag == tag.get('title', '').lower().strip():
|
||||||
|
limit_tag = uuid
|
||||||
|
|
||||||
|
# Sort by last_changed and add the uuid which is usually the key..
|
||||||
|
sorted_watches = []
|
||||||
|
|
||||||
|
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
|
||||||
|
for uuid, watch in datastore.data['watching'].items():
|
||||||
|
# @todo tag notification_muted skip also (improve Watch model)
|
||||||
|
if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'):
|
||||||
|
continue
|
||||||
|
if limit_tag and not limit_tag in watch['tags']:
|
||||||
|
continue
|
||||||
|
watch['uuid'] = uuid
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
|
||||||
|
sorted_watches.sort(key=lambda x: x.last_changed, reverse=False)
|
||||||
|
|
||||||
|
fg = FeedGenerator()
|
||||||
|
fg.title('changedetection.io')
|
||||||
|
fg.description('Feed description')
|
||||||
|
fg.link(href='https://changedetection.io')
|
||||||
|
|
||||||
|
html_colour_enable = False
|
||||||
|
if datastore.data['settings']['application'].get('rss_content_format') == 'html':
|
||||||
|
html_colour_enable = True
|
||||||
|
|
||||||
|
for watch in sorted_watches:
|
||||||
|
|
||||||
|
dates = list(watch.history.keys())
|
||||||
|
# Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected.
|
||||||
|
if len(dates) < 2:
|
||||||
|
continue
|
||||||
|
|
||||||
|
if not watch.viewed:
|
||||||
|
# Re #239 - GUID needs to be individual for each event
|
||||||
|
# @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228)
|
||||||
|
guid = "{}/{}".format(watch['uuid'], watch.last_changed)
|
||||||
|
fe = fg.add_entry()
|
||||||
|
|
||||||
|
# Include a link to the diff page, they will have to login here to see if password protection is enabled.
|
||||||
|
# Description is the page you watch, link takes you to the diff JS UI page
|
||||||
|
# Dict val base_url will get overriden with the env var if it is set.
|
||||||
|
ext_base_url = datastore.data['settings']['application'].get('active_base_url')
|
||||||
|
# @todo fix
|
||||||
|
|
||||||
|
# Because we are called via whatever web server, flask should figure out the right path (
|
||||||
|
diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)}
|
||||||
|
|
||||||
|
fe.link(link=diff_link)
|
||||||
|
|
||||||
|
# @todo watch should be a getter - watch.get('title') (internally if URL else..)
|
||||||
|
|
||||||
|
watch_title = watch.get('title') if watch.get('title') else watch.get('url')
|
||||||
|
fe.title(title=watch_title)
|
||||||
|
try:
|
||||||
|
|
||||||
|
html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
|
||||||
|
newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
|
||||||
|
include_equal=False,
|
||||||
|
line_feed_sep="<br>",
|
||||||
|
html_colour=html_colour_enable
|
||||||
|
)
|
||||||
|
except FileNotFoundError as e:
|
||||||
|
html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
|
||||||
|
|
||||||
|
# @todo Make this configurable and also consider html-colored markup
|
||||||
|
# @todo User could decide if <link> goes to the diff page, or to the watch link
|
||||||
|
rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
|
||||||
|
|
||||||
|
content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
|
||||||
|
|
||||||
|
# Out of range chars could also break feedgen
|
||||||
|
if scan_invalid_chars_in_rss(content):
|
||||||
|
content = clean_entry_content(content)
|
||||||
|
|
||||||
|
fe.content(content=content, type='CDATA')
|
||||||
|
fe.guid(guid, permalink=False)
|
||||||
|
dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
|
||||||
|
dt = dt.replace(tzinfo=pytz.UTC)
|
||||||
|
fe.pubDate(dt)
|
||||||
|
|
||||||
|
response = make_response(fg.rss_str())
|
||||||
|
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
|
||||||
|
logger.trace(f"RSS generated in {time.time() - now:.3f}s")
|
||||||
|
return response
|
||||||
|
|
||||||
|
return rss_blueprint
|
||||||
120
changedetectionio/blueprint/settings/__init__.py
Normal file
@@ -0,0 +1,120 @@
|
|||||||
|
import os
|
||||||
|
from copy import deepcopy
|
||||||
|
from datetime import datetime
|
||||||
|
from zoneinfo import ZoneInfo, available_timezones
|
||||||
|
import secrets
|
||||||
|
import flask_login
|
||||||
|
from flask import Blueprint, render_template, request, redirect, url_for, flash
|
||||||
|
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
|
settings_blueprint = Blueprint('settings', __name__, template_folder="templates")
|
||||||
|
|
||||||
|
@settings_blueprint.route("", methods=['GET', "POST"])
|
||||||
|
@login_optionally_required
|
||||||
|
def settings_page():
|
||||||
|
from changedetectionio import forms
|
||||||
|
|
||||||
|
default = deepcopy(datastore.data['settings'])
|
||||||
|
if datastore.proxy_list is not None:
|
||||||
|
available_proxies = list(datastore.proxy_list.keys())
|
||||||
|
# When enabled
|
||||||
|
system_proxy = datastore.data['settings']['requests']['proxy']
|
||||||
|
# In the case it doesnt exist anymore
|
||||||
|
if not system_proxy in available_proxies:
|
||||||
|
system_proxy = None
|
||||||
|
|
||||||
|
default['requests']['proxy'] = system_proxy if system_proxy is not None else available_proxies[0]
|
||||||
|
# Used by the form handler to keep or remove the proxy settings
|
||||||
|
default['proxy_list'] = available_proxies[0]
|
||||||
|
|
||||||
|
# Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
|
||||||
|
form = forms.globalSettingsForm(formdata=request.form if request.method == 'POST' else None,
|
||||||
|
data=default,
|
||||||
|
extra_notification_tokens=datastore.get_unique_notification_tokens_available()
|
||||||
|
)
|
||||||
|
|
||||||
|
# Remove the last option 'System default'
|
||||||
|
form.application.form.notification_format.choices.pop()
|
||||||
|
|
||||||
|
if datastore.proxy_list is None:
|
||||||
|
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||||
|
del form.requests.form.proxy
|
||||||
|
else:
|
||||||
|
form.requests.form.proxy.choices = []
|
||||||
|
for p in datastore.proxy_list:
|
||||||
|
form.requests.form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||||
|
|
||||||
|
if request.method == 'POST':
|
||||||
|
# Password unset is a GET, but we can lock the session to a salted env password to always need the password
|
||||||
|
if form.application.form.data.get('removepassword_button', False):
|
||||||
|
# SALTED_PASS means the password is "locked" to what we set in the Env var
|
||||||
|
if not os.getenv("SALTED_PASS", False):
|
||||||
|
datastore.remove_password()
|
||||||
|
flash("Password protection removed.", 'notice')
|
||||||
|
flask_login.logout_user()
|
||||||
|
return redirect(url_for('settings.settings_page'))
|
||||||
|
|
||||||
|
if form.validate():
|
||||||
|
# Don't set password to False when a password is set - should be only removed with the `removepassword` button
|
||||||
|
app_update = dict(deepcopy(form.data['application']))
|
||||||
|
|
||||||
|
# Never update password with '' or False (Added by wtforms when not in submission)
|
||||||
|
if 'password' in app_update and not app_update['password']:
|
||||||
|
del (app_update['password'])
|
||||||
|
|
||||||
|
datastore.data['settings']['application'].update(app_update)
|
||||||
|
datastore.data['settings']['requests'].update(form.data['requests'])
|
||||||
|
|
||||||
|
if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
|
||||||
|
datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
|
||||||
|
datastore.needs_write_urgent = True
|
||||||
|
flash("Password protection enabled.", 'notice')
|
||||||
|
flask_login.logout_user()
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
datastore.needs_write_urgent = True
|
||||||
|
flash("Settings updated.")
|
||||||
|
|
||||||
|
else:
|
||||||
|
flash("An error occurred, please see below.", "error")
|
||||||
|
|
||||||
|
# Convert to ISO 8601 format, all date/time relative events stored as UTC time
|
||||||
|
utc_time = datetime.now(ZoneInfo("UTC")).isoformat()
|
||||||
|
|
||||||
|
output = render_template("settings.html",
|
||||||
|
api_key=datastore.data['settings']['application'].get('api_access_token'),
|
||||||
|
available_timezones=sorted(available_timezones()),
|
||||||
|
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||||
|
extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(),
|
||||||
|
form=form,
|
||||||
|
hide_remove_pass=os.getenv("SALTED_PASS", False),
|
||||||
|
min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
|
||||||
|
settings_application=datastore.data['settings']['application'],
|
||||||
|
timezone_default_config=datastore.data['settings']['application'].get('timezone'),
|
||||||
|
utc_time=utc_time,
|
||||||
|
)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
@settings_blueprint.route("/reset-api-key", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def settings_reset_api_key():
|
||||||
|
secret = secrets.token_hex(16)
|
||||||
|
datastore.data['settings']['application']['api_access_token'] = secret
|
||||||
|
datastore.needs_write_urgent = True
|
||||||
|
flash("API Key was regenerated.")
|
||||||
|
return redirect(url_for('settings.settings_page')+'#api')
|
||||||
|
|
||||||
|
@settings_blueprint.route("/notification-logs", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def notification_logs():
|
||||||
|
from changedetectionio.flask_app import notification_debug_log
|
||||||
|
output = render_template("notification-log.html",
|
||||||
|
logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
|
||||||
|
return output
|
||||||
|
|
||||||
|
return settings_blueprint
|
||||||
@@ -4,7 +4,7 @@
|
|||||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
|
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
|
||||||
{% from '_common_fields.html' import render_common_settings_form %}
|
{% from '_common_fields.html' import render_common_settings_form %}
|
||||||
<script>
|
<script>
|
||||||
const notification_base_url="{{url_for('ajax_callback_send_notification_test', mode="global-settings")}}";
|
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
|
||||||
{% if emailprefix %}
|
{% if emailprefix %}
|
||||||
const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
|
const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
|
||||||
{% endif %}
|
{% endif %}
|
||||||
@@ -22,13 +22,14 @@
|
|||||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||||
<li class="tab"><a href="#fetching">Fetching</a></li>
|
<li class="tab"><a href="#fetching">Fetching</a></li>
|
||||||
<li class="tab"><a href="#filters">Global Filters</a></li>
|
<li class="tab"><a href="#filters">Global Filters</a></li>
|
||||||
|
<li class="tab"><a href="#ui-options">UI Options</a></li>
|
||||||
<li class="tab"><a href="#api">API</a></li>
|
<li class="tab"><a href="#api">API</a></li>
|
||||||
<li class="tab"><a href="#timedate">Time & Date</a></li>
|
<li class="tab"><a href="#timedate">Time & Date</a></li>
|
||||||
<li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li>
|
<li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li>
|
||||||
</ul>
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
<div class="box-wrap inner">
|
<div class="box-wrap inner">
|
||||||
<form class="pure-form pure-form-stacked settings" action="{{url_for('settings_page')}}" method="POST">
|
<form class="pure-form pure-form-stacked settings" action="{{url_for('settings.settings_page')}}" method="POST">
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||||
<div class="tab-pane-inner" id="general">
|
<div class="tab-pane-inner" id="general">
|
||||||
<fieldset>
|
<fieldset>
|
||||||
@@ -78,7 +79,10 @@
|
|||||||
{{ render_field(form.application.form.pager_size) }}
|
{{ render_field(form.application.form.pager_size) }}
|
||||||
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
<span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_field(form.application.form.rss_content_format) }}
|
||||||
|
<span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
|
||||||
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
{{ render_checkbox_field(form.application.form.extract_title_as_title) }}
|
||||||
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
|
<span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
|
||||||
@@ -203,7 +207,7 @@ nav
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
<a href="{{url_for('settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a>
|
<a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
<h4>Chrome Extension</h4>
|
<h4>Chrome Extension</h4>
|
||||||
@@ -214,7 +218,7 @@ nav
|
|||||||
<a id="chrome-extension-link"
|
<a id="chrome-extension-link"
|
||||||
title="Try our new Chrome Extension!"
|
title="Try our new Chrome Extension!"
|
||||||
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop">
|
||||||
<img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome">
|
<img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" alt="Chrome">
|
||||||
Chrome Webstore
|
Chrome Webstore
|
||||||
</a>
|
</a>
|
||||||
</p>
|
</p>
|
||||||
@@ -237,6 +241,12 @@ nav
|
|||||||
</p>
|
</p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
<div class="tab-pane-inner" id="ui-options">
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
|
||||||
|
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
<div class="tab-pane-inner" id="proxies">
|
<div class="tab-pane-inner" id="proxies">
|
||||||
<div id="recommended-proxy">
|
<div id="recommended-proxy">
|
||||||
<div>
|
<div>
|
||||||
@@ -280,9 +290,7 @@ nav
|
|||||||
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
<p>
|
|
||||||
Your proxy provider may need to whitelist our IP of <code>204.15.192.195</code>
|
|
||||||
</p>
|
|
||||||
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
<p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
|
||||||
|
|
||||||
<div class="pure-control-group" id="extra-proxies-setting">
|
<div class="pure-control-group" id="extra-proxies-setting">
|
||||||
@@ -301,8 +309,8 @@ nav
|
|||||||
<div id="actions">
|
<div id="actions">
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_button(form.save_button) }}
|
{{ render_button(form.save_button) }}
|
||||||
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
|
<a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a>
|
||||||
<a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
|
<a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</form>
|
</form>
|
||||||
@@ -104,6 +104,9 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
|||||||
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
|
uuid = list(datastore.data['settings']['application']['tags'].keys()).pop()
|
||||||
|
|
||||||
default = datastore.data['settings']['application']['tags'].get(uuid)
|
default = datastore.data['settings']['application']['tags'].get(uuid)
|
||||||
|
if not default:
|
||||||
|
flash("Tag not found", "error")
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
form = group_restock_settings_form(
|
form = group_restock_settings_form(
|
||||||
formdata=request.form if request.method == 'POST' else None,
|
formdata=request.form if request.method == 'POST' else None,
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
|
||||||
{% from '_common_fields.html' import render_common_settings_form %}
|
{% from '_common_fields.html' import render_common_settings_form %}
|
||||||
<script>
|
<script>
|
||||||
const notification_base_url="{{url_for('ajax_callback_send_notification_test', mode="group-settings")}}";
|
const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}";
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
|
||||||
@@ -13,6 +13,7 @@
|
|||||||
/*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/
|
/*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/
|
||||||
/*{% endif %}*/
|
/*{% endif %}*/
|
||||||
|
|
||||||
|
{% set has_tag_filters_extra='' %}
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
|
||||||
@@ -46,59 +47,12 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="tab-pane-inner" id="filters-and-triggers">
|
<div class="tab-pane-inner" id="filters-and-triggers">
|
||||||
<div class="pure-control-group">
|
<p>These settings are <strong><i>added</i></strong> to any existing watch configurations.</p>
|
||||||
{% set field = render_field(form.include_filters,
|
{% include "edit/include_subtract.html" %}
|
||||||
rows=5,
|
<div class="text-filtering border-fieldset">
|
||||||
placeholder="#example
|
<h3>Text filtering</h3>
|
||||||
xpath://body/div/span[contains(@class, 'example-class')]",
|
{% include "edit/text-options.html" %}
|
||||||
class="m-d")
|
</div>
|
||||||
%}
|
|
||||||
{{ field }}
|
|
||||||
{% if '/text()' in field %}
|
|
||||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br>
|
|
||||||
{% endif %}
|
|
||||||
<span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br>
|
|
||||||
<div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div>
|
|
||||||
<ul id="advanced-help-selectors">
|
|
||||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
|
||||||
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
|
||||||
<ul>
|
|
||||||
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
|
||||||
{% if jq_support %}
|
|
||||||
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li>
|
|
||||||
{% else %}
|
|
||||||
<li>jq support not installed</li>
|
|
||||||
{% endif %}
|
|
||||||
</ul>
|
|
||||||
</li>
|
|
||||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code>
|
|
||||||
<ul>
|
|
||||||
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a
|
|
||||||
href="http://xpather.com/" target="new">test your XPath here</a></li>
|
|
||||||
<li>Example: Get all titles from an RSS feed <code>//title/text()</code></li>
|
|
||||||
<li>To use XPath1.0: Prefix with <code>xpath1:</code></li>
|
|
||||||
</ul>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
|
||||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
|
|
||||||
</span>
|
|
||||||
</div>
|
|
||||||
<fieldset class="pure-control-group">
|
|
||||||
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
|
|
||||||
footer
|
|
||||||
nav
|
|
||||||
.stockticker
|
|
||||||
//*[contains(text(), 'Advertisement')]") }}
|
|
||||||
<span class="pure-form-message-inline">
|
|
||||||
<ul>
|
|
||||||
<li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li>
|
|
||||||
<li> Don't paste HTML here, use only CSS and XPath selectors </li>
|
|
||||||
<li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li>
|
|
||||||
</ul>
|
|
||||||
</span>
|
|
||||||
</fieldset>
|
|
||||||
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
{# rendered sub Template #}
|
{# rendered sub Template #}
|
||||||
@@ -112,7 +66,7 @@ nav
|
|||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_checkbox_field(form.notification_muted) }}
|
{{ render_checkbox_field(form.notification_muted) }}
|
||||||
</div>
|
</div>
|
||||||
{% if is_html_webdriver %}
|
{% if 1 %}
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_checkbox_field(form.notification_screenshot) }}
|
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">
|
||||||
@@ -124,7 +78,7 @@ nav
|
|||||||
{% if has_default_notification_urls %}
|
{% if has_default_notification_urls %}
|
||||||
<div class="inline-warning">
|
<div class="inline-warning">
|
||||||
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="Look out!" title="Lookout!" >
|
<img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="Look out!" title="Lookout!" >
|
||||||
There are <a href="{{ url_for('settings_page')}}#notifications">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications.
|
There are <a href="{{ url_for('settings.settings_page')}}#notifications">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications.
|
||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">Use system defaults</a>
|
<a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">Use system defaults</a>
|
||||||
|
|||||||
@@ -47,7 +47,7 @@
|
|||||||
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
<a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
||||||
</td>
|
</td>
|
||||||
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
<td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td>
|
||||||
<td class="title-col inline"> <a href="{{url_for('index', tag=uuid) }}">{{ tag.title }}</a></td>
|
<td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td>
|
||||||
<td>
|
<td>
|
||||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">Edit</a>
|
<a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">Edit</a>
|
||||||
<a class="pure-button pure-button-primary" href="{{ url_for('tags.delete', uuid=uuid) }}" title="Deletes and removes tag">Delete</a>
|
<a class="pure-button pure-button-primary" href="{{ url_for('tags.delete', uuid=uuid) }}" title="Deletes and removes tag">Delete</a>
|
||||||
|
|||||||
305
changedetectionio/blueprint/ui/__init__.py
Normal file
@@ -0,0 +1,305 @@
|
|||||||
|
import time
|
||||||
|
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
|
||||||
|
from loguru import logger
|
||||||
|
from functools import wraps
|
||||||
|
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
|
||||||
|
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
|
||||||
|
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_update_threads, queuedWatchMetaData):
|
||||||
|
ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
|
||||||
|
|
||||||
|
# Register the edit blueprint
|
||||||
|
edit_blueprint = construct_edit_blueprint(datastore, update_q, queuedWatchMetaData)
|
||||||
|
ui_blueprint.register_blueprint(edit_blueprint)
|
||||||
|
|
||||||
|
# Register the notification blueprint
|
||||||
|
notification_blueprint = construct_notification_blueprint(datastore)
|
||||||
|
ui_blueprint.register_blueprint(notification_blueprint)
|
||||||
|
|
||||||
|
# Register the views blueprint
|
||||||
|
views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData)
|
||||||
|
ui_blueprint.register_blueprint(views_blueprint)
|
||||||
|
|
||||||
|
# Import the login decorator
|
||||||
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
|
||||||
|
@ui_blueprint.route("/clear_history/<string:uuid>", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def clear_watch_history(uuid):
|
||||||
|
try:
|
||||||
|
datastore.clear_watch_history(uuid)
|
||||||
|
except KeyError:
|
||||||
|
flash('Watch not found', 'error')
|
||||||
|
else:
|
||||||
|
flash("Cleared snapshot history for watch {}".format(uuid))
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
@ui_blueprint.route("/clear_history", methods=['GET', 'POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def clear_all_history():
|
||||||
|
if request.method == 'POST':
|
||||||
|
confirmtext = request.form.get('confirmtext')
|
||||||
|
|
||||||
|
if confirmtext == 'clear':
|
||||||
|
for uuid in datastore.data['watching'].keys():
|
||||||
|
datastore.clear_watch_history(uuid)
|
||||||
|
|
||||||
|
flash("Cleared snapshot history for all watches")
|
||||||
|
else:
|
||||||
|
flash('Incorrect confirmation text.', 'error')
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
output = render_template("clear_all_history.html")
|
||||||
|
return output
|
||||||
|
|
||||||
|
# Clear all statuses, so we do not see the 'unviewed' class
|
||||||
|
@ui_blueprint.route("/form/mark-all-viewed", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def mark_all_viewed():
|
||||||
|
# Save the current newest history as the most recently viewed
|
||||||
|
with_errors = request.args.get('with_errors') == "1"
|
||||||
|
for watch_uuid, watch in datastore.data['watching'].items():
|
||||||
|
if with_errors and not watch.get('last_error'):
|
||||||
|
continue
|
||||||
|
datastore.set_last_viewed(watch_uuid, int(time.time()))
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
@ui_blueprint.route("/delete", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def form_delete():
|
||||||
|
uuid = request.args.get('uuid')
|
||||||
|
|
||||||
|
if uuid != 'all' and not uuid in datastore.data['watching'].keys():
|
||||||
|
flash('The watch by UUID {} does not exist.'.format(uuid), 'error')
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
# More for testing, possible to return the first/only
|
||||||
|
if uuid == 'first':
|
||||||
|
uuid = list(datastore.data['watching'].keys()).pop()
|
||||||
|
datastore.delete(uuid)
|
||||||
|
flash('Deleted.')
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
@ui_blueprint.route("/clone", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def form_clone():
|
||||||
|
uuid = request.args.get('uuid')
|
||||||
|
# More for testing, possible to return the first/only
|
||||||
|
if uuid == 'first':
|
||||||
|
uuid = list(datastore.data['watching'].keys()).pop()
|
||||||
|
|
||||||
|
new_uuid = datastore.clone(uuid)
|
||||||
|
|
||||||
|
if not datastore.data['watching'].get(uuid).get('paused'):
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
|
||||||
|
|
||||||
|
flash('Cloned, you are editing the new watch.')
|
||||||
|
|
||||||
|
return redirect(url_for("ui.ui_edit.edit_page", uuid=new_uuid))
|
||||||
|
|
||||||
|
@ui_blueprint.route("/checknow", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def form_watch_checknow():
|
||||||
|
# Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True})))
|
||||||
|
tag = request.args.get('tag')
|
||||||
|
uuid = request.args.get('uuid')
|
||||||
|
with_errors = request.args.get('with_errors') == "1"
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
|
||||||
|
running_uuids = []
|
||||||
|
for t in running_update_threads:
|
||||||
|
running_uuids.append(t.current_uuid)
|
||||||
|
|
||||||
|
if uuid:
|
||||||
|
if uuid not in running_uuids:
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
else:
|
||||||
|
# Recheck all, including muted
|
||||||
|
# Get most overdue first
|
||||||
|
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||||
|
watch_uuid = k[0]
|
||||||
|
watch = k[1]
|
||||||
|
if not watch['paused']:
|
||||||
|
if watch_uuid not in running_uuids:
|
||||||
|
if with_errors and not watch.get('last_error'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if tag != None and tag not in watch['tags']:
|
||||||
|
continue
|
||||||
|
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
if i == 1:
|
||||||
|
flash("Queued 1 watch for rechecking.")
|
||||||
|
if i > 1:
|
||||||
|
flash(f"Queued {i} watches for rechecking.")
|
||||||
|
if i == 0:
|
||||||
|
flash("No watches available to recheck.")
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
@ui_blueprint.route("/form/checkbox-operations", methods=['POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def form_watch_list_checkbox_operations():
|
||||||
|
op = request.form['op']
|
||||||
|
uuids = request.form.getlist('uuids')
|
||||||
|
|
||||||
|
if (op == 'delete'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.delete(uuid.strip())
|
||||||
|
flash("{} watches deleted".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'pause'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['paused'] = True
|
||||||
|
flash("{} watches paused".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'unpause'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['paused'] = False
|
||||||
|
flash("{} watches unpaused".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'mark-viewed'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.set_last_viewed(uuid, int(time.time()))
|
||||||
|
flash("{} watches updated".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'mute'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_muted'] = True
|
||||||
|
flash("{} watches muted".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'unmute'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_muted'] = False
|
||||||
|
flash("{} watches un-muted".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'recheck'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
# Recheck and require a full reprocessing
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
flash("{} watches queued for rechecking".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'clear-errors'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid]["last_error"] = False
|
||||||
|
flash(f"{len(uuids)} watches errors cleared")
|
||||||
|
|
||||||
|
elif (op == 'clear-history'):
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.clear_watch_history(uuid)
|
||||||
|
flash("{} watches cleared/reset.".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'notification-default'):
|
||||||
|
from changedetectionio.notification import (
|
||||||
|
default_notification_format_for_watch
|
||||||
|
)
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_title'] = None
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_body'] = None
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_urls'] = []
|
||||||
|
datastore.data['watching'][uuid.strip()]['notification_format'] = default_notification_format_for_watch
|
||||||
|
flash("{} watches set to use default notification settings".format(len(uuids)))
|
||||||
|
|
||||||
|
elif (op == 'assign-tag'):
|
||||||
|
op_extradata = request.form.get('op_extradata', '').strip()
|
||||||
|
if op_extradata:
|
||||||
|
tag_uuid = datastore.add_tag(title=op_extradata)
|
||||||
|
if op_extradata and tag_uuid:
|
||||||
|
for uuid in uuids:
|
||||||
|
uuid = uuid.strip()
|
||||||
|
if datastore.data['watching'].get(uuid):
|
||||||
|
# Bug in old versions caused by bad edit page/tag handler
|
||||||
|
if isinstance(datastore.data['watching'][uuid]['tags'], str):
|
||||||
|
datastore.data['watching'][uuid]['tags'] = []
|
||||||
|
|
||||||
|
datastore.data['watching'][uuid]['tags'].append(tag_uuid)
|
||||||
|
|
||||||
|
flash(f"{len(uuids)} watches were tagged")
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
|
||||||
|
@ui_blueprint.route("/share-url/<string:uuid>", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def form_share_put_watch(uuid):
|
||||||
|
"""Given a watch UUID, upload the info and return a share-link
|
||||||
|
the share-link can be imported/added"""
|
||||||
|
import requests
|
||||||
|
import json
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
# more for testing
|
||||||
|
if uuid == 'first':
|
||||||
|
uuid = list(datastore.data['watching'].keys()).pop()
|
||||||
|
|
||||||
|
# copy it to memory as trim off what we dont need (history)
|
||||||
|
watch = deepcopy(datastore.data['watching'].get(uuid))
|
||||||
|
# For older versions that are not a @property
|
||||||
|
if (watch.get('history')):
|
||||||
|
del (watch['history'])
|
||||||
|
|
||||||
|
# for safety/privacy
|
||||||
|
for k in list(watch.keys()):
|
||||||
|
if k.startswith('notification_'):
|
||||||
|
del watch[k]
|
||||||
|
|
||||||
|
for r in['uuid', 'last_checked', 'last_changed']:
|
||||||
|
if watch.get(r):
|
||||||
|
del (watch[r])
|
||||||
|
|
||||||
|
# Add the global stuff which may have an impact
|
||||||
|
watch['ignore_text'] += datastore.data['settings']['application']['global_ignore_text']
|
||||||
|
watch['subtractive_selectors'] += datastore.data['settings']['application']['global_subtractive_selectors']
|
||||||
|
|
||||||
|
watch_json = json.dumps(watch)
|
||||||
|
|
||||||
|
try:
|
||||||
|
r = requests.request(method="POST",
|
||||||
|
data={'watch': watch_json},
|
||||||
|
url="https://changedetection.io/share/share",
|
||||||
|
headers={'App-Guid': datastore.data['app_guid']})
|
||||||
|
res = r.json()
|
||||||
|
|
||||||
|
# Add to the flask session
|
||||||
|
session['share-link'] = f"https://changedetection.io/share/{res['share_key']}"
|
||||||
|
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error sharing -{str(e)}")
|
||||||
|
flash(f"Could not share, something went wrong while communicating with the share server - {str(e)}", 'error')
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
return ui_blueprint
|
||||||
338
changedetectionio/blueprint/ui/edit.py
Normal file
@@ -0,0 +1,338 @@
|
|||||||
|
import time
|
||||||
|
from copy import deepcopy
|
||||||
|
import os
|
||||||
|
import importlib.resources
|
||||||
|
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||||
|
from loguru import logger
|
||||||
|
from jinja2 import Environment, FileSystemLoader
|
||||||
|
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
from changedetectionio.time_handler import is_within_schedule
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||||
|
edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
|
||||||
|
|
||||||
|
def _watch_has_tag_options_set(watch):
|
||||||
|
"""This should be fixed better so that Tag is some proper Model, a tag is just a Watch also"""
|
||||||
|
for tag_uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||||
|
if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')):
|
||||||
|
return True
|
||||||
|
|
||||||
|
@edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||||
|
# https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ?
|
||||||
|
def edit_page(uuid):
|
||||||
|
from changedetectionio import forms
|
||||||
|
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
|
||||||
|
from changedetectionio import processors
|
||||||
|
import importlib
|
||||||
|
|
||||||
|
# More for testing, possible to return the first/only
|
||||||
|
if not datastore.data['watching'].keys():
|
||||||
|
flash("No watches to edit", "error")
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
if uuid == 'first':
|
||||||
|
uuid = list(datastore.data['watching'].keys()).pop()
|
||||||
|
|
||||||
|
if not uuid in datastore.data['watching']:
|
||||||
|
flash("No watch with the UUID %s found." % (uuid), "error")
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
switch_processor = request.args.get('switch_processor')
|
||||||
|
if switch_processor:
|
||||||
|
for p in processors.available_processors():
|
||||||
|
if p[0] == switch_processor:
|
||||||
|
datastore.data['watching'][uuid]['processor'] = switch_processor
|
||||||
|
flash(f"Switched to mode - {p[1]}.")
|
||||||
|
datastore.clear_watch_history(uuid)
|
||||||
|
redirect(url_for('ui_edit.edit_page', uuid=uuid))
|
||||||
|
|
||||||
|
# be sure we update with a copy instead of accidently editing the live object by reference
|
||||||
|
default = deepcopy(datastore.data['watching'][uuid])
|
||||||
|
|
||||||
|
# Defaults for proxy choice
|
||||||
|
if datastore.proxy_list is not None: # When enabled
|
||||||
|
# @todo
|
||||||
|
# Radio needs '' not None, or incase that the chosen one no longer exists
|
||||||
|
if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list):
|
||||||
|
default['proxy'] = ''
|
||||||
|
# proxy_override set to the json/text list of the items
|
||||||
|
|
||||||
|
# Does it use some custom form? does one exist?
|
||||||
|
processor_name = datastore.data['watching'][uuid].get('processor', '')
|
||||||
|
processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None)
|
||||||
|
if not processor_classes:
|
||||||
|
flash(f"Cannot load the edit form for processor/plugin '{processor_classes[1]}', plugin missing?", 'error')
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
parent_module = processors.get_parent_module(processor_classes[0])
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Get the parent of the "processor.py" go up one, get the form (kinda spaghetti but its reusing existing code)
|
||||||
|
forms_module = importlib.import_module(f"{parent_module.__name__}.forms")
|
||||||
|
# Access the 'processor_settings_form' class from the 'forms' module
|
||||||
|
form_class = getattr(forms_module, 'processor_settings_form')
|
||||||
|
except ModuleNotFoundError as e:
|
||||||
|
# .forms didnt exist
|
||||||
|
form_class = forms.processor_text_json_diff_form
|
||||||
|
except AttributeError as e:
|
||||||
|
# .forms exists but no useful form
|
||||||
|
form_class = forms.processor_text_json_diff_form
|
||||||
|
|
||||||
|
form = form_class(formdata=request.form if request.method == 'POST' else None,
|
||||||
|
data=default,
|
||||||
|
extra_notification_tokens=default.extra_notification_token_values(),
|
||||||
|
default_system_settings=datastore.data['settings']
|
||||||
|
)
|
||||||
|
|
||||||
|
# For the form widget tag UUID back to "string name" for the field
|
||||||
|
form.tags.datastore = datastore
|
||||||
|
|
||||||
|
# Used by some forms that need to dig deeper
|
||||||
|
form.datastore = datastore
|
||||||
|
form.watch = default
|
||||||
|
|
||||||
|
for p in datastore.extra_browsers:
|
||||||
|
form.fetch_backend.choices.append(p)
|
||||||
|
|
||||||
|
form.fetch_backend.choices.append(("system", 'System settings default'))
|
||||||
|
|
||||||
|
# form.browser_steps[0] can be assumed that we 'goto url' first
|
||||||
|
|
||||||
|
if datastore.proxy_list is None:
|
||||||
|
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||||
|
del form.proxy
|
||||||
|
else:
|
||||||
|
form.proxy.choices = [('', 'Default')]
|
||||||
|
for p in datastore.proxy_list:
|
||||||
|
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||||
|
|
||||||
|
|
||||||
|
if request.method == 'POST' and form.validate():
|
||||||
|
|
||||||
|
# If they changed processor, it makes sense to reset it.
|
||||||
|
if datastore.data['watching'][uuid].get('processor') != form.data.get('processor'):
|
||||||
|
datastore.data['watching'][uuid].clear_watch()
|
||||||
|
flash("Reset watch history due to change of processor")
|
||||||
|
|
||||||
|
extra_update_obj = {
|
||||||
|
'consecutive_filter_failures': 0,
|
||||||
|
'last_error' : False
|
||||||
|
}
|
||||||
|
|
||||||
|
if request.args.get('unpause_on_save'):
|
||||||
|
extra_update_obj['paused'] = False
|
||||||
|
|
||||||
|
extra_update_obj['time_between_check'] = form.time_between_check.data
|
||||||
|
|
||||||
|
# Ignore text
|
||||||
|
form_ignore_text = form.ignore_text.data
|
||||||
|
datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text
|
||||||
|
|
||||||
|
# Be sure proxy value is None
|
||||||
|
if datastore.proxy_list is not None and form.data['proxy'] == '':
|
||||||
|
extra_update_obj['proxy'] = None
|
||||||
|
|
||||||
|
# Unsetting all filter_text methods should make it go back to default
|
||||||
|
# This particularly affects tests running
|
||||||
|
if 'filter_text_added' in form.data and not form.data.get('filter_text_added') \
|
||||||
|
and 'filter_text_replaced' in form.data and not form.data.get('filter_text_replaced') \
|
||||||
|
and 'filter_text_removed' in form.data and not form.data.get('filter_text_removed'):
|
||||||
|
extra_update_obj['filter_text_added'] = True
|
||||||
|
extra_update_obj['filter_text_replaced'] = True
|
||||||
|
extra_update_obj['filter_text_removed'] = True
|
||||||
|
|
||||||
|
# Because wtforms doesn't support accessing other data in process_ , but we convert the CSV list of tags back to a list of UUIDs
|
||||||
|
tag_uuids = []
|
||||||
|
if form.data.get('tags'):
|
||||||
|
# Sometimes in testing this can be list, dont know why
|
||||||
|
if type(form.data.get('tags')) == list:
|
||||||
|
extra_update_obj['tags'] = form.data.get('tags')
|
||||||
|
else:
|
||||||
|
for t in form.data.get('tags').split(','):
|
||||||
|
tag_uuids.append(datastore.add_tag(title=t))
|
||||||
|
extra_update_obj['tags'] = tag_uuids
|
||||||
|
|
||||||
|
datastore.data['watching'][uuid].update(form.data)
|
||||||
|
datastore.data['watching'][uuid].update(extra_update_obj)
|
||||||
|
|
||||||
|
if not datastore.data['watching'][uuid].get('tags'):
|
||||||
|
# Force it to be a list, because form.data['tags'] will be string if nothing found
|
||||||
|
# And del(form.data['tags'] ) wont work either for some reason
|
||||||
|
datastore.data['watching'][uuid]['tags'] = []
|
||||||
|
|
||||||
|
# Recast it if need be to right data Watch handler
|
||||||
|
watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor'))
|
||||||
|
datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid])
|
||||||
|
flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.")
|
||||||
|
|
||||||
|
# Re #286 - We wait for syncing new data to disk in another thread every 60 seconds
|
||||||
|
# But in the case something is added we should save straight away
|
||||||
|
datastore.needs_write_urgent = True
|
||||||
|
|
||||||
|
# Do not queue on edit if its not within the time range
|
||||||
|
|
||||||
|
# @todo maybe it should never queue anyway on edit...
|
||||||
|
is_in_schedule = True
|
||||||
|
watch = datastore.data['watching'].get(uuid)
|
||||||
|
|
||||||
|
if watch.get('time_between_check_use_default'):
|
||||||
|
time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {})
|
||||||
|
else:
|
||||||
|
time_schedule_limit = watch.get('time_schedule_limit')
|
||||||
|
|
||||||
|
tz_name = time_schedule_limit.get('timezone')
|
||||||
|
if not tz_name:
|
||||||
|
tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
|
||||||
|
|
||||||
|
if time_schedule_limit and time_schedule_limit.get('enabled'):
|
||||||
|
try:
|
||||||
|
is_in_schedule = is_within_schedule(time_schedule_limit=time_schedule_limit,
|
||||||
|
default_tz=tz_name
|
||||||
|
)
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(
|
||||||
|
f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
#############################
|
||||||
|
if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
|
||||||
|
# Queue the watch for immediate recheck, with a higher priority
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
|
||||||
|
|
||||||
|
# Diff page [edit] link should go back to diff page
|
||||||
|
if request.args.get("next") and request.args.get("next") == 'diff':
|
||||||
|
return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid))
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index', tag=request.args.get("tag",'')))
|
||||||
|
|
||||||
|
else:
|
||||||
|
if request.method == 'POST' and not form.validate():
|
||||||
|
flash("An error occurred, please see below.", "error")
|
||||||
|
|
||||||
|
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
|
||||||
|
jq_support = True
|
||||||
|
try:
|
||||||
|
import jq
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
jq_support = False
|
||||||
|
|
||||||
|
watch = datastore.data['watching'].get(uuid)
|
||||||
|
|
||||||
|
# if system or watch is configured to need a chrome type browser
|
||||||
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||||
|
watch_needs_selenium_or_playwright = False
|
||||||
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||||
|
watch_needs_selenium_or_playwright = True
|
||||||
|
|
||||||
|
|
||||||
|
from zoneinfo import available_timezones
|
||||||
|
|
||||||
|
# Only works reliably with Playwright
|
||||||
|
|
||||||
|
# Import the global plugin system
|
||||||
|
from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
|
||||||
|
|
||||||
|
template_args = {
|
||||||
|
'available_processors': processors.available_processors(),
|
||||||
|
'available_timezones': sorted(available_timezones()),
|
||||||
|
'browser_steps_config': browser_step_ui_config,
|
||||||
|
'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||||
|
'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
|
||||||
|
'extra_processor_config': form.extra_tab_content(),
|
||||||
|
'extra_title': f" - Edit - {watch.label}",
|
||||||
|
'form': form,
|
||||||
|
'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||||
|
'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0,
|
||||||
|
'has_special_tag_options': _watch_has_tag_options_set(watch=watch),
|
||||||
|
'jq_support': jq_support,
|
||||||
|
'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||||
|
'settings_application': datastore.data['settings']['application'],
|
||||||
|
'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
|
||||||
|
'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
|
||||||
|
'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
|
||||||
|
'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
|
||||||
|
'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
|
||||||
|
'using_global_webdriver_wait': not default['webdriver_delay'],
|
||||||
|
'uuid': uuid,
|
||||||
|
'watch': watch,
|
||||||
|
'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright,
|
||||||
|
}
|
||||||
|
|
||||||
|
included_content = None
|
||||||
|
if form.extra_form_content():
|
||||||
|
# So that the extra panels can access _helpers.html etc, we set the environment to load from templates/
|
||||||
|
# And then render the code from the module
|
||||||
|
templates_dir = str(importlib.resources.files("changedetectionio").joinpath('templates'))
|
||||||
|
env = Environment(loader=FileSystemLoader(templates_dir))
|
||||||
|
template = env.from_string(form.extra_form_content())
|
||||||
|
included_content = template.render(**template_args)
|
||||||
|
|
||||||
|
output = render_template("edit.html",
|
||||||
|
extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None,
|
||||||
|
extra_form_content=included_content,
|
||||||
|
**template_args
|
||||||
|
)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
@edit_blueprint.route("/edit/<string:uuid>/get-html", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def watch_get_latest_html(uuid):
|
||||||
|
from io import BytesIO
|
||||||
|
from flask import send_file
|
||||||
|
import brotli
|
||||||
|
|
||||||
|
watch = datastore.data['watching'].get(uuid)
|
||||||
|
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
|
||||||
|
latest_filename = list(watch.history.keys())[-1]
|
||||||
|
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
|
||||||
|
with open(html_fname, 'rb') as f:
|
||||||
|
if html_fname.endswith('.br'):
|
||||||
|
# Read and decompress the Brotli file
|
||||||
|
decompressed_data = brotli.decompress(f.read())
|
||||||
|
else:
|
||||||
|
decompressed_data = f.read()
|
||||||
|
|
||||||
|
buffer = BytesIO(decompressed_data)
|
||||||
|
|
||||||
|
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
|
||||||
|
|
||||||
|
# Return a 500 error
|
||||||
|
abort(500)
|
||||||
|
|
||||||
|
# Ajax callback
|
||||||
|
@edit_blueprint.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def watch_get_preview_rendered(uuid):
|
||||||
|
'''For when viewing the "preview" of the rendered text from inside of Edit'''
|
||||||
|
from flask import jsonify
|
||||||
|
from changedetectionio.processors.text_json_diff import prepare_filter_prevew
|
||||||
|
result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore)
|
||||||
|
return jsonify(result)
|
||||||
|
|
||||||
|
@edit_blueprint.route("/highlight_submit_ignore_url", methods=['POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def highlight_submit_ignore_url():
|
||||||
|
import re
|
||||||
|
mode = request.form.get('mode')
|
||||||
|
selection = request.form.get('selection')
|
||||||
|
|
||||||
|
uuid = request.args.get('uuid','')
|
||||||
|
if datastore.data["watching"].get(uuid):
|
||||||
|
if mode == 'exact':
|
||||||
|
for l in selection.splitlines():
|
||||||
|
datastore.data["watching"][uuid]['ignore_text'].append(l.strip())
|
||||||
|
elif mode == 'digit-regex':
|
||||||
|
for l in selection.splitlines():
|
||||||
|
# Replace any series of numbers with a regex
|
||||||
|
s = re.escape(l.strip())
|
||||||
|
s = re.sub(r'[0-9]+', r'\\d+', s)
|
||||||
|
datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/')
|
||||||
|
|
||||||
|
return f"<a href={url_for('ui.ui_views.preview_page', uuid=uuid)}>Click to preview</a>"
|
||||||
|
|
||||||
|
return edit_blueprint
|
||||||
108
changedetectionio/blueprint/ui/notification.py
Normal file
@@ -0,0 +1,108 @@
|
|||||||
|
from flask import Blueprint, request, make_response
|
||||||
|
import random
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore):
|
||||||
|
notification_blueprint = Blueprint('ui_notification', __name__, template_folder="../ui/templates")
|
||||||
|
|
||||||
|
# AJAX endpoint for sending a test
|
||||||
|
@notification_blueprint.route("/notification/send-test/<string:watch_uuid>", methods=['POST'])
|
||||||
|
@notification_blueprint.route("/notification/send-test", methods=['POST'])
|
||||||
|
@notification_blueprint.route("/notification/send-test/", methods=['POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def ajax_callback_send_notification_test(watch_uuid=None):
|
||||||
|
|
||||||
|
# Watch_uuid could be unset in the case it`s used in tag editor, global settings
|
||||||
|
import apprise
|
||||||
|
from changedetectionio.notification.handler import process_notification
|
||||||
|
from changedetectionio.notification.apprise_plugin.assets import apprise_asset
|
||||||
|
|
||||||
|
from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler
|
||||||
|
|
||||||
|
apobj = apprise.Apprise(asset=apprise_asset)
|
||||||
|
|
||||||
|
is_global_settings_form = request.args.get('mode', '') == 'global-settings'
|
||||||
|
is_group_settings_form = request.args.get('mode', '') == 'group-settings'
|
||||||
|
|
||||||
|
# Use an existing random one on the global/main settings form
|
||||||
|
if not watch_uuid and (is_global_settings_form or is_group_settings_form) \
|
||||||
|
and datastore.data.get('watching'):
|
||||||
|
logger.debug(f"Send test notification - Choosing random Watch {watch_uuid}")
|
||||||
|
watch_uuid = random.choice(list(datastore.data['watching'].keys()))
|
||||||
|
|
||||||
|
if not watch_uuid:
|
||||||
|
return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
|
||||||
|
|
||||||
|
watch = datastore.data['watching'].get(watch_uuid)
|
||||||
|
|
||||||
|
notification_urls = None
|
||||||
|
|
||||||
|
if request.form.get('notification_urls'):
|
||||||
|
notification_urls = request.form['notification_urls'].strip().splitlines()
|
||||||
|
|
||||||
|
if not notification_urls:
|
||||||
|
logger.debug("Test notification - Trying by group/tag in the edit form if available")
|
||||||
|
# On an edit page, we should also fire off to the tags if they have notifications
|
||||||
|
if request.form.get('tags') and request.form['tags'].strip():
|
||||||
|
for k in request.form['tags'].split(','):
|
||||||
|
tag = datastore.tag_exists_by_name(k.strip())
|
||||||
|
notification_urls = tag.get('notifications_urls') if tag and tag.get('notifications_urls') else None
|
||||||
|
|
||||||
|
if not notification_urls and not is_global_settings_form and not is_group_settings_form:
|
||||||
|
# In the global settings, use only what is typed currently in the text box
|
||||||
|
logger.debug("Test notification - Trying by global system settings notifications")
|
||||||
|
if datastore.data['settings']['application'].get('notification_urls'):
|
||||||
|
notification_urls = datastore.data['settings']['application']['notification_urls']
|
||||||
|
|
||||||
|
if not notification_urls:
|
||||||
|
return 'Error: No Notification URLs set/found'
|
||||||
|
|
||||||
|
for n_url in notification_urls:
|
||||||
|
if len(n_url.strip()):
|
||||||
|
if not apobj.add(n_url):
|
||||||
|
return f'Error: {n_url} is not a valid AppRise URL.'
|
||||||
|
|
||||||
|
try:
|
||||||
|
# use the same as when it is triggered, but then override it with the form test values
|
||||||
|
n_object = {
|
||||||
|
'watch_url': request.form.get('window_url', "https://changedetection.io"),
|
||||||
|
'notification_urls': notification_urls
|
||||||
|
}
|
||||||
|
|
||||||
|
# Only use if present, if not set in n_object it should use the default system value
|
||||||
|
if 'notification_format' in request.form and request.form['notification_format'].strip():
|
||||||
|
n_object['notification_format'] = request.form.get('notification_format', '').strip()
|
||||||
|
|
||||||
|
if 'notification_title' in request.form and request.form['notification_title'].strip():
|
||||||
|
n_object['notification_title'] = request.form.get('notification_title', '').strip()
|
||||||
|
elif datastore.data['settings']['application'].get('notification_title'):
|
||||||
|
n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
|
||||||
|
else:
|
||||||
|
n_object['notification_title'] = "Test title"
|
||||||
|
|
||||||
|
if 'notification_body' in request.form and request.form['notification_body'].strip():
|
||||||
|
n_object['notification_body'] = request.form.get('notification_body', '').strip()
|
||||||
|
elif datastore.data['settings']['application'].get('notification_body'):
|
||||||
|
n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
|
||||||
|
else:
|
||||||
|
n_object['notification_body'] = "Test body"
|
||||||
|
|
||||||
|
n_object['as_async'] = False
|
||||||
|
n_object.update(watch.extra_notification_token_values())
|
||||||
|
sent_obj = process_notification(n_object, datastore)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
e_str = str(e)
|
||||||
|
# Remove this text which is not important and floods the container
|
||||||
|
e_str = e_str.replace(
|
||||||
|
"DEBUG - <class 'apprise.decorators.base.CustomNotifyPlugin.instantiate_plugin.<locals>.CustomNotifyPluginWrapper'>",
|
||||||
|
'')
|
||||||
|
|
||||||
|
return make_response(e_str, 400)
|
||||||
|
|
||||||
|
return 'OK - Sent test notifications'
|
||||||
|
|
||||||
|
return notification_blueprint
|
||||||
@@ -3,7 +3,7 @@
|
|||||||
<div class="box-wrap inner">
|
<div class="box-wrap inner">
|
||||||
<form
|
<form
|
||||||
class="pure-form pure-form-stacked"
|
class="pure-form pure-form-stacked"
|
||||||
action="{{url_for('clear_all_history')}}"
|
action="{{url_for('ui.clear_all_history')}}"
|
||||||
method="POST"
|
method="POST"
|
||||||
>
|
>
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||||
@@ -37,7 +37,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<br />
|
<br />
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
<a href="{{url_for('index')}}" class="pure-button button-cancel"
|
<a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel"
|
||||||
>Cancel</a
|
>Cancel</a
|
||||||
>
|
>
|
||||||
</div>
|
</div>
|
||||||
220
changedetectionio/blueprint/ui/views.py
Normal file
@@ -0,0 +1,220 @@
|
|||||||
|
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
|
||||||
|
from flask_login import current_user
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
from changedetectionio import html_tools
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||||
|
views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
|
||||||
|
|
||||||
|
@views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def preview_page(uuid):
|
||||||
|
content = []
|
||||||
|
versions = []
|
||||||
|
timestamp = None
|
||||||
|
|
||||||
|
# More for testing, possible to return the first/only
|
||||||
|
if uuid == 'first':
|
||||||
|
uuid = list(datastore.data['watching'].keys()).pop()
|
||||||
|
|
||||||
|
try:
|
||||||
|
watch = datastore.data['watching'][uuid]
|
||||||
|
except KeyError:
|
||||||
|
flash("No history found for the specified link, bad link?", "error")
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||||
|
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||||
|
|
||||||
|
is_html_webdriver = False
|
||||||
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||||
|
is_html_webdriver = True
|
||||||
|
triggered_line_numbers = []
|
||||||
|
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
|
||||||
|
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
|
||||||
|
else:
|
||||||
|
# So prepare the latest preview or not
|
||||||
|
preferred_version = request.args.get('version')
|
||||||
|
versions = list(watch.history.keys())
|
||||||
|
timestamp = versions[-1]
|
||||||
|
if preferred_version and preferred_version in versions:
|
||||||
|
timestamp = preferred_version
|
||||||
|
|
||||||
|
try:
|
||||||
|
versions = list(watch.history.keys())
|
||||||
|
content = watch.get_history_snapshot(timestamp)
|
||||||
|
|
||||||
|
triggered_line_numbers = html_tools.strip_ignore_text(content=content,
|
||||||
|
wordlist=watch['trigger_text'],
|
||||||
|
mode='line numbers'
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''})
|
||||||
|
|
||||||
|
output = render_template("preview.html",
|
||||||
|
content=content,
|
||||||
|
current_version=timestamp,
|
||||||
|
history_n=watch.history_n,
|
||||||
|
extra_stylesheets=extra_stylesheets,
|
||||||
|
extra_title=f" - Diff - {watch.label} @ {timestamp}",
|
||||||
|
triggered_line_numbers=triggered_line_numbers,
|
||||||
|
current_diff_url=watch['url'],
|
||||||
|
screenshot=watch.get_screenshot(),
|
||||||
|
watch=watch,
|
||||||
|
uuid=uuid,
|
||||||
|
is_html_webdriver=is_html_webdriver,
|
||||||
|
last_error=watch['last_error'],
|
||||||
|
last_error_text=watch.get_error_text(),
|
||||||
|
last_error_screenshot=watch.get_error_snapshot(),
|
||||||
|
versions=versions
|
||||||
|
)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
@views_blueprint.route("/diff/<string:uuid>", methods=['GET', 'POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def diff_history_page(uuid):
|
||||||
|
from changedetectionio import forms
|
||||||
|
|
||||||
|
# More for testing, possible to return the first/only
|
||||||
|
if uuid == 'first':
|
||||||
|
uuid = list(datastore.data['watching'].keys()).pop()
|
||||||
|
|
||||||
|
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||||
|
try:
|
||||||
|
watch = datastore.data['watching'][uuid]
|
||||||
|
except KeyError:
|
||||||
|
flash("No history found for the specified link, bad link?", "error")
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
# For submission of requesting an extract
|
||||||
|
extract_form = forms.extractDataForm(request.form)
|
||||||
|
if request.method == 'POST':
|
||||||
|
if not extract_form.validate():
|
||||||
|
flash("An error occurred, please see below.", "error")
|
||||||
|
|
||||||
|
else:
|
||||||
|
extract_regex = request.form.get('extract_regex').strip()
|
||||||
|
output = watch.extract_regex_from_all_history(extract_regex)
|
||||||
|
if output:
|
||||||
|
watch_dir = os.path.join(datastore.datastore_path, uuid)
|
||||||
|
response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
|
||||||
|
response.headers['Content-type'] = 'text/csv'
|
||||||
|
response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
|
||||||
|
response.headers['Pragma'] = 'no-cache'
|
||||||
|
response.headers['Expires'] = 0
|
||||||
|
return response
|
||||||
|
|
||||||
|
flash('Nothing matches that RegEx', 'error')
|
||||||
|
redirect(url_for('ui_views.diff_history_page', uuid=uuid)+'#extract')
|
||||||
|
|
||||||
|
history = watch.history
|
||||||
|
dates = list(history.keys())
|
||||||
|
|
||||||
|
if len(dates) < 2:
|
||||||
|
flash("Not enough saved change detection snapshots to produce a report.", "error")
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
# Save the current newest history as the most recently viewed
|
||||||
|
datastore.set_last_viewed(uuid, time.time())
|
||||||
|
|
||||||
|
# Read as binary and force decode as UTF-8
|
||||||
|
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||||
|
from_version = request.args.get('from_version')
|
||||||
|
from_version_index = -2 # second newest
|
||||||
|
if from_version and from_version in dates:
|
||||||
|
from_version_index = dates.index(from_version)
|
||||||
|
else:
|
||||||
|
from_version = dates[from_version_index]
|
||||||
|
|
||||||
|
try:
|
||||||
|
from_version_file_contents = watch.get_history_snapshot(dates[from_version_index])
|
||||||
|
except Exception as e:
|
||||||
|
from_version_file_contents = f"Unable to read to-version at index {dates[from_version_index]}.\n"
|
||||||
|
|
||||||
|
to_version = request.args.get('to_version')
|
||||||
|
to_version_index = -1
|
||||||
|
if to_version and to_version in dates:
|
||||||
|
to_version_index = dates.index(to_version)
|
||||||
|
else:
|
||||||
|
to_version = dates[to_version_index]
|
||||||
|
|
||||||
|
try:
|
||||||
|
to_version_file_contents = watch.get_history_snapshot(dates[to_version_index])
|
||||||
|
except Exception as e:
|
||||||
|
to_version_file_contents = "Unable to read to-version at index{}.\n".format(dates[to_version_index])
|
||||||
|
|
||||||
|
screenshot_url = watch.get_screenshot()
|
||||||
|
|
||||||
|
system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
|
||||||
|
|
||||||
|
is_html_webdriver = False
|
||||||
|
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
|
||||||
|
is_html_webdriver = True
|
||||||
|
|
||||||
|
password_enabled_and_share_is_off = False
|
||||||
|
if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
|
||||||
|
password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
|
||||||
|
|
||||||
|
output = render_template("diff.html",
|
||||||
|
current_diff_url=watch['url'],
|
||||||
|
from_version=str(from_version),
|
||||||
|
to_version=str(to_version),
|
||||||
|
extra_stylesheets=extra_stylesheets,
|
||||||
|
extra_title=f" - Diff - {watch.label}",
|
||||||
|
extract_form=extract_form,
|
||||||
|
is_html_webdriver=is_html_webdriver,
|
||||||
|
last_error=watch['last_error'],
|
||||||
|
last_error_screenshot=watch.get_error_snapshot(),
|
||||||
|
last_error_text=watch.get_error_text(),
|
||||||
|
left_sticky=True,
|
||||||
|
newest=to_version_file_contents,
|
||||||
|
newest_version_timestamp=dates[-1],
|
||||||
|
password_enabled_and_share_is_off=password_enabled_and_share_is_off,
|
||||||
|
from_version_file_contents=from_version_file_contents,
|
||||||
|
to_version_file_contents=to_version_file_contents,
|
||||||
|
screenshot=screenshot_url,
|
||||||
|
uuid=uuid,
|
||||||
|
versions=dates, # All except current/last
|
||||||
|
watch_a=watch
|
||||||
|
)
|
||||||
|
|
||||||
|
return output
|
||||||
|
|
||||||
|
@views_blueprint.route("/form/add/quickwatch", methods=['POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def form_quick_watch_add():
|
||||||
|
from changedetectionio import forms
|
||||||
|
form = forms.quickWatchForm(request.form)
|
||||||
|
|
||||||
|
if not form.validate():
|
||||||
|
for widget, l in form.errors.items():
|
||||||
|
flash(','.join(l), 'error')
|
||||||
|
return redirect(url_for('watchlist.index'))
|
||||||
|
|
||||||
|
url = request.form.get('url').strip()
|
||||||
|
if datastore.url_exists(url):
|
||||||
|
flash(f'Warning, URL {url} already exists', "notice")
|
||||||
|
|
||||||
|
add_paused = request.form.get('edit_and_watch_submit_button') != None
|
||||||
|
processor = request.form.get('processor', 'text_json_diff')
|
||||||
|
new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor})
|
||||||
|
|
||||||
|
if new_uuid:
|
||||||
|
if add_paused:
|
||||||
|
flash('Watch added in Paused state, saving will unpause.')
|
||||||
|
return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
|
||||||
|
else:
|
||||||
|
# Straight into the queue.
|
||||||
|
update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
|
||||||
|
flash("Watch added.")
|
||||||
|
|
||||||
|
return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
|
||||||
|
|
||||||
|
return views_blueprint
|
||||||
111
changedetectionio/blueprint/watchlist/__init__.py
Normal file
@@ -0,0 +1,111 @@
|
|||||||
|
import os
|
||||||
|
import time
|
||||||
|
|
||||||
|
from flask import Blueprint, request, make_response, render_template, redirect, url_for, flash, session
|
||||||
|
from flask_login import current_user
|
||||||
|
from flask_paginate import Pagination, get_page_parameter
|
||||||
|
|
||||||
|
from changedetectionio import forms
|
||||||
|
from changedetectionio.store import ChangeDetectionStore
|
||||||
|
from changedetectionio.auth_decorator import login_optionally_required
|
||||||
|
|
||||||
|
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
|
||||||
|
watchlist_blueprint = Blueprint('watchlist', __name__, template_folder="templates")
|
||||||
|
|
||||||
|
@watchlist_blueprint.route("/", methods=['GET'])
|
||||||
|
@login_optionally_required
|
||||||
|
def index():
|
||||||
|
active_tag_req = request.args.get('tag', '').lower().strip()
|
||||||
|
active_tag_uuid = active_tag = None
|
||||||
|
|
||||||
|
# Be sure limit_tag is a uuid
|
||||||
|
if active_tag_req:
|
||||||
|
for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items():
|
||||||
|
if active_tag_req == tag.get('title', '').lower().strip() or active_tag_req == uuid:
|
||||||
|
active_tag = tag
|
||||||
|
active_tag_uuid = uuid
|
||||||
|
break
|
||||||
|
|
||||||
|
# Redirect for the old rss path which used the /?rss=true
|
||||||
|
if request.args.get('rss'):
|
||||||
|
return redirect(url_for('rss.feed', tag=active_tag_uuid))
|
||||||
|
|
||||||
|
op = request.args.get('op')
|
||||||
|
if op:
|
||||||
|
uuid = request.args.get('uuid')
|
||||||
|
if op == 'pause':
|
||||||
|
datastore.data['watching'][uuid].toggle_pause()
|
||||||
|
elif op == 'mute':
|
||||||
|
datastore.data['watching'][uuid].toggle_mute()
|
||||||
|
|
||||||
|
datastore.needs_write = True
|
||||||
|
return redirect(url_for('watchlist.index', tag = active_tag_uuid))
|
||||||
|
|
||||||
|
# Sort by last_changed and add the uuid which is usually the key..
|
||||||
|
sorted_watches = []
|
||||||
|
with_errors = request.args.get('with_errors') == "1"
|
||||||
|
errored_count = 0
|
||||||
|
search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
|
||||||
|
for uuid, watch in datastore.data['watching'].items():
|
||||||
|
if with_errors and not watch.get('last_error'):
|
||||||
|
continue
|
||||||
|
|
||||||
|
if active_tag_uuid and not active_tag_uuid in watch['tags']:
|
||||||
|
continue
|
||||||
|
if watch.get('last_error'):
|
||||||
|
errored_count += 1
|
||||||
|
|
||||||
|
if search_q:
|
||||||
|
if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower():
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
elif watch.get('last_error') and search_q in watch.get('last_error').lower():
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
else:
|
||||||
|
sorted_watches.append(watch)
|
||||||
|
|
||||||
|
form = forms.quickWatchForm(request.form)
|
||||||
|
page = request.args.get(get_page_parameter(), type=int, default=1)
|
||||||
|
total_count = len(sorted_watches)
|
||||||
|
|
||||||
|
pagination = Pagination(page=page,
|
||||||
|
total=total_count,
|
||||||
|
per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
|
||||||
|
|
||||||
|
sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
|
||||||
|
output = render_template(
|
||||||
|
"watch-overview.html",
|
||||||
|
active_tag=active_tag,
|
||||||
|
active_tag_uuid=active_tag_uuid,
|
||||||
|
app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
|
||||||
|
datastore=datastore,
|
||||||
|
errored_count=errored_count,
|
||||||
|
form=form,
|
||||||
|
guid=datastore.data['app_guid'],
|
||||||
|
has_proxies=datastore.proxy_list,
|
||||||
|
has_unviewed=datastore.has_unviewed,
|
||||||
|
hosted_sticky=os.getenv("SALTED_PASS", False) == False,
|
||||||
|
now_time_server=time.time(),
|
||||||
|
pagination=pagination,
|
||||||
|
queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
|
||||||
|
search_q=request.args.get('q', '').strip(),
|
||||||
|
sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
|
||||||
|
sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
|
||||||
|
system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
|
||||||
|
tags=sorted_tags,
|
||||||
|
watches=sorted_watches
|
||||||
|
)
|
||||||
|
|
||||||
|
if session.get('share-link'):
|
||||||
|
del(session['share-link'])
|
||||||
|
|
||||||
|
resp = make_response(output)
|
||||||
|
|
||||||
|
# The template can run on cookie or url query info
|
||||||
|
if request.args.get('sort'):
|
||||||
|
resp.set_cookie('sort', request.args.get('sort'))
|
||||||
|
if request.args.get('order'):
|
||||||
|
resp.set_cookie('order', request.args.get('order'))
|
||||||
|
|
||||||
|
return resp
|
||||||
|
|
||||||
|
return watchlist_blueprint
|
||||||
@@ -3,10 +3,19 @@
|
|||||||
{% from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title %}
|
{% from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title %}
|
||||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
|
||||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
|
||||||
|
<script>let nowtimeserver={{ now_time_server }};</script>
|
||||||
|
|
||||||
|
<style>
|
||||||
|
.checking-now .last-checked {
|
||||||
|
background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%);
|
||||||
|
background-size: 0 100%;
|
||||||
|
background-repeat: no-repeat;
|
||||||
|
transition: background-size 0.9s ease
|
||||||
|
}
|
||||||
|
</style>
|
||||||
<div class="box">
|
<div class="box">
|
||||||
|
|
||||||
<form class="pure-form" action="{{ url_for('form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
|
<form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||||
<fieldset>
|
<fieldset>
|
||||||
<legend>Add a new change detection watch</legend>
|
<legend>Add a new change detection watch</legend>
|
||||||
@@ -25,7 +34,7 @@
|
|||||||
<span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span>
|
<span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span>
|
||||||
</form>
|
</form>
|
||||||
|
|
||||||
<form class="pure-form" action="{{ url_for('form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
|
<form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
|
||||||
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
|
||||||
<input type="hidden" id="op_extradata" name="op_extradata" value="" >
|
<input type="hidden" id="op_extradata" name="op_extradata" value="" >
|
||||||
<div id="checkbox-operations">
|
<div id="checkbox-operations">
|
||||||
@@ -46,12 +55,12 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
{% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
|
{% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
|
||||||
<div>
|
<div>
|
||||||
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
|
<a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
|
||||||
|
|
||||||
<!-- tag list -->
|
<!-- tag list -->
|
||||||
{% for uuid, tag in tags %}
|
{% for uuid, tag in tags %}
|
||||||
{% if tag != "" %}
|
{% if tag != "" %}
|
||||||
<a href="{{url_for('index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
<a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endfor %}
|
{% endfor %}
|
||||||
</div>
|
</div>
|
||||||
@@ -72,27 +81,27 @@
|
|||||||
<tr>
|
<tr>
|
||||||
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
|
{% set link_order = "desc" if sort_order == 'asc' else "asc" %}
|
||||||
{% set arrow_span = "" %}
|
{% set arrow_span = "" %}
|
||||||
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
<th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}" href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
|
||||||
<th class="empty-cell"></th>
|
<th class="empty-cell"></th>
|
||||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
|
||||||
{% if any_has_restock_price_processor %}
|
{% if any_has_restock_price_processor %}
|
||||||
<th>Restock & Price</th>
|
<th>Restock & Price</th>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
|
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
|
||||||
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
|
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
|
||||||
<th class="empty-cell"></th>
|
<th class="empty-cell"></th>
|
||||||
</tr>
|
</tr>
|
||||||
</thead>
|
</thead>
|
||||||
<tbody>
|
<tbody>
|
||||||
{% if not watches|length %}
|
{% if not watches|length %}
|
||||||
<tr>
|
<tr>
|
||||||
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('import_page')}}" >import a list</a>.</td>
|
<td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
|
||||||
</tr>
|
</tr>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
|
{% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
|
||||||
|
|
||||||
{% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}
|
{% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}
|
||||||
|
{% set checking_now = is_checking_now(watch) %}
|
||||||
<tr id="{{ watch.uuid }}"
|
<tr id="{{ watch.uuid }}"
|
||||||
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
|
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
|
||||||
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
|
||||||
@@ -100,25 +109,28 @@
|
|||||||
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
|
{% if watch.paused is defined and watch.paused != False %}paused{% endif %}
|
||||||
{% if is_unviewed %}unviewed{% endif %}
|
{% if is_unviewed %}unviewed{% endif %}
|
||||||
{% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
|
{% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
|
||||||
{% if watch.uuid in queued_uuids %}queued{% endif %}">
|
{% if watch.uuid in queued_uuids %}queued{% endif %}
|
||||||
|
{% if checking_now %}checking-now{% endif %}
|
||||||
|
">
|
||||||
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
|
<td class="inline checkbox-uuid" ><input name="uuids" type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
|
||||||
<td class="inline watch-controls">
|
<td class="inline watch-controls">
|
||||||
{% if not watch.paused %}
|
{% if not watch.paused %}
|
||||||
<a class="state-off" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
|
<a class="state-off" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
|
||||||
{% else %}
|
{% else %}
|
||||||
<a class="state-on" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
<a class="state-on" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a>
|
{% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
|
||||||
|
<a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
|
||||||
</td>
|
</td>
|
||||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
||||||
<a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||||
|
|
||||||
{% if watch.get_fetch_backend == "html_webdriver"
|
{% if watch.get_fetch_backend == "html_webdriver"
|
||||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||||
or "extra_browser_" in watch.get_fetch_backend
|
or "extra_browser_" in watch.get_fetch_backend
|
||||||
%}
|
%}
|
||||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a Chrome browser" >
|
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
|
{%if watch.is_pdf %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
|
||||||
@@ -128,9 +140,9 @@
|
|||||||
|
|
||||||
{% if '403' in watch.last_error %}
|
{% if '403' in watch.last_error %}
|
||||||
{% if has_proxies %}
|
{% if has_proxies %}
|
||||||
<a href="{{ url_for('settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>
|
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<a href="{{ url_for('settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
|
<a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
|
||||||
|
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if 'empty result or contain only an image' in watch.last_error %}
|
{% if 'empty result or contain only an image' in watch.last_error %}
|
||||||
@@ -139,7 +151,7 @@
|
|||||||
</div>
|
</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
|
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
|
||||||
<div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
|
<div class="fetch-error notification-error"><a href="{{url_for('settings.notification_logs')}}">{{ watch.last_notification_error }}</a></div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% if watch['processor'] == 'text_json_diff' %}
|
{% if watch['processor'] == 'text_json_diff' %}
|
||||||
@@ -177,7 +189,14 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<td class="last-checked" data-timestamp="{{ watch.last_checked }}">{{watch|format_last_checked_time|safe}}</td>
|
{#last_checked becomes fetch-start-time#}
|
||||||
|
<td class="last-checked" data-timestamp="{{ watch.last_checked }}" {% if checking_now %} data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" {% endif %} >
|
||||||
|
{% if checking_now %}
|
||||||
|
<span class="spinner"></span><span> Checking now</span>
|
||||||
|
{% else %}
|
||||||
|
{{watch|format_last_checked_time|safe}}</td>
|
||||||
|
{% endif %}
|
||||||
|
|
||||||
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
|
<td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
|
||||||
{{watch.last_changed|format_timestamp_timeago}}
|
{{watch.last_changed|format_timestamp_timeago}}
|
||||||
{% else %}
|
{% else %}
|
||||||
@@ -185,20 +204,23 @@
|
|||||||
{% endif %}
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
<td>
|
<td>
|
||||||
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
|
<a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
|
||||||
class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
|
class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
|
||||||
<a href="{{ url_for('edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||||
{% if watch.history_n >= 2 %}
|
{% if watch.history_n >= 2 %}
|
||||||
|
|
||||||
|
{% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
|
||||||
|
{% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
|
||||||
|
|
||||||
{% if is_unviewed %}
|
{% if is_unviewed %}
|
||||||
<a href="{{ url_for('diff_history_page', uuid=watch.uuid, from_version=watch.get_next_snapshot_key_to_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
|
||||||
{% else %}
|
{% else %}
|
||||||
<a href="{{ url_for('diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
|
|
||||||
{% else %}
|
{% else %}
|
||||||
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
|
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
|
||||||
<a href="{{ url_for('preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary">Preview</a>
|
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% endif %}
|
{% endif %}
|
||||||
</td>
|
</td>
|
||||||
@@ -209,20 +231,20 @@
|
|||||||
<ul id="post-list-buttons">
|
<ul id="post-list-buttons">
|
||||||
{% if errored_count %}
|
{% if errored_count %}
|
||||||
<li>
|
<li>
|
||||||
<a href="{{url_for('index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error ">With errors ({{ errored_count }})</a>
|
<a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error ">With errors ({{ errored_count }})</a>
|
||||||
</li>
|
</li>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if has_unviewed %}
|
{% if has_unviewed %}
|
||||||
<li>
|
<li>
|
||||||
<a href="{{url_for('mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Mark all viewed</a>
|
<a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Mark all viewed</a>
|
||||||
</li>
|
</li>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<li>
|
<li>
|
||||||
<a href="{{ url_for('form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck
|
<a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck
|
||||||
all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
|
all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
|
||||||
</li>
|
</li>
|
||||||
<li>
|
<li>
|
||||||
<a href="{{ url_for('rss', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='Generic_Feed-icon.svg')}}" height="15"></a>
|
<a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
{{ pagination.links }}
|
{{ pagination.links }}
|
||||||
170
changedetectionio/conditions/__init__.py
Normal file
@@ -0,0 +1,170 @@
|
|||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
from json_logic.builtins import BUILTINS
|
||||||
|
|
||||||
|
from .exceptions import EmptyConditionRuleRowNotUsable
|
||||||
|
from .pluggy_interface import plugin_manager # Import the pluggy plugin manager
|
||||||
|
from . import default_plugin
|
||||||
|
from loguru import logger
|
||||||
|
# List of all supported JSON Logic operators
|
||||||
|
operator_choices = [
|
||||||
|
(None, "Choose one - Operator"),
|
||||||
|
(">", "Greater Than"),
|
||||||
|
("<", "Less Than"),
|
||||||
|
(">=", "Greater Than or Equal To"),
|
||||||
|
("<=", "Less Than or Equal To"),
|
||||||
|
("==", "Equals"),
|
||||||
|
("!=", "Not Equals"),
|
||||||
|
("in", "Contains"),
|
||||||
|
("!in", "Does Not Contain"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# Fields available in the rules
|
||||||
|
field_choices = [
|
||||||
|
(None, "Choose one - Field"),
|
||||||
|
]
|
||||||
|
|
||||||
|
# The data we will feed the JSON Rules to see if it passes the test/conditions or not
|
||||||
|
EXECUTE_DATA = {}
|
||||||
|
|
||||||
|
|
||||||
|
# Define the extended operations dictionary
|
||||||
|
CUSTOM_OPERATIONS = {
|
||||||
|
**BUILTINS, # Include all standard operators
|
||||||
|
}
|
||||||
|
|
||||||
|
def filter_complete_rules(ruleset):
|
||||||
|
rules = [
|
||||||
|
rule for rule in ruleset
|
||||||
|
if all(value not in ("", False, "None", None) for value in [rule["operator"], rule["field"], rule["value"]])
|
||||||
|
]
|
||||||
|
return rules
|
||||||
|
|
||||||
|
def convert_to_jsonlogic(logic_operator: str, rule_dict: list):
|
||||||
|
"""
|
||||||
|
Convert a structured rule dict into a JSON Logic rule.
|
||||||
|
|
||||||
|
:param rule_dict: Dictionary containing conditions.
|
||||||
|
:return: JSON Logic rule as a dictionary.
|
||||||
|
"""
|
||||||
|
|
||||||
|
|
||||||
|
json_logic_conditions = []
|
||||||
|
|
||||||
|
for condition in rule_dict:
|
||||||
|
operator = condition["operator"]
|
||||||
|
field = condition["field"]
|
||||||
|
value = condition["value"]
|
||||||
|
|
||||||
|
if not operator or operator == 'None' or not value or not field:
|
||||||
|
raise EmptyConditionRuleRowNotUsable()
|
||||||
|
|
||||||
|
# Convert value to int/float if possible
|
||||||
|
try:
|
||||||
|
if isinstance(value, str) and "." in value and str != "None":
|
||||||
|
value = float(value)
|
||||||
|
else:
|
||||||
|
value = int(value)
|
||||||
|
except (ValueError, TypeError):
|
||||||
|
pass # Keep as a string if conversion fails
|
||||||
|
|
||||||
|
# Handle different JSON Logic operators properly
|
||||||
|
if operator == "in":
|
||||||
|
json_logic_conditions.append({"in": [value, {"var": field}]}) # value first
|
||||||
|
elif operator in ("!", "!!", "-"):
|
||||||
|
json_logic_conditions.append({operator: [{"var": field}]}) # Unary operators
|
||||||
|
elif operator in ("min", "max", "cat"):
|
||||||
|
json_logic_conditions.append({operator: value}) # Multi-argument operators
|
||||||
|
else:
|
||||||
|
json_logic_conditions.append({operator: [{"var": field}, value]}) # Standard binary operators
|
||||||
|
|
||||||
|
return {logic_operator: json_logic_conditions} if len(json_logic_conditions) > 1 else json_logic_conditions[0]
|
||||||
|
|
||||||
|
|
||||||
|
def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_datastruct, ephemeral_data={} ):
|
||||||
|
"""
|
||||||
|
Build our data and options by calling our plugins then pass it to jsonlogic and see if the conditions pass
|
||||||
|
|
||||||
|
:param ruleset: JSON Logic rule dictionary.
|
||||||
|
:param extracted_data: Dictionary containing the facts. <-- maybe the app struct+uuid
|
||||||
|
:return: Dictionary of plugin results.
|
||||||
|
"""
|
||||||
|
from json_logic import jsonLogic
|
||||||
|
|
||||||
|
EXECUTE_DATA = {}
|
||||||
|
result = True
|
||||||
|
|
||||||
|
watch = application_datastruct['watching'].get(current_watch_uuid)
|
||||||
|
|
||||||
|
if watch and watch.get("conditions"):
|
||||||
|
logic_operator = "and" if watch.get("conditions_match_logic", "ALL") == "ALL" else "or"
|
||||||
|
complete_rules = filter_complete_rules(watch['conditions'])
|
||||||
|
if complete_rules:
|
||||||
|
# Give all plugins a chance to update the data dict again (that we will test the conditions against)
|
||||||
|
for plugin in plugin_manager.get_plugins():
|
||||||
|
try:
|
||||||
|
import concurrent.futures
|
||||||
|
import time
|
||||||
|
|
||||||
|
with concurrent.futures.ThreadPoolExecutor() as executor:
|
||||||
|
future = executor.submit(
|
||||||
|
plugin.add_data,
|
||||||
|
current_watch_uuid=current_watch_uuid,
|
||||||
|
application_datastruct=application_datastruct,
|
||||||
|
ephemeral_data=ephemeral_data
|
||||||
|
)
|
||||||
|
logger.debug(f"Trying plugin {plugin}....")
|
||||||
|
|
||||||
|
# Set a timeout of 10 seconds
|
||||||
|
try:
|
||||||
|
new_execute_data = future.result(timeout=10)
|
||||||
|
if new_execute_data and isinstance(new_execute_data, dict):
|
||||||
|
EXECUTE_DATA.update(new_execute_data)
|
||||||
|
|
||||||
|
except concurrent.futures.TimeoutError:
|
||||||
|
# The plugin took too long, abort processing for this watch
|
||||||
|
raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")
|
||||||
|
except Exception as e:
|
||||||
|
# Log the error but continue with the next plugin
|
||||||
|
import logging
|
||||||
|
logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Create the ruleset
|
||||||
|
ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules)
|
||||||
|
|
||||||
|
# Pass the custom operations dictionary to jsonLogic
|
||||||
|
if not jsonLogic(logic=ruleset, data=EXECUTE_DATA, operations=CUSTOM_OPERATIONS):
|
||||||
|
result = False
|
||||||
|
|
||||||
|
return {'executed_data': EXECUTE_DATA, 'result': result}
|
||||||
|
|
||||||
|
# Load plugins dynamically
|
||||||
|
for plugin in plugin_manager.get_plugins():
|
||||||
|
new_ops = plugin.register_operators()
|
||||||
|
if isinstance(new_ops, dict):
|
||||||
|
CUSTOM_OPERATIONS.update(new_ops)
|
||||||
|
|
||||||
|
new_operator_choices = plugin.register_operator_choices()
|
||||||
|
if isinstance(new_operator_choices, list):
|
||||||
|
operator_choices.extend(new_operator_choices)
|
||||||
|
|
||||||
|
new_field_choices = plugin.register_field_choices()
|
||||||
|
if isinstance(new_field_choices, list):
|
||||||
|
field_choices.extend(new_field_choices)
|
||||||
|
|
||||||
|
def collect_ui_edit_stats_extras(watch):
|
||||||
|
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
|
||||||
|
extras_content = []
|
||||||
|
|
||||||
|
for plugin in plugin_manager.get_plugins():
|
||||||
|
try:
|
||||||
|
content = plugin.ui_edit_stats_extras(watch=watch)
|
||||||
|
if content:
|
||||||
|
extras_content.append(content)
|
||||||
|
except Exception as e:
|
||||||
|
# Skip plugins that don't implement the hook or have errors
|
||||||
|
pass
|
||||||
|
|
||||||
|
return "\n".join(extras_content) if extras_content else ""
|
||||||
|
|
||||||
81
changedetectionio/conditions/blueprint.py
Normal file
@@ -0,0 +1,81 @@
|
|||||||
|
# Flask Blueprint Definition
|
||||||
|
import json
|
||||||
|
|
||||||
|
from flask import Blueprint
|
||||||
|
|
||||||
|
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||||
|
|
||||||
|
|
||||||
|
def construct_blueprint(datastore):
|
||||||
|
from changedetectionio.flask_app import login_optionally_required
|
||||||
|
|
||||||
|
conditions_blueprint = Blueprint('conditions', __name__, template_folder="templates")
|
||||||
|
|
||||||
|
@conditions_blueprint.route("/<string:watch_uuid>/verify-condition-single-rule", methods=['POST'])
|
||||||
|
@login_optionally_required
|
||||||
|
def verify_condition_single_rule(watch_uuid):
|
||||||
|
"""Verify a single condition rule against the current snapshot"""
|
||||||
|
from changedetectionio.processors.text_json_diff import prepare_filter_prevew
|
||||||
|
from flask import request, jsonify
|
||||||
|
from copy import deepcopy
|
||||||
|
|
||||||
|
ephemeral_data = {}
|
||||||
|
|
||||||
|
# Get the watch data
|
||||||
|
watch = datastore.data['watching'].get(watch_uuid)
|
||||||
|
if not watch:
|
||||||
|
return jsonify({'status': 'error', 'message': 'Watch not found'}), 404
|
||||||
|
|
||||||
|
# First use prepare_filter_prevew to process the form data
|
||||||
|
# This will return text_after_filter which is after all current form settings are applied
|
||||||
|
# Create ephemeral data with the text from the current snapshot
|
||||||
|
|
||||||
|
try:
|
||||||
|
# Call prepare_filter_prevew to get a processed version of the content with current form settings
|
||||||
|
# We'll ignore the returned response and just use the datastore which is modified by the function
|
||||||
|
|
||||||
|
# this should apply all filters etc so then we can run the CONDITIONS against the final output text
|
||||||
|
result = prepare_filter_prevew(datastore=datastore,
|
||||||
|
form_data=request.form,
|
||||||
|
watch_uuid=watch_uuid)
|
||||||
|
|
||||||
|
ephemeral_data['text'] = result.get('after_filter', '')
|
||||||
|
# Create a temporary watch data structure with this single rule
|
||||||
|
tmp_watch_data = deepcopy(datastore.data['watching'].get(watch_uuid))
|
||||||
|
|
||||||
|
# Override the conditions in the temporary watch
|
||||||
|
rule_json = request.args.get("rule")
|
||||||
|
rule = json.loads(rule_json) if rule_json else None
|
||||||
|
|
||||||
|
# Should be key/value of field, operator, value
|
||||||
|
tmp_watch_data['conditions'] = [rule]
|
||||||
|
tmp_watch_data['conditions_match_logic'] = "ALL" # Single rule, so use ALL
|
||||||
|
|
||||||
|
# Create a temporary application data structure for the rule check
|
||||||
|
temp_app_data = {
|
||||||
|
'watching': {
|
||||||
|
watch_uuid: tmp_watch_data
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# Execute the rule against the current snapshot with form data
|
||||||
|
result = execute_ruleset_against_all_plugins(
|
||||||
|
current_watch_uuid=watch_uuid,
|
||||||
|
application_datastruct=temp_app_data,
|
||||||
|
ephemeral_data=ephemeral_data
|
||||||
|
)
|
||||||
|
|
||||||
|
return jsonify({
|
||||||
|
'status': 'success',
|
||||||
|
'result': result.get('result'),
|
||||||
|
'data': result.get('executed_data'),
|
||||||
|
'message': 'Condition passes' if result else 'Condition does not pass'
|
||||||
|
})
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
return jsonify({
|
||||||
|
'status': 'error',
|
||||||
|
'message': f'Error verifying condition: {str(e)}'
|
||||||
|
}), 500
|
||||||
|
|
||||||
|
return conditions_blueprint
|
||||||
78
changedetectionio/conditions/default_plugin.py
Normal file
@@ -0,0 +1,78 @@
|
|||||||
|
import re
|
||||||
|
|
||||||
|
import pluggy
|
||||||
|
from price_parser import Price
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
||||||
|
|
||||||
|
|
||||||
|
@hookimpl
|
||||||
|
def register_operators():
|
||||||
|
def starts_with(_, text, prefix):
|
||||||
|
return text.lower().strip().startswith(str(prefix).strip().lower())
|
||||||
|
|
||||||
|
def ends_with(_, text, suffix):
|
||||||
|
return text.lower().strip().endswith(str(suffix).strip().lower())
|
||||||
|
|
||||||
|
def length_min(_, text, strlen):
|
||||||
|
return len(text) >= int(strlen)
|
||||||
|
|
||||||
|
def length_max(_, text, strlen):
|
||||||
|
return len(text) <= int(strlen)
|
||||||
|
|
||||||
|
# ✅ Custom function for case-insensitive regex matching
|
||||||
|
def contains_regex(_, text, pattern):
|
||||||
|
"""Returns True if `text` contains `pattern` (case-insensitive regex match)."""
|
||||||
|
return bool(re.search(pattern, str(text), re.IGNORECASE))
|
||||||
|
|
||||||
|
# ✅ Custom function for NOT matching case-insensitive regex
|
||||||
|
def not_contains_regex(_, text, pattern):
|
||||||
|
"""Returns True if `text` does NOT contain `pattern` (case-insensitive regex match)."""
|
||||||
|
return not bool(re.search(pattern, str(text), re.IGNORECASE))
|
||||||
|
|
||||||
|
return {
|
||||||
|
"!contains_regex": not_contains_regex,
|
||||||
|
"contains_regex": contains_regex,
|
||||||
|
"ends_with": ends_with,
|
||||||
|
"length_max": length_max,
|
||||||
|
"length_min": length_min,
|
||||||
|
"starts_with": starts_with,
|
||||||
|
}
|
||||||
|
|
||||||
|
@hookimpl
|
||||||
|
def register_operator_choices():
|
||||||
|
return [
|
||||||
|
("starts_with", "Text Starts With"),
|
||||||
|
("ends_with", "Text Ends With"),
|
||||||
|
("length_min", "Length minimum"),
|
||||||
|
("length_max", "Length maximum"),
|
||||||
|
("contains_regex", "Text Matches Regex"),
|
||||||
|
("!contains_regex", "Text Does NOT Match Regex"),
|
||||||
|
]
|
||||||
|
|
||||||
|
@hookimpl
|
||||||
|
def register_field_choices():
|
||||||
|
return [
|
||||||
|
("extracted_number", "Extracted number after 'Filters & Triggers'"),
|
||||||
|
# ("meta_description", "Meta Description"),
|
||||||
|
# ("meta_keywords", "Meta Keywords"),
|
||||||
|
("page_filtered_text", "Page text after 'Filters & Triggers'"),
|
||||||
|
#("page_title", "Page <title>"), # actual page title <title>
|
||||||
|
]
|
||||||
|
|
||||||
|
@hookimpl
|
||||||
|
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
||||||
|
|
||||||
|
res = {}
|
||||||
|
if 'text' in ephemeral_data:
|
||||||
|
res['page_filtered_text'] = ephemeral_data['text']
|
||||||
|
|
||||||
|
# Better to not wrap this in try/except so that the UI can see any errors
|
||||||
|
price = Price.fromstring(ephemeral_data.get('text'))
|
||||||
|
if price and price.amount != None:
|
||||||
|
# This is slightly misleading, it's extracting a PRICE not a Number..
|
||||||
|
res['extracted_number'] = float(price.amount)
|
||||||
|
logger.debug(f"Extracted number result: '{price}' - returning float({res['extracted_number']})")
|
||||||
|
|
||||||
|
return res
|
||||||
6
changedetectionio/conditions/exceptions.py
Normal file
@@ -0,0 +1,6 @@
|
|||||||
|
class EmptyConditionRuleRowNotUsable(Exception):
|
||||||
|
def __init__(self):
|
||||||
|
super().__init__("One of the 'conditions' rulesets is incomplete, cannot run.")
|
||||||
|
|
||||||
|
def __str__(self):
|
||||||
|
return self.args[0]
|
||||||
44
changedetectionio/conditions/form.py
Normal file
@@ -0,0 +1,44 @@
|
|||||||
|
# Condition Rule Form (for each rule row)
|
||||||
|
from wtforms import Form, SelectField, StringField, validators
|
||||||
|
from wtforms import validators
|
||||||
|
|
||||||
|
class ConditionFormRow(Form):
|
||||||
|
|
||||||
|
# ✅ Ensure Plugins Are Loaded BEFORE Importing Choices
|
||||||
|
from changedetectionio.conditions import plugin_manager
|
||||||
|
from changedetectionio.conditions import operator_choices, field_choices
|
||||||
|
field = SelectField(
|
||||||
|
"Field",
|
||||||
|
choices=field_choices,
|
||||||
|
validators=[validators.Optional()]
|
||||||
|
)
|
||||||
|
|
||||||
|
operator = SelectField(
|
||||||
|
"Operator",
|
||||||
|
choices=operator_choices,
|
||||||
|
validators=[validators.Optional()]
|
||||||
|
)
|
||||||
|
|
||||||
|
value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"})
|
||||||
|
|
||||||
|
def validate(self, extra_validators=None):
|
||||||
|
# First, run the default validators
|
||||||
|
if not super().validate(extra_validators):
|
||||||
|
return False
|
||||||
|
|
||||||
|
# Custom validation logic
|
||||||
|
# If any of the operator/field/value is set, then they must be all set
|
||||||
|
if any(value not in ("", False, "None", None) for value in [self.operator.data, self.field.data, self.value.data]):
|
||||||
|
if not self.operator.data or self.operator.data == 'None':
|
||||||
|
self.operator.errors.append("Operator is required.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not self.field.data or self.field.data == 'None':
|
||||||
|
self.field.errors.append("Field is required.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
if not self.value.data:
|
||||||
|
self.value.errors.append("Value is required.")
|
||||||
|
return False
|
||||||
|
|
||||||
|
return True # Only return True if all conditions pass
|
||||||
74
changedetectionio/conditions/pluggy_interface.py
Normal file
@@ -0,0 +1,74 @@
|
|||||||
|
import pluggy
|
||||||
|
import os
|
||||||
|
import importlib
|
||||||
|
import sys
|
||||||
|
from . import default_plugin
|
||||||
|
|
||||||
|
# ✅ Ensure that the namespace in HookspecMarker matches PluginManager
|
||||||
|
PLUGIN_NAMESPACE = "changedetectionio_conditions"
|
||||||
|
|
||||||
|
hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE)
|
||||||
|
hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE)
|
||||||
|
|
||||||
|
|
||||||
|
class ConditionsSpec:
|
||||||
|
"""Hook specifications for extending JSON Logic conditions."""
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def register_operators():
|
||||||
|
"""Return a dictionary of new JSON Logic operators."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def register_operator_choices():
|
||||||
|
"""Return a list of new operator choices."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def register_field_choices():
|
||||||
|
"""Return a list of new field choices."""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
||||||
|
"""Add to the datadict"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def ui_edit_stats_extras(watch):
|
||||||
|
"""Return HTML content to add to the stats tab in the edit view"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
# ✅ Set up Pluggy Plugin Manager
|
||||||
|
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||||
|
|
||||||
|
# ✅ Register hookspecs (Ensures they are detected)
|
||||||
|
plugin_manager.add_hookspecs(ConditionsSpec)
|
||||||
|
|
||||||
|
# ✅ Register built-in plugins manually
|
||||||
|
plugin_manager.register(default_plugin, "default_plugin")
|
||||||
|
|
||||||
|
# ✅ Load plugins from the plugins directory
|
||||||
|
def load_plugins_from_directory():
|
||||||
|
plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins')
|
||||||
|
if not os.path.exists(plugins_dir):
|
||||||
|
return
|
||||||
|
|
||||||
|
# Get all Python files (excluding __init__.py)
|
||||||
|
for filename in os.listdir(plugins_dir):
|
||||||
|
if filename.endswith(".py") and filename != "__init__.py":
|
||||||
|
module_name = filename[:-3] # Remove .py extension
|
||||||
|
module_path = f"changedetectionio.conditions.plugins.{module_name}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
# Register the plugin with pluggy
|
||||||
|
plugin_manager.register(module, module_name)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
print(f"Error loading plugin {module_name}: {e}")
|
||||||
|
|
||||||
|
# Load plugins from the plugins directory
|
||||||
|
load_plugins_from_directory()
|
||||||
|
|
||||||
|
# ✅ Discover installed plugins from external packages (if any)
|
||||||
|
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
|
||||||
1
changedetectionio/conditions/plugins/__init__.py
Normal file
@@ -0,0 +1 @@
|
|||||||
|
# Import plugins package to make them discoverable
|
||||||
107
changedetectionio/conditions/plugins/levenshtein_plugin.py
Normal file
@@ -0,0 +1,107 @@
|
|||||||
|
import pluggy
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
# Support both plugin systems
|
||||||
|
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
||||||
|
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
||||||
|
|
||||||
|
def levenshtein_ratio_recent_history(watch, incoming_text=None):
|
||||||
|
try:
|
||||||
|
from Levenshtein import ratio, distance
|
||||||
|
k = list(watch.history.keys())
|
||||||
|
a = None
|
||||||
|
b = None
|
||||||
|
|
||||||
|
# When called from ui_edit_stats_extras, we don't have incoming_text
|
||||||
|
if incoming_text is None:
|
||||||
|
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest snapshot
|
||||||
|
b = watch.get_history_snapshot(timestamp=k[-2]) # Previous snapshot
|
||||||
|
|
||||||
|
# Needs atleast one snapshot
|
||||||
|
elif len(k) >= 1: # Should be atleast one snapshot to compare against
|
||||||
|
a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
|
||||||
|
b = incoming_text if incoming_text else k[-2]
|
||||||
|
|
||||||
|
if a and b:
|
||||||
|
distance_value = distance(a, b)
|
||||||
|
ratio_value = ratio(a, b)
|
||||||
|
return {
|
||||||
|
'distance': distance_value,
|
||||||
|
'ratio': ratio_value,
|
||||||
|
'percent_similar': round(ratio_value * 100, 2)
|
||||||
|
}
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Unable to calc similarity: {str(e)}")
|
||||||
|
|
||||||
|
return ''
|
||||||
|
|
||||||
|
@conditions_hookimpl
|
||||||
|
def register_operators():
|
||||||
|
pass
|
||||||
|
|
||||||
|
@conditions_hookimpl
|
||||||
|
def register_operator_choices():
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
@conditions_hookimpl
|
||||||
|
def register_field_choices():
|
||||||
|
return [
|
||||||
|
("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
|
||||||
|
("levenshtein_distance", "Levenshtein - Text change distance"),
|
||||||
|
]
|
||||||
|
|
||||||
|
@conditions_hookimpl
|
||||||
|
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
||||||
|
res = {}
|
||||||
|
watch = application_datastruct['watching'].get(current_watch_uuid)
|
||||||
|
# ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
|
||||||
|
|
||||||
|
if watch and 'text' in ephemeral_data:
|
||||||
|
lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
|
||||||
|
if isinstance(lev_data, dict):
|
||||||
|
res['levenshtein_ratio'] = lev_data.get('ratio', 0)
|
||||||
|
res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
|
||||||
|
res['levenshtein_distance'] = lev_data.get('distance', 0)
|
||||||
|
|
||||||
|
return res
|
||||||
|
|
||||||
|
@global_hookimpl
|
||||||
|
def ui_edit_stats_extras(watch):
|
||||||
|
"""Add Levenshtein stats to the UI using the global plugin system"""
|
||||||
|
"""Generate the HTML for Levenshtein stats - shared by both plugin systems"""
|
||||||
|
if len(watch.history.keys()) < 2:
|
||||||
|
return "<p>Not enough history to calculate Levenshtein metrics</p>"
|
||||||
|
|
||||||
|
try:
|
||||||
|
lev_data = levenshtein_ratio_recent_history(watch)
|
||||||
|
if not lev_data or not isinstance(lev_data, dict):
|
||||||
|
return "<p>Unable to calculate Levenshtein metrics</p>"
|
||||||
|
|
||||||
|
html = f"""
|
||||||
|
<div class="levenshtein-stats">
|
||||||
|
<h4>Levenshtein Text Similarity Details</h4>
|
||||||
|
<table class="pure-table">
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>Raw distance (edits needed)</td>
|
||||||
|
<td>{lev_data['distance']}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Similarity ratio</td>
|
||||||
|
<td>{lev_data['ratio']:.4f}</td>
|
||||||
|
</tr>
|
||||||
|
<tr>
|
||||||
|
<td>Percent similar</td>
|
||||||
|
<td>{lev_data['percent_similar']}%</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
return html
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
|
||||||
|
return "<p>Error calculating Levenshtein metrics</p>"
|
||||||
|
|
||||||
82
changedetectionio/conditions/plugins/wordcount_plugin.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import pluggy
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
# Support both plugin systems
|
||||||
|
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
|
||||||
|
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
|
||||||
|
|
||||||
|
def count_words_in_history(watch, incoming_text=None):
|
||||||
|
"""Count words in snapshot text"""
|
||||||
|
try:
|
||||||
|
if incoming_text is not None:
|
||||||
|
# When called from add_data with incoming text
|
||||||
|
return len(incoming_text.split())
|
||||||
|
elif watch.history.keys():
|
||||||
|
# When called from UI extras to count latest snapshot
|
||||||
|
latest_key = list(watch.history.keys())[-1]
|
||||||
|
latest_content = watch.get_history_snapshot(latest_key)
|
||||||
|
return len(latest_content.split())
|
||||||
|
return 0
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Error counting words: {str(e)}")
|
||||||
|
return 0
|
||||||
|
|
||||||
|
# Implement condition plugin hooks
|
||||||
|
@conditions_hookimpl
|
||||||
|
def register_operators():
|
||||||
|
# No custom operators needed
|
||||||
|
return {}
|
||||||
|
|
||||||
|
@conditions_hookimpl
|
||||||
|
def register_operator_choices():
|
||||||
|
# No custom operator choices needed
|
||||||
|
return []
|
||||||
|
|
||||||
|
@conditions_hookimpl
|
||||||
|
def register_field_choices():
|
||||||
|
# Add a field that will be available in conditions
|
||||||
|
return [
|
||||||
|
("word_count", "Word count of content"),
|
||||||
|
]
|
||||||
|
|
||||||
|
@conditions_hookimpl
|
||||||
|
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
|
||||||
|
"""Add word count data for conditions"""
|
||||||
|
result = {}
|
||||||
|
watch = application_datastruct['watching'].get(current_watch_uuid)
|
||||||
|
|
||||||
|
if watch and 'text' in ephemeral_data:
|
||||||
|
word_count = count_words_in_history(watch, ephemeral_data['text'])
|
||||||
|
result['word_count'] = word_count
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
def _generate_stats_html(watch):
|
||||||
|
"""Generate the HTML content for the stats tab"""
|
||||||
|
word_count = count_words_in_history(watch)
|
||||||
|
|
||||||
|
html = f"""
|
||||||
|
<div class="word-count-stats">
|
||||||
|
<h4>Content Analysis</h4>
|
||||||
|
<table class="pure-table">
|
||||||
|
<tbody>
|
||||||
|
<tr>
|
||||||
|
<td>Word count (latest snapshot)</td>
|
||||||
|
<td>{word_count}</td>
|
||||||
|
</tr>
|
||||||
|
</tbody>
|
||||||
|
</table>
|
||||||
|
<p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p>
|
||||||
|
</div>
|
||||||
|
"""
|
||||||
|
return html
|
||||||
|
|
||||||
|
@conditions_hookimpl
|
||||||
|
def ui_edit_stats_extras(watch):
|
||||||
|
"""Add word count stats to the UI through conditions plugin system"""
|
||||||
|
return _generate_stats_html(watch)
|
||||||
|
|
||||||
|
@global_hookimpl
|
||||||
|
def ui_edit_stats_extras(watch):
|
||||||
|
"""Add word count stats to the UI using the global plugin system"""
|
||||||
|
return _generate_stats_html(watch)
|
||||||
@@ -7,11 +7,29 @@ import os
|
|||||||
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
|
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
|
||||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
|
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
|
||||||
|
|
||||||
|
SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000
|
||||||
|
SCREENSHOT_DEFAULT_QUALITY = 40
|
||||||
|
|
||||||
|
# Maximum total height for the final image (When in stitch mode).
|
||||||
|
# We limit this to 16000px due to the huge amount of RAM that was being used
|
||||||
|
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
|
||||||
|
SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||||
|
|
||||||
|
# The size at which we will switch to stitching method, when below this (and
|
||||||
|
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
|
||||||
|
# screenshot method.
|
||||||
|
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
|
||||||
|
|
||||||
# available_fetchers() will scan this implementation looking for anything starting with html_
|
# available_fetchers() will scan this implementation looking for anything starting with html_
|
||||||
# this information is used in the form selections
|
# this information is used in the form selections
|
||||||
from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
from changedetectionio.content_fetchers.requests import fetcher as html_requests
|
||||||
|
|
||||||
|
|
||||||
|
import importlib.resources
|
||||||
|
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
||||||
|
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
||||||
|
|
||||||
|
|
||||||
def available_fetchers():
|
def available_fetchers():
|
||||||
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
||||||
import inspect
|
import inspect
|
||||||
|
|||||||
@@ -63,11 +63,6 @@ class Fetcher():
|
|||||||
# Time ONTOP of the system defined env minimum time
|
# Time ONTOP of the system defined env minimum time
|
||||||
render_extract_delay = 0
|
render_extract_delay = 0
|
||||||
|
|
||||||
def __init__(self):
|
|
||||||
import importlib.resources
|
|
||||||
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
|
|
||||||
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_error(self):
|
def get_error(self):
|
||||||
return self.error
|
return self.error
|
||||||
@@ -87,7 +82,7 @@ class Fetcher():
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def quit(self):
|
def quit(self, watch=None):
|
||||||
return
|
return
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
@@ -143,6 +138,7 @@ class Fetcher():
|
|||||||
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
|
logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
|
||||||
self.screenshot_step("before-" + str(step_n))
|
self.screenshot_step("before-" + str(step_n))
|
||||||
self.save_step_html("before-" + str(step_n))
|
self.save_step_html("before-" + str(step_n))
|
||||||
|
|
||||||
try:
|
try:
|
||||||
optional_value = step['optional_value']
|
optional_value = step['optional_value']
|
||||||
selector = step['selector']
|
selector = step['selector']
|
||||||
|
|||||||
@@ -4,9 +4,76 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||||
|
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
|
||||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||||
|
|
||||||
|
def capture_full_page(page):
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from multiprocessing import Process, Pipe
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
page_height = page.evaluate("document.documentElement.scrollHeight")
|
||||||
|
page_width = page.evaluate("document.documentElement.scrollWidth")
|
||||||
|
original_viewport = page.viewport_size
|
||||||
|
|
||||||
|
logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
|
||||||
|
|
||||||
|
# Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks
|
||||||
|
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow
|
||||||
|
screenshot_chunks = []
|
||||||
|
y = 0
|
||||||
|
|
||||||
|
if page_height > page.viewport_size['height']:
|
||||||
|
if page_height < step_size:
|
||||||
|
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||||
|
logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
|
||||||
|
# Set viewport to a larger size to capture more content at once
|
||||||
|
page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
|
||||||
|
|
||||||
|
# Capture screenshots in chunks up to the max total height
|
||||||
|
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||||
|
page.request_gc()
|
||||||
|
page.evaluate(f"window.scrollTo(0, {y})")
|
||||||
|
page.request_gc()
|
||||||
|
screenshot_chunks.append(page.screenshot(
|
||||||
|
type="jpeg",
|
||||||
|
full_page=False,
|
||||||
|
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
|
||||||
|
))
|
||||||
|
y += step_size
|
||||||
|
page.request_gc()
|
||||||
|
|
||||||
|
# Restore original viewport size
|
||||||
|
page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||||
|
|
||||||
|
# If we have multiple chunks, stitch them together
|
||||||
|
if len(screenshot_chunks) > 1:
|
||||||
|
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||||
|
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||||
|
parent_conn, child_conn = Pipe()
|
||||||
|
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||||
|
p.start()
|
||||||
|
screenshot = parent_conn.recv_bytes()
|
||||||
|
p.join()
|
||||||
|
logger.debug(
|
||||||
|
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||||
|
# Explicit cleanup
|
||||||
|
del screenshot_chunks
|
||||||
|
del p
|
||||||
|
del parent_conn, child_conn
|
||||||
|
screenshot_chunks = None
|
||||||
|
return screenshot
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||||
|
|
||||||
|
return screenshot_chunks[0]
|
||||||
|
|
||||||
|
|
||||||
class fetcher(Fetcher):
|
class fetcher(Fetcher):
|
||||||
fetcher_description = "Playwright {}/Javascript".format(
|
fetcher_description = "Playwright {}/Javascript".format(
|
||||||
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
|
||||||
@@ -59,7 +126,8 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
def screenshot_step(self, step_n=''):
|
def screenshot_step(self, step_n=''):
|
||||||
super().screenshot_step(step_n=step_n)
|
super().screenshot_step(step_n=step_n)
|
||||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
screenshot = capture_full_page(page=self.page)
|
||||||
|
|
||||||
|
|
||||||
if self.browser_steps_screenshot_path is not None:
|
if self.browser_steps_screenshot_path is not None:
|
||||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||||
@@ -88,7 +156,7 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
import playwright._impl._errors
|
import playwright._impl._errors
|
||||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
import time
|
||||||
self.delete_browser_steps_screenshots()
|
self.delete_browser_steps_screenshots()
|
||||||
response = None
|
response = None
|
||||||
|
|
||||||
@@ -126,7 +194,6 @@ class fetcher(Fetcher):
|
|||||||
browsersteps_interface.page = self.page
|
browsersteps_interface.page = self.page
|
||||||
|
|
||||||
response = browsersteps_interface.action_goto_url(value=url)
|
response = browsersteps_interface.action_goto_url(value=url)
|
||||||
self.headers = response.all_headers()
|
|
||||||
|
|
||||||
if response is None:
|
if response is None:
|
||||||
context.close()
|
context.close()
|
||||||
@@ -134,6 +201,8 @@ class fetcher(Fetcher):
|
|||||||
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
logger.debug("Content Fetcher > Response object from the browser communication was none")
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
|
self.headers = response.all_headers()
|
||||||
|
|
||||||
try:
|
try:
|
||||||
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
|
if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
|
||||||
browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
|
||||||
@@ -162,9 +231,7 @@ class fetcher(Fetcher):
|
|||||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
|
|
||||||
if self.status_code != 200 and not ignore_status_codes:
|
if self.status_code != 200 and not ignore_status_codes:
|
||||||
screenshot = self.page.screenshot(type='jpeg', full_page=True,
|
screenshot = capture_full_page(self.page)
|
||||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
|
||||||
|
|
||||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||||
|
|
||||||
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
|
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
|
||||||
@@ -179,17 +246,30 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
self.page.wait_for_timeout(extra_wait * 1000)
|
self.page.wait_for_timeout(extra_wait * 1000)
|
||||||
|
|
||||||
|
now = time.time()
|
||||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||||
if current_include_filters is not None:
|
if current_include_filters is not None:
|
||||||
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||||
else:
|
else:
|
||||||
self.page.evaluate("var include_filters=''")
|
self.page.evaluate("var include_filters=''")
|
||||||
|
self.page.request_gc()
|
||||||
|
|
||||||
self.xpath_data = self.page.evaluate(
|
# request_gc before and after evaluate to free up memory
|
||||||
"async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
# @todo browsersteps etc
|
||||||
self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||||
|
self.xpath_data = self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||||
|
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||||
|
"max_height": MAX_TOTAL_HEIGHT
|
||||||
|
})
|
||||||
|
self.page.request_gc()
|
||||||
|
|
||||||
|
self.instock_data = self.page.evaluate(INSTOCK_DATA_JS)
|
||||||
|
self.page.request_gc()
|
||||||
|
|
||||||
self.content = self.page.content()
|
self.content = self.page.content()
|
||||||
|
self.page.request_gc()
|
||||||
|
logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
|
||||||
|
|
||||||
# Bug 3 in Playwright screenshot handling
|
# Bug 3 in Playwright screenshot handling
|
||||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||||
# JPEG is better here because the screenshots can be very very large
|
# JPEG is better here because the screenshots can be very very large
|
||||||
@@ -199,13 +279,41 @@ class fetcher(Fetcher):
|
|||||||
# acceptable screenshot quality here
|
# acceptable screenshot quality here
|
||||||
try:
|
try:
|
||||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||||
self.screenshot = self.page.screenshot(type='jpeg',
|
self.screenshot = capture_full_page(page=self.page)
|
||||||
full_page=True,
|
|
||||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)),
|
|
||||||
)
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
# It's likely the screenshot was too long/big and something crashed
|
# It's likely the screenshot was too long/big and something crashed
|
||||||
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
raise ScreenshotUnavailable(url=url, status_code=self.status_code)
|
||||||
finally:
|
finally:
|
||||||
context.close()
|
# Request garbage collection one more time before closing
|
||||||
browser.close()
|
try:
|
||||||
|
self.page.request_gc()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Clean up resources properly
|
||||||
|
try:
|
||||||
|
self.page.request_gc()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
|
||||||
|
try:
|
||||||
|
self.page.close()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
self.page = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
context.close()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
context = None
|
||||||
|
|
||||||
|
try:
|
||||||
|
browser.close()
|
||||||
|
except:
|
||||||
|
pass
|
||||||
|
browser = None
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,8 +6,77 @@ from urllib.parse import urlparse
|
|||||||
|
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
|
||||||
|
SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
|
||||||
|
SCREENSHOT_MAX_TOTAL_HEIGHT
|
||||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError
|
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \
|
||||||
|
BrowserConnectError
|
||||||
|
|
||||||
|
|
||||||
|
# Bug 3 in Playwright screenshot handling
|
||||||
|
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||||
|
|
||||||
|
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||||
|
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||||
|
# acceptable screenshot quality here
|
||||||
|
async def capture_full_page(page):
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
from multiprocessing import Process, Pipe
|
||||||
|
|
||||||
|
start = time.time()
|
||||||
|
|
||||||
|
page_height = await page.evaluate("document.documentElement.scrollHeight")
|
||||||
|
page_width = await page.evaluate("document.documentElement.scrollWidth")
|
||||||
|
original_viewport = page.viewport
|
||||||
|
|
||||||
|
logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}")
|
||||||
|
|
||||||
|
# Bug 3 in Playwright screenshot handling
|
||||||
|
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
||||||
|
# JPEG is better here because the screenshots can be very very large
|
||||||
|
|
||||||
|
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
||||||
|
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
||||||
|
# acceptable screenshot quality here
|
||||||
|
|
||||||
|
|
||||||
|
step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot
|
||||||
|
screenshot_chunks = []
|
||||||
|
y = 0
|
||||||
|
if page_height > page.viewport['height']:
|
||||||
|
if page_height < step_size:
|
||||||
|
step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
|
||||||
|
await page.setViewport({'width': page.viewport['width'], 'height': step_size})
|
||||||
|
|
||||||
|
while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
|
||||||
|
await page.evaluate(f"window.scrollTo(0, {y})")
|
||||||
|
screenshot_chunks.append(await page.screenshot(type_='jpeg',
|
||||||
|
fullPage=False,
|
||||||
|
quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
|
||||||
|
y += step_size
|
||||||
|
|
||||||
|
await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']})
|
||||||
|
|
||||||
|
if len(screenshot_chunks) > 1:
|
||||||
|
from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker
|
||||||
|
logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together")
|
||||||
|
parent_conn, child_conn = Pipe()
|
||||||
|
p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT))
|
||||||
|
p.start()
|
||||||
|
screenshot = parent_conn.recv_bytes()
|
||||||
|
p.join()
|
||||||
|
logger.debug(
|
||||||
|
f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||||
|
|
||||||
|
screenshot_chunks = None
|
||||||
|
return screenshot
|
||||||
|
|
||||||
|
logger.debug(
|
||||||
|
f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s")
|
||||||
|
return screenshot_chunks[0]
|
||||||
|
|
||||||
|
|
||||||
class fetcher(Fetcher):
|
class fetcher(Fetcher):
|
||||||
fetcher_description = "Puppeteer/direct {}/Javascript".format(
|
fetcher_description = "Puppeteer/direct {}/Javascript".format(
|
||||||
@@ -78,8 +147,7 @@ class fetcher(Fetcher):
|
|||||||
is_binary,
|
is_binary,
|
||||||
empty_pages_are_a_change
|
empty_pages_are_a_change
|
||||||
):
|
):
|
||||||
|
import re
|
||||||
from changedetectionio.content_fetchers import visualselector_xpath_selectors
|
|
||||||
self.delete_browser_steps_screenshots()
|
self.delete_browser_steps_screenshots()
|
||||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||||
|
|
||||||
@@ -104,6 +172,17 @@ class fetcher(Fetcher):
|
|||||||
# headless - ask a new page
|
# headless - ask a new page
|
||||||
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
|
self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
|
||||||
|
|
||||||
|
if '--window-size' in self.browser_connection_url:
|
||||||
|
# Be sure the viewport is always the window-size, this is often not the same thing
|
||||||
|
match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url)
|
||||||
|
if match:
|
||||||
|
logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}")
|
||||||
|
await self.page.setViewport({
|
||||||
|
"width": int(match.group(1)),
|
||||||
|
"height": int(match.group(2))
|
||||||
|
})
|
||||||
|
logger.debug(f"Puppeteer viewport size {self.page.viewport}")
|
||||||
|
|
||||||
try:
|
try:
|
||||||
from pyppeteerstealth import inject_evasions_into_page
|
from pyppeteerstealth import inject_evasions_into_page
|
||||||
except ImportError:
|
except ImportError:
|
||||||
@@ -150,7 +229,6 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
response = await self.page.goto(url, waitUntil="load")
|
response = await self.page.goto(url, waitUntil="load")
|
||||||
|
|
||||||
|
|
||||||
if response is None:
|
if response is None:
|
||||||
await self.page.close()
|
await self.page.close()
|
||||||
await browser.close()
|
await browser.close()
|
||||||
@@ -181,11 +259,10 @@ class fetcher(Fetcher):
|
|||||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||||
|
|
||||||
if self.status_code != 200 and not ignore_status_codes:
|
if self.status_code != 200 and not ignore_status_codes:
|
||||||
screenshot = await self.page.screenshot(type_='jpeg',
|
screenshot = await capture_full_page(page=self.page)
|
||||||
fullPage=True,
|
|
||||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
|
||||||
|
|
||||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||||
|
|
||||||
content = await self.page.content
|
content = await self.page.content
|
||||||
|
|
||||||
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
if not empty_pages_are_a_change and len(content.strip()) == 0:
|
||||||
@@ -203,46 +280,31 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
# So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
|
||||||
# Setup the xPath/VisualSelector scraper
|
# Setup the xPath/VisualSelector scraper
|
||||||
if current_include_filters is not None:
|
if current_include_filters:
|
||||||
js = json.dumps(current_include_filters)
|
js = json.dumps(current_include_filters)
|
||||||
await self.page.evaluate(f"var include_filters={js}")
|
await self.page.evaluate(f"var include_filters={js}")
|
||||||
else:
|
else:
|
||||||
await self.page.evaluate(f"var include_filters=''")
|
await self.page.evaluate(f"var include_filters=''")
|
||||||
|
|
||||||
self.xpath_data = await self.page.evaluate(
|
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||||
"async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}")
|
self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
|
||||||
self.instock_data = await self.page.evaluate("async () => {" + self.instock_data_js + "}")
|
"visualselector_xpath_selectors": visualselector_xpath_selectors,
|
||||||
|
"max_height": MAX_TOTAL_HEIGHT
|
||||||
|
})
|
||||||
|
if not self.xpath_data:
|
||||||
|
raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)")
|
||||||
|
|
||||||
|
self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
|
||||||
|
|
||||||
self.content = await self.page.content
|
self.content = await self.page.content
|
||||||
# Bug 3 in Playwright screenshot handling
|
|
||||||
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
|
|
||||||
# JPEG is better here because the screenshots can be very very large
|
|
||||||
|
|
||||||
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
|
self.screenshot = await capture_full_page(page=self.page)
|
||||||
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
|
|
||||||
# acceptable screenshot quality here
|
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
|
||||||
try:
|
logger.success(f"Fetching '{url}' complete, closing page")
|
||||||
self.screenshot = await self.page.screenshot(type_='jpeg',
|
await self.page.close()
|
||||||
fullPage=True,
|
logger.success(f"Fetching '{url}' complete, closing browser")
|
||||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
await browser.close()
|
||||||
except Exception as e:
|
|
||||||
logger.error("Error fetching screenshot")
|
|
||||||
# // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
|
|
||||||
# // @ todo after text extract, we can place some overlay text with red background to say 'croppped'
|
|
||||||
logger.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot')
|
|
||||||
try:
|
|
||||||
self.screenshot = await self.page.screenshot(type_='jpeg',
|
|
||||||
fullPage=False,
|
|
||||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
|
||||||
except Exception as e:
|
|
||||||
logger.error('ERROR: Failed to get viewport-only reduced screenshot :(')
|
|
||||||
pass
|
|
||||||
finally:
|
|
||||||
# It's good to log here in the case that the browser crashes on shutting down but we still get the data we need
|
|
||||||
logger.success(f"Fetching '{url}' complete, closing page")
|
|
||||||
await self.page.close()
|
|
||||||
logger.success(f"Fetching '{url}' complete, closing browser")
|
|
||||||
await browser.close()
|
|
||||||
logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")
|
logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.")
|
||||||
|
|
||||||
async def main(self, **kwargs):
|
async def main(self, **kwargs):
|
||||||
|
|||||||
@@ -28,6 +28,7 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
import chardet
|
import chardet
|
||||||
import requests
|
import requests
|
||||||
|
from requests.exceptions import ProxyError, ConnectionError, RequestException
|
||||||
|
|
||||||
if self.browser_steps_get_valid_steps():
|
if self.browser_steps_get_valid_steps():
|
||||||
raise BrowserStepsInUnsupportedFetcher(url=url)
|
raise BrowserStepsInUnsupportedFetcher(url=url)
|
||||||
@@ -52,14 +53,19 @@ class fetcher(Fetcher):
|
|||||||
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
|
||||||
from requests_file import FileAdapter
|
from requests_file import FileAdapter
|
||||||
session.mount('file://', FileAdapter())
|
session.mount('file://', FileAdapter())
|
||||||
|
try:
|
||||||
r = session.request(method=request_method,
|
r = session.request(method=request_method,
|
||||||
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
|
||||||
url=url,
|
url=url,
|
||||||
headers=request_headers,
|
headers=request_headers,
|
||||||
timeout=timeout,
|
timeout=timeout,
|
||||||
proxies=proxies,
|
proxies=proxies,
|
||||||
verify=False)
|
verify=False)
|
||||||
|
except Exception as e:
|
||||||
|
msg = str(e)
|
||||||
|
if proxies and 'SOCKSHTTPSConnectionPool' in msg:
|
||||||
|
msg = f"Proxy connection failed? {msg}"
|
||||||
|
raise Exception(msg) from e
|
||||||
|
|
||||||
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
# If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
|
||||||
# For example - some sites don't tell us it's utf-8, but return utf-8 content
|
# For example - some sites don't tell us it's utf-8, but return utf-8 content
|
||||||
@@ -96,3 +102,17 @@ class fetcher(Fetcher):
|
|||||||
|
|
||||||
|
|
||||||
self.raw_content = r.content
|
self.raw_content = r.content
|
||||||
|
|
||||||
|
def quit(self, watch=None):
|
||||||
|
|
||||||
|
# In case they switched to `requests` fetcher from something else
|
||||||
|
# Then the screenshot could be old, in any case, it's not used here.
|
||||||
|
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
|
||||||
|
if strtobool(os.getenv("REMOVE_REQUESTS_OLD_SCREENSHOTS", 'true')):
|
||||||
|
screenshot = watch.get_screenshot()
|
||||||
|
if screenshot:
|
||||||
|
try:
|
||||||
|
os.unlink(screenshot)
|
||||||
|
except Exception as e:
|
||||||
|
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")
|
||||||
|
|
||||||
|
|||||||
@@ -1,190 +0,0 @@
|
|||||||
module.exports = async ({page, context}) => {
|
|
||||||
|
|
||||||
var {
|
|
||||||
url,
|
|
||||||
execute_js,
|
|
||||||
user_agent,
|
|
||||||
extra_wait_ms,
|
|
||||||
req_headers,
|
|
||||||
include_filters,
|
|
||||||
xpath_element_js,
|
|
||||||
screenshot_quality,
|
|
||||||
proxy_username,
|
|
||||||
proxy_password,
|
|
||||||
disk_cache_dir,
|
|
||||||
no_cache_list,
|
|
||||||
block_url_list,
|
|
||||||
} = context;
|
|
||||||
|
|
||||||
await page.setBypassCSP(true)
|
|
||||||
await page.setExtraHTTPHeaders(req_headers);
|
|
||||||
|
|
||||||
if (user_agent) {
|
|
||||||
await page.setUserAgent(user_agent);
|
|
||||||
}
|
|
||||||
// https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded
|
|
||||||
|
|
||||||
await page.setDefaultNavigationTimeout(0);
|
|
||||||
|
|
||||||
if (proxy_username) {
|
|
||||||
// Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer
|
|
||||||
// https://github.com/puppeteer/puppeteer/issues/676 ?
|
|
||||||
// https://help.brightdata.com/hc/en-us/articles/12632549957649-Proxy-Manager-How-to-Guides#h_01HAKWR4Q0AFS8RZTNYWRDFJC2
|
|
||||||
// https://cri.dev/posts/2020-03-30-How-to-solve-Puppeteer-Chrome-Error-ERR_INVALID_ARGUMENT/
|
|
||||||
await page.authenticate({
|
|
||||||
username: proxy_username,
|
|
||||||
password: proxy_password
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
await page.setViewport({
|
|
||||||
width: 1024,
|
|
||||||
height: 768,
|
|
||||||
deviceScaleFactor: 1,
|
|
||||||
});
|
|
||||||
|
|
||||||
await page.setRequestInterception(true);
|
|
||||||
if (disk_cache_dir) {
|
|
||||||
console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<");
|
|
||||||
}
|
|
||||||
const fs = require('fs');
|
|
||||||
const crypto = require('crypto');
|
|
||||||
|
|
||||||
function file_is_expired(file_path) {
|
|
||||||
if (!fs.existsSync(file_path)) {
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
var stats = fs.statSync(file_path);
|
|
||||||
const now_date = new Date();
|
|
||||||
const expire_seconds = 300;
|
|
||||||
if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) {
|
|
||||||
console.log("CACHE EXPIRED: " + file_path);
|
|
||||||
return true;
|
|
||||||
}
|
|
||||||
return false;
|
|
||||||
|
|
||||||
}
|
|
||||||
|
|
||||||
page.on('request', async (request) => {
|
|
||||||
// General blocking of requests that waste traffic
|
|
||||||
if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort();
|
|
||||||
|
|
||||||
if (disk_cache_dir) {
|
|
||||||
const url = request.url();
|
|
||||||
const key = crypto.createHash('md5').update(url).digest("hex");
|
|
||||||
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
|
|
||||||
|
|
||||||
// https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js
|
|
||||||
|
|
||||||
if (fs.existsSync(dir_path + key)) {
|
|
||||||
console.log("* CACHE HIT , using - " + dir_path + key + " - " + url);
|
|
||||||
const cached_data = fs.readFileSync(dir_path + key);
|
|
||||||
// @todo headers can come from dir_path+key+".meta" json file
|
|
||||||
request.respond({
|
|
||||||
status: 200,
|
|
||||||
//contentType: 'text/html', //@todo
|
|
||||||
body: cached_data
|
|
||||||
});
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
request.continue();
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
if (disk_cache_dir) {
|
|
||||||
page.on('response', async (response) => {
|
|
||||||
const url = response.url();
|
|
||||||
// Basic filtering for sane responses
|
|
||||||
if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) {
|
|
||||||
console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) {
|
|
||||||
console.log("Skipping (no_cache_list) - " + url);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
if (url.toLowerCase().includes('data:')) {
|
|
||||||
console.log("Skipping (embedded-data) - " + url);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
response.buffer().then(buffer => {
|
|
||||||
if (buffer.length > 100) {
|
|
||||||
console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType());
|
|
||||||
|
|
||||||
const key = crypto.createHash('md5').update(url).digest("hex");
|
|
||||||
const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/';
|
|
||||||
|
|
||||||
if (!fs.existsSync(dir_path)) {
|
|
||||||
fs.mkdirSync(dir_path, {recursive: true})
|
|
||||||
}
|
|
||||||
|
|
||||||
if (fs.existsSync(dir_path + key)) {
|
|
||||||
if (file_is_expired(dir_path + key)) {
|
|
||||||
fs.writeFileSync(dir_path + key, buffer);
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
fs.writeFileSync(dir_path + key, buffer);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
const r = await page.goto(url, {
|
|
||||||
waitUntil: 'load'
|
|
||||||
});
|
|
||||||
|
|
||||||
await page.waitForTimeout(1000);
|
|
||||||
await page.waitForTimeout(extra_wait_ms);
|
|
||||||
|
|
||||||
if (execute_js) {
|
|
||||||
await page.evaluate(execute_js);
|
|
||||||
await page.waitForTimeout(200);
|
|
||||||
}
|
|
||||||
|
|
||||||
var xpath_data;
|
|
||||||
var instock_data;
|
|
||||||
try {
|
|
||||||
// Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode
|
|
||||||
// (Once the old playwright is removed)
|
|
||||||
xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters);
|
|
||||||
instock_data = await page.evaluate(() => {%instock_scrape_code%});
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure
|
|
||||||
// Wrap it here (for now)
|
|
||||||
|
|
||||||
var b64s = false;
|
|
||||||
try {
|
|
||||||
b64s = await page.screenshot({encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg'});
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw'
|
|
||||||
if (!b64s) {
|
|
||||||
// @todo after text extract, we can place some overlay text with red background to say 'croppped'
|
|
||||||
console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot');
|
|
||||||
try {
|
|
||||||
b64s = await page.screenshot({encoding: "base64", quality: screenshot_quality, type: 'jpeg'});
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
var html = await page.content();
|
|
||||||
return {
|
|
||||||
data: {
|
|
||||||
'content': html,
|
|
||||||
'headers': r.headers(),
|
|
||||||
'instock_data': instock_data,
|
|
||||||
'screenshot': b64s,
|
|
||||||
'status_code': r.status(),
|
|
||||||
'xpath_data': xpath_data
|
|
||||||
},
|
|
||||||
type: 'application/json',
|
|
||||||
};
|
|
||||||
};
|
|
||||||
@@ -1,216 +1,235 @@
|
|||||||
// Restock Detector
|
async () => {
|
||||||
// (c) Leigh Morresi dgtlmoon@gmail.com
|
|
||||||
//
|
|
||||||
// Assumes the product is in stock to begin with, unless the following appears above the fold ;
|
|
||||||
// - outOfStockTexts appears above the fold (out of stock)
|
|
||||||
// - negateOutOfStockRegex (really is in stock)
|
|
||||||
|
|
||||||
function isItemInStock() {
|
function isItemInStock() {
|
||||||
// @todo Pass these in so the same list can be used in non-JS fetchers
|
// @todo Pass these in so the same list can be used in non-JS fetchers
|
||||||
const outOfStockTexts = [
|
const outOfStockTexts = [
|
||||||
' أخبرني عندما يتوفر',
|
' أخبرني عندما يتوفر',
|
||||||
'0 in stock',
|
'0 in stock',
|
||||||
'actuellement indisponible',
|
'actuellement indisponible',
|
||||||
'agotado',
|
'agotado',
|
||||||
'article épuisé',
|
'article épuisé',
|
||||||
'artikel zurzeit vergriffen',
|
'artikel zurzeit vergriffen',
|
||||||
'as soon as stock is available',
|
'as soon as stock is available',
|
||||||
'ausverkauft', // sold out
|
'aucune offre n\'est disponible',
|
||||||
'available for back order',
|
'ausverkauft', // sold out
|
||||||
'awaiting stock',
|
'available for back order',
|
||||||
'back in stock soon',
|
'awaiting stock',
|
||||||
'back-order or out of stock',
|
'back in stock soon',
|
||||||
'backordered',
|
'back-order or out of stock',
|
||||||
'benachrichtigt mich', // notify me
|
'backordered',
|
||||||
'brak na stanie',
|
'benachrichtigt mich', // notify me
|
||||||
'brak w magazynie',
|
'brak na stanie',
|
||||||
'coming soon',
|
'brak w magazynie',
|
||||||
'currently have any tickets for this',
|
'coming soon',
|
||||||
'currently unavailable',
|
'currently have any tickets for this',
|
||||||
'dieser artikel ist bald wieder verfügbar',
|
'currently unavailable',
|
||||||
'dostępne wkrótce',
|
'dieser artikel ist bald wieder verfügbar',
|
||||||
'en rupture de stock',
|
'dostępne wkrótce',
|
||||||
'esgotado',
|
'en rupture',
|
||||||
'indisponível',
|
'esgotado',
|
||||||
'isn\'t in stock right now',
|
'in kürze lieferbar',
|
||||||
'isnt in stock right now',
|
'indisponible',
|
||||||
'isn’t in stock right now',
|
'indisponível',
|
||||||
'item is no longer available',
|
'isn\'t in stock right now',
|
||||||
'let me know when it\'s available',
|
'isnt in stock right now',
|
||||||
'mail me when available',
|
'isn’t in stock right now',
|
||||||
'message if back in stock',
|
'item is no longer available',
|
||||||
'mevcut değil',
|
'let me know when it\'s available',
|
||||||
'nachricht bei',
|
'mail me when available',
|
||||||
'nicht auf lager',
|
'message if back in stock',
|
||||||
'nicht lagernd',
|
'mevcut değil',
|
||||||
'nicht lieferbar',
|
'nachricht bei',
|
||||||
'nicht verfügbar',
|
'nicht auf lager',
|
||||||
'nicht vorrätig',
|
'nicht lagernd',
|
||||||
'nicht zur verfügung',
|
'nicht lieferbar',
|
||||||
'nie znaleziono produktów',
|
'nicht verfügbar',
|
||||||
'niet beschikbaar',
|
'nicht vorrätig',
|
||||||
'niet leverbaar',
|
'nicht zur verfügung',
|
||||||
'niet op voorraad',
|
'nie znaleziono produktów',
|
||||||
'no disponible',
|
'niet beschikbaar',
|
||||||
'no longer in stock',
|
'niet leverbaar',
|
||||||
'no tickets available',
|
'niet op voorraad',
|
||||||
'not available',
|
'no disponible',
|
||||||
'not currently available',
|
'no featured offers available',
|
||||||
'not in stock',
|
'no longer available',
|
||||||
'notify me when available',
|
'no longer in stock',
|
||||||
'notify me',
|
'no tickets available',
|
||||||
'notify when available',
|
'non disponibile',
|
||||||
'não disponível',
|
'non disponible',
|
||||||
'não estamos a aceitar encomendas',
|
'not available',
|
||||||
'out of stock',
|
'not currently available',
|
||||||
'out-of-stock',
|
'not in stock',
|
||||||
'prodotto esaurito',
|
'notify me when available',
|
||||||
'produkt niedostępny',
|
'notify me',
|
||||||
'sold out',
|
'notify when available',
|
||||||
'sold-out',
|
'não disponível',
|
||||||
'stokta yok',
|
'não estamos a aceitar encomendas',
|
||||||
'temporarily out of stock',
|
'out of stock',
|
||||||
'temporarily unavailable',
|
'out-of-stock',
|
||||||
'there were no search results for',
|
'plus disponible',
|
||||||
'this item is currently unavailable',
|
'prodotto esaurito',
|
||||||
'tickets unavailable',
|
'produkt niedostępny',
|
||||||
'tijdelijk uitverkocht',
|
'rupture',
|
||||||
'tükendi',
|
'sold out',
|
||||||
'unavailable nearby',
|
'sold-out',
|
||||||
'unavailable tickets',
|
'stok habis',
|
||||||
'vergriffen',
|
'stok kosong',
|
||||||
'vorbestellen',
|
'stok varian ini habis',
|
||||||
'vorbestellung ist bald möglich',
|
'stokta yok',
|
||||||
'we don\'t currently have any',
|
'temporarily out of stock',
|
||||||
'we couldn\'t find any products that match',
|
'temporarily unavailable',
|
||||||
'we do not currently have an estimate of when this product will be back in stock.',
|
'there were no search results for',
|
||||||
'we don\'t know when or if this item will be back in stock.',
|
'this item is currently unavailable',
|
||||||
'we were not able to find a match',
|
'tickets unavailable',
|
||||||
'when this arrives in stock',
|
'tidak dijual',
|
||||||
'zur zeit nicht an lager',
|
'tidak tersedia',
|
||||||
'品切れ',
|
'tijdelijk uitverkocht',
|
||||||
'已售',
|
'tiket tidak tersedia',
|
||||||
'已售完',
|
'tükendi',
|
||||||
'품절'
|
'unavailable nearby',
|
||||||
];
|
'unavailable tickets',
|
||||||
|
'vergriffen',
|
||||||
|
'vorbestellen',
|
||||||
|
'vorbestellung ist bald möglich',
|
||||||
|
'we couldn\'t find any products that match',
|
||||||
|
'we do not currently have an estimate of when this product will be back in stock.',
|
||||||
|
'we don\'t currently have any',
|
||||||
|
'we don\'t know when or if this item will be back in stock.',
|
||||||
|
'we were not able to find a match',
|
||||||
|
'when this arrives in stock',
|
||||||
|
'when this item is available to order',
|
||||||
|
'zur zeit nicht an lager',
|
||||||
|
'épuisé',
|
||||||
|
'品切れ',
|
||||||
|
'已售',
|
||||||
|
'已售完',
|
||||||
|
'품절'
|
||||||
|
];
|
||||||
|
|
||||||
|
|
||||||
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
|
const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0);
|
||||||
|
|
||||||
function getElementBaseText(element) {
|
function getElementBaseText(element) {
|
||||||
// .textContent can include text from children which may give the wrong results
|
// .textContent can include text from children which may give the wrong results
|
||||||
// scan only immediate TEXT_NODEs, which will be a child of the element
|
// scan only immediate TEXT_NODEs, which will be a child of the element
|
||||||
var text = "";
|
var text = "";
|
||||||
for (var i = 0; i < element.childNodes.length; ++i)
|
for (var i = 0; i < element.childNodes.length; ++i)
|
||||||
if (element.childNodes[i].nodeType === Node.TEXT_NODE)
|
if (element.childNodes[i].nodeType === Node.TEXT_NODE)
|
||||||
text += element.childNodes[i].textContent;
|
text += element.childNodes[i].textContent;
|
||||||
return text.toLowerCase().trim();
|
return text.toLowerCase().trim();
|
||||||
}
|
}
|
||||||
|
|
||||||
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
|
const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
|
||||||
|
|
||||||
// The out-of-stock or in-stock-text is generally always above-the-fold
|
// The out-of-stock or in-stock-text is generally always above-the-fold
|
||||||
// and often below-the-fold is a list of related products that may or may not contain trigger text
|
// and often below-the-fold is a list of related products that may or may not contain trigger text
|
||||||
// so it's good to filter to just the 'above the fold' elements
|
// so it's good to filter to just the 'above the fold' elements
|
||||||
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
|
// and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
|
||||||
|
|
||||||
|
function elementIsInEyeBallRange(element) {
|
||||||
|
// outside the 'fold' or some weird text in the heading area
|
||||||
|
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
||||||
|
// Note: theres also an automated test that places the 'out of stock' text fairly low down
|
||||||
|
// Skip text that could be in the header area
|
||||||
|
if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
// Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there
|
||||||
|
if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) {
|
||||||
|
return false;
|
||||||
|
}
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
// @todo - if it's SVG or IMG, go into image diff mode
|
// @todo - if it's SVG or IMG, go into image diff mode
|
||||||
// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
|
|
||||||
|
|
||||||
console.log("Scanning %ELEMENTS%");
|
function collectVisibleElements(parent, visibleElements) {
|
||||||
|
if (!parent) return; // Base case: if parent is null or undefined, return
|
||||||
|
|
||||||
function collectVisibleElements(parent, visibleElements) {
|
// Add the parent itself to the visible elements array if it's of the specified types
|
||||||
if (!parent) return; // Base case: if parent is null or undefined, return
|
visibleElements.push(parent);
|
||||||
|
|
||||||
// Add the parent itself to the visible elements array if it's of the specified types
|
// Iterate over the parent's children
|
||||||
visibleElements.push(parent);
|
const children = parent.children;
|
||||||
|
for (let i = 0; i < children.length; i++) {
|
||||||
// Iterate over the parent's children
|
const child = children[i];
|
||||||
const children = parent.children;
|
if (
|
||||||
for (let i = 0; i < children.length; i++) {
|
child.nodeType === Node.ELEMENT_NODE &&
|
||||||
const child = children[i];
|
window.getComputedStyle(child).display !== 'none' &&
|
||||||
if (
|
window.getComputedStyle(child).visibility !== 'hidden' &&
|
||||||
child.nodeType === Node.ELEMENT_NODE &&
|
child.offsetWidth >= 0 &&
|
||||||
window.getComputedStyle(child).display !== 'none' &&
|
child.offsetHeight >= 0 &&
|
||||||
window.getComputedStyle(child).visibility !== 'hidden' &&
|
window.getComputedStyle(child).contentVisibility !== 'hidden'
|
||||||
child.offsetWidth >= 0 &&
|
) {
|
||||||
child.offsetHeight >= 0 &&
|
// If the child is an element and is visible, recursively collect visible elements
|
||||||
window.getComputedStyle(child).contentVisibility !== 'hidden'
|
collectVisibleElements(child, visibleElements);
|
||||||
) {
|
}
|
||||||
// If the child is an element and is visible, recursively collect visible elements
|
|
||||||
collectVisibleElements(child, visibleElements);
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
const elementsToScan = [];
|
const elementsToScan = [];
|
||||||
collectVisibleElements(document.body, elementsToScan);
|
collectVisibleElements(document.body, elementsToScan);
|
||||||
|
|
||||||
var elementText = "";
|
var elementText = "";
|
||||||
|
|
||||||
// REGEXS THAT REALLY MEAN IT'S IN STOCK
|
// REGEXS THAT REALLY MEAN IT'S IN STOCK
|
||||||
for (let i = elementsToScan.length - 1; i >= 0; i--) {
|
for (let i = elementsToScan.length - 1; i >= 0; i--) {
|
||||||
const element = elementsToScan[i];
|
const element = elementsToScan[i];
|
||||||
|
|
||||||
// outside the 'fold' or some weird text in the heading area
|
if (!elementIsInEyeBallRange(element)) {
|
||||||
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
continue
|
||||||
if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
}
|
||||||
continue
|
|
||||||
|
elementText = "";
|
||||||
|
try {
|
||||||
|
if (element.tagName.toLowerCase() === "input") {
|
||||||
|
elementText = element.value.toLowerCase().trim();
|
||||||
|
} else {
|
||||||
|
elementText = getElementBaseText(element);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
|
||||||
|
}
|
||||||
|
if (elementText.length) {
|
||||||
|
// try which ones could mean its in stock
|
||||||
|
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
|
||||||
|
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
|
||||||
|
element.style.border = "2px solid green"; // highlight the element that was detected as in stock
|
||||||
|
return 'Possibly in stock';
|
||||||
|
}
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
elementText = "";
|
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
|
||||||
try {
|
for (let i = elementsToScan.length - 1; i >= 0; i--) {
|
||||||
|
const element = elementsToScan[i];
|
||||||
|
|
||||||
|
if (!elementIsInEyeBallRange(element)) {
|
||||||
|
continue
|
||||||
|
}
|
||||||
|
elementText = "";
|
||||||
if (element.tagName.toLowerCase() === "input") {
|
if (element.tagName.toLowerCase() === "input") {
|
||||||
elementText = element.value.toLowerCase().trim();
|
elementText = element.value.toLowerCase().trim();
|
||||||
} else {
|
} else {
|
||||||
elementText = getElementBaseText(element);
|
elementText = getElementBaseText(element);
|
||||||
}
|
}
|
||||||
} catch (e) {
|
|
||||||
console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (elementText.length) {
|
if (elementText.length) {
|
||||||
// try which ones could mean its in stock
|
// and these mean its out of stock
|
||||||
if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
|
for (const outOfStockText of outOfStockTexts) {
|
||||||
console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
|
if (elementText.includes(outOfStockText)) {
|
||||||
return 'Possibly in stock';
|
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
|
||||||
}
|
element.style.border = "2px solid red"; // highlight the element that was detected as out of stock
|
||||||
}
|
return outOfStockText; // item is out of stock
|
||||||
}
|
}
|
||||||
|
|
||||||
// OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
|
|
||||||
for (let i = elementsToScan.length - 1; i >= 0; i--) {
|
|
||||||
const element = elementsToScan[i];
|
|
||||||
// outside the 'fold' or some weird text in the heading area
|
|
||||||
// .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
|
|
||||||
// Note: theres also an automated test that places the 'out of stock' text fairly low down
|
|
||||||
if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
|
|
||||||
continue
|
|
||||||
}
|
|
||||||
elementText = "";
|
|
||||||
if (element.tagName.toLowerCase() === "input") {
|
|
||||||
elementText = element.value.toLowerCase().trim();
|
|
||||||
} else {
|
|
||||||
elementText = getElementBaseText(element);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (elementText.length) {
|
|
||||||
// and these mean its out of stock
|
|
||||||
for (const outOfStockText of outOfStockTexts) {
|
|
||||||
if (elementText.includes(outOfStockText)) {
|
|
||||||
console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
|
|
||||||
return outOfStockText; // item is out of stock
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
|
||||||
|
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
|
||||||
}
|
}
|
||||||
|
|
||||||
console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`)
|
|
||||||
return 'Possibly in stock'; // possibly in stock, cant decide otherwise.
|
|
||||||
}
|
|
||||||
|
|
||||||
// returns the element text that makes it think it's out of stock
|
// returns the element text that makes it think it's out of stock
|
||||||
return isItemInStock().trim()
|
return isItemInStock().trim()
|
||||||
|
}
|
||||||
|
|
||||||
|
|||||||
@@ -1,282 +1,284 @@
|
|||||||
// Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com)
|
async (options) => {
|
||||||
// All rights reserved.
|
|
||||||
|
|
||||||
// @file Scrape the page looking for elements of concern (%ELEMENTS%)
|
let visualselector_xpath_selectors = options.visualselector_xpath_selectors
|
||||||
// http://matatk.agrip.org.uk/tests/position-and-width/
|
let max_height = options.max_height
|
||||||
// https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate
|
|
||||||
//
|
|
||||||
// Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis
|
|
||||||
// will automatically force a scroll somewhere, so include the position offset
|
|
||||||
// Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing
|
|
||||||
var scroll_y = 0;
|
|
||||||
try {
|
|
||||||
scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
var scroll_y = 0;
|
||||||
// Include the getXpath script directly, easier than fetching
|
|
||||||
function getxpath(e) {
|
|
||||||
var n = e;
|
|
||||||
if (n && n.id) return '//*[@id="' + n.id + '"]';
|
|
||||||
for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
|
|
||||||
for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
|
|
||||||
for (d = n.nextSibling; d;) {
|
|
||||||
if (d.nodeName === n.nodeName) {
|
|
||||||
r = !0;
|
|
||||||
break
|
|
||||||
}
|
|
||||||
d = d.nextSibling
|
|
||||||
}
|
|
||||||
o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
|
|
||||||
}
|
|
||||||
return o.length ? "/" + o.reverse().join("/") : ""
|
|
||||||
}
|
|
||||||
|
|
||||||
const findUpTag = (el) => {
|
|
||||||
let r = el
|
|
||||||
chained_css = [];
|
|
||||||
depth = 0;
|
|
||||||
|
|
||||||
// Strategy 1: If it's an input, with name, and there's only one, prefer that
|
|
||||||
if (el.name !== undefined && el.name.length) {
|
|
||||||
var proposed = el.tagName + "[name=" + el.name + "]";
|
|
||||||
var proposed_element = window.document.querySelectorAll(proposed);
|
|
||||||
if (proposed_element.length) {
|
|
||||||
if (proposed_element.length === 1) {
|
|
||||||
return proposed;
|
|
||||||
} else {
|
|
||||||
// Some sites change ID but name= stays the same, we can hit it if we know the index
|
|
||||||
// Find all the elements that match and work out the input[n]
|
|
||||||
var n = Array.from(proposed_element).indexOf(el);
|
|
||||||
// Return a Playwright selector for nthinput[name=zipcode]
|
|
||||||
return proposed + " >> nth=" + n;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
|
|
||||||
while (r.parentNode) {
|
|
||||||
if (depth === 5) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if ('' !== r.id) {
|
|
||||||
chained_css.unshift("#" + CSS.escape(r.id));
|
|
||||||
final_selector = chained_css.join(' > ');
|
|
||||||
// Be sure theres only one, some sites have multiples of the same ID tag :-(
|
|
||||||
if (window.document.querySelectorAll(final_selector).length === 1) {
|
|
||||||
return final_selector;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
chained_css.unshift(r.tagName.toLowerCase());
|
|
||||||
}
|
|
||||||
r = r.parentNode;
|
|
||||||
depth += 1;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// @todo - if it's SVG or IMG, go into image diff mode
|
|
||||||
// %ELEMENTS% replaced at injection time because different interfaces use it with different settings
|
|
||||||
|
|
||||||
var size_pos = [];
|
|
||||||
// after page fetch, inject this JS
|
|
||||||
// build a map of all elements and their positions (maybe that only include text?)
|
|
||||||
var bbox;
|
|
||||||
console.log("Scanning %ELEMENTS%");
|
|
||||||
|
|
||||||
function collectVisibleElements(parent, visibleElements) {
|
|
||||||
if (!parent) return; // Base case: if parent is null or undefined, return
|
|
||||||
|
|
||||||
|
|
||||||
// Add the parent itself to the visible elements array if it's of the specified types
|
|
||||||
const tagName = parent.tagName.toLowerCase();
|
|
||||||
if ("%ELEMENTS%".split(',').includes(tagName)) {
|
|
||||||
visibleElements.push(parent);
|
|
||||||
}
|
|
||||||
|
|
||||||
// Iterate over the parent's children
|
|
||||||
const children = parent.children;
|
|
||||||
for (let i = 0; i < children.length; i++) {
|
|
||||||
const child = children[i];
|
|
||||||
if (
|
|
||||||
child.nodeType === Node.ELEMENT_NODE &&
|
|
||||||
window.getComputedStyle(child).display !== 'none' &&
|
|
||||||
window.getComputedStyle(child).visibility !== 'hidden' &&
|
|
||||||
child.offsetWidth >= 0 &&
|
|
||||||
child.offsetHeight >= 0 &&
|
|
||||||
window.getComputedStyle(child).contentVisibility !== 'hidden'
|
|
||||||
) {
|
|
||||||
// If the child is an element and is visible, recursively collect visible elements
|
|
||||||
collectVisibleElements(child, visibleElements);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
// Create an array to hold the visible elements
|
|
||||||
const visibleElementsArray = [];
|
|
||||||
|
|
||||||
// Call collectVisibleElements with the starting parent element
|
|
||||||
collectVisibleElements(document.body, visibleElementsArray);
|
|
||||||
|
|
||||||
|
|
||||||
visibleElementsArray.forEach(function (element) {
|
|
||||||
|
|
||||||
bbox = element.getBoundingClientRect();
|
|
||||||
|
|
||||||
// Skip really small ones, and where width or height ==0
|
|
||||||
if (bbox['width'] * bbox['height'] < 10) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// Don't include elements that are offset from canvas
|
|
||||||
if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
|
|
||||||
return
|
|
||||||
}
|
|
||||||
|
|
||||||
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
|
|
||||||
// it should not traverse when we know we can anchor off just an ID one level up etc..
|
|
||||||
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
|
|
||||||
|
|
||||||
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
|
|
||||||
xpath_result = false;
|
|
||||||
try {
|
try {
|
||||||
var d = findUpTag(element);
|
scroll_y = +document.documentElement.scrollTop || document.body.scrollTop
|
||||||
if (d) {
|
|
||||||
xpath_result = d;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log(e);
|
console.log(e);
|
||||||
}
|
}
|
||||||
// You could swap it and default to getXpath and then try the smarter one
|
|
||||||
// default back to the less intelligent one
|
// Include the getXpath script directly, easier than fetching
|
||||||
if (!xpath_result) {
|
function getxpath(e) {
|
||||||
try {
|
var n = e;
|
||||||
// I've seen on FB and eBay that this doesnt work
|
if (n && n.id) return '//*[@id="' + n.id + '"]';
|
||||||
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
|
for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
|
||||||
xpath_result = getxpath(element);
|
for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
|
||||||
} catch (e) {
|
for (d = n.nextSibling; d;) {
|
||||||
console.log(e);
|
if (d.nodeName === n.nodeName) {
|
||||||
return
|
r = !0;
|
||||||
|
break
|
||||||
|
}
|
||||||
|
d = d.nextSibling
|
||||||
|
}
|
||||||
|
o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
|
||||||
|
}
|
||||||
|
return o.length ? "/" + o.reverse().join("/") : ""
|
||||||
|
}
|
||||||
|
|
||||||
|
const findUpTag = (el) => {
|
||||||
|
let r = el
|
||||||
|
chained_css = [];
|
||||||
|
depth = 0;
|
||||||
|
|
||||||
|
// Strategy 1: If it's an input, with name, and there's only one, prefer that
|
||||||
|
if (el.name !== undefined && el.name.length) {
|
||||||
|
var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]";
|
||||||
|
var proposed_element = window.document.querySelectorAll(proposed);
|
||||||
|
if (proposed_element.length) {
|
||||||
|
if (proposed_element.length === 1) {
|
||||||
|
return proposed;
|
||||||
|
} else {
|
||||||
|
// Some sites change ID but name= stays the same, we can hit it if we know the index
|
||||||
|
// Find all the elements that match and work out the input[n]
|
||||||
|
var n = Array.from(proposed_element).indexOf(el);
|
||||||
|
// Return a Playwright selector for nthinput[name=zipcode]
|
||||||
|
return proposed + " >> nth=" + n;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Strategy 2: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
|
||||||
|
while (r.parentNode) {
|
||||||
|
if (depth === 5) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ('' !== r.id) {
|
||||||
|
chained_css.unshift("#" + CSS.escape(r.id));
|
||||||
|
final_selector = chained_css.join(' > ');
|
||||||
|
// Be sure theres only one, some sites have multiples of the same ID tag :-(
|
||||||
|
if (window.document.querySelectorAll(final_selector).length === 1) {
|
||||||
|
return final_selector;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
chained_css.unshift(r.tagName.toLowerCase());
|
||||||
|
}
|
||||||
|
r = r.parentNode;
|
||||||
|
depth += 1;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// @todo - if it's SVG or IMG, go into image diff mode
|
||||||
|
|
||||||
|
var size_pos = [];
|
||||||
|
// after page fetch, inject this JS
|
||||||
|
// build a map of all elements and their positions (maybe that only include text?)
|
||||||
|
var bbox;
|
||||||
|
console.log(`Scanning for "${visualselector_xpath_selectors}"`);
|
||||||
|
|
||||||
|
function collectVisibleElements(parent, visibleElements) {
|
||||||
|
if (!parent) return; // Base case: if parent is null or undefined, return
|
||||||
|
|
||||||
|
|
||||||
|
// Add the parent itself to the visible elements array if it's of the specified types
|
||||||
|
const tagName = parent.tagName.toLowerCase();
|
||||||
|
if (visualselector_xpath_selectors.split(',').includes(tagName)) {
|
||||||
|
visibleElements.push(parent);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Iterate over the parent's children
|
||||||
|
const children = parent.children;
|
||||||
|
for (let i = 0; i < children.length; i++) {
|
||||||
|
const child = children[i];
|
||||||
|
const computedStyle = window.getComputedStyle(child);
|
||||||
|
|
||||||
|
if (
|
||||||
|
child.nodeType === Node.ELEMENT_NODE &&
|
||||||
|
computedStyle.display !== 'none' &&
|
||||||
|
computedStyle.visibility !== 'hidden' &&
|
||||||
|
child.offsetWidth >= 0 &&
|
||||||
|
child.offsetHeight >= 0 &&
|
||||||
|
computedStyle.contentVisibility !== 'hidden'
|
||||||
|
) {
|
||||||
|
// If the child is an element and is visible, recursively collect visible elements
|
||||||
|
collectVisibleElements(child, visibleElements);
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
|
// Create an array to hold the visible elements
|
||||||
|
const visibleElementsArray = [];
|
||||||
|
|
||||||
let text = element.textContent.trim().slice(0, 30).trim();
|
// Call collectVisibleElements with the starting parent element
|
||||||
while (/\n{2,}|\t{2,}/.test(text)) {
|
collectVisibleElements(document.body, visibleElementsArray);
|
||||||
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
|
|
||||||
}
|
|
||||||
|
|
||||||
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
|
||||||
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ;
|
|
||||||
|
|
||||||
size_pos.push({
|
visibleElementsArray.forEach(function (element) {
|
||||||
xpath: xpath_result,
|
|
||||||
width: Math.round(bbox['width']),
|
bbox = element.getBoundingClientRect();
|
||||||
height: Math.round(bbox['height']),
|
|
||||||
left: Math.floor(bbox['left']),
|
// Skip really small ones, and where width or height ==0
|
||||||
top: Math.floor(bbox['top']) + scroll_y,
|
if (bbox['width'] * bbox['height'] < 10) {
|
||||||
// tagName used by Browser Steps
|
return
|
||||||
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
}
|
||||||
// tagtype used by Browser Steps
|
|
||||||
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
// Don't include elements that are offset from canvas
|
||||||
isClickable: window.getComputedStyle(element).cursor === "pointer",
|
if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) {
|
||||||
// Used by the keras trainer
|
return
|
||||||
fontSize: window.getComputedStyle(element).getPropertyValue('font-size'),
|
}
|
||||||
fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'),
|
|
||||||
hasDigitCurrency: hasDigitCurrency,
|
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
|
||||||
label: label,
|
// it should not traverse when we know we can anchor off just an ID one level up etc..
|
||||||
|
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
|
||||||
|
|
||||||
|
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
|
||||||
|
xpath_result = false;
|
||||||
|
try {
|
||||||
|
var d = findUpTag(element);
|
||||||
|
if (d) {
|
||||||
|
xpath_result = d;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
// You could swap it and default to getXpath and then try the smarter one
|
||||||
|
// default back to the less intelligent one
|
||||||
|
if (!xpath_result) {
|
||||||
|
try {
|
||||||
|
// I've seen on FB and eBay that this doesnt work
|
||||||
|
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
|
||||||
|
xpath_result = getxpath(element);
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
return
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now
|
||||||
|
|
||||||
|
let text = element.textContent.trim().slice(0, 30).trim();
|
||||||
|
while (/\n{2,}|\t{2,}/.test(text)) {
|
||||||
|
text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t')
|
||||||
|
}
|
||||||
|
|
||||||
|
// Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training.
|
||||||
|
const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6))) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text);
|
||||||
|
const computedStyle = window.getComputedStyle(element);
|
||||||
|
|
||||||
|
if (Math.floor(bbox['top']) + scroll_y > max_height) {
|
||||||
|
return
|
||||||
|
}
|
||||||
|
|
||||||
|
size_pos.push({
|
||||||
|
xpath: xpath_result,
|
||||||
|
width: Math.round(bbox['width']),
|
||||||
|
height: Math.round(bbox['height']),
|
||||||
|
left: Math.floor(bbox['left']),
|
||||||
|
top: Math.floor(bbox['top']) + scroll_y,
|
||||||
|
// tagName used by Browser Steps
|
||||||
|
tagName: (element.tagName) ? element.tagName.toLowerCase() : '',
|
||||||
|
// tagtype used by Browser Steps
|
||||||
|
tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '',
|
||||||
|
isClickable: computedStyle.cursor === "pointer",
|
||||||
|
// Used by the keras trainer
|
||||||
|
fontSize: computedStyle.getPropertyValue('font-size'),
|
||||||
|
fontWeight: computedStyle.getPropertyValue('font-weight'),
|
||||||
|
hasDigitCurrency: hasDigitCurrency,
|
||||||
|
label: label,
|
||||||
|
});
|
||||||
|
|
||||||
});
|
});
|
||||||
|
|
||||||
});
|
|
||||||
|
|
||||||
|
|
||||||
// Inject the current one set in the include_filters, which may be a CSS rule
|
// Inject the current one set in the include_filters, which may be a CSS rule
|
||||||
// used for displaying the current one in VisualSelector, where its not one we generated.
|
// used for displaying the current one in VisualSelector, where its not one we generated.
|
||||||
if (include_filters.length) {
|
if (include_filters.length) {
|
||||||
let results;
|
let results;
|
||||||
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again
|
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again
|
||||||
for (const f of include_filters) {
|
for (const f of include_filters) {
|
||||||
bbox = false;
|
bbox = false;
|
||||||
q = false;
|
|
||||||
|
|
||||||
if (!f.length) {
|
if (!f.length) {
|
||||||
console.log("xpath_element_scraper: Empty filter, skipping");
|
console.log("xpath_element_scraper: Empty filter, skipping");
|
||||||
continue;
|
continue;
|
||||||
}
|
|
||||||
|
|
||||||
try {
|
|
||||||
// is it xpath?
|
|
||||||
if (f.startsWith('/') || f.startsWith('xpath')) {
|
|
||||||
var qry_f = f.replace(/xpath(:|\d:)/, '')
|
|
||||||
console.log("[xpath] Scanning for included filter " + qry_f)
|
|
||||||
let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
|
||||||
results = [];
|
|
||||||
for (let i = 0; i < xpathResult.snapshotLength; i++) {
|
|
||||||
results.push(xpathResult.snapshotItem(i));
|
|
||||||
}
|
|
||||||
} else {
|
|
||||||
console.log("[css] Scanning for included filter " + f)
|
|
||||||
console.log("[css] Scanning for included filter " + f);
|
|
||||||
results = document.querySelectorAll(f);
|
|
||||||
}
|
}
|
||||||
} catch (e) {
|
|
||||||
// Maybe catch DOMException and alert?
|
|
||||||
console.log("xpath_element_scraper: Exception selecting element from filter " + f);
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (results != null && results.length) {
|
try {
|
||||||
|
// is it xpath?
|
||||||
// Iterate over the results
|
if (f.startsWith('/') || f.startsWith('xpath')) {
|
||||||
results.forEach(node => {
|
var qry_f = f.replace(/xpath(:|\d:)/, '')
|
||||||
// Try to resolve //something/text() back to its /something so we can atleast get the bounding box
|
console.log("[xpath] Scanning for included filter " + qry_f)
|
||||||
try {
|
let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null);
|
||||||
if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
|
results = [];
|
||||||
node = node.parentElement
|
for (let i = 0; i < xpathResult.snapshotLength; i++) {
|
||||||
|
results.push(xpathResult.snapshotItem(i));
|
||||||
}
|
}
|
||||||
} catch (e) {
|
|
||||||
console.log(e)
|
|
||||||
console.log("xpath_element_scraper: #text resolver")
|
|
||||||
}
|
|
||||||
|
|
||||||
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
|
|
||||||
if (typeof node.getBoundingClientRect == 'function') {
|
|
||||||
bbox = node.getBoundingClientRect();
|
|
||||||
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
|
|
||||||
} else {
|
} else {
|
||||||
|
console.log("[css] Scanning for included filter " + f)
|
||||||
|
console.log("[css] Scanning for included filter " + f);
|
||||||
|
results = document.querySelectorAll(f);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Maybe catch DOMException and alert?
|
||||||
|
console.log("xpath_element_scraper: Exception selecting element from filter " + f);
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (results != null && results.length) {
|
||||||
|
|
||||||
|
// Iterate over the results
|
||||||
|
results.forEach(node => {
|
||||||
|
// Try to resolve //something/text() back to its /something so we can atleast get the bounding box
|
||||||
try {
|
try {
|
||||||
// Try and see we can find its ownerElement
|
if (typeof node.nodeName == 'string' && node.nodeName === '#text') {
|
||||||
bbox = node.ownerElement.getBoundingClientRect();
|
node = node.parentElement
|
||||||
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.log(e)
|
console.log(e)
|
||||||
console.log("xpath_element_scraper: error looking up q.ownerElement")
|
console.log("xpath_element_scraper: #text resolver")
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
|
// #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element.
|
||||||
size_pos.push({
|
if (typeof node.getBoundingClientRect == 'function') {
|
||||||
xpath: f,
|
bbox = node.getBoundingClientRect();
|
||||||
width: parseInt(bbox['width']),
|
console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y)
|
||||||
height: parseInt(bbox['height']),
|
} else {
|
||||||
left: parseInt(bbox['left']),
|
try {
|
||||||
top: parseInt(bbox['top']) + scroll_y,
|
// Try and see we can find its ownerElement
|
||||||
highlight_as_custom_filter: true
|
bbox = node.ownerElement.getBoundingClientRect();
|
||||||
});
|
console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
|
||||||
}
|
} catch (e) {
|
||||||
});
|
console.log(e)
|
||||||
|
console.log("xpath_element_scraper: error looking up node.ownerElement")
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
|
||||||
|
size_pos.push({
|
||||||
|
xpath: f,
|
||||||
|
width: parseInt(bbox['width']),
|
||||||
|
height: parseInt(bbox['height']),
|
||||||
|
left: parseInt(bbox['left']),
|
||||||
|
top: parseInt(bbox['top']) + scroll_y,
|
||||||
|
highlight_as_custom_filter: true
|
||||||
|
});
|
||||||
|
}
|
||||||
|
});
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
|
// Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
|
||||||
// so that we dont select the wrapping element by mistake and be unable to select what we want
|
// so that we dont select the wrapping element by mistake and be unable to select what we want
|
||||||
size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
|
size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1)
|
||||||
|
|
||||||
|
// browser_width required for proper scaling in the frontend
|
||||||
|
// Return as a string to save playwright for juggling thousands of objects
|
||||||
|
return JSON.stringify({'size_pos': size_pos, 'browser_width': window.innerWidth});
|
||||||
|
}
|
||||||
|
|
||||||
// Window.width required for proper scaling in the frontend
|
|
||||||
return {'size_pos': size_pos, 'browser_width': window.innerWidth};
|
|
||||||
|
|||||||
73
changedetectionio/content_fetchers/screenshot_handler.py
Normal file
@@ -0,0 +1,73 @@
|
|||||||
|
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
||||||
|
|
||||||
|
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
||||||
|
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
||||||
|
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
||||||
|
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY
|
||||||
|
|
||||||
|
|
||||||
|
def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height):
|
||||||
|
import os
|
||||||
|
import io
|
||||||
|
from PIL import Image, ImageDraw, ImageFont
|
||||||
|
|
||||||
|
try:
|
||||||
|
|
||||||
|
# Load images from byte chunks
|
||||||
|
images = [Image.open(io.BytesIO(b)) for b in chunks_bytes]
|
||||||
|
total_height = sum(im.height for im in images)
|
||||||
|
max_width = max(im.width for im in images)
|
||||||
|
|
||||||
|
# Create stitched image
|
||||||
|
stitched = Image.new('RGB', (max_width, total_height))
|
||||||
|
y_offset = 0
|
||||||
|
for im in images:
|
||||||
|
stitched.paste(im, (0, y_offset))
|
||||||
|
y_offset += im.height
|
||||||
|
|
||||||
|
# Draw caption on top (overlaid, not extending canvas)
|
||||||
|
draw = ImageDraw.Draw(stitched)
|
||||||
|
|
||||||
|
if original_page_height > capture_height:
|
||||||
|
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||||
|
padding = 10
|
||||||
|
font_size = 35
|
||||||
|
font_color = (255, 0, 0)
|
||||||
|
background_color = (255, 255, 255)
|
||||||
|
|
||||||
|
|
||||||
|
# Try to load a proper font
|
||||||
|
try:
|
||||||
|
font = ImageFont.truetype("arial.ttf", font_size)
|
||||||
|
except IOError:
|
||||||
|
font = ImageFont.load_default()
|
||||||
|
|
||||||
|
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||||
|
text_width = bbox[2] - bbox[0]
|
||||||
|
text_height = bbox[3] - bbox[1]
|
||||||
|
|
||||||
|
# Draw white rectangle background behind text
|
||||||
|
rect_top = 0
|
||||||
|
rect_bottom = text_height + 2 * padding
|
||||||
|
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
|
||||||
|
|
||||||
|
# Draw text centered horizontally, 10px padding from top of the rectangle
|
||||||
|
text_x = (max_width - text_width) // 2
|
||||||
|
text_y = padding
|
||||||
|
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
|
||||||
|
|
||||||
|
# Encode and send image
|
||||||
|
output = io.BytesIO()
|
||||||
|
stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY)))
|
||||||
|
pipe_conn.send_bytes(output.getvalue())
|
||||||
|
|
||||||
|
stitched.close()
|
||||||
|
except Exception as e:
|
||||||
|
pipe_conn.send(f"error:{e}")
|
||||||
|
finally:
|
||||||
|
pipe_conn.close()
|
||||||
|
|
||||||
|
|
||||||
@@ -65,8 +65,18 @@ class fetcher(Fetcher):
|
|||||||
# request_body, request_method unused for now, until some magic in the future happens.
|
# request_body, request_method unused for now, until some magic in the future happens.
|
||||||
|
|
||||||
options = ChromeOptions()
|
options = ChromeOptions()
|
||||||
if self.proxy:
|
|
||||||
options.proxy = self.proxy
|
# Load Chrome options from env
|
||||||
|
CHROME_OPTIONS = [
|
||||||
|
line.strip()
|
||||||
|
for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
|
||||||
|
if line.strip()
|
||||||
|
]
|
||||||
|
|
||||||
|
for opt in CHROME_OPTIONS:
|
||||||
|
options.add_argument(opt)
|
||||||
|
|
||||||
|
options.add_argument(f"--proxy-server={self.proxy}")
|
||||||
|
|
||||||
self.driver = webdriver.Remote(
|
self.driver = webdriver.Remote(
|
||||||
command_executor=self.browser_connection_url,
|
command_executor=self.browser_connection_url,
|
||||||
@@ -79,7 +89,9 @@ class fetcher(Fetcher):
|
|||||||
self.quit()
|
self.quit()
|
||||||
raise
|
raise
|
||||||
|
|
||||||
self.driver.set_window_size(1280, 1024)
|
if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
|
||||||
|
self.driver.set_window_size(1280, 1024)
|
||||||
|
|
||||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||||
|
|
||||||
if self.webdriver_js_execute_code is not None:
|
if self.webdriver_js_execute_code is not None:
|
||||||
@@ -87,6 +99,7 @@ class fetcher(Fetcher):
|
|||||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
||||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||||
|
|
||||||
|
|
||||||
# @todo - how to check this? is it possible?
|
# @todo - how to check this? is it possible?
|
||||||
self.status_code = 200
|
self.status_code = 200
|
||||||
# @todo somehow we should try to get this working for WebDriver
|
# @todo somehow we should try to get this working for WebDriver
|
||||||
@@ -112,9 +125,9 @@ class fetcher(Fetcher):
|
|||||||
self.quit()
|
self.quit()
|
||||||
return True
|
return True
|
||||||
|
|
||||||
def quit(self):
|
def quit(self, watch=None):
|
||||||
if self.driver:
|
if self.driver:
|
||||||
try:
|
try:
|
||||||
self.driver.quit()
|
self.driver.quit()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
|
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
|
||||||
|
|||||||
@@ -3,6 +3,8 @@ import re
|
|||||||
from loguru import logger
|
from loguru import logger
|
||||||
from wtforms.widgets.core import TimeInput
|
from wtforms.widgets.core import TimeInput
|
||||||
|
|
||||||
|
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||||
|
from changedetectionio.conditions.form import ConditionFormRow
|
||||||
from changedetectionio.strtobool import strtobool
|
from changedetectionio.strtobool import strtobool
|
||||||
|
|
||||||
from wtforms import (
|
from wtforms import (
|
||||||
@@ -171,7 +173,7 @@ class validateTimeZoneName(object):
|
|||||||
|
|
||||||
class ScheduleLimitDaySubForm(Form):
|
class ScheduleLimitDaySubForm(Form):
|
||||||
enabled = BooleanField("not set", default=True)
|
enabled = BooleanField("not set", default=True)
|
||||||
start_time = TimeStringField("Start At", default="00:00", render_kw={"placeholder": "HH:MM"}, validators=[validators.Optional()])
|
start_time = TimeStringField("Start At", default="00:00", validators=[validators.Optional()])
|
||||||
duration = FormField(TimeDurationForm, label="Run duration")
|
duration = FormField(TimeDurationForm, label="Run duration")
|
||||||
|
|
||||||
class ScheduleLimitForm(Form):
|
class ScheduleLimitForm(Form):
|
||||||
@@ -304,9 +306,11 @@ class ValidateAppRiseServers(object):
|
|||||||
|
|
||||||
def __call__(self, form, field):
|
def __call__(self, form, field):
|
||||||
import apprise
|
import apprise
|
||||||
apobj = apprise.Apprise()
|
from .notification.apprise_plugin.assets import apprise_asset
|
||||||
# so that the custom endpoints are registered
|
from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler # noqa: F401
|
||||||
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
|
||||||
|
apobj = apprise.Apprise(asset=apprise_asset)
|
||||||
|
|
||||||
for server_url in field.data:
|
for server_url in field.data:
|
||||||
url = server_url.strip()
|
url = server_url.strip()
|
||||||
if url.startswith("#"):
|
if url.startswith("#"):
|
||||||
@@ -509,6 +513,7 @@ class quickWatchForm(Form):
|
|||||||
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
# Common to a single watch and the global settings
|
# Common to a single watch and the global settings
|
||||||
class commonSettingsForm(Form):
|
class commonSettingsForm(Form):
|
||||||
from . import processors
|
from . import processors
|
||||||
@@ -581,7 +586,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
||||||
filter_text_removed = BooleanField('Removed lines', default=True)
|
filter_text_removed = BooleanField('Removed lines', default=True)
|
||||||
|
|
||||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||||
@@ -596,6 +601,10 @@ class processor_text_json_diff_form(commonSettingsForm):
|
|||||||
notification_muted = BooleanField('Notifications Muted / Off', default=False)
|
notification_muted = BooleanField('Notifications Muted / Off', default=False)
|
||||||
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
||||||
|
|
||||||
|
conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
|
||||||
|
conditions = FieldList(FormField(ConditionFormRow), min_entries=1) # Add rule logic here
|
||||||
|
|
||||||
|
|
||||||
def extra_tab_content(self):
|
def extra_tab_content(self):
|
||||||
return None
|
return None
|
||||||
|
|
||||||
@@ -712,6 +721,8 @@ class globalSettingsRequestForm(Form):
|
|||||||
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
|
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
class globalSettingsApplicationUIForm(Form):
|
||||||
|
open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()])
|
||||||
|
|
||||||
# datastore.data['settings']['application']..
|
# datastore.data['settings']['application']..
|
||||||
class globalSettingsApplicationForm(commonSettingsForm):
|
class globalSettingsApplicationForm(commonSettingsForm):
|
||||||
@@ -731,6 +742,9 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||||||
render_kw={"style": "width: 5em;"},
|
render_kw={"style": "width: 5em;"},
|
||||||
validators=[validators.NumberRange(min=0,
|
validators=[validators.NumberRange(min=0,
|
||||||
message="Should be atleast zero (disabled)")])
|
message="Should be atleast zero (disabled)")])
|
||||||
|
|
||||||
|
rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES)
|
||||||
|
|
||||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||||
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
|
||||||
@@ -740,6 +754,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||||||
render_kw={"style": "width: 5em;"},
|
render_kw={"style": "width: 5em;"},
|
||||||
validators=[validators.NumberRange(min=0,
|
validators=[validators.NumberRange(min=0,
|
||||||
message="Should contain zero or more attempts")])
|
message="Should contain zero or more attempts")])
|
||||||
|
ui = FormField(globalSettingsApplicationUIForm)
|
||||||
|
|
||||||
|
|
||||||
class globalSettingsForm(Form):
|
class globalSettingsForm(Form):
|
||||||
|
|||||||
162
changedetectionio/gc_cleanup.py
Normal file
@@ -0,0 +1,162 @@
|
|||||||
|
#!/usr/bin/env python3
|
||||||
|
|
||||||
|
import ctypes
|
||||||
|
import gc
|
||||||
|
import re
|
||||||
|
import psutil
|
||||||
|
import sys
|
||||||
|
import threading
|
||||||
|
import importlib
|
||||||
|
from loguru import logger
|
||||||
|
|
||||||
|
def memory_cleanup(app=None):
|
||||||
|
"""
|
||||||
|
Perform comprehensive memory cleanup operations and log memory usage
|
||||||
|
at each step with nicely formatted numbers.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
app: Optional Flask app instance for clearing Flask-specific caches
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: Status message
|
||||||
|
"""
|
||||||
|
# Get current process
|
||||||
|
process = psutil.Process()
|
||||||
|
|
||||||
|
# Log initial memory usage with nicely formatted numbers
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"Memory cleanup started - Current memory usage: {current_memory:,.2f} MB")
|
||||||
|
|
||||||
|
# 1. Standard garbage collection - force full collection on all generations
|
||||||
|
gc.collect(0) # Collect youngest generation
|
||||||
|
gc.collect(1) # Collect middle generation
|
||||||
|
gc.collect(2) # Collect oldest generation
|
||||||
|
|
||||||
|
# Run full collection again to ensure maximum cleanup
|
||||||
|
gc.collect()
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After full gc.collect() - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
|
||||||
|
|
||||||
|
# 3. Call libc's malloc_trim to release memory back to the OS
|
||||||
|
libc = ctypes.CDLL("libc.so.6")
|
||||||
|
libc.malloc_trim(0)
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After malloc_trim(0) - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
|
||||||
|
# 4. Clear Python's regex cache
|
||||||
|
re.purge()
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After re.purge() - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
|
||||||
|
# 5. Reset thread-local storage
|
||||||
|
# Create a new thread local object to encourage cleanup of old ones
|
||||||
|
threading.local()
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After threading.local() - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
|
||||||
|
# 6. Clear sys.intern cache if Python version supports it
|
||||||
|
try:
|
||||||
|
sys.intern.clear()
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After sys.intern.clear() - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
logger.debug("sys.intern.clear() not supported in this Python version")
|
||||||
|
|
||||||
|
# 7. Clear XML/lxml caches if available
|
||||||
|
try:
|
||||||
|
# Check if lxml.etree is in use
|
||||||
|
lxml_etree = sys.modules.get('lxml.etree')
|
||||||
|
if lxml_etree:
|
||||||
|
# Clear module-level caches
|
||||||
|
if hasattr(lxml_etree, 'clear_error_log'):
|
||||||
|
lxml_etree.clear_error_log()
|
||||||
|
|
||||||
|
# Check for _ErrorLog and _RotatingErrorLog objects and clear them
|
||||||
|
for obj in gc.get_objects():
|
||||||
|
if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'):
|
||||||
|
class_name = obj.__class__.__name__
|
||||||
|
if class_name in ('_ErrorLog', '_RotatingErrorLog', '_DomainErrorLog') and hasattr(obj, 'clear'):
|
||||||
|
try:
|
||||||
|
obj.clear()
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
# Clear Element objects which can hold references to documents
|
||||||
|
elif class_name in ('_Element', 'ElementBase') and hasattr(obj, 'clear'):
|
||||||
|
try:
|
||||||
|
obj.clear()
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After lxml.etree cleanup - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
|
||||||
|
# Check if lxml.html is in use
|
||||||
|
lxml_html = sys.modules.get('lxml.html')
|
||||||
|
if lxml_html:
|
||||||
|
# Clear HTML-specific element types
|
||||||
|
for obj in gc.get_objects():
|
||||||
|
if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'):
|
||||||
|
class_name = obj.__class__.__name__
|
||||||
|
if class_name in ('HtmlElement', 'FormElement', 'InputElement',
|
||||||
|
'SelectElement', 'TextareaElement', 'CheckboxGroup',
|
||||||
|
'RadioGroup', 'MultipleSelectOptions', 'FieldsDict') and hasattr(obj, 'clear'):
|
||||||
|
try:
|
||||||
|
obj.clear()
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After lxml.html cleanup - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
except (ImportError, AttributeError):
|
||||||
|
logger.debug("lxml cleanup not applicable")
|
||||||
|
|
||||||
|
# 8. Clear JSON parser caches if applicable
|
||||||
|
try:
|
||||||
|
# Check if json module is being used and try to clear its cache
|
||||||
|
json_module = sys.modules.get('json')
|
||||||
|
if json_module and hasattr(json_module, '_default_encoder'):
|
||||||
|
json_module._default_encoder.markers.clear()
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After JSON parser cleanup - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
logger.debug("JSON cleanup not applicable")
|
||||||
|
|
||||||
|
# 9. Force Python's memory allocator to release unused memory
|
||||||
|
try:
|
||||||
|
if hasattr(sys, 'pypy_version_info'):
|
||||||
|
# PyPy has different memory management
|
||||||
|
gc.collect()
|
||||||
|
else:
|
||||||
|
# CPython - try to release unused memory
|
||||||
|
ctypes.pythonapi.PyGC_Collect()
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After PyGC_Collect - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
except (AttributeError, TypeError):
|
||||||
|
logger.debug("PyGC_Collect not supported")
|
||||||
|
|
||||||
|
# 10. Clear Flask-specific caches if applicable
|
||||||
|
if app:
|
||||||
|
try:
|
||||||
|
# Clear Flask caches if they exist
|
||||||
|
for key in list(app.config.get('_cache', {}).keys()):
|
||||||
|
app.config['_cache'].pop(key, None)
|
||||||
|
|
||||||
|
# Clear Jinja2 template cache if available
|
||||||
|
if hasattr(app, 'jinja_env') and hasattr(app.jinja_env, 'cache'):
|
||||||
|
app.jinja_env.cache.clear()
|
||||||
|
|
||||||
|
current_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.debug(f"After Flask cache clear - Memory usage: {current_memory:,.2f} MB")
|
||||||
|
except (AttributeError, KeyError):
|
||||||
|
logger.debug("No Flask cache to clear")
|
||||||
|
|
||||||
|
# Final garbage collection pass
|
||||||
|
gc.collect()
|
||||||
|
libc.malloc_trim(0)
|
||||||
|
|
||||||
|
# Log final memory usage
|
||||||
|
final_memory = process.memory_info().rss / 1024 / 1024
|
||||||
|
logger.info(f"Memory cleanup completed - Final memory usage: {final_memory:,.2f} MB")
|
||||||
|
return "cleaned"
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
from typing import List
|
from loguru import logger
|
||||||
from lxml import etree
|
from lxml import etree
|
||||||
|
from typing import List
|
||||||
import json
|
import json
|
||||||
import re
|
import re
|
||||||
|
|
||||||
@@ -298,8 +299,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
|
||||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
|
||||||
try:
|
try:
|
||||||
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
# .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
|
||||||
except json.JSONDecodeError:
|
stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
|
||||||
|
except json.JSONDecodeError as e:
|
||||||
|
logger.warning(str(e))
|
||||||
|
|
||||||
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||||
# As a last resort, try to parse the whole <body>
|
# As a last resort, try to parse the whole <body>
|
||||||
@@ -363,22 +366,41 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
|||||||
# wordlist - list of regex's (str) or words (str)
|
# wordlist - list of regex's (str) or words (str)
|
||||||
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
|
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
|
||||||
def strip_ignore_text(content, wordlist, mode="content"):
|
def strip_ignore_text(content, wordlist, mode="content"):
|
||||||
i = 0
|
|
||||||
output = []
|
|
||||||
ignore_text = []
|
ignore_text = []
|
||||||
ignore_regex = []
|
ignore_regex = []
|
||||||
ignored_line_numbers = []
|
ignore_regex_multiline = []
|
||||||
|
ignored_lines = []
|
||||||
|
|
||||||
for k in wordlist:
|
for k in wordlist:
|
||||||
# Is it a regex?
|
# Is it a regex?
|
||||||
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
|
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
|
||||||
if res:
|
if res:
|
||||||
ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k)))
|
res = re.compile(perl_style_slash_enclosed_regex_to_options(k))
|
||||||
|
if res.flags & re.DOTALL or res.flags & re.MULTILINE:
|
||||||
|
ignore_regex_multiline.append(res)
|
||||||
|
else:
|
||||||
|
ignore_regex.append(res)
|
||||||
else:
|
else:
|
||||||
ignore_text.append(k.strip())
|
ignore_text.append(k.strip())
|
||||||
|
|
||||||
for line in content.splitlines(keepends=True):
|
for r in ignore_regex_multiline:
|
||||||
i += 1
|
for match in r.finditer(content):
|
||||||
|
content_lines = content[:match.end()].splitlines(keepends=True)
|
||||||
|
match_lines = content[match.start():match.end()].splitlines(keepends=True)
|
||||||
|
|
||||||
|
end_line = len(content_lines)
|
||||||
|
start_line = end_line - len(match_lines)
|
||||||
|
|
||||||
|
if end_line - start_line <= 1:
|
||||||
|
# Match is empty or in the middle of the line
|
||||||
|
ignored_lines.append(start_line)
|
||||||
|
else:
|
||||||
|
for i in range(start_line, end_line):
|
||||||
|
ignored_lines.append(i)
|
||||||
|
|
||||||
|
line_index = 0
|
||||||
|
lines = content.splitlines(keepends=True)
|
||||||
|
for line in lines:
|
||||||
# Always ignore blank lines in this mode. (when this function gets called)
|
# Always ignore blank lines in this mode. (when this function gets called)
|
||||||
got_match = False
|
got_match = False
|
||||||
for l in ignore_text:
|
for l in ignore_text:
|
||||||
@@ -390,17 +412,19 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
|||||||
if r.search(line):
|
if r.search(line):
|
||||||
got_match = True
|
got_match = True
|
||||||
|
|
||||||
if not got_match:
|
if got_match:
|
||||||
# Not ignored, and should preserve "keepends"
|
ignored_lines.append(line_index)
|
||||||
output.append(line)
|
|
||||||
else:
|
line_index += 1
|
||||||
ignored_line_numbers.append(i)
|
|
||||||
|
ignored_lines = set([i for i in ignored_lines if i >= 0 and i < len(lines)])
|
||||||
|
|
||||||
# Used for finding out what to highlight
|
# Used for finding out what to highlight
|
||||||
if mode == "line numbers":
|
if mode == "line numbers":
|
||||||
return ignored_line_numbers
|
return [i + 1 for i in ignored_lines]
|
||||||
|
|
||||||
return ''.join(output)
|
output_lines = set(range(len(lines))) - ignored_lines
|
||||||
|
return ''.join([lines[i] for i in output_lines])
|
||||||
|
|
||||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||||
from xml.sax.saxutils import escape as xml_escape
|
from xml.sax.saxutils import escape as xml_escape
|
||||||
@@ -411,7 +435,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
|
|||||||
|
|
||||||
return re.sub(pattern, repl, html_content)
|
return re.sub(pattern, repl, html_content)
|
||||||
|
|
||||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str:
|
|
||||||
|
def html_to_text_sub_worker(conn, html_content: str, render_anchor_tag_content=False, is_rss=False):
|
||||||
|
|
||||||
from inscriptis import get_text
|
from inscriptis import get_text
|
||||||
from inscriptis.model.config import ParserConfig
|
from inscriptis.model.config import ParserConfig
|
||||||
|
|
||||||
@@ -446,15 +472,27 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals
|
|||||||
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
html_content = re.sub(r'</title>', r'</h1>', html_content)
|
||||||
|
|
||||||
text_content = get_text(html_content, config=parser_config)
|
text_content = get_text(html_content, config=parser_config)
|
||||||
|
conn.send(text_content)
|
||||||
|
conn.close()
|
||||||
|
|
||||||
return text_content
|
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
|
||||||
|
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False):
|
||||||
|
from multiprocessing import Process, Pipe
|
||||||
|
|
||||||
|
parent_conn, child_conn = Pipe()
|
||||||
|
p = Process(target=html_to_text_sub_worker, args=(child_conn, html_content, render_anchor_tag_content, is_rss))
|
||||||
|
p.start()
|
||||||
|
text = parent_conn.recv()
|
||||||
|
p.join()
|
||||||
|
return text
|
||||||
|
|
||||||
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
|
||||||
def has_ldjson_product_info(content):
|
def has_ldjson_product_info(content):
|
||||||
try:
|
try:
|
||||||
lc = content.lower()
|
# Better than .lower() which can use a lot of ram
|
||||||
if 'application/ld+json' in lc and lc.count('"price"') == 1 and '"pricecurrency"' in lc:
|
if (re.search(r'application/ld\+json', content, re.IGNORECASE) and
|
||||||
|
re.search(r'"price"', content, re.IGNORECASE) and
|
||||||
|
re.search(r'"pricecurrency"', content, re.IGNORECASE)):
|
||||||
return True
|
return True
|
||||||
|
|
||||||
# On some pages this is really terribly expensive when they dont really need it
|
# On some pages this is really terribly expensive when they dont really need it
|
||||||
|
|||||||
@@ -1,4 +1,7 @@
|
|||||||
from os import getenv
|
from os import getenv
|
||||||
|
|
||||||
|
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
|
||||||
|
|
||||||
from changedetectionio.notification import (
|
from changedetectionio.notification import (
|
||||||
default_notification_body,
|
default_notification_body,
|
||||||
default_notification_format,
|
default_notification_format,
|
||||||
@@ -9,6 +12,8 @@ from changedetectionio.notification import (
|
|||||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
|
||||||
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
|
DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
class model(dict):
|
class model(dict):
|
||||||
base_config = {
|
base_config = {
|
||||||
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
|
||||||
@@ -48,12 +53,16 @@ class model(dict):
|
|||||||
'password': False,
|
'password': False,
|
||||||
'render_anchor_tag_content': False,
|
'render_anchor_tag_content': False,
|
||||||
'rss_access_token': None,
|
'rss_access_token': None,
|
||||||
|
'rss_content_format': RSS_FORMAT_TYPES[0][0],
|
||||||
'rss_hide_muted_watches': True,
|
'rss_hide_muted_watches': True,
|
||||||
'schema_version' : 0,
|
'schema_version' : 0,
|
||||||
'shared_diff_access': False,
|
'shared_diff_access': False,
|
||||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||||
'tags': {}, #@todo use Tag.model initialisers
|
'tags': {}, #@todo use Tag.model initialisers
|
||||||
'timezone': None, # Default IANA timezone name
|
'timezone': None, # Default IANA timezone name
|
||||||
|
'ui': {
|
||||||
|
'open_diff_in_new_tab': True,
|
||||||
|
},
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -69,7 +78,7 @@ def parse_headers_from_text_file(filepath):
|
|||||||
for l in f.readlines():
|
for l in f.readlines():
|
||||||
l = l.strip()
|
l = l.strip()
|
||||||
if not l.startswith('#') and ':' in l:
|
if not l.startswith('#') and ':' in l:
|
||||||
(k, v) = l.split(':')
|
(k, v) = l.split(':', 1) # Split only on the first colon
|
||||||
headers[k.strip()] = v.strip()
|
headers[k.strip()] = v.strip()
|
||||||
|
|
||||||
return headers
|
return headers
|
||||||
@@ -83,7 +83,7 @@ class model(watch_base):
|
|||||||
flash, Markup, url_for
|
flash, Markup, url_for
|
||||||
)
|
)
|
||||||
message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format(
|
message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format(
|
||||||
url_for('edit_page', uuid=self.get('uuid')), self.get('url', '')))
|
url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', '')))
|
||||||
flash(message, 'error')
|
flash(message, 'error')
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
@@ -247,37 +247,32 @@ class model(watch_base):
|
|||||||
bump = self.history
|
bump = self.history
|
||||||
return self.__newest_history_key
|
return self.__newest_history_key
|
||||||
|
|
||||||
# Given an arbitrary timestamp, find the closest next key
|
# Given an arbitrary timestamp, find the best history key for the [diff] button so it can preset a smarter from_version
|
||||||
# For example, last_viewed = 1000 so it should return the next 1001 timestamp
|
|
||||||
#
|
|
||||||
# used for the [diff] button so it can preset a smarter from_version
|
|
||||||
@property
|
@property
|
||||||
def get_next_snapshot_key_to_last_viewed(self):
|
def get_from_version_based_on_last_viewed(self):
|
||||||
|
|
||||||
"""Unfortunately for now timestamp is stored as string key"""
|
"""Unfortunately for now timestamp is stored as string key"""
|
||||||
keys = list(self.history.keys())
|
keys = list(self.history.keys())
|
||||||
if not keys:
|
if not keys:
|
||||||
return None
|
return None
|
||||||
|
if len(keys) == 1:
|
||||||
|
return keys[0]
|
||||||
|
|
||||||
last_viewed = int(self.get('last_viewed'))
|
last_viewed = int(self.get('last_viewed'))
|
||||||
prev_k = keys[0]
|
|
||||||
sorted_keys = sorted(keys, key=lambda x: int(x))
|
sorted_keys = sorted(keys, key=lambda x: int(x))
|
||||||
sorted_keys.reverse()
|
sorted_keys.reverse()
|
||||||
|
|
||||||
# When the 'last viewed' timestamp is greater than the newest snapshot, return second last
|
# When the 'last viewed' timestamp is greater than or equal the newest snapshot, return second newest
|
||||||
if last_viewed > int(sorted_keys[0]):
|
if last_viewed >= int(sorted_keys[0]):
|
||||||
return sorted_keys[1]
|
return sorted_keys[1]
|
||||||
|
|
||||||
|
# When the 'last viewed' timestamp is between snapshots, return the older snapshot
|
||||||
|
for newer, older in list(zip(sorted_keys[0:], sorted_keys[1:])):
|
||||||
|
if last_viewed < int(newer) and last_viewed >= int(older):
|
||||||
|
return older
|
||||||
|
|
||||||
for k in sorted_keys:
|
# When the 'last viewed' timestamp is less than the oldest snapshot, return oldest
|
||||||
if int(k) < last_viewed:
|
return sorted_keys[-1]
|
||||||
if prev_k == sorted_keys[0]:
|
|
||||||
# Return the second last one so we dont recommend the same version compares itself
|
|
||||||
return sorted_keys[1]
|
|
||||||
|
|
||||||
return prev_k
|
|
||||||
prev_k = k
|
|
||||||
|
|
||||||
return keys[0]
|
|
||||||
|
|
||||||
def get_history_snapshot(self, timestamp):
|
def get_history_snapshot(self, timestamp):
|
||||||
import brotli
|
import brotli
|
||||||
@@ -301,11 +296,11 @@ class model(watch_base):
|
|||||||
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
with open(filepath, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
|
|
||||||
# Save some text file to the appropriate path and bump the history
|
# Save some text file to the appropriate path and bump the history
|
||||||
# result_obj from fetch_site_status.run()
|
# result_obj from fetch_site_status.run()
|
||||||
def save_history_text(self, contents, timestamp, snapshot_id):
|
def save_history_text(self, contents, timestamp, snapshot_id):
|
||||||
import brotli
|
import brotli
|
||||||
|
import tempfile
|
||||||
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
|
logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}")
|
||||||
|
|
||||||
self.ensure_data_dir_exists()
|
self.ensure_data_dir_exists()
|
||||||
@@ -313,26 +308,37 @@ class model(watch_base):
|
|||||||
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024))
|
||||||
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False'))
|
||||||
|
|
||||||
|
# Decide on snapshot filename and destination path
|
||||||
if not skip_brotli and len(contents) > threshold:
|
if not skip_brotli and len(contents) > threshold:
|
||||||
snapshot_fname = f"{snapshot_id}.txt.br"
|
snapshot_fname = f"{snapshot_id}.txt.br"
|
||||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)
|
||||||
if not os.path.exists(dest):
|
|
||||||
with open(dest, 'wb') as f:
|
|
||||||
f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT))
|
|
||||||
else:
|
else:
|
||||||
snapshot_fname = f"{snapshot_id}.txt"
|
snapshot_fname = f"{snapshot_id}.txt"
|
||||||
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
encoded_data = contents.encode('utf-8')
|
||||||
if not os.path.exists(dest):
|
|
||||||
with open(dest, 'wb') as f:
|
|
||||||
f.write(contents.encode('utf-8'))
|
|
||||||
|
|
||||||
# Append to index
|
dest = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||||
# @todo check last char was \n
|
|
||||||
|
# Write snapshot file atomically if it doesn't exist
|
||||||
|
if not os.path.exists(dest):
|
||||||
|
with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp:
|
||||||
|
tmp.write(encoded_data)
|
||||||
|
tmp.flush()
|
||||||
|
os.fsync(tmp.fileno())
|
||||||
|
tmp_path = tmp.name
|
||||||
|
os.rename(tmp_path, dest)
|
||||||
|
|
||||||
|
# Append to history.txt atomically
|
||||||
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||||
with open(index_fname, 'a') as f:
|
index_line = f"{timestamp},{snapshot_fname}\n"
|
||||||
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
|
||||||
f.close()
|
|
||||||
|
|
||||||
|
# Lets try force flush here since it's usually a very small file
|
||||||
|
# If this still fails in the future then try reading all to memory first, re-writing etc
|
||||||
|
with open(index_fname, 'a', encoding='utf-8') as f:
|
||||||
|
f.write(index_line)
|
||||||
|
f.flush()
|
||||||
|
os.fsync(f.fileno())
|
||||||
|
|
||||||
|
# Update internal state
|
||||||
self.__newest_history_key = timestamp
|
self.__newest_history_key = timestamp
|
||||||
self.__history_n += 1
|
self.__history_n += 1
|
||||||
|
|
||||||
@@ -357,7 +363,7 @@ class model(watch_base):
|
|||||||
# Iterate over all history texts and see if something new exists
|
# Iterate over all history texts and see if something new exists
|
||||||
# Always applying .strip() to start/end but optionally replace any other whitespace
|
# Always applying .strip() to start/end but optionally replace any other whitespace
|
||||||
def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
|
def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False):
|
||||||
local_lines = []
|
local_lines = set([])
|
||||||
if lines:
|
if lines:
|
||||||
if ignore_whitespace:
|
if ignore_whitespace:
|
||||||
if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
|
if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk
|
||||||
@@ -532,7 +538,7 @@ class model(watch_base):
|
|||||||
def save_error_text(self, contents):
|
def save_error_text(self, contents):
|
||||||
self.ensure_data_dir_exists()
|
self.ensure_data_dir_exists()
|
||||||
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
target_path = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||||
with open(target_path, 'w') as f:
|
with open(target_path, 'w', encoding='utf-8') as f:
|
||||||
f.write(contents)
|
f.write(contents)
|
||||||
|
|
||||||
def save_xpath_data(self, data, as_error=False):
|
def save_xpath_data(self, data, as_error=False):
|
||||||
@@ -547,7 +553,10 @@ class model(watch_base):
|
|||||||
self.ensure_data_dir_exists()
|
self.ensure_data_dir_exists()
|
||||||
|
|
||||||
with open(target_path, 'wb') as f:
|
with open(target_path, 'wb') as f:
|
||||||
f.write(zlib.compress(json.dumps(data).encode()))
|
if not isinstance(data, str):
|
||||||
|
f.write(zlib.compress(json.dumps(data).encode()))
|
||||||
|
else:
|
||||||
|
f.write(zlib.compress(data.encode()))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
# Save as PNG, PNG is larger but better for doing visual diff in the future
|
# Save as PNG, PNG is larger but better for doing visual diff in the future
|
||||||
@@ -569,7 +578,7 @@ class model(watch_base):
|
|||||||
import brotli
|
import brotli
|
||||||
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
filepath = os.path.join(self.watch_data_dir, 'last-fetched.br')
|
||||||
|
|
||||||
if not os.path.isfile(filepath):
|
if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0:
|
||||||
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
# If a previous attempt doesnt yet exist, just snarf the previous snapshot instead
|
||||||
dates = list(self.history.keys())
|
dates = list(self.history.keys())
|
||||||
if len(dates):
|
if len(dates):
|
||||||
|
|||||||
@@ -2,7 +2,7 @@ import os
|
|||||||
import uuid
|
import uuid
|
||||||
|
|
||||||
from changedetectionio import strtobool
|
from changedetectionio import strtobool
|
||||||
from changedetectionio.notification import default_notification_format_for_watch
|
default_notification_format_for_watch = 'System default'
|
||||||
|
|
||||||
class watch_base(dict):
|
class watch_base(dict):
|
||||||
|
|
||||||
|
|||||||
35
changedetectionio/notification/__init__.py
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
from changedetectionio.model import default_notification_format_for_watch
|
||||||
|
|
||||||
|
ult_notification_format_for_watch = 'System default'
|
||||||
|
default_notification_format = 'HTML Color'
|
||||||
|
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
|
||||||
|
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
|
||||||
|
|
||||||
|
# The values (markdown etc) are from apprise NotifyFormat,
|
||||||
|
# But to avoid importing the whole heavy module just use the same strings here.
|
||||||
|
valid_notification_formats = {
|
||||||
|
'Text': 'text',
|
||||||
|
'Markdown': 'markdown',
|
||||||
|
'HTML': 'html',
|
||||||
|
'HTML Color': 'htmlcolor',
|
||||||
|
# Used only for editing a watch (not for global)
|
||||||
|
default_notification_format_for_watch: default_notification_format_for_watch
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
valid_tokens = {
|
||||||
|
'base_url': '',
|
||||||
|
'current_snapshot': '',
|
||||||
|
'diff': '',
|
||||||
|
'diff_added': '',
|
||||||
|
'diff_full': '',
|
||||||
|
'diff_patch': '',
|
||||||
|
'diff_removed': '',
|
||||||
|
'diff_url': '',
|
||||||
|
'preview_url': '',
|
||||||
|
'triggered_text': '',
|
||||||
|
'watch_tag': '',
|
||||||
|
'watch_title': '',
|
||||||
|
'watch_url': '',
|
||||||
|
'watch_uuid': '',
|
||||||
|
}
|
||||||
16
changedetectionio/notification/apprise_plugin/assets.py
Normal file
@@ -0,0 +1,16 @@
|
|||||||
|
from apprise import AppriseAsset
|
||||||
|
|
||||||
|
# Refer to:
|
||||||
|
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
|
||||||
|
|
||||||
|
APPRISE_APP_ID = "changedetection.io"
|
||||||
|
APPRISE_APP_DESC = "ChangeDetection.io best and simplest website monitoring and change detection"
|
||||||
|
APPRISE_APP_URL = "https://changedetection.io"
|
||||||
|
APPRISE_AVATAR_URL = "https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png"
|
||||||
|
|
||||||
|
apprise_asset = AppriseAsset(
|
||||||
|
app_id=APPRISE_APP_ID,
|
||||||
|
app_desc=APPRISE_APP_DESC,
|
||||||
|
app_url=APPRISE_APP_URL,
|
||||||
|
image_url_logo=APPRISE_AVATAR_URL,
|
||||||
|
)
|
||||||
112
changedetectionio/notification/apprise_plugin/custom_handlers.py
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
import json
|
||||||
|
import re
|
||||||
|
from urllib.parse import unquote_plus
|
||||||
|
|
||||||
|
import requests
|
||||||
|
from apprise.decorators import notify
|
||||||
|
from apprise.utils.parse import parse_url as apprise_parse_url
|
||||||
|
from loguru import logger
|
||||||
|
from requests.structures import CaseInsensitiveDict
|
||||||
|
|
||||||
|
SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
|
||||||
|
|
||||||
|
|
||||||
|
def notify_supported_methods(func):
|
||||||
|
for method in SUPPORTED_HTTP_METHODS:
|
||||||
|
func = notify(on=method)(func)
|
||||||
|
# Add support for https, for each supported http method
|
||||||
|
func = notify(on=f"{method}s")(func)
|
||||||
|
return func
|
||||||
|
|
||||||
|
|
||||||
|
def _get_auth(parsed_url: dict) -> str | tuple[str, str]:
|
||||||
|
user: str | None = parsed_url.get("user")
|
||||||
|
password: str | None = parsed_url.get("password")
|
||||||
|
|
||||||
|
if user is not None and password is not None:
|
||||||
|
return (unquote_plus(user), unquote_plus(password))
|
||||||
|
|
||||||
|
if user is not None:
|
||||||
|
return unquote_plus(user)
|
||||||
|
|
||||||
|
return ""
|
||||||
|
|
||||||
|
|
||||||
|
def _get_headers(parsed_url: dict, body: str) -> CaseInsensitiveDict:
|
||||||
|
headers = CaseInsensitiveDict(
|
||||||
|
{unquote_plus(k).title(): unquote_plus(v) for k, v in parsed_url["qsd+"].items()}
|
||||||
|
)
|
||||||
|
|
||||||
|
# If Content-Type is not specified, guess if the body is a valid JSON
|
||||||
|
if headers.get("Content-Type") is None:
|
||||||
|
try:
|
||||||
|
json.loads(body)
|
||||||
|
headers["Content-Type"] = "application/json; charset=utf-8"
|
||||||
|
except Exception:
|
||||||
|
pass
|
||||||
|
|
||||||
|
return headers
|
||||||
|
|
||||||
|
|
||||||
|
def _get_params(parsed_url: dict) -> CaseInsensitiveDict:
|
||||||
|
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
|
||||||
|
# In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise
|
||||||
|
# but here we are making straight requests, so we need todo convert this against apprise's logic
|
||||||
|
params = CaseInsensitiveDict(
|
||||||
|
{
|
||||||
|
unquote_plus(k): unquote_plus(v)
|
||||||
|
for k, v in parsed_url["qsd"].items()
|
||||||
|
if k.strip("-") not in parsed_url["qsd-"]
|
||||||
|
and k.strip("+") not in parsed_url["qsd+"]
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
return params
|
||||||
|
|
||||||
|
|
||||||
|
@notify_supported_methods
|
||||||
|
def apprise_http_custom_handler(
|
||||||
|
body: str,
|
||||||
|
title: str,
|
||||||
|
notify_type: str,
|
||||||
|
meta: dict,
|
||||||
|
*args,
|
||||||
|
**kwargs,
|
||||||
|
) -> bool:
|
||||||
|
url: str = meta.get("url")
|
||||||
|
schema: str = meta.get("schema")
|
||||||
|
method: str = re.sub(r"s$", "", schema).upper()
|
||||||
|
|
||||||
|
# Convert /foobar?+some-header=hello to proper header dictionary
|
||||||
|
parsed_url: dict[str, str | dict | None] | None = apprise_parse_url(url)
|
||||||
|
if parsed_url is None:
|
||||||
|
return False
|
||||||
|
|
||||||
|
auth = _get_auth(parsed_url=parsed_url)
|
||||||
|
headers = _get_headers(parsed_url=parsed_url, body=body)
|
||||||
|
params = _get_params(parsed_url=parsed_url)
|
||||||
|
|
||||||
|
url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
|
||||||
|
|
||||||
|
try:
|
||||||
|
response = requests.request(
|
||||||
|
method=method,
|
||||||
|
url=url,
|
||||||
|
auth=auth,
|
||||||
|
headers=headers,
|
||||||
|
params=params,
|
||||||
|
data=body.encode("utf-8") if isinstance(body, str) else body,
|
||||||
|
)
|
||||||
|
|
||||||
|
response.raise_for_status()
|
||||||
|
|
||||||
|
logger.info(f"Successfully sent custom notification to {url}")
|
||||||
|
return True
|
||||||
|
|
||||||
|
except requests.RequestException as e:
|
||||||
|
logger.error(f"Remote host error while sending custom notification to {url}: {e}")
|
||||||
|
return False
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}")
|
||||||
|
return False
|
||||||
@@ -1,48 +1,17 @@
|
|||||||
|
|
||||||
import time
|
import time
|
||||||
from apprise import NotifyFormat
|
|
||||||
import apprise
|
import apprise
|
||||||
from loguru import logger
|
from loguru import logger
|
||||||
|
|
||||||
|
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
|
||||||
valid_tokens = {
|
|
||||||
'base_url': '',
|
|
||||||
'current_snapshot': '',
|
|
||||||
'diff': '',
|
|
||||||
'diff_added': '',
|
|
||||||
'diff_full': '',
|
|
||||||
'diff_patch': '',
|
|
||||||
'diff_removed': '',
|
|
||||||
'diff_url': '',
|
|
||||||
'preview_url': '',
|
|
||||||
'triggered_text': '',
|
|
||||||
'watch_tag': '',
|
|
||||||
'watch_title': '',
|
|
||||||
'watch_url': '',
|
|
||||||
'watch_uuid': '',
|
|
||||||
}
|
|
||||||
|
|
||||||
default_notification_format_for_watch = 'System default'
|
|
||||||
default_notification_format = 'HTML Color'
|
|
||||||
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
|
|
||||||
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
|
|
||||||
|
|
||||||
valid_notification_formats = {
|
|
||||||
'Text': NotifyFormat.TEXT,
|
|
||||||
'Markdown': NotifyFormat.MARKDOWN,
|
|
||||||
'HTML': NotifyFormat.HTML,
|
|
||||||
'HTML Color': 'htmlcolor',
|
|
||||||
# Used only for editing a watch (not for global)
|
|
||||||
default_notification_format_for_watch: default_notification_format_for_watch
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
def process_notification(n_object, datastore):
|
def process_notification(n_object, datastore):
|
||||||
# so that the custom endpoints are registered
|
from changedetectionio.safe_jinja import render as jinja_render
|
||||||
from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper
|
from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
|
||||||
|
# be sure its registered
|
||||||
|
from .apprise_plugin.custom_handlers import apprise_http_custom_handler
|
||||||
|
|
||||||
from .safe_jinja import render as jinja_render
|
|
||||||
now = time.time()
|
now = time.time()
|
||||||
if n_object.get('notification_timestamp'):
|
if n_object.get('notification_timestamp'):
|
||||||
logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
|
logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
|
||||||
@@ -59,15 +28,18 @@ def process_notification(n_object, datastore):
|
|||||||
# Initially text or whatever
|
# Initially text or whatever
|
||||||
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
|
n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
|
||||||
|
|
||||||
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.3f}s")
|
logger.trace(f"Complete notification body including Jinja and placeholders calculated in {time.time() - now:.2f}s")
|
||||||
|
|
||||||
# https://github.com/caronc/apprise/wiki/Development_LogCapture
|
# https://github.com/caronc/apprise/wiki/Development_LogCapture
|
||||||
# Anything higher than or equal to WARNING (which covers things like Connection errors)
|
# Anything higher than or equal to WARNING (which covers things like Connection errors)
|
||||||
# raise it as an exception
|
# raise it as an exception
|
||||||
|
|
||||||
sent_objs = []
|
sent_objs = []
|
||||||
from .apprise_asset import asset
|
|
||||||
apobj = apprise.Apprise(debug=True, asset=asset)
|
if 'as_async' in n_object:
|
||||||
|
apprise_asset.async_mode = n_object.get('as_async')
|
||||||
|
|
||||||
|
apobj = apprise.Apprise(debug=True, asset=apprise_asset)
|
||||||
|
|
||||||
if not n_object.get('notification_urls'):
|
if not n_object.get('notification_urls'):
|
||||||
return None
|
return None
|
||||||
@@ -108,7 +80,7 @@ def process_notification(n_object, datastore):
|
|||||||
and not url.startswith('get') \
|
and not url.startswith('get') \
|
||||||
and not url.startswith('delete') \
|
and not url.startswith('delete') \
|
||||||
and not url.startswith('put'):
|
and not url.startswith('put'):
|
||||||
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
|
url += k + f"avatar_url={APPRISE_AVATAR_URL}"
|
||||||
|
|
||||||
if url.startswith('tgram://'):
|
if url.startswith('tgram://'):
|
||||||
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
# Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
|
||||||
@@ -157,8 +129,6 @@ def process_notification(n_object, datastore):
|
|||||||
attach=n_object.get('screenshot', None)
|
attach=n_object.get('screenshot', None)
|
||||||
)
|
)
|
||||||
|
|
||||||
# Give apprise time to register an error
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# Returns empty string if nothing found, multi-line string otherwise
|
# Returns empty string if nothing found, multi-line string otherwise
|
||||||
log_value = logs.getvalue()
|
log_value = logs.getvalue()
|
||||||
@@ -175,6 +145,7 @@ def process_notification(n_object, datastore):
|
|||||||
# ( Where we prepare the tokens in the notification to be replaced with actual values )
|
# ( Where we prepare the tokens in the notification to be replaced with actual values )
|
||||||
def create_notification_parameters(n_object, datastore):
|
def create_notification_parameters(n_object, datastore):
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
from . import valid_tokens
|
||||||
|
|
||||||
# in the case we send a test notification from the main settings, there is no UUID.
|
# in the case we send a test notification from the main settings, there is no UUID.
|
||||||
uuid = n_object['uuid'] if 'uuid' in n_object else ''
|
uuid = n_object['uuid'] if 'uuid' in n_object else ''
|
||||||
82
changedetectionio/pluggy_interface.py
Normal file
@@ -0,0 +1,82 @@
|
|||||||
|
import pluggy
|
||||||
|
import os
|
||||||
|
import importlib
|
||||||
|
import sys
|
||||||
|
|
||||||
|
# Global plugin namespace for changedetection.io
|
||||||
|
PLUGIN_NAMESPACE = "changedetectionio"
|
||||||
|
|
||||||
|
hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE)
|
||||||
|
hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE)
|
||||||
|
|
||||||
|
|
||||||
|
class ChangeDetectionSpec:
|
||||||
|
"""Hook specifications for extending changedetection.io functionality."""
|
||||||
|
|
||||||
|
@hookspec
|
||||||
|
def ui_edit_stats_extras(watch):
|
||||||
|
"""Return HTML content to add to the stats tab in the edit view.
|
||||||
|
|
||||||
|
Args:
|
||||||
|
watch: The watch object being edited
|
||||||
|
|
||||||
|
Returns:
|
||||||
|
str: HTML content to be inserted in the stats tab
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
# Set up Plugin Manager
|
||||||
|
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||||
|
|
||||||
|
# Register hookspecs
|
||||||
|
plugin_manager.add_hookspecs(ChangeDetectionSpec)
|
||||||
|
|
||||||
|
# Load plugins from subdirectories
|
||||||
|
def load_plugins_from_directories():
|
||||||
|
# Dictionary of directories to scan for plugins
|
||||||
|
plugin_dirs = {
|
||||||
|
'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
|
||||||
|
# Add more plugin directories here as needed
|
||||||
|
}
|
||||||
|
|
||||||
|
# Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory
|
||||||
|
|
||||||
|
for dir_name, dir_path in plugin_dirs.items():
|
||||||
|
if not os.path.exists(dir_path):
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Get all Python files (excluding __init__.py)
|
||||||
|
for filename in os.listdir(dir_path):
|
||||||
|
if filename.endswith(".py") and filename != "__init__.py":
|
||||||
|
module_name = filename[:-3] # Remove .py extension
|
||||||
|
module_path = f"changedetectionio.{dir_name}.plugins.{module_name}"
|
||||||
|
|
||||||
|
try:
|
||||||
|
module = importlib.import_module(module_path)
|
||||||
|
# Register the plugin with pluggy
|
||||||
|
plugin_manager.register(module, module_name)
|
||||||
|
except (ImportError, AttributeError) as e:
|
||||||
|
print(f"Error loading plugin {module_name}: {e}")
|
||||||
|
|
||||||
|
# Load plugins
|
||||||
|
load_plugins_from_directories()
|
||||||
|
|
||||||
|
# Discover installed plugins from external packages (if any)
|
||||||
|
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
|
||||||
|
|
||||||
|
# Helper function to collect UI stats extras from all plugins
|
||||||
|
def collect_ui_edit_stats_extras(watch):
|
||||||
|
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
|
||||||
|
extras_content = []
|
||||||
|
|
||||||
|
# Get all plugins that implement the ui_edit_stats_extras hook
|
||||||
|
results = plugin_manager.hook.ui_edit_stats_extras(watch=watch)
|
||||||
|
|
||||||
|
# If we have results, add them to our content
|
||||||
|
if results:
|
||||||
|
for result in results:
|
||||||
|
if result: # Skip empty results
|
||||||
|
extras_content.append(result)
|
||||||
|
|
||||||
|
return "\n".join(extras_content) if extras_content else ""
|
||||||
@@ -33,8 +33,8 @@ class difference_detection_processor():
|
|||||||
|
|
||||||
url = self.watch.link
|
url = self.watch.link
|
||||||
|
|
||||||
# Protect against file://, file:/ access, check the real "link" without any meta "source:" etc prepended.
|
# Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended.
|
||||||
if re.search(r'^file:/', url.strip(), re.IGNORECASE):
|
if re.search(r'^file:', url.strip(), re.IGNORECASE):
|
||||||
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"file:// type access is denied for security reasons."
|
"file:// type access is denied for security reasons."
|
||||||
@@ -159,7 +159,7 @@ class difference_detection_processor():
|
|||||||
)
|
)
|
||||||
|
|
||||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||||
self.fetcher.quit()
|
self.fetcher.quit(watch=self.watch)
|
||||||
|
|
||||||
# After init, call run_changedetection() which will do the actual change-detection
|
# After init, call run_changedetection() which will do the actual change-detection
|
||||||
|
|
||||||
|
|||||||
@@ -28,13 +28,13 @@ def _task(watch, update_handler):
|
|||||||
return text_after_filter
|
return text_after_filter
|
||||||
|
|
||||||
|
|
||||||
def prepare_filter_prevew(datastore, watch_uuid):
|
def prepare_filter_prevew(datastore, watch_uuid, form_data):
|
||||||
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
'''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
|
||||||
from changedetectionio import forms, html_tools
|
from changedetectionio import forms, html_tools
|
||||||
from changedetectionio.model.Watch import model as watch_model
|
from changedetectionio.model.Watch import model as watch_model
|
||||||
from concurrent.futures import ProcessPoolExecutor
|
from concurrent.futures import ProcessPoolExecutor
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from flask import request, jsonify
|
from flask import request
|
||||||
import brotli
|
import brotli
|
||||||
import importlib
|
import importlib
|
||||||
import os
|
import os
|
||||||
@@ -50,12 +50,12 @@ def prepare_filter_prevew(datastore, watch_uuid):
|
|||||||
|
|
||||||
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
|
if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir):
|
||||||
# Splice in the temporary stuff from the form
|
# Splice in the temporary stuff from the form
|
||||||
form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None,
|
form = forms.processor_text_json_diff_form(formdata=form_data if request.method == 'POST' else None,
|
||||||
data=request.form
|
data=form_data
|
||||||
)
|
)
|
||||||
|
|
||||||
# Only update vars that came in via the AJAX post
|
# Only update vars that came in via the AJAX post
|
||||||
p = {k: v for k, v in form.data.items() if k in request.form.keys()}
|
p = {k: v for k, v in form.data.items() if k in form_data.keys()}
|
||||||
tmp_watch.update(p)
|
tmp_watch.update(p)
|
||||||
blank_watch_no_filters = watch_model()
|
blank_watch_no_filters = watch_model()
|
||||||
blank_watch_no_filters['url'] = tmp_watch.get('url')
|
blank_watch_no_filters['url'] = tmp_watch.get('url')
|
||||||
@@ -103,13 +103,12 @@ def prepare_filter_prevew(datastore, watch_uuid):
|
|||||||
|
|
||||||
logger.trace(f"Parsed in {time.time() - now:.3f}s")
|
logger.trace(f"Parsed in {time.time() - now:.3f}s")
|
||||||
|
|
||||||
return jsonify(
|
return ({
|
||||||
{
|
|
||||||
'after_filter': text_after_filter,
|
'after_filter': text_after_filter,
|
||||||
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
|
'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter,
|
||||||
'duration': time.time() - now,
|
'duration': time.time() - now,
|
||||||
'trigger_line_numbers': trigger_line_numbers,
|
'trigger_line_numbers': trigger_line_numbers,
|
||||||
'ignore_line_numbers': ignore_line_numbers,
|
'ignore_line_numbers': ignore_line_numbers,
|
||||||
}
|
})
|
||||||
)
|
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ import os
|
|||||||
import re
|
import re
|
||||||
import urllib3
|
import urllib3
|
||||||
|
|
||||||
|
from changedetectionio.conditions import execute_ruleset_against_all_plugins
|
||||||
from changedetectionio.processors import difference_detection_processor
|
from changedetectionio.processors import difference_detection_processor
|
||||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
|
||||||
from changedetectionio import html_tools, content_fetchers
|
from changedetectionio import html_tools, content_fetchers
|
||||||
@@ -251,6 +252,7 @@ class perform_site_check(difference_detection_processor):
|
|||||||
|
|
||||||
# 615 Extract text by regex
|
# 615 Extract text by regex
|
||||||
extract_text = watch.get('extract_text', [])
|
extract_text = watch.get('extract_text', [])
|
||||||
|
extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
|
||||||
if len(extract_text) > 0:
|
if len(extract_text) > 0:
|
||||||
regex_matched_output = []
|
regex_matched_output = []
|
||||||
for s_re in extract_text:
|
for s_re in extract_text:
|
||||||
@@ -295,6 +297,8 @@ class perform_site_check(difference_detection_processor):
|
|||||||
### CALCULATE MD5
|
### CALCULATE MD5
|
||||||
# If there's text to ignore
|
# If there's text to ignore
|
||||||
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
|
||||||
|
text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
|
||||||
|
|
||||||
text_for_checksuming = stripped_text_from_html
|
text_for_checksuming = stripped_text_from_html
|
||||||
if text_to_ignore:
|
if text_to_ignore:
|
||||||
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
|
||||||
@@ -307,8 +311,8 @@ class perform_site_check(difference_detection_processor):
|
|||||||
|
|
||||||
############ Blocking rules, after checksum #################
|
############ Blocking rules, after checksum #################
|
||||||
blocked = False
|
blocked = False
|
||||||
|
|
||||||
trigger_text = watch.get('trigger_text', [])
|
trigger_text = watch.get('trigger_text', [])
|
||||||
|
trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
|
||||||
if len(trigger_text):
|
if len(trigger_text):
|
||||||
# Assume blocked
|
# Assume blocked
|
||||||
blocked = True
|
blocked = True
|
||||||
@@ -323,6 +327,7 @@ class perform_site_check(difference_detection_processor):
|
|||||||
blocked = False
|
blocked = False
|
||||||
|
|
||||||
text_should_not_be_present = watch.get('text_should_not_be_present', [])
|
text_should_not_be_present = watch.get('text_should_not_be_present', [])
|
||||||
|
text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
|
||||||
if len(text_should_not_be_present):
|
if len(text_should_not_be_present):
|
||||||
# If anything matched, then we should block a change from happening
|
# If anything matched, then we should block a change from happening
|
||||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||||
@@ -331,6 +336,18 @@ class perform_site_check(difference_detection_processor):
|
|||||||
if result:
|
if result:
|
||||||
blocked = True
|
blocked = True
|
||||||
|
|
||||||
|
# And check if 'conditions' will let this pass through
|
||||||
|
if watch.get('conditions') and watch.get('conditions_match_logic'):
|
||||||
|
conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
|
||||||
|
application_datastruct=self.datastore.data,
|
||||||
|
ephemeral_data={
|
||||||
|
'text': stripped_text_from_html
|
||||||
|
}
|
||||||
|
)
|
||||||
|
|
||||||
|
if not conditions_result.get('result'):
|
||||||
|
# Conditions say "Condition not met" so we block it.
|
||||||
|
blocked = True
|
||||||
|
|
||||||
# Looks like something changed, but did it match all the rules?
|
# Looks like something changed, but did it match all the rules?
|
||||||
if blocked:
|
if blocked:
|
||||||
|
|||||||
@@ -14,7 +14,8 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
|||||||
find tests/test_*py -type f|while read test_name
|
find tests/test_*py -type f|while read test_name
|
||||||
do
|
do
|
||||||
echo "TEST RUNNING $test_name"
|
echo "TEST RUNNING $test_name"
|
||||||
pytest $test_name
|
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||||
|
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
|
||||||
done
|
done
|
||||||
|
|
||||||
echo "RUNNING WITH BASE_URL SET"
|
echo "RUNNING WITH BASE_URL SET"
|
||||||
@@ -22,7 +23,7 @@ echo "RUNNING WITH BASE_URL SET"
|
|||||||
# Now re-run some tests with BASE_URL enabled
|
# Now re-run some tests with BASE_URL enabled
|
||||||
# Re #65 - Ability to include a link back to the installation, in the notification.
|
# Re #65 - Ability to include a link back to the installation, in the notification.
|
||||||
export BASE_URL="https://really-unique-domain.io"
|
export BASE_URL="https://really-unique-domain.io"
|
||||||
pytest tests/test_notification.py
|
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||||
|
|
||||||
|
|
||||||
# Re-run with HIDE_REFERER set - could affect login
|
# Re-run with HIDE_REFERER set - could affect login
|
||||||
@@ -32,7 +33,7 @@ pytest tests/test_access_control.py
|
|||||||
# Re-run a few tests that will trigger brotli based storage
|
# Re-run a few tests that will trigger brotli based storage
|
||||||
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
||||||
pytest tests/test_access_control.py
|
pytest tests/test_access_control.py
|
||||||
pytest tests/test_notification.py
|
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||||
pytest tests/test_backend.py
|
pytest tests/test_backend.py
|
||||||
pytest tests/test_rss.py
|
pytest tests/test_rss.py
|
||||||
pytest tests/test_unique_lines.py
|
pytest tests/test_unique_lines.py
|
||||||
|
|||||||
@@ -82,3 +82,26 @@ done
|
|||||||
|
|
||||||
|
|
||||||
docker kill squid-one squid-two squid-custom
|
docker kill squid-one squid-two squid-custom
|
||||||
|
|
||||||
|
# Test that the UI is returning the correct error message when a proxy is not available
|
||||||
|
|
||||||
|
# Requests
|
||||||
|
docker run --network changedet-network \
|
||||||
|
test-changedetectionio \
|
||||||
|
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
|
||||||
|
|
||||||
|
# Playwright
|
||||||
|
docker run --network changedet-network \
|
||||||
|
test-changedetectionio \
|
||||||
|
bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
|
||||||
|
|
||||||
|
# Puppeteer fast
|
||||||
|
docker run --network changedet-network \
|
||||||
|
test-changedetectionio \
|
||||||
|
bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py'
|
||||||
|
|
||||||
|
# Selenium - todo - fix proxies
|
||||||
|
docker run --network changedet-network \
|
||||||
|
-e "WEBDRIVER_URL=http://selenium:4444/wd/hub" \
|
||||||
|
test-changedetectionio \
|
||||||
|
bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py'
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
<svg
|
<svg
|
||||||
version="1.1"
|
version="1.1"
|
||||||
id="Layer_1"
|
id="copy"
|
||||||
x="0px"
|
x="0px"
|
||||||
y="0px"
|
y="0px"
|
||||||
viewBox="0 0 115.77 122.88"
|
viewBox="0 0 115.77 122.88"
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 2.5 KiB |
@@ -6,7 +6,7 @@
|
|||||||
height="7.5005589"
|
height="7.5005589"
|
||||||
width="11.248507"
|
width="11.248507"
|
||||||
version="1.1"
|
version="1.1"
|
||||||
id="Layer_1"
|
id="email"
|
||||||
viewBox="0 0 7.1975545 4.7993639"
|
viewBox="0 0 7.1975545 4.7993639"
|
||||||
xml:space="preserve"
|
xml:space="preserve"
|
||||||
xmlns="http://www.w3.org/2000/svg"
|
xmlns="http://www.w3.org/2000/svg"
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB |
|
Before Width: | Height: | Size: 569 B After Width: | Height: | Size: 569 B |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 6.2 KiB After Width: | Height: | Size: 6.2 KiB |
@@ -1,7 +1,7 @@
|
|||||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
|
||||||
<svg
|
<svg
|
||||||
version="1.1"
|
version="1.1"
|
||||||
id="Layer_1"
|
id="schedule"
|
||||||
x="0px"
|
x="0px"
|
||||||
y="0px"
|
y="0px"
|
||||||
viewBox="0 0 661.20001 665.40002"
|
viewBox="0 0 661.20001 665.40002"
|
||||||
|
|||||||
|
Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 5.9 KiB |
@@ -211,7 +211,14 @@ $(document).ready(function () {
|
|||||||
$('input[type=text]', first_available).first().val(x['xpath']);
|
$('input[type=text]', first_available).first().val(x['xpath']);
|
||||||
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
|
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
|
||||||
found_something = true;
|
found_something = true;
|
||||||
} else {
|
}
|
||||||
|
else if (x['tagName'] === 'select') {
|
||||||
|
$('select', first_available).val('<select> by option text').change();
|
||||||
|
$('input[type=text]', first_available).first().val(x['xpath']);
|
||||||
|
$('input[placeholder="Value"]', first_available).addClass('ok').click().focus();
|
||||||
|
found_something = true;
|
||||||
|
}
|
||||||
|
else {
|
||||||
// There's no good way (that I know) to find if this
|
// There's no good way (that I know) to find if this
|
||||||
// see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging
|
// see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging
|
||||||
// https://codepen.io/azaslavsky/pen/DEJVWv
|
// https://codepen.io/azaslavsky/pen/DEJVWv
|
||||||
@@ -221,7 +228,7 @@ $(document).ready(function () {
|
|||||||
// If you switch to "Click X,y" after an element here is setup, it will give the last co-ords anyway
|
// If you switch to "Click X,y" after an element here is setup, it will give the last co-ords anyway
|
||||||
//if (x['isClickable'] || x['tagName'].startsWith('h') || x['tagName'] === 'a' || x['tagName'] === 'button' || x['tagtype'] === 'submit' || x['tagtype'] === 'checkbox' || x['tagtype'] === 'radio' || x['tagtype'] === 'li') {
|
//if (x['isClickable'] || x['tagName'].startsWith('h') || x['tagName'] === 'a' || x['tagName'] === 'button' || x['tagtype'] === 'submit' || x['tagtype'] === 'checkbox' || x['tagtype'] === 'radio' || x['tagtype'] === 'li') {
|
||||||
$('select', first_available).val('Click element').change();
|
$('select', first_available).val('Click element').change();
|
||||||
$('input[type=text]', first_available).first().val(x['xpath']);
|
$('input[type=text]', first_available).first().val(x['xpath']).focus();
|
||||||
found_something = true;
|
found_something = true;
|
||||||
//}
|
//}
|
||||||
}
|
}
|
||||||
@@ -251,6 +258,10 @@ $(document).ready(function () {
|
|||||||
400: function () {
|
400: function () {
|
||||||
// More than likely the CSRF token was lost when the server restarted
|
// More than likely the CSRF token was lost when the server restarted
|
||||||
alert("There was a problem processing the request, please reload the page.");
|
alert("There was a problem processing the request, please reload the page.");
|
||||||
|
},
|
||||||
|
401: function (err) {
|
||||||
|
// This will be a custom error
|
||||||
|
alert(err.responseText);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}).done(function (data) {
|
}).done(function (data) {
|
||||||
@@ -305,7 +316,7 @@ $(document).ready(function () {
|
|||||||
|
|
||||||
if ($(this).val() === 'Click X,Y' && last_click_xy['x'] > 0 && $(elem_value).val().length === 0) {
|
if ($(this).val() === 'Click X,Y' && last_click_xy['x'] > 0 && $(elem_value).val().length === 0) {
|
||||||
// @todo handle scale
|
// @todo handle scale
|
||||||
$(elem_value).val(last_click_xy['x'] + ',' + last_click_xy['y']);
|
$(elem_value).val(last_click_xy['x'] + ',' + last_click_xy['y']).focus();
|
||||||
}
|
}
|
||||||
}).change();
|
}).change();
|
||||||
|
|
||||||
|
|||||||
154
changedetectionio/static/js/conditions.js
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
$(document).ready(function () {
|
||||||
|
// Function to set up button event handlers
|
||||||
|
function setupButtonHandlers() {
|
||||||
|
// Unbind existing handlers first to prevent duplicates
|
||||||
|
$(".addRuleRow, .removeRuleRow, .verifyRuleRow").off("click");
|
||||||
|
|
||||||
|
// Add row button handler
|
||||||
|
$(".addRuleRow").on("click", function(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
|
||||||
|
let currentRow = $(this).closest(".fieldlist-row");
|
||||||
|
|
||||||
|
// Clone without events
|
||||||
|
let newRow = currentRow.clone(false);
|
||||||
|
|
||||||
|
// Reset input values in the cloned row
|
||||||
|
newRow.find("input").val("");
|
||||||
|
newRow.find("select").prop("selectedIndex", 0);
|
||||||
|
|
||||||
|
// Insert the new row after the current one
|
||||||
|
currentRow.after(newRow);
|
||||||
|
|
||||||
|
// Reindex all rows
|
||||||
|
reindexRules();
|
||||||
|
});
|
||||||
|
|
||||||
|
// Remove row button handler
|
||||||
|
$(".removeRuleRow").on("click", function(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
|
||||||
|
// Only remove if there's more than one row
|
||||||
|
if ($("#rulesTable .fieldlist-row").length > 1) {
|
||||||
|
$(this).closest(".fieldlist-row").remove();
|
||||||
|
reindexRules();
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Verify rule button handler
|
||||||
|
$(".verifyRuleRow").on("click", function(e) {
|
||||||
|
e.preventDefault();
|
||||||
|
|
||||||
|
let row = $(this).closest(".fieldlist-row");
|
||||||
|
let field = row.find("select[name$='field']").val();
|
||||||
|
let operator = row.find("select[name$='operator']").val();
|
||||||
|
let value = row.find("input[name$='value']").val();
|
||||||
|
|
||||||
|
// Validate that all fields are filled
|
||||||
|
if (!field || field === "None" || !operator || operator === "None" || !value) {
|
||||||
|
alert("Please fill in all fields (Field, Operator, and Value) before verifying.");
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Create a rule object
|
||||||
|
let rule = {
|
||||||
|
field: field,
|
||||||
|
operator: operator,
|
||||||
|
value: value
|
||||||
|
};
|
||||||
|
|
||||||
|
// Show a spinner or some indication that verification is in progress
|
||||||
|
const $button = $(this);
|
||||||
|
const originalHTML = $button.html();
|
||||||
|
$button.html("⌛").prop("disabled", true);
|
||||||
|
|
||||||
|
// Collect form data - similar to request_textpreview_update() in watch-settings.js
|
||||||
|
let formData = new FormData();
|
||||||
|
$('#edit-text-filter textarea, #edit-text-filter input').each(function() {
|
||||||
|
const $element = $(this);
|
||||||
|
const name = $element.attr('name');
|
||||||
|
if (name) {
|
||||||
|
if ($element.is(':checkbox')) {
|
||||||
|
formData.append(name, $element.is(':checked') ? $element.val() : false);
|
||||||
|
} else {
|
||||||
|
formData.append(name, $element.val());
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
// Also collect select values
|
||||||
|
$('#edit-text-filter select').each(function() {
|
||||||
|
const $element = $(this);
|
||||||
|
const name = $element.attr('name');
|
||||||
|
if (name) {
|
||||||
|
formData.append(name, $element.val());
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
// Send the request to verify the rule
|
||||||
|
$.ajax({
|
||||||
|
url: verify_condition_rule_url+"?"+ new URLSearchParams({ rule: JSON.stringify(rule) }).toString(),
|
||||||
|
type: "POST",
|
||||||
|
data: formData,
|
||||||
|
processData: false, // Prevent jQuery from converting FormData to a string
|
||||||
|
contentType: false, // Let the browser set the correct content type
|
||||||
|
success: function (response) {
|
||||||
|
if (response.status === "success") {
|
||||||
|
if(rule['field'] !== "page_filtered_text") {
|
||||||
|
// A little debug helper for the user
|
||||||
|
$('#verify-state-text').text(`${rule['field']} was value "${response.data[rule['field']]}"`)
|
||||||
|
}
|
||||||
|
if (response.result) {
|
||||||
|
alert("✅ Condition PASSES verification against current snapshot!");
|
||||||
|
} else {
|
||||||
|
alert("❌ Condition FAILS verification against current snapshot.");
|
||||||
|
}
|
||||||
|
} else {
|
||||||
|
alert("Error: " + response.message);
|
||||||
|
}
|
||||||
|
$button.html(originalHTML).prop("disabled", false);
|
||||||
|
},
|
||||||
|
error: function (xhr) {
|
||||||
|
let errorMsg = "Error verifying condition.";
|
||||||
|
if (xhr.responseJSON && xhr.responseJSON.message) {
|
||||||
|
errorMsg = xhr.responseJSON.message;
|
||||||
|
}
|
||||||
|
alert(errorMsg);
|
||||||
|
$button.html(originalHTML).prop("disabled", false);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
// Function to reindex form elements and re-setup event handlers
|
||||||
|
function reindexRules() {
|
||||||
|
// Unbind all button handlers first
|
||||||
|
$(".addRuleRow, .removeRuleRow, .verifyRuleRow").off("click");
|
||||||
|
|
||||||
|
// Reindex all form elements
|
||||||
|
$("#rulesTable .fieldlist-row").each(function(index) {
|
||||||
|
$(this).find("select, input").each(function() {
|
||||||
|
let oldName = $(this).attr("name");
|
||||||
|
let oldId = $(this).attr("id");
|
||||||
|
|
||||||
|
if (oldName) {
|
||||||
|
let newName = oldName.replace(/\d+/, index);
|
||||||
|
$(this).attr("name", newName);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (oldId) {
|
||||||
|
let newId = oldId.replace(/\d+/, index);
|
||||||
|
$(this).attr("id", newId);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
});
|
||||||
|
|
||||||
|
// Reattach event handlers after reindexing
|
||||||
|
setupButtonHandlers();
|
||||||
|
}
|
||||||
|
|
||||||
|
// Initial setup of button handlers
|
||||||
|
setupButtonHandlers();
|
||||||
|
});
|
||||||
@@ -1,42 +1,52 @@
|
|||||||
$(document).ready(function() {
|
$(document).ready(function () {
|
||||||
|
|
||||||
$('#add-email-helper').click(function (e) {
|
$('#add-email-helper').click(function (e) {
|
||||||
e.preventDefault();
|
e.preventDefault();
|
||||||
email = prompt("Destination email");
|
email = prompt("Destination email");
|
||||||
if(email) {
|
if (email) {
|
||||||
var n = $(".notification-urls");
|
var n = $(".notification-urls");
|
||||||
var p=email_notification_prefix;
|
var p = email_notification_prefix;
|
||||||
$(n).val( $.trim( $(n).val() )+"\n"+email_notification_prefix+email );
|
$(n).val($.trim($(n).val()) + "\n" + email_notification_prefix + email);
|
||||||
}
|
|
||||||
});
|
|
||||||
|
|
||||||
$('#send-test-notification').click(function (e) {
|
|
||||||
e.preventDefault();
|
|
||||||
|
|
||||||
data = {
|
|
||||||
notification_body: $('#notification_body').val(),
|
|
||||||
notification_format: $('#notification_format').val(),
|
|
||||||
notification_title: $('#notification_title').val(),
|
|
||||||
notification_urls: $('.notification-urls').val(),
|
|
||||||
tags: $('#tags').val(),
|
|
||||||
window_url: window.location.href,
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
$.ajax({
|
|
||||||
type: "POST",
|
|
||||||
url: notification_base_url,
|
|
||||||
data : data,
|
|
||||||
statusCode: {
|
|
||||||
400: function(data) {
|
|
||||||
// More than likely the CSRF token was lost when the server restarted
|
|
||||||
alert(data.responseText);
|
|
||||||
}
|
}
|
||||||
}
|
});
|
||||||
}).done(function(data){
|
|
||||||
console.log(data);
|
$('#send-test-notification').click(function (e) {
|
||||||
alert(data);
|
e.preventDefault();
|
||||||
})
|
|
||||||
});
|
data = {
|
||||||
|
notification_body: $('#notification_body').val(),
|
||||||
|
notification_format: $('#notification_format').val(),
|
||||||
|
notification_title: $('#notification_title').val(),
|
||||||
|
notification_urls: $('.notification-urls').val(),
|
||||||
|
tags: $('#tags').val(),
|
||||||
|
window_url: window.location.href,
|
||||||
|
}
|
||||||
|
|
||||||
|
$('.notifications-wrapper .spinner').fadeIn();
|
||||||
|
$('#notification-test-log').show();
|
||||||
|
$.ajax({
|
||||||
|
type: "POST",
|
||||||
|
url: notification_base_url,
|
||||||
|
data: data,
|
||||||
|
statusCode: {
|
||||||
|
400: function (data) {
|
||||||
|
$("#notification-test-log>span").text(data.responseText);
|
||||||
|
},
|
||||||
|
}
|
||||||
|
}).done(function (data) {
|
||||||
|
$("#notification-test-log>span").text(data);
|
||||||
|
}).fail(function (jqXHR, textStatus, errorThrown) {
|
||||||
|
// Handle connection refused or other errors
|
||||||
|
if (textStatus === "error" && errorThrown === "") {
|
||||||
|
console.error("Connection refused or server unreachable");
|
||||||
|
$("#notification-test-log>span").text("Error: Connection refused or server is unreachable.");
|
||||||
|
} else {
|
||||||
|
console.error("Error:", textStatus, errorThrown);
|
||||||
|
$("#notification-test-log>span").text("An error occurred: " + textStatus);
|
||||||
|
}
|
||||||
|
}).always(function () {
|
||||||
|
$('.notifications-wrapper .spinner').hide();
|
||||||
|
})
|
||||||
|
});
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -26,7 +26,6 @@ function set_active_tab() {
|
|||||||
if (tab.length) {
|
if (tab.length) {
|
||||||
tab[0].parentElement.className = "active";
|
tab[0].parentElement.className = "active";
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function focus_error_tab() {
|
function focus_error_tab() {
|
||||||
|
|||||||
@@ -48,6 +48,8 @@ $(function () {
|
|||||||
$('input[type=checkbox]').not(this).prop('checked', this.checked);
|
$('input[type=checkbox]').not(this).prop('checked', this.checked);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
const time_check_step_size_seconds=1;
|
||||||
|
|
||||||
// checkboxes - show/hide buttons
|
// checkboxes - show/hide buttons
|
||||||
$("input[type=checkbox]").click(function (e) {
|
$("input[type=checkbox]").click(function (e) {
|
||||||
if ($('input[type=checkbox]:checked').length) {
|
if ($('input[type=checkbox]:checked').length) {
|
||||||
@@ -57,5 +59,30 @@ $(function () {
|
|||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
setInterval(function () {
|
||||||
|
// Background ETA completion for 'checking now'
|
||||||
|
$(".watch-table .checking-now .last-checked").each(function () {
|
||||||
|
const eta_complete = parseFloat($(this).data('eta_complete'));
|
||||||
|
const fetch_duration = parseInt($(this).data('fetchduration'));
|
||||||
|
|
||||||
|
if (eta_complete + 2 > nowtimeserver && fetch_duration > 3) {
|
||||||
|
const remaining_seconds = Math.abs(eta_complete) - nowtimeserver - 1;
|
||||||
|
|
||||||
|
let r = (1.0 - (remaining_seconds / fetch_duration)) * 100;
|
||||||
|
if (r < 10) {
|
||||||
|
r = 10;
|
||||||
|
}
|
||||||
|
if (r >= 90) {
|
||||||
|
r = 100;
|
||||||
|
}
|
||||||
|
$(this).css('background-size', `${r}% 100%`);
|
||||||
|
//$(this).text(`${r}% remain ${remaining_seconds}`);
|
||||||
|
} else {
|
||||||
|
$(this).css('background-size', `100% 100%`);
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
nowtimeserver = nowtimeserver + time_check_step_size_seconds;
|
||||||
|
}, time_check_step_size_seconds * 1000);
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|||||||
@@ -40,19 +40,22 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@media only screen and (min-width: 760px) {
|
||||||
|
|
||||||
#browser-steps .flex-wrapper {
|
#browser-steps .flex-wrapper {
|
||||||
display: flex;
|
display: flex;
|
||||||
flex-flow: row;
|
flex-flow: row;
|
||||||
height: 70vh;
|
height: 70vh;
|
||||||
font-size: 80%;
|
font-size: 80%;
|
||||||
#browser-steps-ui {
|
|
||||||
flex-grow: 1; /* Allow it to grow and fill the available space */
|
|
||||||
flex-shrink: 1; /* Allow it to shrink if needed */
|
|
||||||
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
|
||||||
background-color: #eee;
|
|
||||||
border-radius: 5px;
|
|
||||||
|
|
||||||
|
#browser-steps-ui {
|
||||||
|
flex-grow: 1; /* Allow it to grow and fill the available space */
|
||||||
|
flex-shrink: 1; /* Allow it to shrink if needed */
|
||||||
|
flex-basis: 0; /* Start with 0 base width so it stretches as much as possible */
|
||||||
|
background-color: #eee;
|
||||||
|
border-radius: 5px;
|
||||||
|
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#browser-steps-fieldlist {
|
#browser-steps-fieldlist {
|
||||||
@@ -63,15 +66,21 @@
|
|||||||
padding-left: 1rem;
|
padding-left: 1rem;
|
||||||
overflow-y: scroll;
|
overflow-y: scroll;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* this is duplicate :( */
|
||||||
|
#browsersteps-selector-wrapper {
|
||||||
|
height: 100% !important;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
/* this is duplicate :( */
|
/* this is duplicate :( */
|
||||||
#browsersteps-selector-wrapper {
|
#browsersteps-selector-wrapper {
|
||||||
height: 100%;
|
|
||||||
width: 100%;
|
width: 100%;
|
||||||
overflow-y: scroll;
|
overflow-y: scroll;
|
||||||
position: relative;
|
position: relative;
|
||||||
//width: 100%;
|
height: 80vh;
|
||||||
|
|
||||||
> img {
|
> img {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
max-width: 100%;
|
max-width: 100%;
|
||||||
@@ -91,7 +100,6 @@
|
|||||||
left: 50%;
|
left: 50%;
|
||||||
top: 50%;
|
top: 50%;
|
||||||
transform: translate(-50%, -50%);
|
transform: translate(-50%, -50%);
|
||||||
margin-left: -40px;
|
|
||||||
z-index: 100;
|
z-index: 100;
|
||||||
max-width: 350px;
|
max-width: 350px;
|
||||||
text-align: center;
|
text-align: center;
|
||||||
|
|||||||
@@ -0,0 +1,135 @@
|
|||||||
|
/* Styles for the flexbox-based table replacement for conditions */
|
||||||
|
.fieldlist_formfields {
|
||||||
|
width: 100%;
|
||||||
|
background-color: var(--color-background, #fff);
|
||||||
|
border-radius: 4px;
|
||||||
|
border: 1px solid var(--color-border-table-cell, #cbcbcb);
|
||||||
|
|
||||||
|
/* Header row */
|
||||||
|
.fieldlist-header {
|
||||||
|
display: flex;
|
||||||
|
background-color: var(--color-background-table-thead, #e0e0e0);
|
||||||
|
font-weight: bold;
|
||||||
|
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb);
|
||||||
|
}
|
||||||
|
|
||||||
|
.fieldlist-header-cell {
|
||||||
|
flex: 1;
|
||||||
|
padding: 0.5em 1em;
|
||||||
|
text-align: left;
|
||||||
|
|
||||||
|
&:last-child {
|
||||||
|
flex: 0 0 120px; /* Fixed width for actions column */
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Body rows */
|
||||||
|
.fieldlist-body {
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.fieldlist-row {
|
||||||
|
display: flex;
|
||||||
|
border-bottom: 1px solid var(--color-border-table-cell, #cbcbcb);
|
||||||
|
|
||||||
|
&:last-child {
|
||||||
|
border-bottom: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
&:nth-child(2n-1) {
|
||||||
|
background-color: var(--color-table-stripe, #f2f2f2);
|
||||||
|
}
|
||||||
|
|
||||||
|
&.error-row {
|
||||||
|
background-color: var(--color-error-input, #ffdddd);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
.fieldlist-cell {
|
||||||
|
flex: 1;
|
||||||
|
padding: 0.5em 1em;
|
||||||
|
display: flex;
|
||||||
|
flex-direction: column;
|
||||||
|
justify-content: center;
|
||||||
|
|
||||||
|
/* Make inputs take up full width of their cell */
|
||||||
|
input, select {
|
||||||
|
width: 100%;
|
||||||
|
}
|
||||||
|
|
||||||
|
&.fieldlist-actions {
|
||||||
|
flex: 0 0 120px; /* Fixed width for actions column */
|
||||||
|
display: flex;
|
||||||
|
flex-direction: row;
|
||||||
|
align-items: center;
|
||||||
|
gap: 4px;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Error styling */
|
||||||
|
ul.errors {
|
||||||
|
margin-top: 0.5em;
|
||||||
|
margin-bottom: 0;
|
||||||
|
padding: 0.5em;
|
||||||
|
background-color: var(--color-error-background-snapshot-age, #ffdddd);
|
||||||
|
border-radius: 4px;
|
||||||
|
list-style-position: inside;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Responsive styles */
|
||||||
|
@media only screen and (max-width: 760px) {
|
||||||
|
.fieldlist-header, .fieldlist-row {
|
||||||
|
flex-direction: column;
|
||||||
|
}
|
||||||
|
|
||||||
|
.fieldlist-header-cell {
|
||||||
|
display: none;
|
||||||
|
}
|
||||||
|
|
||||||
|
.fieldlist-row {
|
||||||
|
padding: 0.5em 0;
|
||||||
|
border-bottom: 2px solid var(--color-border-table-cell, #cbcbcb);
|
||||||
|
}
|
||||||
|
|
||||||
|
.fieldlist-cell {
|
||||||
|
padding: 0.25em 0.5em;
|
||||||
|
|
||||||
|
&.fieldlist-actions {
|
||||||
|
flex: 1;
|
||||||
|
justify-content: flex-start;
|
||||||
|
padding-top: 0.5em;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Add some spacing between fields on mobile */
|
||||||
|
.fieldlist-cell:not(:last-child) {
|
||||||
|
margin-bottom: 0.5em;
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Label each cell on mobile view */
|
||||||
|
.fieldlist-cell::before {
|
||||||
|
content: attr(data-label);
|
||||||
|
font-weight: bold;
|
||||||
|
margin-bottom: 0.25em;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Button styling */
|
||||||
|
.fieldlist_formfields {
|
||||||
|
.addRuleRow, .removeRuleRow, .verifyRuleRow {
|
||||||
|
cursor: pointer;
|
||||||
|
border: none;
|
||||||
|
padding: 4px 8px;
|
||||||
|
border-radius: 3px;
|
||||||
|
font-weight: bold;
|
||||||
|
background-color: #aaa;
|
||||||
|
color: var(--color-foreground-text, #fff);
|
||||||
|
|
||||||
|
&:hover {
|
||||||
|
background-color: #999;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
}
|
||||||
9
changedetectionio/static/styles/scss/parts/_edit.scss
Normal file
@@ -0,0 +1,9 @@
|
|||||||
|
ul#conditions_match_logic {
|
||||||
|
list-style: none;
|
||||||
|
input, label, li {
|
||||||
|
display: inline-block;
|
||||||
|
}
|
||||||
|
li {
|
||||||
|
padding-right: 1em;
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -13,6 +13,8 @@
|
|||||||
@import "parts/_menu";
|
@import "parts/_menu";
|
||||||
@import "parts/_love";
|
@import "parts/_love";
|
||||||
@import "parts/preview_text_filter";
|
@import "parts/preview_text_filter";
|
||||||
|
@import "parts/_edit";
|
||||||
|
@import "parts/_conditions_table";
|
||||||
|
|
||||||
body {
|
body {
|
||||||
color: var(--color-text);
|
color: var(--color-text);
|
||||||
@@ -380,7 +382,15 @@ a.pure-button-selected {
|
|||||||
}
|
}
|
||||||
|
|
||||||
.notifications-wrapper {
|
.notifications-wrapper {
|
||||||
padding: 0.5rem 0 1rem 0;
|
padding-top: 0.5rem;
|
||||||
|
#notification-test-log {
|
||||||
|
padding-top: 1rem;
|
||||||
|
white-space: pre-wrap;
|
||||||
|
word-break: break-word;
|
||||||
|
overflow-wrap: break-word;
|
||||||
|
max-width: 100%;
|
||||||
|
box-sizing: border-box;
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
label {
|
label {
|
||||||
|
|||||||