mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-10 19:46:22 +00:00
Compare commits
68 Commits
proxies-js
...
test-impro
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
b3f9d16e8b | ||
|
|
f066a1c38f | ||
|
|
d0d191a7d1 | ||
|
|
d7482c8d6a | ||
|
|
bcf7417f63 | ||
|
|
df6e835035 | ||
|
|
ab28f20eba | ||
|
|
1174b95ab4 | ||
|
|
a564475325 | ||
|
|
85d8d57997 | ||
|
|
359dcb63e3 | ||
|
|
b043d477dc | ||
|
|
06bcfb28e5 | ||
|
|
ca3b351bae | ||
|
|
b7e0f0a5e4 | ||
|
|
61f0ac2937 | ||
|
|
fca66eb558 | ||
|
|
359fc48fb4 | ||
|
|
d0efeb9770 | ||
|
|
3416532cd6 | ||
|
|
defc7a340e | ||
|
|
c197c062e1 | ||
|
|
77b59809ca | ||
|
|
f90b170e68 | ||
|
|
c93ca1841c | ||
|
|
57f604dff1 | ||
|
|
8499468749 | ||
|
|
7f6a13ea6c | ||
|
|
9874f0cbc7 | ||
|
|
72834a42fd | ||
|
|
724cb17224 | ||
|
|
4eb4b401a1 | ||
|
|
5d40e16c73 | ||
|
|
492bbce6b6 | ||
|
|
0394a56be5 | ||
|
|
7839551d6b | ||
|
|
9c5588c791 | ||
|
|
5a43a350de | ||
|
|
3c31f023ce | ||
|
|
4cbcc59461 | ||
|
|
4be0260381 | ||
|
|
957a3c1c16 | ||
|
|
85897e0bf9 | ||
|
|
63095f70ea | ||
|
|
8d5b0b5576 | ||
|
|
1b077abd93 | ||
|
|
32ea1a8721 | ||
|
|
fff32cef0d | ||
|
|
8fb146f3e4 | ||
|
|
770b0faa45 | ||
|
|
f6faa90340 | ||
|
|
669fd3ae0b | ||
|
|
17d37fb626 | ||
|
|
dfa7fc3a81 | ||
|
|
cd467df97a | ||
|
|
71bc2fed82 | ||
|
|
738fcfe01c | ||
|
|
3ebb2ab9ba | ||
|
|
ac98bc9144 | ||
|
|
3705ce6681 | ||
|
|
f7ea99412f | ||
|
|
d4715e2bc8 | ||
|
|
8567a83c47 | ||
|
|
77fdf59ae3 | ||
|
|
0e194aa4b4 | ||
|
|
2ba55bb477 | ||
|
|
4c759490da | ||
|
|
58a52c1f60 |
31
.github/test/Dockerfile-alpine
vendored
Normal file
31
.github/test/Dockerfile-alpine
vendored
Normal file
@@ -0,0 +1,31 @@
|
|||||||
|
# Taken from https://github.com/linuxserver/docker-changedetection.io/blob/main/Dockerfile
|
||||||
|
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
|
||||||
|
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
|
||||||
|
|
||||||
|
FROM ghcr.io/linuxserver/baseimage-alpine:3.16
|
||||||
|
ENV PYTHONUNBUFFERED=1
|
||||||
|
|
||||||
|
COPY requirements.txt /requirements.txt
|
||||||
|
|
||||||
|
RUN \
|
||||||
|
apk add --update --no-cache --virtual=build-dependencies \
|
||||||
|
cargo \
|
||||||
|
g++ \
|
||||||
|
gcc \
|
||||||
|
libc-dev \
|
||||||
|
libffi-dev \
|
||||||
|
libxslt-dev \
|
||||||
|
make \
|
||||||
|
openssl-dev \
|
||||||
|
py3-wheel \
|
||||||
|
python3-dev \
|
||||||
|
zlib-dev && \
|
||||||
|
apk add --update --no-cache \
|
||||||
|
libxslt \
|
||||||
|
python3 \
|
||||||
|
py3-pip && \
|
||||||
|
echo "**** pip3 install test of changedetection.io ****" && \
|
||||||
|
pip3 install -U pip wheel setuptools && \
|
||||||
|
pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.16/ -r /requirements.txt && \
|
||||||
|
apk del --purge \
|
||||||
|
build-dependencies
|
||||||
66
.github/workflows/test-container-build.yml
vendored
Normal file
66
.github/workflows/test-container-build.yml
vendored
Normal file
@@ -0,0 +1,66 @@
|
|||||||
|
name: ChangeDetection.io Container Build Test
|
||||||
|
|
||||||
|
# Triggers the workflow on push or pull request events
|
||||||
|
|
||||||
|
# This line doesnt work, even tho it is the documented one
|
||||||
|
#on: [push, pull_request]
|
||||||
|
|
||||||
|
on:
|
||||||
|
push:
|
||||||
|
paths:
|
||||||
|
- requirements.txt
|
||||||
|
- Dockerfile
|
||||||
|
|
||||||
|
pull_request:
|
||||||
|
paths:
|
||||||
|
- requirements.txt
|
||||||
|
- Dockerfile
|
||||||
|
|
||||||
|
# Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
|
||||||
|
# @todo: some kind of path filter for requirements.txt and Dockerfile
|
||||||
|
jobs:
|
||||||
|
test-container-build:
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v2
|
||||||
|
- name: Set up Python 3.9
|
||||||
|
uses: actions/setup-python@v2
|
||||||
|
with:
|
||||||
|
python-version: 3.9
|
||||||
|
|
||||||
|
# Just test that the build works, some libraries won't compile on ARM/rPi etc
|
||||||
|
- name: Set up QEMU
|
||||||
|
uses: docker/setup-qemu-action@v1
|
||||||
|
with:
|
||||||
|
image: tonistiigi/binfmt:latest
|
||||||
|
platforms: all
|
||||||
|
|
||||||
|
- name: Set up Docker Buildx
|
||||||
|
id: buildx
|
||||||
|
uses: docker/setup-buildx-action@v1
|
||||||
|
with:
|
||||||
|
install: true
|
||||||
|
version: latest
|
||||||
|
driver-opts: image=moby/buildkit:master
|
||||||
|
|
||||||
|
# https://github.com/dgtlmoon/changedetection.io/pull/1067
|
||||||
|
# Check we can still build under alpine/musl
|
||||||
|
- name: Test that the docker containers can build (musl via alpine check)
|
||||||
|
id: docker_build_musl
|
||||||
|
uses: docker/build-push-action@v2
|
||||||
|
with:
|
||||||
|
context: ./
|
||||||
|
file: ./.github/test/Dockerfile-alpine
|
||||||
|
platforms: linux/amd64,linux/arm64
|
||||||
|
|
||||||
|
- name: Test that the docker containers can build
|
||||||
|
id: docker_build
|
||||||
|
uses: docker/build-push-action@v2
|
||||||
|
# https://github.com/docker/build-push-action#customizing
|
||||||
|
with:
|
||||||
|
context: ./
|
||||||
|
file: ./Dockerfile
|
||||||
|
platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
|
||||||
|
cache-from: type=local,src=/tmp/.buildx-cache
|
||||||
|
cache-to: type=local,dest=/tmp/.buildx-cache
|
||||||
|
|
||||||
12
.github/workflows/test-only.yml
vendored
12
.github/workflows/test-only.yml
vendored
@@ -1,28 +1,25 @@
|
|||||||
name: ChangeDetection.io Test
|
name: ChangeDetection.io App Test
|
||||||
|
|
||||||
# Triggers the workflow on push or pull request events
|
# Triggers the workflow on push or pull request events
|
||||||
on: [push, pull_request]
|
on: [push, pull_request]
|
||||||
|
|
||||||
jobs:
|
jobs:
|
||||||
test-build:
|
test-application:
|
||||||
runs-on: ubuntu-latest
|
runs-on: ubuntu-latest
|
||||||
steps:
|
steps:
|
||||||
|
|
||||||
- uses: actions/checkout@v2
|
- uses: actions/checkout@v2
|
||||||
- name: Set up Python 3.9
|
- name: Set up Python 3.9
|
||||||
uses: actions/setup-python@v2
|
uses: actions/setup-python@v2
|
||||||
with:
|
with:
|
||||||
python-version: 3.9
|
python-version: 3.9
|
||||||
|
|
||||||
- name: Show env vars
|
|
||||||
run: set
|
|
||||||
|
|
||||||
- name: Install dependencies
|
- name: Install dependencies
|
||||||
run: |
|
run: |
|
||||||
python -m pip install --upgrade pip
|
python -m pip install --upgrade pip
|
||||||
pip install flake8 pytest
|
pip install flake8 pytest
|
||||||
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
|
||||||
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
|
if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
|
||||||
|
|
||||||
- name: Lint with flake8
|
- name: Lint with flake8
|
||||||
run: |
|
run: |
|
||||||
# stop the build if there are Python syntax errors or undefined names
|
# stop the build if there are Python syntax errors or undefined names
|
||||||
@@ -39,7 +36,4 @@ jobs:
|
|||||||
# Each test is totally isolated and performs its own cleanup/reset
|
# Each test is totally isolated and performs its own cleanup/reset
|
||||||
cd changedetectionio; ./run_all_tests.sh
|
cd changedetectionio; ./run_all_tests.sh
|
||||||
|
|
||||||
# https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
|
|
||||||
# https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
|
|
||||||
|
|
||||||
# https://github.com/docker/buildx/issues/495#issuecomment-918925854
|
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch.
|
|||||||
|
|
||||||
Please be sure that all new functionality has a matching test!
|
Please be sure that all new functionality has a matching test!
|
||||||
|
|
||||||
Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example
|
Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
|
||||||
|
|
||||||
```
|
```
|
||||||
pip3 install -r requirements-dev
|
pip3 install -r requirements-dev
|
||||||
|
|||||||
13
Dockerfile
13
Dockerfile
@@ -5,13 +5,14 @@ FROM python:3.8-slim as builder
|
|||||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
|
||||||
|
|
||||||
RUN apt-get update && apt-get install -y --no-install-recommends \
|
RUN apt-get update && apt-get install -y --no-install-recommends \
|
||||||
libssl-dev \
|
g++ \
|
||||||
libffi-dev \
|
|
||||||
gcc \
|
gcc \
|
||||||
libc-dev \
|
libc-dev \
|
||||||
|
libffi-dev \
|
||||||
|
libssl-dev \
|
||||||
libxslt-dev \
|
libxslt-dev \
|
||||||
zlib1g-dev \
|
make \
|
||||||
g++
|
zlib1g-dev
|
||||||
|
|
||||||
RUN mkdir /install
|
RUN mkdir /install
|
||||||
WORKDIR /install
|
WORKDIR /install
|
||||||
@@ -22,7 +23,8 @@ RUN pip install --target=/dependencies -r /requirements.txt
|
|||||||
|
|
||||||
# Playwright is an alternative to Selenium
|
# Playwright is an alternative to Selenium
|
||||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
|
||||||
RUN pip install --target=/dependencies playwright~=1.24 \
|
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
|
||||||
|
RUN pip install --target=/dependencies playwright~=1.26 \
|
||||||
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
|| echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
|
||||||
|
|
||||||
# Final image stage
|
# Final image stage
|
||||||
@@ -58,6 +60,7 @@ EXPOSE 5000
|
|||||||
|
|
||||||
# The actual flask app
|
# The actual flask app
|
||||||
COPY changedetectionio /app/changedetectionio
|
COPY changedetectionio /app/changedetectionio
|
||||||
|
|
||||||
# The eventlet server wrapper
|
# The eventlet server wrapper
|
||||||
COPY changedetection.py /app/changedetection.py
|
COPY changedetection.py /app/changedetection.py
|
||||||
|
|
||||||
|
|||||||
@@ -2,6 +2,8 @@ recursive-include changedetectionio/api *
|
|||||||
recursive-include changedetectionio/templates *
|
recursive-include changedetectionio/templates *
|
||||||
recursive-include changedetectionio/static *
|
recursive-include changedetectionio/static *
|
||||||
recursive-include changedetectionio/model *
|
recursive-include changedetectionio/model *
|
||||||
|
recursive-include changedetectionio/tests *
|
||||||
|
recursive-include changedetectionio/res *
|
||||||
include changedetection.py
|
include changedetection.py
|
||||||
global-exclude *.pyc
|
global-exclude *.pyc
|
||||||
global-exclude node_modules
|
global-exclude node_modules
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
|||||||
#### Key Features
|
#### Key Features
|
||||||
|
|
||||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
|
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
||||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||||
- Easily specify how often a site should be checked
|
- Easily specify how often a site should be checked
|
||||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
||||||
|
|||||||
44
README.md
44
README.md
@@ -1,6 +1,7 @@
|
|||||||
## Web Site Change Detection, Monitoring and Notification.
|
## Web Site Change Detection, Monitoring and Notification.
|
||||||
|
|
||||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
|
_Live your data-life pro-actively, Detect website changes and perform meaningful actions, trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._
|
||||||
|
|
||||||
|
|
||||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github)
|
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start?src=github)
|
||||||
|
|
||||||
@@ -8,15 +9,16 @@ Live your data-life pro-actively, track website content changes and receive noti
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
|
|
||||||
|
|
||||||
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
||||||
|
|
||||||
|
- Chrome browser included.
|
||||||
|
- Super fast, no registration needed setup.
|
||||||
|
- Start watching and receiving change notifications instantly.
|
||||||
|
|
||||||
|
|
||||||
- Automatic Updates, Automatic Backups, No Heroku "paused application", don't miss a change!
|
Easily see what changed, examine by word, line, or individual character.
|
||||||
- Javascript browser included
|
|
||||||
- Unlimited checks and watches!
|
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />
|
||||||
|
|
||||||
|
|
||||||
#### Example use cases
|
#### Example use cases
|
||||||
@@ -44,22 +46,18 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
|
|||||||
#### Key Features
|
#### Key Features
|
||||||
|
|
||||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
|
||||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
|
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
|
||||||
- Switch between fast non-JS and Chrome JS based "fetchers"
|
- Switch between fast non-JS and Chrome JS based "fetchers"
|
||||||
- Easily specify how often a site should be checked
|
- Easily specify how often a site should be checked
|
||||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
|
||||||
- Override Request Headers, Specify `POST` or `GET` and other methods
|
- Override Request Headers, Specify `POST` or `GET` and other methods
|
||||||
- Use the "Visual Selector" to help target specific elements
|
- Use the "Visual Selector" to help target specific elements
|
||||||
|
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
|
||||||
|
|
||||||
|
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
|
||||||
|
|
||||||
## Screenshots
|
## Screenshots
|
||||||
|
|
||||||
### Examine differences in content.
|
|
||||||
|
|
||||||
Easily see what changed, examine by word, line, or individual character.
|
|
||||||
|
|
||||||
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />
|
|
||||||
|
|
||||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||||
|
|
||||||
### Filter by elements using the Visual Selector tool.
|
### Filter by elements using the Visual Selector tool.
|
||||||
@@ -122,8 +120,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
|
|||||||
|
|
||||||
|
|
||||||
## Filters
|
## Filters
|
||||||
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
|
||||||
|
|
||||||
|
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
|
||||||
(We support LXML `re:test`, `re:math` and `re:replace`.)
|
(We support LXML `re:test`, `re:math` and `re:replace`.)
|
||||||
|
|
||||||
## Notifications
|
## Notifications
|
||||||
@@ -152,7 +150,7 @@ Now you can also customise your notification content!
|
|||||||
|
|
||||||
## JSON API Monitoring
|
## JSON API Monitoring
|
||||||
|
|
||||||
Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter / selector.
|
Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed.
|
||||||
|
|
||||||

|

|
||||||
|
|
||||||
@@ -160,9 +158,17 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
|
|||||||
|
|
||||||

|

|
||||||
|
|
||||||
|
### JSONPath or jq?
|
||||||
|
|
||||||
|
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq.
|
||||||
|
|
||||||
|
One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc.
|
||||||
|
|
||||||
|
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
|
||||||
|
|
||||||
### Parse JSON embedded in HTML!
|
### Parse JSON embedded in HTML!
|
||||||
|
|
||||||
When you enable a `json:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
|
When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.
|
||||||
|
|
||||||
```
|
```
|
||||||
<html>
|
<html>
|
||||||
@@ -172,11 +178,11 @@ When you enable a `json:` filter, you can even automatically extract and parse e
|
|||||||
</script>
|
</script>
|
||||||
```
|
```
|
||||||
|
|
||||||
`json:$.price` would give `23.50`, or you can extract the whole structure
|
`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
|
||||||
|
|
||||||
## Proxy configuration
|
## Proxy Configuration
|
||||||
|
|
||||||
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration
|
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration , we also support using [BrightData proxy services where possible]( https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support)
|
||||||
|
|
||||||
## Raspberry Pi support?
|
## Raspberry Pi support?
|
||||||
|
|
||||||
|
|||||||
@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
|
|||||||
from changedetectionio import html_tools
|
from changedetectionio import html_tools
|
||||||
from changedetectionio.api import api_v1
|
from changedetectionio.api import api_v1
|
||||||
|
|
||||||
__version__ = '0.39.19.1'
|
__version__ = '0.39.21.1'
|
||||||
|
|
||||||
datastore = None
|
datastore = None
|
||||||
|
|
||||||
@@ -194,7 +194,8 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
|
watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
|
||||||
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||||
|
|
||||||
|
watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
|
||||||
|
resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
@@ -547,6 +548,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
# Defaults for proxy choice
|
# Defaults for proxy choice
|
||||||
if datastore.proxy_list is not None: # When enabled
|
if datastore.proxy_list is not None: # When enabled
|
||||||
|
# @todo
|
||||||
# Radio needs '' not None, or incase that the chosen one no longer exists
|
# Radio needs '' not None, or incase that the chosen one no longer exists
|
||||||
if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list):
|
if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list):
|
||||||
default['proxy'] = ''
|
default['proxy'] = ''
|
||||||
@@ -560,7 +562,10 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||||
del form.proxy
|
del form.proxy
|
||||||
else:
|
else:
|
||||||
form.proxy.choices = [('', 'Default')] + datastore.proxy_list
|
form.proxy.choices = [('', 'Default')]
|
||||||
|
for p in datastore.proxy_list:
|
||||||
|
form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||||
|
|
||||||
|
|
||||||
if request.method == 'POST' and form.validate():
|
if request.method == 'POST' and form.validate():
|
||||||
extra_update_obj = {}
|
extra_update_obj = {}
|
||||||
@@ -594,7 +599,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
||||||
|
|
||||||
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
|
# Reset the previous_md5 so we process a new snapshot including stripping ignore text.
|
||||||
if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']:
|
if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
|
||||||
if len(datastore.data['watching'][uuid].history):
|
if len(datastore.data['watching'][uuid].history):
|
||||||
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
|
||||||
|
|
||||||
@@ -632,20 +637,27 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
# Only works reliably with Playwright
|
# Only works reliably with Playwright
|
||||||
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
|
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
|
||||||
|
|
||||||
|
# JQ is difficult to install on windows and must be manually added (outside requirements.txt)
|
||||||
|
jq_support = True
|
||||||
|
try:
|
||||||
|
import jq
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
jq_support = False
|
||||||
|
|
||||||
output = render_template("edit.html",
|
output = render_template("edit.html",
|
||||||
uuid=uuid,
|
|
||||||
watch=datastore.data['watching'][uuid],
|
|
||||||
form=form,
|
|
||||||
has_empty_checktime=using_default_check_time,
|
|
||||||
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
|
||||||
using_global_webdriver_wait=default['webdriver_delay'] is None,
|
|
||||||
current_base_url=datastore.data['settings']['application']['base_url'],
|
current_base_url=datastore.data['settings']['application']['base_url'],
|
||||||
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
|
||||||
|
form=form,
|
||||||
|
has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
|
||||||
|
has_empty_checktime=using_default_check_time,
|
||||||
|
jq_support=jq_support,
|
||||||
|
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
|
||||||
settings_application=datastore.data['settings']['application'],
|
settings_application=datastore.data['settings']['application'],
|
||||||
|
using_global_webdriver_wait=default['webdriver_delay'] is None,
|
||||||
|
uuid=uuid,
|
||||||
visualselector_data_is_ready=visualselector_data_is_ready,
|
visualselector_data_is_ready=visualselector_data_is_ready,
|
||||||
visualselector_enabled=visualselector_enabled,
|
visualselector_enabled=visualselector_enabled,
|
||||||
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
watch=datastore.data['watching'][uuid],
|
||||||
)
|
)
|
||||||
|
|
||||||
return output
|
return output
|
||||||
@@ -657,15 +669,16 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
default = deepcopy(datastore.data['settings'])
|
default = deepcopy(datastore.data['settings'])
|
||||||
if datastore.proxy_list is not None:
|
if datastore.proxy_list is not None:
|
||||||
|
available_proxies = list(datastore.proxy_list.keys())
|
||||||
# When enabled
|
# When enabled
|
||||||
system_proxy = datastore.data['settings']['requests']['proxy']
|
system_proxy = datastore.data['settings']['requests']['proxy']
|
||||||
# In the case it doesnt exist anymore
|
# In the case it doesnt exist anymore
|
||||||
if not any([system_proxy in tup for tup in datastore.proxy_list]):
|
if not system_proxy in available_proxies:
|
||||||
system_proxy = None
|
system_proxy = None
|
||||||
|
|
||||||
default['requests']['proxy'] = system_proxy if system_proxy is not None else datastore.proxy_list[0][0]
|
default['requests']['proxy'] = system_proxy if system_proxy is not None else available_proxies[0]
|
||||||
# Used by the form handler to keep or remove the proxy settings
|
# Used by the form handler to keep or remove the proxy settings
|
||||||
default['proxy_list'] = datastore.proxy_list
|
default['proxy_list'] = available_proxies[0]
|
||||||
|
|
||||||
|
|
||||||
# Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
|
# Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
|
||||||
@@ -680,7 +693,10 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
# @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
|
||||||
del form.requests.form.proxy
|
del form.requests.form.proxy
|
||||||
else:
|
else:
|
||||||
form.requests.form.proxy.choices = datastore.proxy_list
|
form.requests.form.proxy.choices = []
|
||||||
|
for p in datastore.proxy_list:
|
||||||
|
form.requests.form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
|
||||||
|
|
||||||
|
|
||||||
if request.method == 'POST':
|
if request.method == 'POST':
|
||||||
# Password unset is a GET, but we can lock the session to a salted env password to always need the password
|
# Password unset is a GET, but we can lock the session to a salted env password to always need the password
|
||||||
@@ -801,8 +817,10 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
newest_file = history[dates[-1]]
|
newest_file = history[dates[-1]]
|
||||||
|
|
||||||
|
# Read as binary and force decode as UTF-8
|
||||||
|
# Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
|
||||||
try:
|
try:
|
||||||
with open(newest_file, 'r') as f:
|
with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
newest_version_file_contents = f.read()
|
newest_version_file_contents = f.read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
|
newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
|
||||||
@@ -815,7 +833,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
previous_file = history[dates[-2]]
|
previous_file = history[dates[-2]]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
with open(previous_file, 'r') as f:
|
with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
previous_version_file_contents = f.read()
|
previous_version_file_contents = f.read()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
|
previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
|
||||||
@@ -892,7 +910,7 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
timestamp = list(watch.history.keys())[-1]
|
timestamp = list(watch.history.keys())[-1]
|
||||||
filename = watch.history[timestamp]
|
filename = watch.history[timestamp]
|
||||||
try:
|
try:
|
||||||
with open(filename, 'r') as f:
|
with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
|
||||||
tmp = f.readlines()
|
tmp = f.readlines()
|
||||||
|
|
||||||
# Get what needs to be highlighted
|
# Get what needs to be highlighted
|
||||||
@@ -967,9 +985,6 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
# create a ZipFile object
|
# create a ZipFile object
|
||||||
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
|
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
|
||||||
|
|
||||||
# We only care about UUIDS from the current index file
|
|
||||||
uuids = list(datastore.data['watching'].keys())
|
|
||||||
backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
|
backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
|
||||||
|
|
||||||
with zipfile.ZipFile(backup_filepath, "w",
|
with zipfile.ZipFile(backup_filepath, "w",
|
||||||
@@ -985,12 +1000,12 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
# Add the flask app secret
|
# Add the flask app secret
|
||||||
zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
|
zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
|
||||||
|
|
||||||
# Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip.
|
# Add any data in the watch data directory.
|
||||||
for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'):
|
for uuid, w in datastore.data['watching'].items():
|
||||||
parent_p = txt_file_path.parent
|
for f in Path(w.watch_data_dir).glob('*'):
|
||||||
if parent_p.name in uuids:
|
zipObj.write(f,
|
||||||
zipObj.write(txt_file_path,
|
# Use the full path to access the file, but make the file 'relative' in the Zip.
|
||||||
arcname=str(txt_file_path).replace(datastore_o.datastore_path, ''),
|
arcname=os.path.join(f.parts[-2], f.parts[-1]),
|
||||||
compress_type=zipfile.ZIP_DEFLATED,
|
compress_type=zipfile.ZIP_DEFLATED,
|
||||||
compresslevel=8)
|
compresslevel=8)
|
||||||
|
|
||||||
@@ -1292,8 +1307,8 @@ def changedetection_app(config=None, datastore_o=None):
|
|||||||
|
|
||||||
threading.Thread(target=notification_runner).start()
|
threading.Thread(target=notification_runner).start()
|
||||||
|
|
||||||
# Check for new release version, but not when running in test/build
|
# Check for new release version, but not when running in test/build or pytest
|
||||||
if not os.getenv("GITHUB_REF", False):
|
if not os.getenv("GITHUB_REF", False) and not config.get('disable_checkver') == True:
|
||||||
threading.Thread(target=check_for_new_version).start()
|
threading.Thread(target=check_for_new_version).start()
|
||||||
|
|
||||||
return app
|
return app
|
||||||
@@ -1353,7 +1368,7 @@ def notification_runner():
|
|||||||
# UUID wont be present when we submit a 'test' from the global settings
|
# UUID wont be present when we submit a 'test' from the global settings
|
||||||
if 'uuid' in n_object:
|
if 'uuid' in n_object:
|
||||||
datastore.update_watch(uuid=n_object['uuid'],
|
datastore.update_watch(uuid=n_object['uuid'],
|
||||||
update_obj={'last_notification_error': "Notification error detected, please see logs."})
|
update_obj={'last_notification_error': "Notification error detected, goto notification log."})
|
||||||
|
|
||||||
log_lines = str(e).splitlines()
|
log_lines = str(e).splitlines()
|
||||||
notification_debug_log += log_lines
|
notification_debug_log += log_lines
|
||||||
@@ -1368,6 +1383,8 @@ def ticker_thread_check_time_launch_checks():
|
|||||||
import random
|
import random
|
||||||
from changedetectionio import update_worker
|
from changedetectionio import update_worker
|
||||||
|
|
||||||
|
proxy_last_called_time = {}
|
||||||
|
|
||||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
||||||
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
||||||
|
|
||||||
@@ -1428,10 +1445,30 @@ def ticker_thread_check_time_launch_checks():
|
|||||||
if watch.jitter_seconds == 0:
|
if watch.jitter_seconds == 0:
|
||||||
watch.jitter_seconds = random.uniform(-abs(jitter), jitter)
|
watch.jitter_seconds = random.uniform(-abs(jitter), jitter)
|
||||||
|
|
||||||
|
|
||||||
seconds_since_last_recheck = now - watch['last_checked']
|
seconds_since_last_recheck = now - watch['last_checked']
|
||||||
|
|
||||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||||
|
|
||||||
|
# Proxies can be set to have a limit on seconds between which they can be called
|
||||||
|
watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||||
|
if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()):
|
||||||
|
# Proxy may also have some threshold minimum
|
||||||
|
proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0))
|
||||||
|
if proxy_list_reuse_time_minimum:
|
||||||
|
proxy_last_used_time = proxy_last_called_time.get(watch_proxy, 0)
|
||||||
|
time_since_proxy_used = int(time.time() - proxy_last_used_time)
|
||||||
|
if time_since_proxy_used < proxy_list_reuse_time_minimum:
|
||||||
|
# Not enough time difference reached, skip this watch
|
||||||
|
print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
|
||||||
|
watch_proxy,
|
||||||
|
time_since_proxy_used,
|
||||||
|
proxy_list_reuse_time_minimum))
|
||||||
|
continue
|
||||||
|
else:
|
||||||
|
# Record the last used time
|
||||||
|
proxy_last_called_time[watch_proxy] = int(time.time())
|
||||||
|
|
||||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||||
priority = int(time.time())
|
priority = int(time.time())
|
||||||
print(
|
print(
|
||||||
|
|||||||
@@ -122,3 +122,37 @@ class CreateWatch(Resource):
|
|||||||
return {'status': "OK"}, 200
|
return {'status': "OK"}, 200
|
||||||
|
|
||||||
return list, 200
|
return list, 200
|
||||||
|
|
||||||
|
class SystemInfo(Resource):
|
||||||
|
def __init__(self, **kwargs):
|
||||||
|
# datastore is a black box dependency
|
||||||
|
self.datastore = kwargs['datastore']
|
||||||
|
self.update_q = kwargs['update_q']
|
||||||
|
|
||||||
|
@auth.check_token
|
||||||
|
def get(self):
|
||||||
|
import time
|
||||||
|
overdue_watches = []
|
||||||
|
|
||||||
|
# Check all watches and report which have not been checked but should have been
|
||||||
|
|
||||||
|
for uuid, watch in self.datastore.data.get('watching', {}).items():
|
||||||
|
# see if now - last_checked is greater than the time that should have been
|
||||||
|
# this is not super accurate (maybe they just edited it) but better than nothing
|
||||||
|
t = watch.threshold_seconds()
|
||||||
|
if not t:
|
||||||
|
# Use the system wide default
|
||||||
|
t = self.datastore.threshold_seconds
|
||||||
|
|
||||||
|
time_since_check = time.time() - watch.get('last_checked')
|
||||||
|
|
||||||
|
# Allow 5 minutes of grace time before we decide it's overdue
|
||||||
|
if time_since_check - (5 * 60) > t:
|
||||||
|
overdue_watches.append(uuid)
|
||||||
|
|
||||||
|
return {
|
||||||
|
'queue_size': self.update_q.qsize(),
|
||||||
|
'overdue_watches': overdue_watches,
|
||||||
|
'uptime': round(time.time() - self.datastore.start_time, 2),
|
||||||
|
'watch_count': len(self.datastore.data.get('watching', {}))
|
||||||
|
}, 200
|
||||||
|
|||||||
@@ -2,19 +2,20 @@
|
|||||||
|
|
||||||
# Launch as a eventlet.wsgi server instance.
|
# Launch as a eventlet.wsgi server instance.
|
||||||
|
|
||||||
|
from distutils.util import strtobool
|
||||||
|
import eventlet
|
||||||
|
import eventlet.wsgi
|
||||||
import getopt
|
import getopt
|
||||||
import os
|
import os
|
||||||
import signal
|
import signal
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
import eventlet
|
|
||||||
import eventlet.wsgi
|
|
||||||
from . import store, changedetection_app, content_fetcher
|
from . import store, changedetection_app, content_fetcher
|
||||||
from . import __version__
|
from . import __version__
|
||||||
|
|
||||||
# Only global so we can access it in the signal handler
|
# Only global so we can access it in the signal handler
|
||||||
datastore = None
|
|
||||||
app = None
|
app = None
|
||||||
|
datastore = None
|
||||||
|
|
||||||
def sigterm_handler(_signo, _stack_frame):
|
def sigterm_handler(_signo, _stack_frame):
|
||||||
global app
|
global app
|
||||||
@@ -102,6 +103,15 @@ def main():
|
|||||||
has_password=datastore.data['settings']['application']['password'] != False
|
has_password=datastore.data['settings']['application']['password'] != False
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
|
||||||
|
# @Note: Incompatible with password login (and maybe other features) for now, submit a PR!
|
||||||
|
@app.after_request
|
||||||
|
def hide_referrer(response):
|
||||||
|
if strtobool(os.getenv("HIDE_REFERER", 'false')):
|
||||||
|
response.headers["Referrer-Policy"] = "no-referrer"
|
||||||
|
|
||||||
|
return response
|
||||||
|
|
||||||
# Proxy sub-directory support
|
# Proxy sub-directory support
|
||||||
# Set environment var USE_X_SETTINGS=1 on this script
|
# Set environment var USE_X_SETTINGS=1 on this script
|
||||||
# And then in your proxy_pass settings
|
# And then in your proxy_pass settings
|
||||||
|
|||||||
@@ -1,11 +1,11 @@
|
|||||||
from abc import ABC, abstractmethod
|
from abc import abstractmethod
|
||||||
|
from pkg_resources import resource_string
|
||||||
import chardet
|
import chardet
|
||||||
import json
|
import json
|
||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import time
|
|
||||||
import sys
|
import sys
|
||||||
|
import time
|
||||||
|
|
||||||
class Non200ErrorCodeReceived(Exception):
|
class Non200ErrorCodeReceived(Exception):
|
||||||
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
|
def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None):
|
||||||
@@ -73,131 +73,8 @@ class Fetcher():
|
|||||||
|
|
||||||
fetcher_description = "No description"
|
fetcher_description = "No description"
|
||||||
webdriver_js_execute_code = None
|
webdriver_js_execute_code = None
|
||||||
xpath_element_js = """
|
xpath_element_js = ""
|
||||||
// Include the getXpath script directly, easier than fetching
|
|
||||||
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
|
|
||||||
|
|
||||||
|
|
||||||
const findUpTag = (el) => {
|
|
||||||
let r = el
|
|
||||||
chained_css = [];
|
|
||||||
depth=0;
|
|
||||||
|
|
||||||
// Strategy 1: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
|
|
||||||
while (r.parentNode) {
|
|
||||||
if(depth==5) {
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
if('' !==r.id) {
|
|
||||||
chained_css.unshift("#"+CSS.escape(r.id));
|
|
||||||
final_selector= chained_css.join(' > ');
|
|
||||||
// Be sure theres only one, some sites have multiples of the same ID tag :-(
|
|
||||||
if (window.document.querySelectorAll(final_selector).length ==1 ) {
|
|
||||||
return final_selector;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
} else {
|
|
||||||
chained_css.unshift(r.tagName.toLowerCase());
|
|
||||||
}
|
|
||||||
r=r.parentNode;
|
|
||||||
depth+=1;
|
|
||||||
}
|
|
||||||
return null;
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// @todo - if it's SVG or IMG, go into image diff mode
|
|
||||||
var elements = window.document.querySelectorAll("div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary");
|
|
||||||
var size_pos=[];
|
|
||||||
// after page fetch, inject this JS
|
|
||||||
// build a map of all elements and their positions (maybe that only include text?)
|
|
||||||
var bbox;
|
|
||||||
for (var i = 0; i < elements.length; i++) {
|
|
||||||
bbox = elements[i].getBoundingClientRect();
|
|
||||||
|
|
||||||
// forget really small ones
|
|
||||||
if (bbox['width'] <20 && bbox['height'] < 20 ) {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
|
|
||||||
// it should not traverse when we know we can anchor off just an ID one level up etc..
|
|
||||||
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
|
|
||||||
|
|
||||||
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
|
|
||||||
xpath_result=false;
|
|
||||||
|
|
||||||
try {
|
|
||||||
var d= findUpTag(elements[i]);
|
|
||||||
if (d) {
|
|
||||||
xpath_result =d;
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
|
|
||||||
// You could swap it and default to getXpath and then try the smarter one
|
|
||||||
// default back to the less intelligent one
|
|
||||||
if (!xpath_result) {
|
|
||||||
try {
|
|
||||||
// I've seen on FB and eBay that this doesnt work
|
|
||||||
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
|
|
||||||
xpath_result = getXPath(elements[i]);
|
|
||||||
} catch (e) {
|
|
||||||
console.log(e);
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if(window.getComputedStyle(elements[i]).visibility === "hidden") {
|
|
||||||
continue;
|
|
||||||
}
|
|
||||||
|
|
||||||
size_pos.push({
|
|
||||||
xpath: xpath_result,
|
|
||||||
width: Math.round(bbox['width']),
|
|
||||||
height: Math.round(bbox['height']),
|
|
||||||
left: Math.floor(bbox['left']),
|
|
||||||
top: Math.floor(bbox['top']),
|
|
||||||
childCount: elements[i].childElementCount
|
|
||||||
});
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
// inject the current one set in the css_filter, which may be a CSS rule
|
|
||||||
// used for displaying the current one in VisualSelector, where its not one we generated.
|
|
||||||
if (css_filter.length) {
|
|
||||||
q=false;
|
|
||||||
try {
|
|
||||||
// is it xpath?
|
|
||||||
if (css_filter.startsWith('/') || css_filter.startsWith('xpath:')) {
|
|
||||||
q=document.evaluate(css_filter.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
|
||||||
} else {
|
|
||||||
q=document.querySelector(css_filter);
|
|
||||||
}
|
|
||||||
} catch (e) {
|
|
||||||
// Maybe catch DOMException and alert?
|
|
||||||
console.log(e);
|
|
||||||
}
|
|
||||||
bbox=false;
|
|
||||||
if(q) {
|
|
||||||
bbox = q.getBoundingClientRect();
|
|
||||||
}
|
|
||||||
|
|
||||||
if (bbox && bbox['width'] >0 && bbox['height']>0) {
|
|
||||||
size_pos.push({
|
|
||||||
xpath: css_filter,
|
|
||||||
width: bbox['width'],
|
|
||||||
height: bbox['height'],
|
|
||||||
left: bbox['left'],
|
|
||||||
top: bbox['top'],
|
|
||||||
childCount: q.childElementCount
|
|
||||||
});
|
|
||||||
}
|
|
||||||
}
|
|
||||||
// Window.width required for proper scaling in the frontend
|
|
||||||
return {'size_pos':size_pos, 'browser_width': window.innerWidth};
|
|
||||||
"""
|
|
||||||
xpath_data = None
|
xpath_data = None
|
||||||
|
|
||||||
# Will be needed in the future by the VisualSelector, always get this where possible.
|
# Will be needed in the future by the VisualSelector, always get this where possible.
|
||||||
@@ -208,6 +85,10 @@ class Fetcher():
|
|||||||
# Time ONTOP of the system defined env minimum time
|
# Time ONTOP of the system defined env minimum time
|
||||||
render_extract_delay = 0
|
render_extract_delay = 0
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
# The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector
|
||||||
|
self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8')
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def get_error(self):
|
def get_error(self):
|
||||||
return self.error
|
return self.error
|
||||||
@@ -220,7 +101,7 @@ class Fetcher():
|
|||||||
request_body,
|
request_body,
|
||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_css_filter=None):
|
current_include_filters=None):
|
||||||
# Should set self.error, self.status_code and self.content
|
# Should set self.error, self.status_code and self.content
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@@ -273,7 +154,7 @@ class base_html_playwright(Fetcher):
|
|||||||
proxy = None
|
proxy = None
|
||||||
|
|
||||||
def __init__(self, proxy_override=None):
|
def __init__(self, proxy_override=None):
|
||||||
|
super().__init__()
|
||||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||||
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
|
||||||
self.command_executor = os.getenv(
|
self.command_executor = os.getenv(
|
||||||
@@ -310,12 +191,13 @@ class base_html_playwright(Fetcher):
|
|||||||
request_body,
|
request_body,
|
||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_css_filter=None):
|
current_include_filters=None):
|
||||||
|
|
||||||
from playwright.sync_api import sync_playwright
|
from playwright.sync_api import sync_playwright
|
||||||
import playwright._impl._api_types
|
import playwright._impl._api_types
|
||||||
from playwright._impl._api_types import Error, TimeoutError
|
from playwright._impl._api_types import Error, TimeoutError
|
||||||
response = None
|
response = None
|
||||||
|
|
||||||
with sync_playwright() as p:
|
with sync_playwright() as p:
|
||||||
browser_type = getattr(p, self.browser_type)
|
browser_type = getattr(p, self.browser_type)
|
||||||
|
|
||||||
@@ -373,8 +255,11 @@ class base_html_playwright(Fetcher):
|
|||||||
print("response object was none")
|
print("response object was none")
|
||||||
raise EmptyReply(url=url, status_code=None)
|
raise EmptyReply(url=url, status_code=None)
|
||||||
|
|
||||||
# Bug 2(?) Set the viewport size AFTER loading the page
|
|
||||||
page.set_viewport_size({"width": 1280, "height": 1024})
|
# Removed browser-set-size, seemed to be needed to make screenshots work reliably in older playwright versions
|
||||||
|
# Was causing exceptions like 'waiting for page but content is changing' etc
|
||||||
|
# https://www.browserstack.com/docs/automate/playwright/change-browser-window-size 1280x720 should be the default
|
||||||
|
|
||||||
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
|
||||||
time.sleep(extra_wait)
|
time.sleep(extra_wait)
|
||||||
|
|
||||||
@@ -398,14 +283,21 @@ class base_html_playwright(Fetcher):
|
|||||||
|
|
||||||
raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
|
raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
|
||||||
|
|
||||||
|
else:
|
||||||
|
# JS eval was run, now we also wait some time if possible to let the page settle
|
||||||
|
if self.render_extract_delay:
|
||||||
|
page.wait_for_timeout(self.render_extract_delay * 1000)
|
||||||
|
|
||||||
|
page.wait_for_timeout(500)
|
||||||
|
|
||||||
self.content = page.content()
|
self.content = page.content()
|
||||||
self.status_code = response.status
|
self.status_code = response.status
|
||||||
self.headers = response.all_headers()
|
self.headers = response.all_headers()
|
||||||
|
|
||||||
if current_css_filter is not None:
|
if current_include_filters is not None:
|
||||||
page.evaluate("var css_filter={}".format(json.dumps(current_css_filter)))
|
page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
|
||||||
else:
|
else:
|
||||||
page.evaluate("var css_filter=''")
|
page.evaluate("var include_filters=''")
|
||||||
|
|
||||||
self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}")
|
self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}")
|
||||||
|
|
||||||
@@ -454,6 +346,7 @@ class base_html_webdriver(Fetcher):
|
|||||||
proxy = None
|
proxy = None
|
||||||
|
|
||||||
def __init__(self, proxy_override=None):
|
def __init__(self, proxy_override=None):
|
||||||
|
super().__init__()
|
||||||
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
|
||||||
|
|
||||||
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
# .strip('"') is going to save someone a lot of time when they accidently wrap the env value
|
||||||
@@ -486,7 +379,7 @@ class base_html_webdriver(Fetcher):
|
|||||||
request_body,
|
request_body,
|
||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_css_filter=None):
|
current_include_filters=None):
|
||||||
|
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
|
||||||
@@ -514,8 +407,6 @@ class base_html_webdriver(Fetcher):
|
|||||||
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
|
||||||
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
|
||||||
|
|
||||||
self.screenshot = self.driver.get_screenshot_as_png()
|
|
||||||
|
|
||||||
# @todo - how to check this? is it possible?
|
# @todo - how to check this? is it possible?
|
||||||
self.status_code = 200
|
self.status_code = 200
|
||||||
# @todo somehow we should try to get this working for WebDriver
|
# @todo somehow we should try to get this working for WebDriver
|
||||||
@@ -526,6 +417,8 @@ class base_html_webdriver(Fetcher):
|
|||||||
self.content = self.driver.page_source
|
self.content = self.driver.page_source
|
||||||
self.headers = {}
|
self.headers = {}
|
||||||
|
|
||||||
|
self.screenshot = self.driver.get_screenshot_as_png()
|
||||||
|
|
||||||
# Does the connection to the webdriver work? run a test connection.
|
# Does the connection to the webdriver work? run a test connection.
|
||||||
def is_ready(self):
|
def is_ready(self):
|
||||||
from selenium import webdriver
|
from selenium import webdriver
|
||||||
@@ -562,7 +455,12 @@ class html_requests(Fetcher):
|
|||||||
request_body,
|
request_body,
|
||||||
request_method,
|
request_method,
|
||||||
ignore_status_codes=False,
|
ignore_status_codes=False,
|
||||||
current_css_filter=None):
|
current_include_filters=None):
|
||||||
|
|
||||||
|
# Make requests use a more modern looking user-agent
|
||||||
|
if not 'User-Agent' in request_headers:
|
||||||
|
request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
|
||||||
|
'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
|
||||||
|
|
||||||
proxies = {}
|
proxies = {}
|
||||||
|
|
||||||
|
|||||||
@@ -10,6 +10,11 @@ from changedetectionio import content_fetcher, html_tools
|
|||||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||||
|
|
||||||
|
|
||||||
|
class FilterNotFoundInResponse(ValueError):
|
||||||
|
def __init__(self, msg):
|
||||||
|
ValueError.__init__(self, msg)
|
||||||
|
|
||||||
|
|
||||||
# Some common stuff here that can be moved to a base class
|
# Some common stuff here that can be moved to a base class
|
||||||
# (set_proxy_from_list)
|
# (set_proxy_from_list)
|
||||||
class perform_site_check():
|
class perform_site_check():
|
||||||
@@ -20,34 +25,6 @@ class perform_site_check():
|
|||||||
super().__init__(*args, **kwargs)
|
super().__init__(*args, **kwargs)
|
||||||
self.datastore = datastore
|
self.datastore = datastore
|
||||||
|
|
||||||
# If there was a proxy list enabled, figure out what proxy_args/which proxy to use
|
|
||||||
# if watch.proxy use that
|
|
||||||
# fetcher.proxy_override = watch.proxy or main config proxy
|
|
||||||
# Allows override the proxy on a per-request basis
|
|
||||||
# ALWAYS use the first one is nothing selected
|
|
||||||
|
|
||||||
def set_proxy_from_list(self, watch):
|
|
||||||
proxy_args = None
|
|
||||||
if self.datastore.proxy_list is None:
|
|
||||||
return None
|
|
||||||
|
|
||||||
# If its a valid one
|
|
||||||
if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
|
|
||||||
proxy_args = watch['proxy']
|
|
||||||
|
|
||||||
# not valid (including None), try the system one
|
|
||||||
else:
|
|
||||||
system_proxy = self.datastore.data['settings']['requests']['proxy']
|
|
||||||
# Is not None and exists
|
|
||||||
if any([system_proxy in p for p in self.datastore.proxy_list]):
|
|
||||||
proxy_args = system_proxy
|
|
||||||
|
|
||||||
# Fallback - Did not resolve anything, use the first available
|
|
||||||
if proxy_args is None:
|
|
||||||
proxy_args = self.datastore.proxy_list[0][0]
|
|
||||||
|
|
||||||
return proxy_args
|
|
||||||
|
|
||||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
# Doesn't look like python supports forward slash auto enclosure in re.findall
|
||||||
# So convert it to inline flag "foobar(?i)" type configuration
|
# So convert it to inline flag "foobar(?i)" type configuration
|
||||||
def forward_slash_enclosed_regex_to_options(self, regex):
|
def forward_slash_enclosed_regex_to_options(self, regex):
|
||||||
@@ -61,16 +38,20 @@ class perform_site_check():
|
|||||||
|
|
||||||
return regex
|
return regex
|
||||||
|
|
||||||
|
|
||||||
def run(self, uuid):
|
def run(self, uuid):
|
||||||
|
from copy import deepcopy
|
||||||
changed_detected = False
|
changed_detected = False
|
||||||
screenshot = False # as bytes
|
screenshot = False # as bytes
|
||||||
stripped_text_from_html = ""
|
stripped_text_from_html = ""
|
||||||
|
|
||||||
watch = self.datastore.data['watching'].get(uuid)
|
# DeepCopy so we can be sure we don't accidently change anything by reference
|
||||||
|
watch = deepcopy(self.datastore.data['watching'].get(uuid))
|
||||||
|
|
||||||
|
if not watch:
|
||||||
|
return
|
||||||
|
|
||||||
# Protect against file:// access
|
# Protect against file:// access
|
||||||
if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
|
||||||
raise Exception(
|
raise Exception(
|
||||||
"file:// type access is denied for security reasons."
|
"file:// type access is denied for security reasons."
|
||||||
)
|
)
|
||||||
@@ -78,10 +59,10 @@ class perform_site_check():
|
|||||||
# Unset any existing notification error
|
# Unset any existing notification error
|
||||||
update_obj = {'last_notification_error': False, 'last_error': False}
|
update_obj = {'last_notification_error': False, 'last_error': False}
|
||||||
|
|
||||||
extra_headers =self.datastore.data['watching'][uuid].get('headers')
|
extra_headers = watch.get('headers', [])
|
||||||
|
|
||||||
# Tweak the base config with the per-watch ones
|
# Tweak the base config with the per-watch ones
|
||||||
request_headers = self.datastore.data['settings']['headers'].copy()
|
request_headers = deepcopy(self.datastore.data['settings']['headers'])
|
||||||
request_headers.update(extra_headers)
|
request_headers.update(extra_headers)
|
||||||
|
|
||||||
# https://github.com/psf/requests/issues/4525
|
# https://github.com/psf/requests/issues/4525
|
||||||
@@ -90,8 +71,10 @@ class perform_site_check():
|
|||||||
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
|
||||||
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
|
||||||
|
|
||||||
timeout = self.datastore.data['settings']['requests']['timeout']
|
timeout = self.datastore.data['settings']['requests'].get('timeout')
|
||||||
url = watch.get('url')
|
|
||||||
|
url = watch.link
|
||||||
|
|
||||||
request_body = self.datastore.data['watching'][uuid].get('body')
|
request_body = self.datastore.data['watching'][uuid].get('body')
|
||||||
request_method = self.datastore.data['watching'][uuid].get('method')
|
request_method = self.datastore.data['watching'][uuid].get('method')
|
||||||
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
|
ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
|
||||||
@@ -103,28 +86,32 @@ class perform_site_check():
|
|||||||
is_source = True
|
is_source = True
|
||||||
|
|
||||||
# Pluggable content fetcher
|
# Pluggable content fetcher
|
||||||
prefer_backend = watch['fetch_backend']
|
prefer_backend = watch.get('fetch_backend')
|
||||||
if hasattr(content_fetcher, prefer_backend):
|
if hasattr(content_fetcher, prefer_backend):
|
||||||
klass = getattr(content_fetcher, prefer_backend)
|
klass = getattr(content_fetcher, prefer_backend)
|
||||||
else:
|
else:
|
||||||
# If the klass doesnt exist, just use a default
|
# If the klass doesnt exist, just use a default
|
||||||
klass = getattr(content_fetcher, "html_requests")
|
klass = getattr(content_fetcher, "html_requests")
|
||||||
|
|
||||||
|
proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
|
||||||
|
proxy_url = None
|
||||||
|
if proxy_id:
|
||||||
|
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
||||||
|
print("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||||
|
|
||||||
proxy_args = self.set_proxy_from_list(watch)
|
fetcher = klass(proxy_override=proxy_url)
|
||||||
fetcher = klass(proxy_override=proxy_args)
|
|
||||||
|
|
||||||
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
# Configurable per-watch or global extra delay before extracting text (for webDriver types)
|
||||||
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
|
||||||
if watch['webdriver_delay'] is not None:
|
if watch['webdriver_delay'] is not None:
|
||||||
fetcher.render_extract_delay = watch['webdriver_delay']
|
fetcher.render_extract_delay = watch.get('webdriver_delay')
|
||||||
elif system_webdriver_delay is not None:
|
elif system_webdriver_delay is not None:
|
||||||
fetcher.render_extract_delay = system_webdriver_delay
|
fetcher.render_extract_delay = system_webdriver_delay
|
||||||
|
|
||||||
if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
|
if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
|
||||||
fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
|
fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')
|
||||||
|
|
||||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
|
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'))
|
||||||
fetcher.quit()
|
fetcher.quit()
|
||||||
|
|
||||||
self.screenshot = fetcher.screenshot
|
self.screenshot = fetcher.screenshot
|
||||||
@@ -148,27 +135,30 @@ class perform_site_check():
|
|||||||
is_html = False
|
is_html = False
|
||||||
is_json = False
|
is_json = False
|
||||||
|
|
||||||
css_filter_rule = watch['css_filter']
|
include_filters_rule = watch.get('include_filters', [])
|
||||||
|
# include_filters_rule = watch['include_filters']
|
||||||
subtractive_selectors = watch.get(
|
subtractive_selectors = watch.get(
|
||||||
"subtractive_selectors", []
|
"subtractive_selectors", []
|
||||||
) + self.datastore.data["settings"]["application"].get(
|
) + self.datastore.data["settings"]["application"].get(
|
||||||
"global_subtractive_selectors", []
|
"global_subtractive_selectors", []
|
||||||
)
|
)
|
||||||
|
|
||||||
has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
|
has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip())
|
||||||
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
|
has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
|
||||||
|
|
||||||
if is_json and not has_filter_rule:
|
if is_json and not has_filter_rule:
|
||||||
css_filter_rule = "json:$"
|
include_filters_rule.append("json:$")
|
||||||
has_filter_rule = True
|
has_filter_rule = True
|
||||||
|
|
||||||
if has_filter_rule:
|
if has_filter_rule:
|
||||||
if 'json:' in css_filter_rule:
|
json_filter_prefixes = ['json:', 'jq:']
|
||||||
stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
|
for filter in include_filters_rule:
|
||||||
is_html = False
|
if any(prefix in filter for prefix in json_filter_prefixes):
|
||||||
|
stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
|
||||||
|
is_html = False
|
||||||
|
|
||||||
if is_html or is_source:
|
if is_html or is_source:
|
||||||
|
|
||||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content)
|
fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content)
|
||||||
html_content = fetcher.content
|
html_content = fetcher.content
|
||||||
@@ -180,33 +170,36 @@ class perform_site_check():
|
|||||||
else:
|
else:
|
||||||
# Then we assume HTML
|
# Then we assume HTML
|
||||||
if has_filter_rule:
|
if has_filter_rule:
|
||||||
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
html_content = ""
|
||||||
if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'):
|
for filter_rule in include_filters_rule:
|
||||||
html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''),
|
# For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
|
||||||
html_content=fetcher.content)
|
if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
|
||||||
else:
|
html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
|
||||||
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
html_content=fetcher.content,
|
||||||
html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
|
append_pretty_line_formatting=not is_source)
|
||||||
|
else:
|
||||||
|
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
|
||||||
|
html_content += html_tools.include_filters(include_filters=filter_rule,
|
||||||
|
html_content=fetcher.content,
|
||||||
|
append_pretty_line_formatting=not is_source)
|
||||||
|
|
||||||
|
if not html_content.strip():
|
||||||
|
raise FilterNotFoundInResponse(include_filters_rule)
|
||||||
|
|
||||||
if has_subtractive_selectors:
|
if has_subtractive_selectors:
|
||||||
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
html_content = html_tools.element_removal(subtractive_selectors, html_content)
|
||||||
|
|
||||||
if not is_source:
|
if is_source:
|
||||||
|
stripped_text_from_html = html_content
|
||||||
|
else:
|
||||||
# extract text
|
# extract text
|
||||||
|
do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
|
||||||
stripped_text_from_html = \
|
stripped_text_from_html = \
|
||||||
html_tools.html_to_text(
|
html_tools.html_to_text(
|
||||||
html_content,
|
html_content,
|
||||||
render_anchor_tag_content=self.datastore.data["settings"][
|
render_anchor_tag_content=do_anchor
|
||||||
"application"].get(
|
|
||||||
"render_anchor_tag_content", False)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
elif is_source:
|
|
||||||
stripped_text_from_html = html_content
|
|
||||||
|
|
||||||
# Re #340 - return the content before the 'ignore text' was applied
|
|
||||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
|
||||||
|
|
||||||
# Re #340 - return the content before the 'ignore text' was applied
|
# Re #340 - return the content before the 'ignore text' was applied
|
||||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||||
|
|
||||||
@@ -239,7 +232,7 @@ class perform_site_check():
|
|||||||
|
|
||||||
for l in result:
|
for l in result:
|
||||||
if type(l) is tuple:
|
if type(l) is tuple:
|
||||||
#@todo - some formatter option default (between groups)
|
# @todo - some formatter option default (between groups)
|
||||||
regex_matched_output += list(l) + [b'\n']
|
regex_matched_output += list(l) + [b'\n']
|
||||||
else:
|
else:
|
||||||
# @todo - some formatter option default (between each ungrouped result)
|
# @todo - some formatter option default (between each ungrouped result)
|
||||||
@@ -253,7 +246,6 @@ class perform_site_check():
|
|||||||
stripped_text_from_html = b''.join(regex_matched_output)
|
stripped_text_from_html = b''.join(regex_matched_output)
|
||||||
text_content_before_ignored_filter = stripped_text_from_html
|
text_content_before_ignored_filter = stripped_text_from_html
|
||||||
|
|
||||||
|
|
||||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||||
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||||
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
||||||
@@ -263,29 +255,30 @@ class perform_site_check():
|
|||||||
############ Blocking rules, after checksum #################
|
############ Blocking rules, after checksum #################
|
||||||
blocked = False
|
blocked = False
|
||||||
|
|
||||||
if len(watch['trigger_text']):
|
trigger_text = watch.get('trigger_text', [])
|
||||||
|
if len(trigger_text):
|
||||||
# Assume blocked
|
# Assume blocked
|
||||||
blocked = True
|
blocked = True
|
||||||
# Filter and trigger works the same, so reuse it
|
# Filter and trigger works the same, so reuse it
|
||||||
# It should return the line numbers that match
|
# It should return the line numbers that match
|
||||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||||
wordlist=watch['trigger_text'],
|
wordlist=trigger_text,
|
||||||
mode="line numbers")
|
mode="line numbers")
|
||||||
# Unblock if the trigger was found
|
# Unblock if the trigger was found
|
||||||
if result:
|
if result:
|
||||||
blocked = False
|
blocked = False
|
||||||
|
|
||||||
|
text_should_not_be_present = watch.get('text_should_not_be_present', [])
|
||||||
if len(watch['text_should_not_be_present']):
|
if len(text_should_not_be_present):
|
||||||
# If anything matched, then we should block a change from happening
|
# If anything matched, then we should block a change from happening
|
||||||
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
|
||||||
wordlist=watch['text_should_not_be_present'],
|
wordlist=text_should_not_be_present,
|
||||||
mode="line numbers")
|
mode="line numbers")
|
||||||
if result:
|
if result:
|
||||||
blocked = True
|
blocked = True
|
||||||
|
|
||||||
# The main thing that all this at the moment comes down to :)
|
# The main thing that all this at the moment comes down to :)
|
||||||
if watch['previous_md5'] != fetched_md5:
|
if watch.get('previous_md5') != fetched_md5:
|
||||||
changed_detected = True
|
changed_detected = True
|
||||||
|
|
||||||
# Looks like something changed, but did it match all the rules?
|
# Looks like something changed, but did it match all the rules?
|
||||||
@@ -294,7 +287,7 @@ class perform_site_check():
|
|||||||
|
|
||||||
# Extract title as title
|
# Extract title as title
|
||||||
if is_html:
|
if is_html:
|
||||||
if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
|
if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
|
||||||
if not watch['title'] or not len(watch['title']):
|
if not watch['title'] or not len(watch['title']):
|
||||||
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
|
||||||
|
|
||||||
|
|||||||
@@ -303,6 +303,25 @@ class ValidateCSSJSONXPATHInput(object):
|
|||||||
|
|
||||||
# Re #265 - maybe in the future fetch the page and offer a
|
# Re #265 - maybe in the future fetch the page and offer a
|
||||||
# warning/notice that its possible the rule doesnt yet match anything?
|
# warning/notice that its possible the rule doesnt yet match anything?
|
||||||
|
if not self.allow_json:
|
||||||
|
raise ValidationError("jq not permitted in this field!")
|
||||||
|
|
||||||
|
if 'jq:' in line:
|
||||||
|
try:
|
||||||
|
import jq
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
# `jq` requires full compilation in windows and so isn't generally available
|
||||||
|
raise ValidationError("jq not support not found")
|
||||||
|
|
||||||
|
input = line.replace('jq:', '')
|
||||||
|
|
||||||
|
try:
|
||||||
|
jq.compile(input)
|
||||||
|
except (ValueError) as e:
|
||||||
|
message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
|
||||||
|
raise ValidationError(message % (input, str(e)))
|
||||||
|
except:
|
||||||
|
raise ValidationError("A system-error occurred when validating your jq expression")
|
||||||
|
|
||||||
|
|
||||||
class quickWatchForm(Form):
|
class quickWatchForm(Form):
|
||||||
@@ -330,7 +349,7 @@ class watchForm(commonSettingsForm):
|
|||||||
|
|
||||||
time_between_check = FormField(TimeBetweenCheckForm)
|
time_between_check = FormField(TimeBetweenCheckForm)
|
||||||
|
|
||||||
css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='')
|
include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
|
||||||
|
|
||||||
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
|
||||||
|
|
||||||
@@ -356,6 +375,7 @@ class watchForm(commonSettingsForm):
|
|||||||
'Send a notification when the filter can no longer be found on the page', default=False)
|
'Send a notification when the filter can no longer be found on the page', default=False)
|
||||||
|
|
||||||
notification_muted = BooleanField('Notifications Muted / Off', default=False)
|
notification_muted = BooleanField('Notifications Muted / Off', default=False)
|
||||||
|
notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
|
||||||
|
|
||||||
def validate(self, **kwargs):
|
def validate(self, **kwargs):
|
||||||
if not super().validate():
|
if not super().validate():
|
||||||
|
|||||||
@@ -1,32 +1,36 @@
|
|||||||
import json
|
|
||||||
from typing import List
|
|
||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
from jsonpath_ng.ext import parse
|
|
||||||
import re
|
|
||||||
from inscriptis import get_text
|
from inscriptis import get_text
|
||||||
from inscriptis.model.config import ParserConfig
|
from inscriptis.model.config import ParserConfig
|
||||||
|
from jsonpath_ng.ext import parse
|
||||||
|
from typing import List
|
||||||
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
class FilterNotFoundInResponse(ValueError):
|
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
|
||||||
def __init__(self, msg):
|
TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
|
||||||
ValueError.__init__(self, msg)
|
|
||||||
|
|
||||||
class JSONNotFound(ValueError):
|
class JSONNotFound(ValueError):
|
||||||
def __init__(self, msg):
|
def __init__(self, msg):
|
||||||
ValueError.__init__(self, msg)
|
ValueError.__init__(self, msg)
|
||||||
|
|
||||||
|
|
||||||
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
|
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
|
||||||
def css_filter(css_filter, html_content):
|
def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
|
||||||
soup = BeautifulSoup(html_content, "html.parser")
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
html_block = ""
|
html_block = ""
|
||||||
r = soup.select(css_filter, separator="")
|
r = soup.select(include_filters, separator="")
|
||||||
if len(html_content) > 0 and len(r) == 0:
|
|
||||||
raise FilterNotFoundInResponse(css_filter)
|
|
||||||
for item in r:
|
|
||||||
html_block += str(item)
|
|
||||||
|
|
||||||
return html_block + "\n"
|
for element in r:
|
||||||
|
# When there's more than 1 match, then add the suffix to separate each line
|
||||||
|
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||||
|
# (This way each 'match' reliably has a new-line in the diff)
|
||||||
|
# Divs are converted to 4 whitespaces by inscriptis
|
||||||
|
if append_pretty_line_formatting and len(html_block) and not element.name in (['br', 'hr', 'div', 'p']):
|
||||||
|
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||||
|
|
||||||
|
html_block += str(element)
|
||||||
|
|
||||||
|
return html_block
|
||||||
|
|
||||||
def subtractive_css_selector(css_selector, html_content):
|
def subtractive_css_selector(css_selector, html_content):
|
||||||
soup = BeautifulSoup(html_content, "html.parser")
|
soup = BeautifulSoup(html_content, "html.parser")
|
||||||
@@ -42,25 +46,29 @@ def element_removal(selectors: List[str], html_content):
|
|||||||
|
|
||||||
|
|
||||||
# Return str Utf-8 of matched rules
|
# Return str Utf-8 of matched rules
|
||||||
def xpath_filter(xpath_filter, html_content):
|
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False):
|
||||||
from lxml import etree, html
|
from lxml import etree, html
|
||||||
|
|
||||||
tree = html.fromstring(bytes(html_content, encoding='utf-8'))
|
tree = html.fromstring(bytes(html_content, encoding='utf-8'))
|
||||||
html_block = ""
|
html_block = ""
|
||||||
|
|
||||||
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
|
r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
|
||||||
if len(html_content) > 0 and len(r) == 0:
|
|
||||||
raise FilterNotFoundInResponse(xpath_filter)
|
|
||||||
|
|
||||||
#@note: //title/text() wont work where <title>CDATA..
|
#@note: //title/text() wont work where <title>CDATA..
|
||||||
|
|
||||||
for element in r:
|
for element in r:
|
||||||
|
# When there's more than 1 match, then add the suffix to separate each line
|
||||||
|
# And where the matched result doesn't include something that will cause Inscriptis to add a newline
|
||||||
|
# (This way each 'match' reliably has a new-line in the diff)
|
||||||
|
# Divs are converted to 4 whitespaces by inscriptis
|
||||||
|
if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
|
||||||
|
html_block += TEXT_FILTER_LIST_LINE_SUFFIX
|
||||||
|
|
||||||
if type(element) == etree._ElementStringResult:
|
if type(element) == etree._ElementStringResult:
|
||||||
html_block += str(element) + "<br/>"
|
html_block += str(element)
|
||||||
elif type(element) == etree._ElementUnicodeResult:
|
elif type(element) == etree._ElementUnicodeResult:
|
||||||
html_block += str(element) + "<br/>"
|
html_block += str(element)
|
||||||
else:
|
else:
|
||||||
html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>"
|
html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
|
||||||
|
|
||||||
return html_block
|
return html_block
|
||||||
|
|
||||||
@@ -79,19 +87,35 @@ def extract_element(find='title', html_content=''):
|
|||||||
return element_text
|
return element_text
|
||||||
|
|
||||||
#
|
#
|
||||||
def _parse_json(json_data, jsonpath_filter):
|
def _parse_json(json_data, json_filter):
|
||||||
s=[]
|
if 'json:' in json_filter:
|
||||||
jsonpath_expression = parse(jsonpath_filter.replace('json:', ''))
|
jsonpath_expression = parse(json_filter.replace('json:', ''))
|
||||||
match = jsonpath_expression.find(json_data)
|
match = jsonpath_expression.find(json_data)
|
||||||
|
return _get_stripped_text_from_json_match(match)
|
||||||
|
|
||||||
|
if 'jq:' in json_filter:
|
||||||
|
|
||||||
|
try:
|
||||||
|
import jq
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
# `jq` requires full compilation in windows and so isn't generally available
|
||||||
|
raise Exception("jq not support not found")
|
||||||
|
|
||||||
|
jq_expression = jq.compile(json_filter.replace('jq:', ''))
|
||||||
|
match = jq_expression.input(json_data).all()
|
||||||
|
|
||||||
|
return _get_stripped_text_from_json_match(match)
|
||||||
|
|
||||||
|
def _get_stripped_text_from_json_match(match):
|
||||||
|
s = []
|
||||||
# More than one result, we will return it as a JSON list.
|
# More than one result, we will return it as a JSON list.
|
||||||
if len(match) > 1:
|
if len(match) > 1:
|
||||||
for i in match:
|
for i in match:
|
||||||
s.append(i.value)
|
s.append(i.value if hasattr(i, 'value') else i)
|
||||||
|
|
||||||
# Single value, use just the value, as it could be later used in a token in notifications.
|
# Single value, use just the value, as it could be later used in a token in notifications.
|
||||||
if len(match) == 1:
|
if len(match) == 1:
|
||||||
s = match[0].value
|
s = match[0].value if hasattr(match[0], 'value') else match[0]
|
||||||
|
|
||||||
# Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
|
# Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
|
||||||
if not match:
|
if not match:
|
||||||
@@ -103,16 +127,16 @@ def _parse_json(json_data, jsonpath_filter):
|
|||||||
|
|
||||||
return stripped_text_from_html
|
return stripped_text_from_html
|
||||||
|
|
||||||
def extract_json_as_string(content, jsonpath_filter):
|
def extract_json_as_string(content, json_filter):
|
||||||
|
|
||||||
stripped_text_from_html = False
|
stripped_text_from_html = False
|
||||||
|
|
||||||
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
|
# Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
|
||||||
try:
|
try:
|
||||||
stripped_text_from_html = _parse_json(json.loads(content), jsonpath_filter)
|
stripped_text_from_html = _parse_json(json.loads(content), json_filter)
|
||||||
except json.JSONDecodeError:
|
except json.JSONDecodeError:
|
||||||
|
|
||||||
# Foreach <script json></script> blob.. just return the first that matches jsonpath_filter
|
# Foreach <script json></script> blob.. just return the first that matches json_filter
|
||||||
s = []
|
s = []
|
||||||
soup = BeautifulSoup(content, 'html.parser')
|
soup = BeautifulSoup(content, 'html.parser')
|
||||||
bs_result = soup.findAll('script')
|
bs_result = soup.findAll('script')
|
||||||
@@ -131,7 +155,7 @@ def extract_json_as_string(content, jsonpath_filter):
|
|||||||
# Just skip it
|
# Just skip it
|
||||||
continue
|
continue
|
||||||
else:
|
else:
|
||||||
stripped_text_from_html = _parse_json(json_data, jsonpath_filter)
|
stripped_text_from_html = _parse_json(json_data, json_filter)
|
||||||
if stripped_text_from_html:
|
if stripped_text_from_html:
|
||||||
break
|
break
|
||||||
|
|
||||||
|
|||||||
@@ -103,12 +103,12 @@ class import_distill_io_json(Importer):
|
|||||||
pass
|
pass
|
||||||
except IndexError:
|
except IndexError:
|
||||||
pass
|
pass
|
||||||
|
extras['include_filters'] = []
|
||||||
try:
|
try:
|
||||||
extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr']
|
|
||||||
if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
|
if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
|
||||||
extras['css_filter'] = 'xpath:' + extras['css_filter']
|
extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
|
||||||
|
else:
|
||||||
|
extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
|
||||||
except KeyError:
|
except KeyError:
|
||||||
pass
|
pass
|
||||||
except IndexError:
|
except IndexError:
|
||||||
|
|||||||
@@ -13,10 +13,6 @@ class model(dict):
|
|||||||
'watching': {},
|
'watching': {},
|
||||||
'settings': {
|
'settings': {
|
||||||
'headers': {
|
'headers': {
|
||||||
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
|
|
||||||
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
|
|
||||||
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
|
|
||||||
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
|
|
||||||
},
|
},
|
||||||
'requests': {
|
'requests': {
|
||||||
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
|
||||||
|
|||||||
@@ -1,6 +1,8 @@
|
|||||||
import os
|
|
||||||
import uuid as uuid_builder
|
|
||||||
from distutils.util import strtobool
|
from distutils.util import strtobool
|
||||||
|
import logging
|
||||||
|
import os
|
||||||
|
import time
|
||||||
|
import uuid
|
||||||
|
|
||||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
|
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
|
||||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
|
||||||
@@ -14,42 +16,44 @@ class model(dict):
|
|||||||
__newest_history_key = None
|
__newest_history_key = None
|
||||||
__history_n=0
|
__history_n=0
|
||||||
__base_config = {
|
__base_config = {
|
||||||
'url': None,
|
#'history': {}, # Dict of timestamp and output stripped filename (removed)
|
||||||
'tag': None,
|
#'newest_history_key': 0, (removed, taken from history.txt index)
|
||||||
'last_checked': 0,
|
|
||||||
'paused': False,
|
|
||||||
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
|
||||||
#'newest_history_key': 0,
|
|
||||||
'title': None,
|
|
||||||
'previous_md5': False,
|
|
||||||
'uuid': str(uuid_builder.uuid4()),
|
|
||||||
'headers': {}, # Extra headers to send
|
|
||||||
'body': None,
|
'body': None,
|
||||||
'method': 'GET',
|
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
||||||
#'history': {}, # Dict of timestamp and output stripped filename
|
'check_count': 0,
|
||||||
|
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
||||||
|
'extract_text': [], # Extract text by regex after filters
|
||||||
|
'extract_title_as_title': False,
|
||||||
|
'fetch_backend': None,
|
||||||
|
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
||||||
|
'headers': {}, # Extra headers to send
|
||||||
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
'ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||||
# Custom notification content
|
'include_filters': [],
|
||||||
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
'last_checked': 0,
|
||||||
'notification_title': None,
|
'last_error': False,
|
||||||
|
'last_viewed': 0, # history key value of the last viewed via the [diff] link
|
||||||
|
'method': 'GET',
|
||||||
|
# Custom notification content
|
||||||
'notification_body': None,
|
'notification_body': None,
|
||||||
'notification_format': default_notification_format_for_watch,
|
'notification_format': default_notification_format_for_watch,
|
||||||
'notification_muted': False,
|
'notification_muted': False,
|
||||||
'css_filter': '',
|
'notification_title': None,
|
||||||
'last_error': False,
|
'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL
|
||||||
'extract_text': [], # Extract text by regex after filters
|
'notification_urls': [], # List of URLs to add to the notification Queue (Usually AppRise)
|
||||||
'subtractive_selectors': [],
|
'paused': False,
|
||||||
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
'previous_md5': False,
|
||||||
'text_should_not_be_present': [], # Text that should not present
|
|
||||||
'fetch_backend': None,
|
|
||||||
'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
|
|
||||||
'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
|
|
||||||
'extract_title_as_title': False,
|
|
||||||
'check_unique_lines': False, # On change-detected, compare against all history if its something new
|
|
||||||
'proxy': None, # Preferred proxy connection
|
'proxy': None, # Preferred proxy connection
|
||||||
|
'subtractive_selectors': [],
|
||||||
|
'tag': None,
|
||||||
|
'text_should_not_be_present': [], # Text that should not present
|
||||||
# Re #110, so then if this is set to None, we know to use the default value instead
|
# Re #110, so then if this is set to None, we know to use the default value instead
|
||||||
# Requires setting to None on submit if it's the same as the default
|
# Requires setting to None on submit if it's the same as the default
|
||||||
# Should be all None by default, so we use the system default in this case.
|
# Should be all None by default, so we use the system default in this case.
|
||||||
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
|
||||||
|
'title': None,
|
||||||
|
'trigger_text': [], # List of text or regex to wait for until a change is detected
|
||||||
|
'url': None,
|
||||||
|
'uuid': str(uuid.uuid4()),
|
||||||
'webdriver_delay': None,
|
'webdriver_delay': None,
|
||||||
'webdriver_js_execute_code': None, # Run before change-detection
|
'webdriver_js_execute_code': None, # Run before change-detection
|
||||||
}
|
}
|
||||||
@@ -60,7 +64,7 @@ class model(dict):
|
|||||||
self.update(self.__base_config)
|
self.update(self.__base_config)
|
||||||
self.__datastore_path = kw['datastore_path']
|
self.__datastore_path = kw['datastore_path']
|
||||||
|
|
||||||
self['uuid'] = str(uuid_builder.uuid4())
|
self['uuid'] = str(uuid.uuid4())
|
||||||
|
|
||||||
del kw['datastore_path']
|
del kw['datastore_path']
|
||||||
|
|
||||||
@@ -82,10 +86,19 @@ class model(dict):
|
|||||||
return False
|
return False
|
||||||
|
|
||||||
def ensure_data_dir_exists(self):
|
def ensure_data_dir_exists(self):
|
||||||
target_path = os.path.join(self.__datastore_path, self['uuid'])
|
if not os.path.isdir(self.watch_data_dir):
|
||||||
if not os.path.isdir(target_path):
|
print ("> Creating data dir {}".format(self.watch_data_dir))
|
||||||
print ("> Creating data dir {}".format(target_path))
|
os.mkdir(self.watch_data_dir)
|
||||||
os.mkdir(target_path)
|
|
||||||
|
@property
|
||||||
|
def link(self):
|
||||||
|
url = self.get('url', '')
|
||||||
|
if '{%' in url or '{{' in url:
|
||||||
|
from jinja2 import Environment
|
||||||
|
# Jinja2 available in URLs along with https://pypi.org/project/jinja2-time/
|
||||||
|
jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
|
||||||
|
return str(jinja2_env.from_string(url).render())
|
||||||
|
return url
|
||||||
|
|
||||||
@property
|
@property
|
||||||
def label(self):
|
def label(self):
|
||||||
@@ -109,16 +122,40 @@ class model(dict):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def history(self):
|
def history(self):
|
||||||
|
"""History index is just a text file as a list
|
||||||
|
{watch-uuid}/history.txt
|
||||||
|
|
||||||
|
contains a list like
|
||||||
|
|
||||||
|
{epoch-time},{filename}\n
|
||||||
|
|
||||||
|
We read in this list as the history information
|
||||||
|
|
||||||
|
"""
|
||||||
tmp_history = {}
|
tmp_history = {}
|
||||||
import logging
|
|
||||||
import time
|
|
||||||
|
|
||||||
# Read the history file as a dict
|
# Read the history file as a dict
|
||||||
fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
|
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||||
if os.path.isfile(fname):
|
if os.path.isfile(fname):
|
||||||
logging.debug("Reading history index " + str(time.time()))
|
logging.debug("Reading history index " + str(time.time()))
|
||||||
with open(fname, "r") as f:
|
with open(fname, "r") as f:
|
||||||
tmp_history = dict(i.strip().split(',', 2) for i in f.readlines())
|
for i in f.readlines():
|
||||||
|
if ',' in i:
|
||||||
|
k, v = i.strip().split(',', 2)
|
||||||
|
|
||||||
|
# The index history could contain a relative path, so we need to make the fullpath
|
||||||
|
# so that python can read it
|
||||||
|
if not '/' in v and not '\'' in v:
|
||||||
|
v = os.path.join(self.watch_data_dir, v)
|
||||||
|
else:
|
||||||
|
# It's possible that they moved the datadir on older versions
|
||||||
|
# So the snapshot exists but is in a different path
|
||||||
|
snapshot_fname = v.split('/')[-1]
|
||||||
|
proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
|
||||||
|
if not os.path.exists(v) and os.path.exists(proposed_new_path):
|
||||||
|
v = proposed_new_path
|
||||||
|
|
||||||
|
tmp_history[k] = v
|
||||||
|
|
||||||
if len(tmp_history):
|
if len(tmp_history):
|
||||||
self.__newest_history_key = list(tmp_history.keys())[-1]
|
self.__newest_history_key = list(tmp_history.keys())[-1]
|
||||||
@@ -129,7 +166,7 @@ class model(dict):
|
|||||||
|
|
||||||
@property
|
@property
|
||||||
def has_history(self):
|
def has_history(self):
|
||||||
fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
|
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||||
return os.path.isfile(fname)
|
return os.path.isfile(fname)
|
||||||
|
|
||||||
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
|
# Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
|
||||||
@@ -148,31 +185,33 @@ class model(dict):
|
|||||||
# Save some text file to the appropriate path and bump the history
|
# Save some text file to the appropriate path and bump the history
|
||||||
# result_obj from fetch_site_status.run()
|
# result_obj from fetch_site_status.run()
|
||||||
def save_history_text(self, contents, timestamp):
|
def save_history_text(self, contents, timestamp):
|
||||||
import uuid
|
|
||||||
import logging
|
|
||||||
|
|
||||||
output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
|
|
||||||
|
|
||||||
self.ensure_data_dir_exists()
|
self.ensure_data_dir_exists()
|
||||||
|
|
||||||
snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
|
# Small hack so that we sleep just enough to allow 1 second between history snapshots
|
||||||
logging.debug("Saving history text {}".format(snapshot_fname))
|
# this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
|
||||||
|
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
|
||||||
|
time.sleep(timestamp - self.__newest_history_key)
|
||||||
|
|
||||||
with open(snapshot_fname, 'wb') as f:
|
snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
|
||||||
|
|
||||||
|
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
|
||||||
|
# most sites are utf-8 and some are even broken utf-8
|
||||||
|
with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
|
||||||
f.write(contents)
|
f.write(contents)
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
# Append to index
|
# Append to index
|
||||||
# @todo check last char was \n
|
# @todo check last char was \n
|
||||||
index_fname = "{}/history.txt".format(output_path)
|
index_fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||||
with open(index_fname, 'a') as f:
|
with open(index_fname, 'a') as f:
|
||||||
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
f.write("{},{}\n".format(timestamp, snapshot_fname))
|
||||||
f.close()
|
f.close()
|
||||||
|
|
||||||
self.__newest_history_key = timestamp
|
self.__newest_history_key = timestamp
|
||||||
self.__history_n+=1
|
self.__history_n += 1
|
||||||
|
|
||||||
#@todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
|
# @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
|
||||||
return snapshot_fname
|
return snapshot_fname
|
||||||
|
|
||||||
@property
|
@property
|
||||||
@@ -205,14 +244,14 @@ class model(dict):
|
|||||||
return not local_lines.issubset(existing_history)
|
return not local_lines.issubset(existing_history)
|
||||||
|
|
||||||
def get_screenshot(self):
|
def get_screenshot(self):
|
||||||
fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png")
|
fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
|
||||||
if os.path.isfile(fname):
|
if os.path.isfile(fname):
|
||||||
return fname
|
return fname
|
||||||
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
def __get_file_ctime(self, filename):
|
def __get_file_ctime(self, filename):
|
||||||
fname = os.path.join(self.__datastore_path, self['uuid'], filename)
|
fname = os.path.join(self.watch_data_dir, filename)
|
||||||
if os.path.isfile(fname):
|
if os.path.isfile(fname):
|
||||||
return int(os.path.getmtime(fname))
|
return int(os.path.getmtime(fname))
|
||||||
return False
|
return False
|
||||||
@@ -237,9 +276,14 @@ class model(dict):
|
|||||||
def snapshot_error_screenshot_ctime(self):
|
def snapshot_error_screenshot_ctime(self):
|
||||||
return self.__get_file_ctime('last-error-screenshot.png')
|
return self.__get_file_ctime('last-error-screenshot.png')
|
||||||
|
|
||||||
|
@property
|
||||||
|
def watch_data_dir(self):
|
||||||
|
# The base dir of the watch data
|
||||||
|
return os.path.join(self.__datastore_path, self['uuid'])
|
||||||
|
|
||||||
def get_error_text(self):
|
def get_error_text(self):
|
||||||
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
"""Return the text saved from a previous request that resulted in a non-200 error"""
|
||||||
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt")
|
fname = os.path.join(self.watch_data_dir, "last-error.txt")
|
||||||
if os.path.isfile(fname):
|
if os.path.isfile(fname):
|
||||||
with open(fname, 'r') as f:
|
with open(fname, 'r') as f:
|
||||||
return f.read()
|
return f.read()
|
||||||
@@ -247,7 +291,7 @@ class model(dict):
|
|||||||
|
|
||||||
def get_error_snapshot(self):
|
def get_error_snapshot(self):
|
||||||
"""Return path to the screenshot that resulted in a non-200 error"""
|
"""Return path to the screenshot that resulted in a non-200 error"""
|
||||||
fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png")
|
fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
|
||||||
if os.path.isfile(fname):
|
if os.path.isfile(fname):
|
||||||
return fname
|
return fname
|
||||||
return False
|
return False
|
||||||
|
|||||||
@@ -101,7 +101,10 @@ def process_notification(n_object, datastore):
|
|||||||
apobj.notify(
|
apobj.notify(
|
||||||
title=n_title,
|
title=n_title,
|
||||||
body=n_body,
|
body=n_body,
|
||||||
body_format=n_format)
|
body_format=n_format,
|
||||||
|
# False is not an option for AppRise, must be type None
|
||||||
|
attach=None if not n_object.get('screenshot') else n_object.get('screenshot')
|
||||||
|
)
|
||||||
|
|
||||||
apobj.clear()
|
apobj.clear()
|
||||||
|
|
||||||
|
|||||||
154
changedetectionio/res/xpath_element_scraper.js
Normal file
154
changedetectionio/res/xpath_element_scraper.js
Normal file
@@ -0,0 +1,154 @@
|
|||||||
|
// Include the getXpath script directly, easier than fetching
|
||||||
|
!function (e, n) {
|
||||||
|
"object" == typeof exports && "undefined" != typeof module ? module.exports = n() : "function" == typeof define && define.amd ? define(n) : (e = e || self).getXPath = n()
|
||||||
|
}(this, function () {
|
||||||
|
return function (e) {
|
||||||
|
var n = e;
|
||||||
|
if (n && n.id) return '//*[@id="' + n.id + '"]';
|
||||||
|
for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) {
|
||||||
|
for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling;
|
||||||
|
for (d = n.nextSibling; d;) {
|
||||||
|
if (d.nodeName === n.nodeName) {
|
||||||
|
r = !0;
|
||||||
|
break
|
||||||
|
}
|
||||||
|
d = d.nextSibling
|
||||||
|
}
|
||||||
|
o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode
|
||||||
|
}
|
||||||
|
return o.length ? "/" + o.reverse().join("/") : ""
|
||||||
|
}
|
||||||
|
});
|
||||||
|
|
||||||
|
|
||||||
|
const findUpTag = (el) => {
|
||||||
|
let r = el
|
||||||
|
chained_css = [];
|
||||||
|
depth = 0;
|
||||||
|
|
||||||
|
// Strategy 1: Keep going up until we hit an ID tag, imagine it's like #list-widget div h4
|
||||||
|
while (r.parentNode) {
|
||||||
|
if (depth == 5) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
if ('' !== r.id) {
|
||||||
|
chained_css.unshift("#" + CSS.escape(r.id));
|
||||||
|
final_selector = chained_css.join(' > ');
|
||||||
|
// Be sure theres only one, some sites have multiples of the same ID tag :-(
|
||||||
|
if (window.document.querySelectorAll(final_selector).length == 1) {
|
||||||
|
return final_selector;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
} else {
|
||||||
|
chained_css.unshift(r.tagName.toLowerCase());
|
||||||
|
}
|
||||||
|
r = r.parentNode;
|
||||||
|
depth += 1;
|
||||||
|
}
|
||||||
|
return null;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// @todo - if it's SVG or IMG, go into image diff mode
|
||||||
|
var elements = window.document.querySelectorAll("div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary");
|
||||||
|
var size_pos = [];
|
||||||
|
// after page fetch, inject this JS
|
||||||
|
// build a map of all elements and their positions (maybe that only include text?)
|
||||||
|
var bbox;
|
||||||
|
for (var i = 0; i < elements.length; i++) {
|
||||||
|
bbox = elements[i].getBoundingClientRect();
|
||||||
|
|
||||||
|
// forget really small ones
|
||||||
|
if (bbox['width'] < 15 && bbox['height'] < 15) {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
// @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
|
||||||
|
// it should not traverse when we know we can anchor off just an ID one level up etc..
|
||||||
|
// maybe, get current class or id, keep traversing up looking for only class or id until there is just one match
|
||||||
|
|
||||||
|
// 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
|
||||||
|
xpath_result = false;
|
||||||
|
|
||||||
|
try {
|
||||||
|
var d = findUpTag(elements[i]);
|
||||||
|
if (d) {
|
||||||
|
xpath_result = d;
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
// You could swap it and default to getXpath and then try the smarter one
|
||||||
|
// default back to the less intelligent one
|
||||||
|
if (!xpath_result) {
|
||||||
|
try {
|
||||||
|
// I've seen on FB and eBay that this doesnt work
|
||||||
|
// ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
|
||||||
|
xpath_result = getXPath(elements[i]);
|
||||||
|
} catch (e) {
|
||||||
|
console.log(e);
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
if (window.getComputedStyle(elements[i]).visibility === "hidden") {
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
size_pos.push({
|
||||||
|
xpath: xpath_result,
|
||||||
|
width: Math.round(bbox['width']),
|
||||||
|
height: Math.round(bbox['height']),
|
||||||
|
left: Math.floor(bbox['left']),
|
||||||
|
top: Math.floor(bbox['top'])
|
||||||
|
});
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
// Inject the current one set in the include_filters, which may be a CSS rule
|
||||||
|
// used for displaying the current one in VisualSelector, where its not one we generated.
|
||||||
|
if (include_filters.length) {
|
||||||
|
// Foreach filter, go and find it on the page and add it to the results so we can visualise it again
|
||||||
|
for (const f of include_filters) {
|
||||||
|
bbox = false;
|
||||||
|
q = false;
|
||||||
|
|
||||||
|
if (!f.length) {
|
||||||
|
console.log("xpath_element_scraper: Empty filter, skipping");
|
||||||
|
continue;
|
||||||
|
}
|
||||||
|
|
||||||
|
try {
|
||||||
|
// is it xpath?
|
||||||
|
if (f.startsWith('/') || f.startsWith('xpath:')) {
|
||||||
|
q = document.evaluate(f.replace('xpath:', ''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
||||||
|
} else {
|
||||||
|
q = document.querySelector(f);
|
||||||
|
}
|
||||||
|
} catch (e) {
|
||||||
|
// Maybe catch DOMException and alert?
|
||||||
|
console.log("xpath_element_scraper: Exception selecting element from filter "+f);
|
||||||
|
console.log(e);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (q) {
|
||||||
|
bbox = q.getBoundingClientRect();
|
||||||
|
} else {
|
||||||
|
console.log("xpath_element_scraper: filter element "+f+" was not found");
|
||||||
|
}
|
||||||
|
|
||||||
|
if (bbox && bbox['width'] > 0 && bbox['height'] > 0) {
|
||||||
|
size_pos.push({
|
||||||
|
xpath: f,
|
||||||
|
width: Math.round(bbox['width']),
|
||||||
|
height: Math.round(bbox['height']),
|
||||||
|
left: Math.floor(bbox['left']),
|
||||||
|
top: Math.floor(bbox['top'])
|
||||||
|
});
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
// Window.width required for proper scaling in the frontend
|
||||||
|
return {'size_pos': size_pos, 'browser_width': window.innerWidth};
|
||||||
@@ -9,6 +9,8 @@
|
|||||||
# exit when any command fails
|
# exit when any command fails
|
||||||
set -e
|
set -e
|
||||||
|
|
||||||
|
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||||
|
|
||||||
find tests/test_*py -type f|while read test_name
|
find tests/test_*py -type f|while read test_name
|
||||||
do
|
do
|
||||||
echo "TEST RUNNING $test_name"
|
echo "TEST RUNNING $test_name"
|
||||||
@@ -23,6 +25,11 @@ export BASE_URL="https://really-unique-domain.io"
|
|||||||
pytest tests/test_notification.py
|
pytest tests/test_notification.py
|
||||||
|
|
||||||
|
|
||||||
|
# Re-run with HIDE_REFERER set - could affect login
|
||||||
|
export HIDE_REFERER=True
|
||||||
|
pytest tests/test_access_control.py
|
||||||
|
|
||||||
|
|
||||||
# Now for the selenium and playwright/browserless fetchers
|
# Now for the selenium and playwright/browserless fetchers
|
||||||
# Note - this is not UI functional tests - just checking that each one can fetch the content
|
# Note - this is not UI functional tests - just checking that each one can fetch the content
|
||||||
|
|
||||||
@@ -38,7 +45,9 @@ docker kill $$-test_selenium
|
|||||||
|
|
||||||
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
|
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
|
||||||
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
|
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
|
||||||
pip3 install playwright~=1.24
|
PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+")
|
||||||
|
echo "using $PLAYWRIGHT_VERSION"
|
||||||
|
pip3 install "$PLAYWRIGHT_VERSION"
|
||||||
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
|
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
|
||||||
# takes a while to spin up
|
# takes a while to spin up
|
||||||
sleep 5
|
sleep 5
|
||||||
@@ -48,4 +57,48 @@ pytest tests/test_errorhandling.py
|
|||||||
pytest tests/visualselector/test_fetch_data.py
|
pytest tests/visualselector/test_fetch_data.py
|
||||||
|
|
||||||
unset PLAYWRIGHT_DRIVER_URL
|
unset PLAYWRIGHT_DRIVER_URL
|
||||||
docker kill $$-test_browserless
|
docker kill $$-test_browserless
|
||||||
|
|
||||||
|
# Test proxy list handling, starting two squids on different ports
|
||||||
|
# Each squid adds a different header to the response, which is the main thing we test for.
|
||||||
|
docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge
|
||||||
|
docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge
|
||||||
|
|
||||||
|
|
||||||
|
# So, basic HTTP as env var test
|
||||||
|
export HTTP_PROXY=http://localhost:3128
|
||||||
|
export HTTPS_PROXY=http://localhost:3128
|
||||||
|
pytest tests/proxy_list/test_proxy.py
|
||||||
|
docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io
|
||||||
|
if [ $? -ne 0 ]
|
||||||
|
then
|
||||||
|
echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)"
|
||||||
|
fi
|
||||||
|
unset HTTP_PROXY
|
||||||
|
unset HTTPS_PROXY
|
||||||
|
|
||||||
|
|
||||||
|
# 2nd test actually choose the preferred proxy from proxies.json
|
||||||
|
cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json
|
||||||
|
# Makes a watch use a preferred proxy
|
||||||
|
pytest tests/proxy_list/test_multiple_proxy.py
|
||||||
|
|
||||||
|
# Should be a request in the default "first" squid
|
||||||
|
docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io
|
||||||
|
if [ $? -ne 0 ]
|
||||||
|
then
|
||||||
|
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# And one in the 'second' squid (user selects this as preferred)
|
||||||
|
docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io
|
||||||
|
if [ $? -ne 0 ]
|
||||||
|
then
|
||||||
|
echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# @todo - test system override proxy selection and watch defaults, setup a 3rd squid?
|
||||||
|
docker kill $$-squid-one
|
||||||
|
docker kill $$-squid-two
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
112
changedetectionio/static/js/diff-render.js
Normal file
112
changedetectionio/static/js/diff-render.js
Normal file
@@ -0,0 +1,112 @@
|
|||||||
|
var a = document.getElementById('a');
|
||||||
|
var b = document.getElementById('b');
|
||||||
|
var result = document.getElementById('result');
|
||||||
|
|
||||||
|
function changed() {
|
||||||
|
// https://github.com/kpdecker/jsdiff/issues/389
|
||||||
|
// I would love to use `{ignoreWhitespace: true}` here but it breaks the formatting
|
||||||
|
options = {ignoreWhitespace: document.getElementById('ignoreWhitespace').checked};
|
||||||
|
|
||||||
|
var diff = Diff[window.diffType](a.textContent, b.textContent, options);
|
||||||
|
var fragment = document.createDocumentFragment();
|
||||||
|
for (var i = 0; i < diff.length; i++) {
|
||||||
|
|
||||||
|
if (diff[i].added && diff[i + 1] && diff[i + 1].removed) {
|
||||||
|
var swap = diff[i];
|
||||||
|
diff[i] = diff[i + 1];
|
||||||
|
diff[i + 1] = swap;
|
||||||
|
}
|
||||||
|
|
||||||
|
var node;
|
||||||
|
if (diff[i].removed) {
|
||||||
|
node = document.createElement('del');
|
||||||
|
node.classList.add("change");
|
||||||
|
node.appendChild(document.createTextNode(diff[i].value));
|
||||||
|
|
||||||
|
} else if (diff[i].added) {
|
||||||
|
node = document.createElement('ins');
|
||||||
|
node.classList.add("change");
|
||||||
|
node.appendChild(document.createTextNode(diff[i].value));
|
||||||
|
} else {
|
||||||
|
node = document.createTextNode(diff[i].value);
|
||||||
|
}
|
||||||
|
fragment.appendChild(node);
|
||||||
|
}
|
||||||
|
|
||||||
|
result.textContent = '';
|
||||||
|
result.appendChild(fragment);
|
||||||
|
|
||||||
|
// Jump at start
|
||||||
|
inputs.current = 0;
|
||||||
|
next_diff();
|
||||||
|
}
|
||||||
|
|
||||||
|
window.onload = function () {
|
||||||
|
|
||||||
|
|
||||||
|
/* Convert what is options from UTC time.time() to local browser time */
|
||||||
|
var diffList = document.getElementById("diff-version");
|
||||||
|
if (typeof (diffList) != 'undefined' && diffList != null) {
|
||||||
|
for (var option of diffList.options) {
|
||||||
|
var dateObject = new Date(option.value * 1000);
|
||||||
|
option.label = dateObject.toLocaleString();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
/* Set current version date as local time in the browser also */
|
||||||
|
var current_v = document.getElementById("current-v-date");
|
||||||
|
var dateObject = new Date(newest_version_timestamp*1000);
|
||||||
|
current_v.innerHTML = dateObject.toLocaleString();
|
||||||
|
onDiffTypeChange(document.querySelector('#settings [name="diff_type"]:checked'));
|
||||||
|
changed();
|
||||||
|
};
|
||||||
|
|
||||||
|
a.onpaste = a.onchange =
|
||||||
|
b.onpaste = b.onchange = changed;
|
||||||
|
|
||||||
|
if ('oninput' in a) {
|
||||||
|
a.oninput = b.oninput = changed;
|
||||||
|
} else {
|
||||||
|
a.onkeyup = b.onkeyup = changed;
|
||||||
|
}
|
||||||
|
|
||||||
|
function onDiffTypeChange(radio) {
|
||||||
|
window.diffType = radio.value;
|
||||||
|
// Not necessary
|
||||||
|
// document.title = "Diff " + radio.value.slice(4);
|
||||||
|
}
|
||||||
|
|
||||||
|
var radio = document.getElementsByName('diff_type');
|
||||||
|
for (var i = 0; i < radio.length; i++) {
|
||||||
|
radio[i].onchange = function (e) {
|
||||||
|
onDiffTypeChange(e.target);
|
||||||
|
changed();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
document.getElementById('ignoreWhitespace').onchange = function (e) {
|
||||||
|
changed();
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
var inputs = document.getElementsByClassName('change');
|
||||||
|
inputs.current = 0;
|
||||||
|
|
||||||
|
|
||||||
|
function next_diff() {
|
||||||
|
|
||||||
|
var element = inputs[inputs.current];
|
||||||
|
var headerOffset = 80;
|
||||||
|
var elementPosition = element.getBoundingClientRect().top;
|
||||||
|
var offsetPosition = elementPosition - headerOffset + window.scrollY;
|
||||||
|
|
||||||
|
window.scrollTo({
|
||||||
|
top: offsetPosition,
|
||||||
|
behavior: "smooth"
|
||||||
|
});
|
||||||
|
|
||||||
|
inputs.current++;
|
||||||
|
if (inputs.current >= inputs.length) {
|
||||||
|
inputs.current = 0;
|
||||||
|
}
|
||||||
|
}
|
||||||
File diff suppressed because it is too large
Load Diff
38
changedetectionio/static/js/diff.min.js
vendored
Normal file
38
changedetectionio/static/js/diff.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
@@ -13,7 +13,7 @@ $(document).ready(function() {
|
|||||||
// redline highlight context
|
// redline highlight context
|
||||||
var ctx;
|
var ctx;
|
||||||
|
|
||||||
var current_default_xpath;
|
var current_default_xpath=[];
|
||||||
var x_scale=1;
|
var x_scale=1;
|
||||||
var y_scale=1;
|
var y_scale=1;
|
||||||
var selector_image;
|
var selector_image;
|
||||||
@@ -50,28 +50,31 @@ $(document).ready(function() {
|
|||||||
state_clicked=false;
|
state_clicked=false;
|
||||||
ctx.clearRect(0, 0, c.width, c.height);
|
ctx.clearRect(0, 0, c.width, c.height);
|
||||||
xctx.clearRect(0, 0, c.width, c.height);
|
xctx.clearRect(0, 0, c.width, c.height);
|
||||||
$("#css_filter").val('');
|
$("#include_filters").val('');
|
||||||
});
|
});
|
||||||
|
|
||||||
|
|
||||||
bootstrap_visualselector();
|
bootstrap_visualselector();
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
function bootstrap_visualselector() {
|
function bootstrap_visualselector() {
|
||||||
if ( 1 ) {
|
if (1) {
|
||||||
// bootstrap it, this will trigger everything else
|
// bootstrap it, this will trigger everything else
|
||||||
$("img#selector-background").bind('load', function () {
|
$("img#selector-background").bind('load', function () {
|
||||||
console.log("Loaded background...");
|
console.log("Loaded background...");
|
||||||
c = document.getElementById("selector-canvas");
|
c = document.getElementById("selector-canvas");
|
||||||
// greyed out fill context
|
// greyed out fill context
|
||||||
xctx = c.getContext("2d");
|
xctx = c.getContext("2d");
|
||||||
// redline highlight context
|
// redline highlight context
|
||||||
ctx = c.getContext("2d");
|
ctx = c.getContext("2d");
|
||||||
current_default_xpath =$("#css_filter").val();
|
if ($("#include_filters").val().trim().length) {
|
||||||
fetch_data();
|
current_default_xpath = $("#include_filters").val().split(/\r?\n/g);
|
||||||
$('#selector-canvas').off("mousemove mousedown");
|
} else {
|
||||||
// screenshot_url defined in the edit.html template
|
current_default_xpath = [];
|
||||||
|
}
|
||||||
|
fetch_data();
|
||||||
|
$('#selector-canvas').off("mousemove mousedown");
|
||||||
|
// screenshot_url defined in the edit.html template
|
||||||
}).attr("src", screenshot_url);
|
}).attr("src", screenshot_url);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -127,24 +130,30 @@ $(document).ready(function() {
|
|||||||
|
|
||||||
console.log(selector_data['size_pos'].length + " selectors found");
|
console.log(selector_data['size_pos'].length + " selectors found");
|
||||||
|
|
||||||
// highlight the default one if we can find it in the xPath list
|
// highlight the default one if we can find it in the xPath list
|
||||||
// or the xpath matches the default one
|
// or the xpath matches the default one
|
||||||
found = false;
|
found = false;
|
||||||
if(current_default_xpath.length) {
|
if (current_default_xpath.length) {
|
||||||
for (var i = selector_data['size_pos'].length; i!==0; i--) {
|
// Find the first one that matches
|
||||||
var sel = selector_data['size_pos'][i-1];
|
// @todo In the future paint all that match
|
||||||
if(selector_data['size_pos'][i - 1].xpath == current_default_xpath) {
|
for (const c of current_default_xpath) {
|
||||||
console.log("highlighting "+current_default_xpath);
|
for (var i = selector_data['size_pos'].length; i !== 0; i--) {
|
||||||
current_selected_i = i-1;
|
if (selector_data['size_pos'][i - 1].xpath === c) {
|
||||||
highlight_current_selected_i();
|
console.log("highlighting " + c);
|
||||||
found = true;
|
current_selected_i = i - 1;
|
||||||
break;
|
highlight_current_selected_i();
|
||||||
|
found = true;
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (found) {
|
||||||
|
break;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
if (!found) {
|
||||||
|
alert("Unfortunately your existing CSS/xPath Filter was no longer found!");
|
||||||
}
|
}
|
||||||
}
|
|
||||||
if(!found) {
|
|
||||||
alert("Unfortunately your existing CSS/xPath Filter was no longer found!");
|
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
$('#selector-canvas').bind('mousemove', function (e) {
|
$('#selector-canvas').bind('mousemove', function (e) {
|
||||||
@@ -205,9 +214,9 @@ $(document).ready(function() {
|
|||||||
var sel = selector_data['size_pos'][current_selected_i];
|
var sel = selector_data['size_pos'][current_selected_i];
|
||||||
if (sel[0] == '/') {
|
if (sel[0] == '/') {
|
||||||
// @todo - not sure just checking / is right
|
// @todo - not sure just checking / is right
|
||||||
$("#css_filter").val('xpath:'+sel.xpath);
|
$("#include_filters").val('xpath:'+sel.xpath);
|
||||||
} else {
|
} else {
|
||||||
$("#css_filter").val(sel.xpath);
|
$("#include_filters").val(sel.xpath);
|
||||||
}
|
}
|
||||||
xctx.fillStyle = 'rgba(205,205,205,0.95)';
|
xctx.fillStyle = 'rgba(205,205,205,0.95)';
|
||||||
xctx.strokeStyle = 'rgba(225,0,0,0.9)';
|
xctx.strokeStyle = 'rgba(225,0,0,0.9)';
|
||||||
|
|||||||
@@ -156,7 +156,7 @@ body:after, body:before {
|
|||||||
|
|
||||||
.fetch-error {
|
.fetch-error {
|
||||||
padding-top: 1em;
|
padding-top: 1em;
|
||||||
font-size: 60%;
|
font-size: 80%;
|
||||||
max-width: 400px;
|
max-width: 400px;
|
||||||
display: block;
|
display: block;
|
||||||
}
|
}
|
||||||
@@ -803,4 +803,4 @@ ul {
|
|||||||
padding: 0.5rem;
|
padding: 0.5rem;
|
||||||
border-radius: 5px;
|
border-radius: 5px;
|
||||||
color: #ff3300;
|
color: #ff3300;
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -27,17 +27,18 @@ class ChangeDetectionStore:
|
|||||||
# For when we edit, we should write to disk
|
# For when we edit, we should write to disk
|
||||||
needs_write_urgent = False
|
needs_write_urgent = False
|
||||||
|
|
||||||
|
__version_check = True
|
||||||
|
|
||||||
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
|
def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
|
||||||
# Should only be active for docker
|
# Should only be active for docker
|
||||||
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
|
# logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
|
||||||
self.needs_write = False
|
self.__data = App.model()
|
||||||
self.datastore_path = datastore_path
|
self.datastore_path = datastore_path
|
||||||
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
|
||||||
|
self.needs_write = False
|
||||||
self.proxy_list = None
|
self.proxy_list = None
|
||||||
|
self.start_time = time.time()
|
||||||
self.stop_thread = False
|
self.stop_thread = False
|
||||||
|
|
||||||
self.__data = App.model()
|
|
||||||
|
|
||||||
# Base definition for all watchers
|
# Base definition for all watchers
|
||||||
# deepcopy part of #569 - not sure why its needed exactly
|
# deepcopy part of #569 - not sure why its needed exactly
|
||||||
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
|
self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
|
||||||
@@ -81,10 +82,13 @@ class ChangeDetectionStore:
|
|||||||
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
except (FileNotFoundError, json.decoder.JSONDecodeError):
|
||||||
if include_default_watches:
|
if include_default_watches:
|
||||||
print("Creating JSON store at", self.datastore_path)
|
print("Creating JSON store at", self.datastore_path)
|
||||||
|
self.add_watch(url='https://news.ycombinator.com/',
|
||||||
|
tag='Tech news',
|
||||||
|
extras={'fetch_backend': 'html_requests'})
|
||||||
|
|
||||||
self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
|
self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
|
||||||
self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
|
tag='changedetection.io',
|
||||||
self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
|
extras={'fetch_backend': 'html_requests'})
|
||||||
|
|
||||||
self.__data['version_tag'] = version_tag
|
self.__data['version_tag'] = version_tag
|
||||||
|
|
||||||
@@ -113,9 +117,7 @@ class ChangeDetectionStore:
|
|||||||
self.__data['settings']['application']['api_access_token'] = secret
|
self.__data['settings']['application']['api_access_token'] = secret
|
||||||
|
|
||||||
# Proxy list support - available as a selection in settings when text file is imported
|
# Proxy list support - available as a selection in settings when text file is imported
|
||||||
# CSV list
|
proxy_list_file = "{}/proxies.json".format(self.datastore_path)
|
||||||
# "name, address", or just "name"
|
|
||||||
proxy_list_file = "{}/proxies.txt".format(self.datastore_path)
|
|
||||||
if path.isfile(proxy_list_file):
|
if path.isfile(proxy_list_file):
|
||||||
self.import_proxy_list(proxy_list_file)
|
self.import_proxy_list(proxy_list_file)
|
||||||
|
|
||||||
@@ -270,7 +272,7 @@ class ChangeDetectionStore:
|
|||||||
extras = {}
|
extras = {}
|
||||||
# should always be str
|
# should always be str
|
||||||
if tag is None or not tag:
|
if tag is None or not tag:
|
||||||
tag=''
|
tag = ''
|
||||||
|
|
||||||
# Incase these are copied across, assume it's a reference and deepcopy()
|
# Incase these are copied across, assume it's a reference and deepcopy()
|
||||||
apply_extras = deepcopy(extras)
|
apply_extras = deepcopy(extras)
|
||||||
@@ -285,17 +287,31 @@ class ChangeDetectionStore:
|
|||||||
res = r.json()
|
res = r.json()
|
||||||
|
|
||||||
# List of permissible attributes we accept from the wild internet
|
# List of permissible attributes we accept from the wild internet
|
||||||
for k in ['url', 'tag',
|
for k in [
|
||||||
'paused', 'title',
|
'body',
|
||||||
'previous_md5', 'headers',
|
'css_filter',
|
||||||
'body', 'method',
|
'extract_text',
|
||||||
'ignore_text', 'css_filter',
|
'extract_title_as_title',
|
||||||
'subtractive_selectors', 'trigger_text',
|
'headers',
|
||||||
'extract_title_as_title', 'extract_text',
|
'ignore_text',
|
||||||
'text_should_not_be_present',
|
'include_filters',
|
||||||
'webdriver_js_execute_code']:
|
'method',
|
||||||
|
'paused',
|
||||||
|
'previous_md5',
|
||||||
|
'subtractive_selectors',
|
||||||
|
'tag',
|
||||||
|
'text_should_not_be_present',
|
||||||
|
'title',
|
||||||
|
'trigger_text',
|
||||||
|
'webdriver_js_execute_code',
|
||||||
|
'url',
|
||||||
|
]:
|
||||||
if res.get(k):
|
if res.get(k):
|
||||||
apply_extras[k] = res[k]
|
if k != 'css_filter':
|
||||||
|
apply_extras[k] = res[k]
|
||||||
|
else:
|
||||||
|
# We renamed the field and made it a list
|
||||||
|
apply_extras['include_filters'] = [res['css_filter']]
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error("Error fetching metadata for shared watch link", url, str(e))
|
logging.error("Error fetching metadata for shared watch link", url, str(e))
|
||||||
@@ -318,12 +334,13 @@ class ChangeDetectionStore:
|
|||||||
del apply_extras[k]
|
del apply_extras[k]
|
||||||
|
|
||||||
new_watch.update(apply_extras)
|
new_watch.update(apply_extras)
|
||||||
self.__data['watching'][new_uuid]=new_watch
|
self.__data['watching'][new_uuid] = new_watch
|
||||||
|
|
||||||
self.__data['watching'][new_uuid].ensure_data_dir_exists()
|
self.__data['watching'][new_uuid].ensure_data_dir_exists()
|
||||||
|
|
||||||
if write_to_disk_now:
|
if write_to_disk_now:
|
||||||
self.sync_to_json()
|
self.sync_to_json()
|
||||||
|
|
||||||
return new_uuid
|
return new_uuid
|
||||||
|
|
||||||
def visualselector_data_is_ready(self, watch_uuid):
|
def visualselector_data_is_ready(self, watch_uuid):
|
||||||
@@ -437,20 +454,42 @@ class ChangeDetectionStore:
|
|||||||
unlink(item)
|
unlink(item)
|
||||||
|
|
||||||
def import_proxy_list(self, filename):
|
def import_proxy_list(self, filename):
|
||||||
import csv
|
with open(filename) as f:
|
||||||
with open(filename, newline='') as f:
|
self.proxy_list = json.load(f)
|
||||||
reader = csv.reader(f, skipinitialspace=True)
|
print ("Registered proxy list", list(self.proxy_list.keys()))
|
||||||
# @todo This loop can could be improved
|
|
||||||
l = []
|
|
||||||
for row in reader:
|
|
||||||
if len(row):
|
|
||||||
if len(row)>=2:
|
|
||||||
l.append(tuple(row[:2]))
|
|
||||||
else:
|
|
||||||
l.append(tuple([row[0], row[0]]))
|
|
||||||
self.proxy_list = l if len(l) else None
|
|
||||||
|
|
||||||
|
|
||||||
|
def get_preferred_proxy_for_watch(self, uuid):
|
||||||
|
"""
|
||||||
|
Returns the preferred proxy by ID key
|
||||||
|
:param uuid: UUID
|
||||||
|
:return: proxy "key" id
|
||||||
|
"""
|
||||||
|
|
||||||
|
proxy_id = None
|
||||||
|
if self.proxy_list is None:
|
||||||
|
return None
|
||||||
|
|
||||||
|
# If its a valid one
|
||||||
|
watch = self.data['watching'].get(uuid)
|
||||||
|
|
||||||
|
if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()):
|
||||||
|
return watch.get('proxy')
|
||||||
|
|
||||||
|
# not valid (including None), try the system one
|
||||||
|
else:
|
||||||
|
system_proxy_id = self.data['settings']['requests'].get('proxy')
|
||||||
|
# Is not None and exists
|
||||||
|
if self.proxy_list.get(system_proxy_id):
|
||||||
|
return system_proxy_id
|
||||||
|
|
||||||
|
# Fallback - Did not resolve anything, use the first available
|
||||||
|
if system_proxy_id is None:
|
||||||
|
first_default = list(self.proxy_list)[0]
|
||||||
|
return first_default
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
# Run all updates
|
# Run all updates
|
||||||
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
# IMPORTANT - Each update could be run even when they have a new install and the schema is correct
|
||||||
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
|
# So therefor - each `update_n` should be very careful about checking if it needs to actually run
|
||||||
@@ -557,3 +596,22 @@ class ChangeDetectionStore:
|
|||||||
continue
|
continue
|
||||||
return
|
return
|
||||||
|
|
||||||
|
|
||||||
|
# We incorrectly used common header overrides that should only apply to Requests
|
||||||
|
# These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
|
||||||
|
def update_7(self):
|
||||||
|
# These were hard-coded in early versions
|
||||||
|
for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
|
||||||
|
if self.data['settings']['headers'].get(v):
|
||||||
|
del self.data['settings']['headers'][v]
|
||||||
|
|
||||||
|
# Convert filters to a list of filters css_filter -> include_filters
|
||||||
|
def update_8(self):
|
||||||
|
for uuid, watch in self.data['watching'].items():
|
||||||
|
try:
|
||||||
|
existing_filter = watch.get('css_filter', '')
|
||||||
|
if existing_filter:
|
||||||
|
watch['include_filters'] = [existing_filter]
|
||||||
|
except:
|
||||||
|
continue
|
||||||
|
return
|
||||||
@@ -21,6 +21,9 @@
|
|||||||
|
|
||||||
<label for="diffChars" class="pure-checkbox">
|
<label for="diffChars" class="pure-checkbox">
|
||||||
<input type="radio" name="diff_type" id="diffChars" value="diffChars"/> Chars</label>
|
<input type="radio" name="diff_type" id="diffChars" value="diffChars"/> Chars</label>
|
||||||
|
<!-- @todo - when mimetype is JSON, select this by default? -->
|
||||||
|
<label for="diffJson" class="pure-checkbox">
|
||||||
|
<input type="radio" name="diff_type" id="diffJson" value="diffJson" /> JSON</label>
|
||||||
|
|
||||||
{% if versions|length >= 1 %}
|
{% if versions|length >= 1 %}
|
||||||
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
|
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
|
||||||
@@ -37,6 +40,11 @@
|
|||||||
</form>
|
</form>
|
||||||
<del>Removed text</del>
|
<del>Removed text</del>
|
||||||
<ins>Inserted Text</ins>
|
<ins>Inserted Text</ins>
|
||||||
|
<span>
|
||||||
|
<!-- https://github.com/kpdecker/jsdiff/issues/389 ? -->
|
||||||
|
<label for="ignoreWhitespace" class="pure-checkbox" id="label-diff-ignorewhitespace">
|
||||||
|
<input type="checkbox" id="ignoreWhitespace" name="ignoreWhitespace"/> Ignore Whitespace</label>
|
||||||
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div id="diff-jump">
|
<div id="diff-jump">
|
||||||
@@ -102,122 +110,12 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<script>
|
||||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.js')}}"></script>
|
const newest_version_timestamp = {{newest_version_timestamp}};
|
||||||
|
|
||||||
<script defer="">
|
|
||||||
|
|
||||||
var a = document.getElementById('a');
|
|
||||||
var b = document.getElementById('b');
|
|
||||||
var result = document.getElementById('result');
|
|
||||||
|
|
||||||
function changed() {
|
|
||||||
var diff = JsDiff[window.diffType](a.textContent, b.textContent);
|
|
||||||
var fragment = document.createDocumentFragment();
|
|
||||||
for (var i=0; i < diff.length; i++) {
|
|
||||||
|
|
||||||
if (diff[i].added && diff[i + 1] && diff[i + 1].removed) {
|
|
||||||
var swap = diff[i];
|
|
||||||
diff[i] = diff[i + 1];
|
|
||||||
diff[i + 1] = swap;
|
|
||||||
}
|
|
||||||
|
|
||||||
var node;
|
|
||||||
if (diff[i].removed) {
|
|
||||||
node = document.createElement('del');
|
|
||||||
node.classList.add("change");
|
|
||||||
node.appendChild(document.createTextNode(diff[i].value));
|
|
||||||
|
|
||||||
} else if (diff[i].added) {
|
|
||||||
node = document.createElement('ins');
|
|
||||||
node.classList.add("change");
|
|
||||||
node.appendChild(document.createTextNode(diff[i].value));
|
|
||||||
} else {
|
|
||||||
node = document.createTextNode(diff[i].value);
|
|
||||||
}
|
|
||||||
fragment.appendChild(node);
|
|
||||||
}
|
|
||||||
|
|
||||||
result.textContent = '';
|
|
||||||
result.appendChild(fragment);
|
|
||||||
|
|
||||||
// Jump at start
|
|
||||||
inputs.current=0;
|
|
||||||
next_diff();
|
|
||||||
}
|
|
||||||
|
|
||||||
window.onload = function() {
|
|
||||||
|
|
||||||
|
|
||||||
/* Convert what is options from UTC time.time() to local browser time */
|
|
||||||
var diffList=document.getElementById("diff-version");
|
|
||||||
if (typeof(diffList) != 'undefined' && diffList != null) {
|
|
||||||
for (var option of diffList.options) {
|
|
||||||
var dateObject = new Date(option.value*1000);
|
|
||||||
option.label=dateObject.toLocaleString();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Set current version date as local time in the browser also */
|
|
||||||
var current_v = document.getElementById("current-v-date");
|
|
||||||
var dateObject = new Date({{ newest_version_timestamp }}*1000);
|
|
||||||
current_v.innerHTML=dateObject.toLocaleString();
|
|
||||||
|
|
||||||
|
|
||||||
onDiffTypeChange(document.querySelector('#settings [name="diff_type"]:checked'));
|
|
||||||
changed();
|
|
||||||
|
|
||||||
};
|
|
||||||
|
|
||||||
a.onpaste = a.onchange =
|
|
||||||
b.onpaste = b.onchange = changed;
|
|
||||||
|
|
||||||
if ('oninput' in a) {
|
|
||||||
a.oninput = b.oninput = changed;
|
|
||||||
} else {
|
|
||||||
a.onkeyup = b.onkeyup = changed;
|
|
||||||
}
|
|
||||||
|
|
||||||
function onDiffTypeChange(radio) {
|
|
||||||
window.diffType = radio.value;
|
|
||||||
// Not necessary
|
|
||||||
// document.title = "Diff " + radio.value.slice(4);
|
|
||||||
}
|
|
||||||
|
|
||||||
var radio = document.getElementsByName('diff_type');
|
|
||||||
for (var i = 0; i < radio.length; i++) {
|
|
||||||
radio[i].onchange = function(e) {
|
|
||||||
onDiffTypeChange(e.target);
|
|
||||||
changed();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
var inputs = document.getElementsByClassName('change');
|
|
||||||
inputs.current=0;
|
|
||||||
|
|
||||||
|
|
||||||
function next_diff() {
|
|
||||||
|
|
||||||
var element = inputs[inputs.current];
|
|
||||||
var headerOffset = 80;
|
|
||||||
var elementPosition = element.getBoundingClientRect().top;
|
|
||||||
var offsetPosition = elementPosition - headerOffset + window.scrollY;
|
|
||||||
|
|
||||||
window.scrollTo({
|
|
||||||
top: offsetPosition,
|
|
||||||
behavior: "smooth"
|
|
||||||
});
|
|
||||||
|
|
||||||
inputs.current++;
|
|
||||||
if(inputs.current >= inputs.length) {
|
|
||||||
inputs.current=0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
</script>
|
</script>
|
||||||
|
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff.min.js')}}"></script>
|
||||||
|
|
||||||
|
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-render.js')}}"></script>
|
||||||
|
|
||||||
|
|
||||||
{% endblock %}
|
{% endblock %}
|
||||||
@@ -40,7 +40,8 @@
|
|||||||
<fieldset>
|
<fieldset>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
|
||||||
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span>
|
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
|
||||||
|
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
|
||||||
</div>
|
</div>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{{ render_field(form.title, class="m-d") }}
|
{{ render_field(form.title, class="m-d") }}
|
||||||
@@ -77,6 +78,7 @@
|
|||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">
|
||||||
<p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
|
<p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
|
||||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||||
|
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
{% if form.proxy %}
|
{% if form.proxy %}
|
||||||
@@ -139,6 +141,9 @@ User-Agent: wonderbra 1.0") }}
|
|||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_checkbox_field(form.notification_muted) }}
|
{{ render_checkbox_field(form.notification_muted) }}
|
||||||
</div>
|
</div>
|
||||||
|
<div class="pure-control-group inline-radio">
|
||||||
|
{{ render_checkbox_field(form.notification_screenshot) }}
|
||||||
|
</div>
|
||||||
<div class="field-group" id="notification-field-group">
|
<div class="field-group" id="notification-field-group">
|
||||||
{% if has_default_notification_urls %}
|
{% if has_default_notification_urls %}
|
||||||
<div class="inline-warning">
|
<div class="inline-warning">
|
||||||
@@ -172,19 +177,29 @@ User-Agent: wonderbra 1.0") }}
|
|||||||
</div>
|
</div>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<div class="pure-control-group">
|
<div class="pure-control-group">
|
||||||
{% set field = render_field(form.css_filter,
|
{% set field = render_field(form.include_filters,
|
||||||
placeholder=".class-name or #some-id, or other CSS selector rule.",
|
rows=5,
|
||||||
|
placeholder="#example
|
||||||
|
xpath://body/div/span[contains(@class, 'example-class')]",
|
||||||
class="m-d")
|
class="m-d")
|
||||||
%}
|
%}
|
||||||
{{ field }}
|
{{ field }}
|
||||||
{% if '/text()' in field %}
|
{% if '/text()' in field %}
|
||||||
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/>
|
<span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
<span class="pure-form-message-inline">
|
<span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
|
||||||
<ul>
|
<ul>
|
||||||
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
<li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
|
||||||
<li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required, <a
|
<li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
|
||||||
href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
|
<ul>
|
||||||
|
<li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required, <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
|
||||||
|
{% if jq_support %}
|
||||||
|
<li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
|
||||||
|
{% else %}
|
||||||
|
<li>jq support not installed</li>
|
||||||
|
{% endif %}
|
||||||
|
</ul>
|
||||||
|
</li>
|
||||||
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
|
<li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
|
||||||
<ul>
|
<ul>
|
||||||
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
|
<li>Example: <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
|
||||||
@@ -193,7 +208,7 @@ User-Agent: wonderbra 1.0") }}
|
|||||||
</ul>
|
</ul>
|
||||||
</li>
|
</li>
|
||||||
</ul>
|
</ul>
|
||||||
Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
|
Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
|
||||||
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
|
||||||
</span>
|
</span>
|
||||||
</div>
|
</div>
|
||||||
|
|||||||
@@ -99,6 +99,8 @@
|
|||||||
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
<p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
|
||||||
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
<p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
|
||||||
</span>
|
</span>
|
||||||
|
<br/>
|
||||||
|
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
|
||||||
</div>
|
</div>
|
||||||
<fieldset class="pure-group" id="webdriver-override-options">
|
<fieldset class="pure-group" id="webdriver-override-options">
|
||||||
<div class="pure-form-message-inline">
|
<div class="pure-form-message-inline">
|
||||||
|
|||||||
@@ -87,7 +87,7 @@
|
|||||||
<a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
|
<a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
|
||||||
</td>
|
</td>
|
||||||
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
<td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
|
||||||
<a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
|
<a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
|
||||||
<a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
|
<a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
|
||||||
|
|
||||||
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
|
{%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
|
||||||
@@ -96,7 +96,7 @@
|
|||||||
<div class="fetch-error">{{ watch.last_error }}</div>
|
<div class="fetch-error">{{ watch.last_error }}</div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
|
{% if watch.last_notification_error is defined and watch.last_notification_error != False %}
|
||||||
<div class="fetch-error notification-error">{{ watch.last_notification_error }}</div>
|
<div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div>
|
||||||
{% endif %}
|
{% endif %}
|
||||||
{% if not active_tag %}
|
{% if not active_tag %}
|
||||||
<span class="watch-tag-list">{{ watch.tag}}</span>
|
<span class="watch-tag-list">{{ watch.tag}}</span>
|
||||||
|
|||||||
@@ -41,7 +41,7 @@ def app(request):
|
|||||||
|
|
||||||
cleanup(datastore_path)
|
cleanup(datastore_path)
|
||||||
|
|
||||||
app_config = {'datastore_path': datastore_path}
|
app_config = {'datastore_path': datastore_path, 'disable_checkver' : True}
|
||||||
cleanup(app_config['datastore_path'])
|
cleanup(app_config['datastore_path'])
|
||||||
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
|
datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
|
||||||
app = changedetection_app(app_config, datastore)
|
app = changedetection_app(app_config, datastore)
|
||||||
|
|||||||
2
changedetectionio/tests/proxy_list/__init__.py
Normal file
2
changedetectionio/tests/proxy_list/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
|||||||
|
"""Tests for the app."""
|
||||||
|
|
||||||
14
changedetectionio/tests/proxy_list/conftest.py
Normal file
14
changedetectionio/tests/proxy_list/conftest.py
Normal file
@@ -0,0 +1,14 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
from .. import conftest
|
||||||
|
|
||||||
|
#def pytest_addoption(parser):
|
||||||
|
# parser.addoption("--url_suffix", action="store", default="identifier for request")
|
||||||
|
|
||||||
|
|
||||||
|
#def pytest_generate_tests(metafunc):
|
||||||
|
# # This is called for every test. Only get/set command line arguments
|
||||||
|
# # if the argument is specified in the list of test "fixturenames".
|
||||||
|
# option_value = metafunc.config.option.url_suffix
|
||||||
|
# if 'url_suffix' in metafunc.fixturenames and option_value is not None:
|
||||||
|
# metafunc.parametrize("url_suffix", [option_value])
|
||||||
10
changedetectionio/tests/proxy_list/proxies.json-example
Normal file
10
changedetectionio/tests/proxy_list/proxies.json-example
Normal file
@@ -0,0 +1,10 @@
|
|||||||
|
{
|
||||||
|
"proxy-one": {
|
||||||
|
"label": "One",
|
||||||
|
"url": "http://127.0.0.1:3128"
|
||||||
|
},
|
||||||
|
"proxy-two": {
|
||||||
|
"label": "two",
|
||||||
|
"url": "http://127.0.0.1:3129"
|
||||||
|
}
|
||||||
|
}
|
||||||
41
changedetectionio/tests/proxy_list/squid.conf
Normal file
41
changedetectionio/tests/proxy_list/squid.conf
Normal file
@@ -0,0 +1,41 @@
|
|||||||
|
acl localnet src 0.0.0.1-0.255.255.255 # RFC 1122 "this" network (LAN)
|
||||||
|
acl localnet src 10.0.0.0/8 # RFC 1918 local private network (LAN)
|
||||||
|
acl localnet src 100.64.0.0/10 # RFC 6598 shared address space (CGN)
|
||||||
|
acl localnet src 169.254.0.0/16 # RFC 3927 link-local (directly plugged) machines
|
||||||
|
acl localnet src 172.16.0.0/12 # RFC 1918 local private network (LAN)
|
||||||
|
acl localnet src 192.168.0.0/16 # RFC 1918 local private network (LAN)
|
||||||
|
acl localnet src fc00::/7 # RFC 4193 local private network range
|
||||||
|
acl localnet src fe80::/10 # RFC 4291 link-local (directly plugged) machines
|
||||||
|
acl localnet src 159.65.224.174
|
||||||
|
acl SSL_ports port 443
|
||||||
|
acl Safe_ports port 80 # http
|
||||||
|
acl Safe_ports port 21 # ftp
|
||||||
|
acl Safe_ports port 443 # https
|
||||||
|
acl Safe_ports port 70 # gopher
|
||||||
|
acl Safe_ports port 210 # wais
|
||||||
|
acl Safe_ports port 1025-65535 # unregistered ports
|
||||||
|
acl Safe_ports port 280 # http-mgmt
|
||||||
|
acl Safe_ports port 488 # gss-http
|
||||||
|
acl Safe_ports port 591 # filemaker
|
||||||
|
acl Safe_ports port 777 # multiling http
|
||||||
|
acl CONNECT method CONNECT
|
||||||
|
|
||||||
|
http_access deny !Safe_ports
|
||||||
|
http_access deny CONNECT !SSL_ports
|
||||||
|
http_access allow localhost manager
|
||||||
|
http_access deny manager
|
||||||
|
http_access allow localhost
|
||||||
|
http_access allow localnet
|
||||||
|
http_access deny all
|
||||||
|
http_port 3128
|
||||||
|
coredump_dir /var/spool/squid
|
||||||
|
refresh_pattern ^ftp: 1440 20% 10080
|
||||||
|
refresh_pattern ^gopher: 1440 0% 1440
|
||||||
|
refresh_pattern -i (/cgi-bin/|\?) 0 0% 0
|
||||||
|
refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
|
||||||
|
refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims
|
||||||
|
refresh_pattern \/InRelease$ 0 0% 0 refresh-ims
|
||||||
|
refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
|
||||||
|
refresh_pattern . 0 20% 4320
|
||||||
|
logfile_rotate 0
|
||||||
|
|
||||||
38
changedetectionio/tests/proxy_list/test_multiple_proxy.py
Normal file
38
changedetectionio/tests/proxy_list/test_multiple_proxy.py
Normal file
@@ -0,0 +1,38 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from ..util import live_server_setup
|
||||||
|
|
||||||
|
def test_preferred_proxy(client, live_server):
|
||||||
|
time.sleep(1)
|
||||||
|
live_server_setup(live_server)
|
||||||
|
time.sleep(1)
|
||||||
|
url = "http://chosen.changedetection.io"
|
||||||
|
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
# Because a URL wont show in squid/proxy logs due it being SSLed
|
||||||
|
# Use plain HTTP or a specific domain-name here
|
||||||
|
data={"urls": url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
time.sleep(2)
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={
|
||||||
|
"include_filters": "",
|
||||||
|
"fetch_backend": "html_requests",
|
||||||
|
"headers": "",
|
||||||
|
"proxy": "proxy-two",
|
||||||
|
"tag": "",
|
||||||
|
"url": url,
|
||||||
|
},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
time.sleep(2)
|
||||||
|
# Now the request should appear in the second-squid logs
|
||||||
19
changedetectionio/tests/proxy_list/test_proxy.py
Normal file
19
changedetectionio/tests/proxy_list/test_proxy.py
Normal file
@@ -0,0 +1,19 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||||
|
|
||||||
|
# just make a request, we will grep in the docker logs to see it actually got called
|
||||||
|
def test_check_basic_change_detection_functionality(client, live_server):
|
||||||
|
live_server_setup(live_server)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
# Because a URL wont show in squid/proxy logs due it being SSLed
|
||||||
|
# Use plain HTTP or a specific domain-name here
|
||||||
|
data={"urls": "http://one.changedetection.io"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
time.sleep(3)
|
||||||
@@ -147,6 +147,16 @@ def test_api_simple(client, live_server):
|
|||||||
# @todo how to handle None/default global values?
|
# @todo how to handle None/default global values?
|
||||||
assert watch['history_n'] == 2, "Found replacement history section, which is in its own API"
|
assert watch['history_n'] == 2, "Found replacement history section, which is in its own API"
|
||||||
|
|
||||||
|
# basic systeminfo check
|
||||||
|
res = client.get(
|
||||||
|
url_for("systeminfo"),
|
||||||
|
headers={'x-api-key': api_key},
|
||||||
|
)
|
||||||
|
info = json.loads(res.data)
|
||||||
|
assert info.get('watch_count') == 1
|
||||||
|
assert info.get('uptime') > 0.5
|
||||||
|
|
||||||
|
|
||||||
# Finally delete the watch
|
# Finally delete the watch
|
||||||
res = client.delete(
|
res = client.delete(
|
||||||
url_for("watch", uuid=watch_uuid),
|
url_for("watch", uuid=watch_uuid),
|
||||||
|
|||||||
@@ -19,17 +19,16 @@ def test_basic_auth(client, live_server):
|
|||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
# Check form validation
|
# Check form validation
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
data={"include_filters": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("preview_page", uuid="first"),
|
url_for("preview_page", uuid="first"),
|
||||||
|
|||||||
@@ -3,7 +3,7 @@
|
|||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
from .util import set_original_response, set_modified_response, live_server_setup
|
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||||
|
|
||||||
sleep_time_for_fetch_thread = 3
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
@@ -36,7 +36,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
|||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'unviewed' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
@@ -69,7 +69,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
|||||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
assert b'1 watches are queued for rechecking.' in res.data
|
assert b'1 watches are queued for rechecking.' in res.data
|
||||||
|
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Now something should be ready, indicated by having a 'unviewed' class
|
# Now something should be ready, indicated by having a 'unviewed' class
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
@@ -98,14 +98,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
|||||||
assert b'which has this one new line' in res.data
|
assert b'which has this one new line' in res.data
|
||||||
assert b'Which is across multiple lines' not in res.data
|
assert b'Which is across multiple lines' not in res.data
|
||||||
|
|
||||||
time.sleep(2)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# Do this a few times.. ensures we dont accidently set the status
|
# Do this a few times.. ensures we dont accidently set the status
|
||||||
for n in range(2):
|
for n in range(2):
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
# It should report nothing found (no new 'unviewed' class)
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
@@ -125,7 +125,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
|
|||||||
)
|
)
|
||||||
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
wait_for_all_checks(client)
|
||||||
|
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
assert b'unviewed' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|||||||
@@ -1,18 +1,31 @@
|
|||||||
#!/usr/bin/python3
|
#!/usr/bin/python3
|
||||||
|
|
||||||
import time
|
from .util import set_original_response, set_modified_response, live_server_setup
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from urllib.request import urlopen
|
from urllib.request import urlopen
|
||||||
from . util import set_original_response, set_modified_response, live_server_setup
|
from zipfile import ZipFile
|
||||||
|
import re
|
||||||
|
import time
|
||||||
|
|
||||||
|
|
||||||
def test_backup(client, live_server):
|
def test_backup(client, live_server):
|
||||||
|
|
||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
set_original_response()
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
# Give the endpoint time to spin up
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": url_for('test_endpoint', _external=True)},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
time.sleep(3)
|
||||||
|
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("get_backup"),
|
url_for("get_backup"),
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
@@ -20,6 +33,19 @@ def test_backup(client, live_server):
|
|||||||
|
|
||||||
# Should get the right zip content type
|
# Should get the right zip content type
|
||||||
assert res.content_type == "application/zip"
|
assert res.content_type == "application/zip"
|
||||||
|
|
||||||
# Should be PK/ZIP stream
|
# Should be PK/ZIP stream
|
||||||
assert res.data.count(b'PK') >= 2
|
assert res.data.count(b'PK') >= 2
|
||||||
|
|
||||||
|
# ZipFile from buffer seems non-obvious, just save it instead
|
||||||
|
with open("download.zip", 'wb') as f:
|
||||||
|
f.write(res.data)
|
||||||
|
|
||||||
|
zip = ZipFile('download.zip')
|
||||||
|
l = zip.namelist()
|
||||||
|
uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
|
||||||
|
newlist = list(filter(uuid4hex.match, l)) # Read Note below
|
||||||
|
|
||||||
|
# Should be two txt files in the archive (history and the snapshot)
|
||||||
|
assert len(newlist) == 2
|
||||||
|
|
||||||
|
|||||||
@@ -46,22 +46,23 @@ def set_modified_response():
|
|||||||
|
|
||||||
|
|
||||||
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
|
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
|
||||||
def test_css_filter_output():
|
def test_include_filters_output():
|
||||||
from changedetectionio import fetch_site_status
|
|
||||||
from inscriptis import get_text
|
from inscriptis import get_text
|
||||||
|
|
||||||
# Check text with sub-parts renders correctly
|
# Check text with sub-parts renders correctly
|
||||||
content = """<html> <body><div id="thingthing" > Some really <b>bold</b> text </div> </body> </html>"""
|
content = """<html> <body><div id="thingthing" > Some really <b>bold</b> text </div> </body> </html>"""
|
||||||
html_blob = css_filter(css_filter="#thingthing", html_content=content)
|
html_blob = include_filters(include_filters="#thingthing", html_content=content)
|
||||||
text = get_text(html_blob)
|
text = get_text(html_blob)
|
||||||
assert text == " Some really bold text"
|
assert text == " Some really bold text"
|
||||||
|
|
||||||
content = """<html> <body>
|
content = """<html> <body>
|
||||||
<p>foo bar blah</p>
|
<p>foo bar blah</p>
|
||||||
<div class="parts">Block A</div> <div class="parts">Block B</div></body>
|
<DIV class="parts">Block A</DiV> <div class="parts">Block B</DIV></body>
|
||||||
</html>
|
</html>
|
||||||
"""
|
"""
|
||||||
html_blob = css_filter(css_filter=".parts", html_content=content)
|
|
||||||
|
# in xPath this would be //*[@class='parts']
|
||||||
|
html_blob = include_filters(include_filters=".parts", html_content=content)
|
||||||
text = get_text(html_blob)
|
text = get_text(html_blob)
|
||||||
|
|
||||||
# Divs are converted to 4 whitespaces by inscriptis
|
# Divs are converted to 4 whitespaces by inscriptis
|
||||||
@@ -69,10 +70,10 @@ def test_css_filter_output():
|
|||||||
|
|
||||||
|
|
||||||
# Tests the whole stack works with the CSS Filter
|
# Tests the whole stack works with the CSS Filter
|
||||||
def test_check_markup_css_filter_restriction(client, live_server):
|
def test_check_markup_include_filters_restriction(client, live_server):
|
||||||
sleep_time_for_fetch_thread = 3
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
css_filter = "#sametext"
|
include_filters = "#sametext"
|
||||||
|
|
||||||
set_original_response()
|
set_original_response()
|
||||||
|
|
||||||
@@ -88,9 +89,6 @@ def test_check_markup_css_filter_restriction(client, live_server):
|
|||||||
)
|
)
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
@@ -98,19 +96,16 @@ def test_check_markup_css_filter_restriction(client, live_server):
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
|
time.sleep(1)
|
||||||
# Check it saved
|
# Check it saved
|
||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
)
|
)
|
||||||
assert bytes(css_filter.encode('utf-8')) in res.data
|
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
@@ -126,3 +121,58 @@ def test_check_markup_css_filter_restriction(client, live_server):
|
|||||||
# Because it should be looking at only that 'sametext' id
|
# Because it should be looking at only that 'sametext' id
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
assert b'unviewed' in res.data
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
# Tests the whole stack works with the CSS Filter
|
||||||
|
def test_check_multiple_filters(client, live_server):
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]"
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write("""<html><body>
|
||||||
|
<div id="blob-a">Blob A</div>
|
||||||
|
<div id="blob-b">Blob B</div>
|
||||||
|
<div id="blob-c">Blob C</div>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
""")
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": test_url},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Goto the edit page, add our ignore text
|
||||||
|
# Add our URL to the import page
|
||||||
|
res = client.post(
|
||||||
|
url_for("edit_page", uuid="first"),
|
||||||
|
data={"include_filters": include_filters,
|
||||||
|
"url": test_url,
|
||||||
|
"tag": "",
|
||||||
|
"headers": "",
|
||||||
|
'fetch_backend': "html_requests"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Updated watch." in res.data
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# Only the two blobs should be here
|
||||||
|
assert b"Blob A" in res.data # CSS was ok
|
||||||
|
assert b"Blob B" in res.data # xPath was ok
|
||||||
|
assert b"Blob C" not in res.data # Should not be included
|
||||||
|
|||||||
@@ -70,9 +70,6 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve
|
|||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
|
|
||||||
|
|||||||
@@ -88,7 +88,7 @@ def test_check_filter_multiline(client, live_server):
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": '',
|
data={"include_filters": '',
|
||||||
'extract_text': '/something.+?6 billion.+?lines/si',
|
'extract_text': '/something.+?6 billion.+?lines/si',
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tag": "",
|
"tag": "",
|
||||||
@@ -116,7 +116,7 @@ def test_check_filter_multiline(client, live_server):
|
|||||||
|
|
||||||
def test_check_filter_and_regex_extract(client, live_server):
|
def test_check_filter_and_regex_extract(client, live_server):
|
||||||
sleep_time_for_fetch_thread = 3
|
sleep_time_for_fetch_thread = 3
|
||||||
css_filter = ".changetext"
|
include_filters = ".changetext"
|
||||||
|
|
||||||
set_original_response()
|
set_original_response()
|
||||||
|
|
||||||
@@ -143,7 +143,7 @@ def test_check_filter_and_regex_extract(client, live_server):
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": css_filter,
|
data={"include_filters": include_filters,
|
||||||
'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
|
'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tag": "",
|
"tag": "",
|
||||||
|
|||||||
@@ -92,7 +92,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
|
|||||||
"tag": "my tag",
|
"tag": "my tag",
|
||||||
"title": "my title",
|
"title": "my title",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
"css_filter": '.ticket-available',
|
"include_filters": '.ticket-available',
|
||||||
"fetch_backend": "html_requests"})
|
"fetch_backend": "html_requests"})
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
|
|||||||
@@ -76,7 +76,7 @@ def run_filter_test(client, content_filter):
|
|||||||
"title": "my title",
|
"title": "my title",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
"filter_failure_notification_send": 'y',
|
"filter_failure_notification_send": 'y',
|
||||||
"css_filter": content_filter,
|
"include_filters": content_filter,
|
||||||
"fetch_backend": "html_requests"})
|
"fetch_backend": "html_requests"})
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
@@ -95,7 +95,7 @@ def run_filter_test(client, content_filter):
|
|||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
|
|
||||||
# We should see something in the frontend
|
# We should see something in the frontend
|
||||||
assert b'Warning, filter' in res.data
|
assert b'Warning, no filters were found' in res.data
|
||||||
|
|
||||||
# Now it should exist and contain our "filter not found" alert
|
# Now it should exist and contain our "filter not found" alert
|
||||||
assert os.path.isfile("test-datastore/notification.txt")
|
assert os.path.isfile("test-datastore/notification.txt")
|
||||||
@@ -131,7 +131,7 @@ def run_filter_test(client, content_filter):
|
|||||||
def test_setup(live_server):
|
def test_setup(live_server):
|
||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
|
|
||||||
def test_check_css_filter_failure_notification(client, live_server):
|
def test_check_include_filters_failure_notification(client, live_server):
|
||||||
set_original_response()
|
set_original_response()
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
run_filter_test(client, '#nope-doesnt-exist')
|
run_filter_test(client, '#nope-doesnt-exist')
|
||||||
|
|||||||
33
changedetectionio/tests/test_jinja2.py
Normal file
33
changedetectionio/tests/test_jinja2.py
Normal file
@@ -0,0 +1,33 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from .util import live_server_setup
|
||||||
|
|
||||||
|
|
||||||
|
# If there was only a change in the whitespacing, then we shouldnt have a change detected
|
||||||
|
def test_jinja2_in_url_query(client, live_server):
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
# Give the endpoint time to spin up
|
||||||
|
time.sleep(1)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
test_url = url_for('test_return_query', _external=True)
|
||||||
|
|
||||||
|
# because url_for() will URL-encode the var, but we dont here
|
||||||
|
full_url = "{}?{}".format(test_url,
|
||||||
|
"date={% now 'Europe/Berlin', '%Y' %}.{% now 'Europe/Berlin', '%m' %}.{% now 'Europe/Berlin', '%d' %}", )
|
||||||
|
res = client.post(
|
||||||
|
url_for("form_quick_watch_add"),
|
||||||
|
data={"url": full_url, "tag": "test"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b"Watch added" in res.data
|
||||||
|
time.sleep(3)
|
||||||
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b'date=2' in res.data
|
||||||
@@ -2,10 +2,15 @@
|
|||||||
# coding=utf-8
|
# coding=utf-8
|
||||||
|
|
||||||
import time
|
import time
|
||||||
from flask import url_for
|
from flask import url_for, escape
|
||||||
from . util import live_server_setup
|
from . util import live_server_setup
|
||||||
import pytest
|
import pytest
|
||||||
|
jq_support = True
|
||||||
|
|
||||||
|
try:
|
||||||
|
import jq
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
jq_support = False
|
||||||
|
|
||||||
def test_setup(live_server):
|
def test_setup(live_server):
|
||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
@@ -36,16 +41,28 @@ and it can also be repeated
|
|||||||
from .. import html_tools
|
from .. import html_tools
|
||||||
|
|
||||||
# See that we can find the second <script> one, which is not broken, and matches our filter
|
# See that we can find the second <script> one, which is not broken, and matches our filter
|
||||||
text = html_tools.extract_json_as_string(content, "$.offers.price")
|
text = html_tools.extract_json_as_string(content, "json:$.offers.price")
|
||||||
assert text == "23.5"
|
assert text == "23.5"
|
||||||
|
|
||||||
text = html_tools.extract_json_as_string('{"id":5}', "$.id")
|
# also check for jq
|
||||||
|
if jq_support:
|
||||||
|
text = html_tools.extract_json_as_string(content, "jq:.offers.price")
|
||||||
|
assert text == "23.5"
|
||||||
|
|
||||||
|
text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
|
||||||
|
assert text == "5"
|
||||||
|
|
||||||
|
text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
|
||||||
assert text == "5"
|
assert text == "5"
|
||||||
|
|
||||||
# When nothing at all is found, it should throw JSONNOTFound
|
# When nothing at all is found, it should throw JSONNOTFound
|
||||||
# Which is caught and shown to the user in the watch-overview table
|
# Which is caught and shown to the user in the watch-overview table
|
||||||
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||||
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id")
|
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
|
||||||
|
|
||||||
|
if jq_support:
|
||||||
|
with pytest.raises(html_tools.JSONNotFound) as e_info:
|
||||||
|
html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
|
||||||
|
|
||||||
def set_original_ext_response():
|
def set_original_ext_response():
|
||||||
data = """
|
data = """
|
||||||
@@ -66,6 +83,7 @@ def set_original_ext_response():
|
|||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
|
return None
|
||||||
|
|
||||||
def set_modified_ext_response():
|
def set_modified_ext_response():
|
||||||
data = """
|
data = """
|
||||||
@@ -86,6 +104,7 @@ def set_modified_ext_response():
|
|||||||
|
|
||||||
with open("test-datastore/endpoint-content.txt", "w") as f:
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
f.write(data)
|
f.write(data)
|
||||||
|
return None
|
||||||
|
|
||||||
def set_original_response():
|
def set_original_response():
|
||||||
test_return_data = """
|
test_return_data = """
|
||||||
@@ -113,7 +132,7 @@ def set_original_response():
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def set_response_with_html():
|
def set_json_response_with_html():
|
||||||
test_return_data = """
|
test_return_data = """
|
||||||
{
|
{
|
||||||
"test": [
|
"test": [
|
||||||
@@ -157,7 +176,7 @@ def set_modified_response():
|
|||||||
def test_check_json_without_filter(client, live_server):
|
def test_check_json_without_filter(client, live_server):
|
||||||
# Request a JSON document from a application/json source containing HTML
|
# Request a JSON document from a application/json source containing HTML
|
||||||
# and be sure it doesn't get chewed up by instriptis
|
# and be sure it doesn't get chewed up by instriptis
|
||||||
set_response_with_html()
|
set_json_response_with_html()
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
# Give the endpoint time to spin up
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
@@ -170,9 +189,6 @@ def test_check_json_without_filter(client, live_server):
|
|||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
|
|
||||||
@@ -181,13 +197,14 @@ def test_check_json_without_filter(client, live_server):
|
|||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
# Should still see '"html": "<b>"'
|
||||||
assert b'"<b>' in res.data
|
assert b'"<b>' in res.data
|
||||||
assert res.data.count(b'{\n') >= 2
|
assert res.data.count(b'{\n') >= 2
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_check_json_filter(client, live_server):
|
def check_json_filter(json_filter, client, live_server):
|
||||||
json_filter = 'json:boss.name'
|
|
||||||
|
|
||||||
set_original_response()
|
set_original_response()
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
# Give the endpoint time to spin up
|
||||||
@@ -202,9 +219,6 @@ def test_check_json_filter(client, live_server):
|
|||||||
)
|
)
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
|
|
||||||
@@ -212,7 +226,7 @@ def test_check_json_filter(client, live_server):
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": json_filter,
|
data={"include_filters": json_filter,
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tag": "",
|
"tag": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
@@ -226,10 +240,7 @@ def test_check_json_filter(client, live_server):
|
|||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
)
|
)
|
||||||
assert bytes(json_filter.encode('utf-8')) in res.data
|
assert bytes(escape(json_filter).encode('utf-8')) in res.data
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
@@ -252,10 +263,17 @@ def test_check_json_filter(client, live_server):
|
|||||||
# And #462 - check we see the proper utf-8 string there
|
# And #462 - check we see the proper utf-8 string there
|
||||||
assert "Örnsköldsvik".encode('utf-8') in res.data
|
assert "Örnsköldsvik".encode('utf-8') in res.data
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
def test_check_json_filter_bool_val(client, live_server):
|
def test_check_jsonpath_filter(client, live_server):
|
||||||
json_filter = "json:$['available']"
|
check_json_filter('json:boss.name', client, live_server)
|
||||||
|
|
||||||
|
def test_check_jq_filter(client, live_server):
|
||||||
|
if jq_support:
|
||||||
|
check_json_filter('jq:.boss.name', client, live_server)
|
||||||
|
|
||||||
|
def check_json_filter_bool_val(json_filter, client, live_server):
|
||||||
set_original_response()
|
set_original_response()
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
# Give the endpoint time to spin up
|
||||||
@@ -275,7 +293,7 @@ def test_check_json_filter_bool_val(client, live_server):
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": json_filter,
|
data={"include_filters": json_filter,
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tag": "",
|
"tag": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
@@ -285,11 +303,6 @@ def test_check_json_filter_bool_val(client, live_server):
|
|||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
|
|
||||||
time.sleep(3)
|
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
# Make a change
|
# Make a change
|
||||||
@@ -304,14 +317,22 @@ def test_check_json_filter_bool_val(client, live_server):
|
|||||||
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
|
# But the change should be there, tho its hard to test the change was detected because it will show old and new versions
|
||||||
assert b'false' in res.data
|
assert b'false' in res.data
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
def test_check_jsonpath_filter_bool_val(client, live_server):
|
||||||
|
check_json_filter_bool_val("json:$['available']", client, live_server)
|
||||||
|
|
||||||
|
def test_check_jq_filter_bool_val(client, live_server):
|
||||||
|
if jq_support:
|
||||||
|
check_json_filter_bool_val("jq:.available", client, live_server)
|
||||||
|
|
||||||
# Re #265 - Extended JSON selector test
|
# Re #265 - Extended JSON selector test
|
||||||
# Stuff to consider here
|
# Stuff to consider here
|
||||||
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
|
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
|
||||||
# - The 'diff' tab could show the old and new content
|
# - The 'diff' tab could show the old and new content
|
||||||
# - Form should let us enter a selector that doesnt (yet) match anything
|
# - Form should let us enter a selector that doesnt (yet) match anything
|
||||||
def test_check_json_ext_filter(client, live_server):
|
def check_json_ext_filter(json_filter, client, live_server):
|
||||||
json_filter = 'json:$[?(@.status==Sold)]'
|
|
||||||
|
|
||||||
set_original_ext_response()
|
set_original_ext_response()
|
||||||
|
|
||||||
# Give the endpoint time to spin up
|
# Give the endpoint time to spin up
|
||||||
@@ -326,9 +347,6 @@ def test_check_json_ext_filter(client, live_server):
|
|||||||
)
|
)
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
|
|
||||||
@@ -336,7 +354,7 @@ def test_check_json_ext_filter(client, live_server):
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": json_filter,
|
data={"include_filters": json_filter,
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"tag": "",
|
"tag": "",
|
||||||
"headers": "",
|
"headers": "",
|
||||||
@@ -350,10 +368,7 @@ def test_check_json_ext_filter(client, live_server):
|
|||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
)
|
)
|
||||||
assert bytes(json_filter.encode('utf-8')) in res.data
|
assert bytes(escape(json_filter).encode('utf-8')) in res.data
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
@@ -376,3 +391,12 @@ def test_check_json_ext_filter(client, live_server):
|
|||||||
assert b'ForSale' not in res.data
|
assert b'ForSale' not in res.data
|
||||||
assert b'Sold' in res.data
|
assert b'Sold' in res.data
|
||||||
|
|
||||||
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
|
def test_check_jsonpath_ext_filter(client, live_server):
|
||||||
|
check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
|
||||||
|
|
||||||
|
def test_check_jq_ext_filter(client, live_server):
|
||||||
|
if jq_support:
|
||||||
|
check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
|
||||||
@@ -3,7 +3,9 @@ import time
|
|||||||
import re
|
import re
|
||||||
from flask import url_for
|
from flask import url_for
|
||||||
from . util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup
|
from . util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup
|
||||||
|
from . util import extract_UUID_from_client
|
||||||
import logging
|
import logging
|
||||||
|
import base64
|
||||||
|
|
||||||
from changedetectionio.notification import (
|
from changedetectionio.notification import (
|
||||||
default_notification_body,
|
default_notification_body,
|
||||||
@@ -68,6 +70,14 @@ def test_check_notification(client, live_server):
|
|||||||
# Give the thread time to pick up the first version
|
# Give the thread time to pick up the first version
|
||||||
time.sleep(3)
|
time.sleep(3)
|
||||||
|
|
||||||
|
testimage = 'iVBORw0KGgoAAAANSUhEUgAAAAEAAAABCAQAAAC1HAwCAAAAC0lEQVR42mNkYAAAAAYAAjCB0C8AAAAASUVORK5CYII='
|
||||||
|
# Write the last screenshot png
|
||||||
|
|
||||||
|
uuid = extract_UUID_from_client(client)
|
||||||
|
datastore = 'test-datastore'
|
||||||
|
with open(os.path.join(datastore, str(uuid), 'last-screenshot.png'), 'wb') as f:
|
||||||
|
f.write(base64.b64decode(testimage))
|
||||||
|
|
||||||
# Goto the edit page, add our ignore text
|
# Goto the edit page, add our ignore text
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
|
|
||||||
@@ -86,6 +96,7 @@ def test_check_notification(client, live_server):
|
|||||||
"Diff: {diff}\n"
|
"Diff: {diff}\n"
|
||||||
"Diff Full: {diff_full}\n"
|
"Diff Full: {diff_full}\n"
|
||||||
":-)",
|
":-)",
|
||||||
|
"notification_screenshot": True,
|
||||||
"notification_format": "Text"}
|
"notification_format": "Text"}
|
||||||
|
|
||||||
notification_form_data.update({
|
notification_form_data.update({
|
||||||
@@ -142,6 +153,7 @@ def test_check_notification(client, live_server):
|
|||||||
assert "preview/" in notification_submission
|
assert "preview/" in notification_submission
|
||||||
assert ":-)" in notification_submission
|
assert ":-)" in notification_submission
|
||||||
assert "New ChangeDetection.io Notification - {}".format(test_url) in notification_submission
|
assert "New ChangeDetection.io Notification - {}".format(test_url) in notification_submission
|
||||||
|
assert testimage in notification_submission
|
||||||
|
|
||||||
if env_base_url:
|
if env_base_url:
|
||||||
# Re #65 - did we see our BASE_URl ?
|
# Re #65 - did we see our BASE_URl ?
|
||||||
|
|||||||
@@ -14,7 +14,7 @@ def test_share_watch(client, live_server):
|
|||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
|
|
||||||
test_url = url_for('test_endpoint', _external=True)
|
test_url = url_for('test_endpoint', _external=True)
|
||||||
css_filter = ".nice-filter"
|
include_filters = ".nice-filter"
|
||||||
|
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
@@ -29,7 +29,7 @@ def test_share_watch(client, live_server):
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -37,7 +37,7 @@ def test_share_watch(client, live_server):
|
|||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
)
|
)
|
||||||
assert bytes(css_filter.encode('utf-8')) in res.data
|
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||||
|
|
||||||
# click share the link
|
# click share the link
|
||||||
res = client.get(
|
res = client.get(
|
||||||
@@ -73,4 +73,8 @@ def test_share_watch(client, live_server):
|
|||||||
res = client.get(
|
res = client.get(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
)
|
)
|
||||||
assert bytes(css_filter.encode('utf-8')) in res.data
|
assert bytes(include_filters.encode('utf-8')) in res.data
|
||||||
|
|
||||||
|
# Check it saved the URL
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert bytes(test_url.encode('utf-8')) in res.data
|
||||||
|
|||||||
@@ -57,10 +57,9 @@ def test_check_basic_change_detection_functionality_source(client, live_server):
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
# `subtractive_selectors` should still work in `source:` type requests
|
||||||
def test_check_ignore_elements(client, live_server):
|
def test_check_ignore_elements(client, live_server):
|
||||||
set_original_response()
|
set_original_response()
|
||||||
|
|
||||||
time.sleep(2)
|
time.sleep(2)
|
||||||
test_url = 'source:'+url_for('test_endpoint', _external=True)
|
test_url = 'source:'+url_for('test_endpoint', _external=True)
|
||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
@@ -77,9 +76,9 @@ def test_check_ignore_elements(client, live_server):
|
|||||||
#####################
|
#####################
|
||||||
# We want <span> and <p> ONLY, but ignore span with .foobar-detection
|
# We want <span> and <p> ONLY, but ignore span with .foobar-detection
|
||||||
|
|
||||||
res = client.post(
|
client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
|
data={"include_filters": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -89,7 +88,6 @@ def test_check_ignore_elements(client, live_server):
|
|||||||
url_for("preview_page", uuid="first"),
|
url_for("preview_page", uuid="first"),
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
assert b'foobar-detection' not in res.data
|
assert b'foobar-detection' not in res.data
|
||||||
assert b'<br' not in res.data
|
assert b'<br' not in res.data
|
||||||
assert b'<p' in res.data
|
assert b'<p' in res.data
|
||||||
@@ -49,7 +49,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server):
|
|||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"trigger_text": "/cool.stuff/",
|
data={"trigger_text": "/cool.stuff/",
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
"css_filter": '#in-here',
|
"include_filters": '#in-here',
|
||||||
"fetch_backend": "html_requests"},
|
"fetch_backend": "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ def test_check_watch_field_storage(client, live_server):
|
|||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n",
|
data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n",
|
||||||
"time_between_check-minutes": 126,
|
"time_between_check-minutes": 126,
|
||||||
"css_filter" : ".fooclass",
|
"include_filters" : ".fooclass",
|
||||||
"title" : "My title",
|
"title" : "My title",
|
||||||
"ignore_text" : "ignore this",
|
"ignore_text" : "ignore this",
|
||||||
"url": test_url,
|
"url": test_url,
|
||||||
|
|||||||
@@ -89,7 +89,7 @@ def test_check_xpath_filter_utf8(client, live_server):
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -143,7 +143,7 @@ def test_check_xpath_text_function_utf8(client, live_server):
|
|||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -182,9 +182,6 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
|
|||||||
)
|
)
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
# Trigger a check
|
|
||||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
|
||||||
|
|
||||||
# Give the thread time to pick it up
|
# Give the thread time to pick it up
|
||||||
time.sleep(sleep_time_for_fetch_thread)
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
@@ -192,7 +189,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
|
|||||||
# Add our URL to the import page
|
# Add our URL to the import page
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
data={"include_filters": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"Updated watch." in res.data
|
assert b"Updated watch." in res.data
|
||||||
@@ -230,10 +227,11 @@ def test_xpath_validation(client, live_server):
|
|||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"1 Imported" in res.data
|
assert b"1 Imported" in res.data
|
||||||
|
time.sleep(2)
|
||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
data={"include_filters": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"is not a valid XPath expression" in res.data
|
assert b"is not a valid XPath expression" in res.data
|
||||||
@@ -242,7 +240,7 @@ def test_xpath_validation(client, live_server):
|
|||||||
|
|
||||||
|
|
||||||
# actually only really used by the distll.io importer, but could be handy too
|
# actually only really used by the distll.io importer, but could be handy too
|
||||||
def test_check_with_prefix_css_filter(client, live_server):
|
def test_check_with_prefix_include_filters(client, live_server):
|
||||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||||
assert b'Deleted' in res.data
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
@@ -263,7 +261,7 @@ def test_check_with_prefix_css_filter(client, live_server):
|
|||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first"),
|
url_for("edit_page", uuid="first"),
|
||||||
data={"css_filter": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
data={"include_filters": "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -86,6 +86,7 @@ def extract_UUID_from_client(client):
|
|||||||
def wait_for_all_checks(client):
|
def wait_for_all_checks(client):
|
||||||
# Loop waiting until done..
|
# Loop waiting until done..
|
||||||
attempt=0
|
attempt=0
|
||||||
|
time.sleep(0.1)
|
||||||
while attempt < 60:
|
while attempt < 60:
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
res = client.get(url_for("index"))
|
res = client.get(url_for("index"))
|
||||||
@@ -159,5 +160,10 @@ def live_server_setup(live_server):
|
|||||||
ret = " ".join([auth.username, auth.password, auth.type])
|
ret = " ".join([auth.username, auth.password, auth.type])
|
||||||
return ret
|
return ret
|
||||||
|
|
||||||
|
# Just return some GET var
|
||||||
|
@live_server.app.route('/test-return-query', methods=['GET'])
|
||||||
|
def test_return_query():
|
||||||
|
return request.query_string
|
||||||
|
|
||||||
live_server.start()
|
live_server.start()
|
||||||
|
|
||||||
|
|||||||
@@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server):
|
|||||||
live_server_setup(live_server)
|
live_server_setup(live_server)
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
|
|
||||||
# Add our URL to the import page, maybe better to use something we control?
|
# Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
|
||||||
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
|
test_url = "https://changedetection.io/ci-test/test-runjs.html"
|
||||||
test_url = 'https://news.ycombinator.com'
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("form_quick_watch_add"),
|
url_for("form_quick_watch_add"),
|
||||||
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||||
@@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server):
|
|||||||
|
|
||||||
res = client.post(
|
res = client.post(
|
||||||
url_for("edit_page", uuid="first", unpause_on_save=1),
|
url_for("edit_page", uuid="first", unpause_on_save=1),
|
||||||
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
|
data={
|
||||||
|
"url": test_url,
|
||||||
|
"tag": "",
|
||||||
|
"headers": "",
|
||||||
|
'fetch_backend': "html_webdriver",
|
||||||
|
'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
|
||||||
|
},
|
||||||
follow_redirects=True
|
follow_redirects=True
|
||||||
)
|
)
|
||||||
assert b"unpaused" in res.data
|
assert b"unpaused" in res.data
|
||||||
time.sleep(1)
|
time.sleep(1)
|
||||||
wait_for_all_checks(client)
|
wait_for_all_checks(client)
|
||||||
uuid = extract_UUID_from_client(client)
|
uuid = extract_UUID_from_client(client)
|
||||||
|
|
||||||
|
# Check the JS execute code before extract worked
|
||||||
|
res = client.get(
|
||||||
|
url_for("preview_page", uuid="first"),
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
assert b'I smell JavaScript' in res.data
|
||||||
|
|
||||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
||||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
||||||
|
|
||||||
|
|||||||
@@ -4,7 +4,7 @@ import queue
|
|||||||
import time
|
import time
|
||||||
|
|
||||||
from changedetectionio import content_fetcher
|
from changedetectionio import content_fetcher
|
||||||
from changedetectionio.html_tools import FilterNotFoundInResponse
|
from changedetectionio.fetch_site_status import FilterNotFoundInResponse
|
||||||
|
|
||||||
# A single update worker
|
# A single update worker
|
||||||
#
|
#
|
||||||
@@ -74,6 +74,7 @@ class update_worker(threading.Thread):
|
|||||||
n_object.update({
|
n_object.update({
|
||||||
'watch_url': watch['url'],
|
'watch_url': watch['url'],
|
||||||
'uuid': watch_uuid,
|
'uuid': watch_uuid,
|
||||||
|
'screenshot': watch.get_screenshot() if watch.get('notification_screenshot') else False,
|
||||||
'current_snapshot': snapshot_contents.decode('utf-8'),
|
'current_snapshot': snapshot_contents.decode('utf-8'),
|
||||||
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
|
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
|
||||||
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
|
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
|
||||||
@@ -91,8 +92,8 @@ class update_worker(threading.Thread):
|
|||||||
return
|
return
|
||||||
|
|
||||||
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
|
||||||
'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
|
'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
|
||||||
watch['css_filter'],
|
", ".join(watch['include_filters']),
|
||||||
threshold),
|
threshold),
|
||||||
'notification_format': 'text'}
|
'notification_format': 'text'}
|
||||||
|
|
||||||
@@ -106,7 +107,8 @@ class update_worker(threading.Thread):
|
|||||||
if 'notification_urls' in n_object:
|
if 'notification_urls' in n_object:
|
||||||
n_object.update({
|
n_object.update({
|
||||||
'watch_url': watch['url'],
|
'watch_url': watch['url'],
|
||||||
'uuid': watch_uuid
|
'uuid': watch_uuid,
|
||||||
|
'screenshot': False
|
||||||
})
|
})
|
||||||
self.notification_q.put(n_object)
|
self.notification_q.put(n_object)
|
||||||
print("Sent filter not found notification for {}".format(watch_uuid))
|
print("Sent filter not found notification for {}".format(watch_uuid))
|
||||||
@@ -189,7 +191,7 @@ class update_worker(threading.Thread):
|
|||||||
if not self.datastore.data['watching'].get(uuid):
|
if not self.datastore.data['watching'].get(uuid):
|
||||||
continue
|
continue
|
||||||
|
|
||||||
err_text = "Warning, filter '{}' not found".format(str(e))
|
err_text = "Warning, no filters were found, no change detection ran."
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||||
# So that we get a trigger when the content is added again
|
# So that we get a trigger when the content is added again
|
||||||
'previous_md5': ''})
|
'previous_md5': ''})
|
||||||
@@ -282,16 +284,19 @@ class update_worker(threading.Thread):
|
|||||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||||
|
|
||||||
|
if self.datastore.data['watching'].get(uuid):
|
||||||
|
# Always record that we atleast tried
|
||||||
|
count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
|
||||||
|
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
||||||
|
'last_checked': round(time.time()),
|
||||||
|
'check_count': count
|
||||||
|
})
|
||||||
|
|
||||||
# Always record that we atleast tried
|
# Always save the screenshot if it's available
|
||||||
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
|
if update_handler.screenshot:
|
||||||
'last_checked': round(time.time())})
|
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
||||||
|
if update_handler.xpath_data:
|
||||||
# Always save the screenshot if it's available
|
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
||||||
if update_handler.screenshot:
|
|
||||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
|
||||||
if update_handler.xpath_data:
|
|
||||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
|
||||||
|
|
||||||
|
|
||||||
self.current_uuid = None # Done
|
self.current_uuid = None # Done
|
||||||
|
|||||||
@@ -6,6 +6,8 @@ services:
|
|||||||
hostname: changedetection
|
hostname: changedetection
|
||||||
volumes:
|
volumes:
|
||||||
- changedetection-data:/datastore
|
- changedetection-data:/datastore
|
||||||
|
# Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support
|
||||||
|
# - ./proxies.json:/datastore/proxies.json
|
||||||
|
|
||||||
# environment:
|
# environment:
|
||||||
# Default listening port, can also be changed with the -p option
|
# Default listening port, can also be changed with the -p option
|
||||||
@@ -43,6 +45,9 @@ services:
|
|||||||
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
|
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
|
||||||
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
|
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
|
||||||
# - USE_X_SETTINGS=1
|
# - USE_X_SETTINGS=1
|
||||||
|
#
|
||||||
|
# Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
|
||||||
|
# - HIDE_REFERER=true
|
||||||
|
|
||||||
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
|
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
|
||||||
ports:
|
ports:
|
||||||
|
|||||||
BIN
docs/proxy-example.jpg
Normal file
BIN
docs/proxy-example.jpg
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 46 KiB |
@@ -1,31 +1,36 @@
|
|||||||
flask~= 2.0
|
flask~=2.0
|
||||||
flask_wtf
|
flask_wtf
|
||||||
eventlet>=0.31.0
|
eventlet>=0.31.0
|
||||||
validators
|
validators
|
||||||
timeago ~=1.0
|
timeago~=1.0
|
||||||
inscriptis ~= 2.2
|
inscriptis~=2.2
|
||||||
feedgen ~= 0.9
|
feedgen~=0.9
|
||||||
flask-login ~= 0.5
|
flask-login~=0.5
|
||||||
flask_restful
|
flask_restful
|
||||||
pytz
|
pytz
|
||||||
|
|
||||||
# Set these versions together to avoid a RequestsDependencyWarning
|
# Set these versions together to avoid a RequestsDependencyWarning
|
||||||
requests[socks] ~= 2.26
|
# >= 2.26 also adds Brotli support if brotli is installed
|
||||||
urllib3 > 1.26
|
brotli~=1.0
|
||||||
chardet > 2.3.0
|
requests[socks] ~=2.28
|
||||||
|
|
||||||
wtforms ~= 3.0
|
urllib3>1.26
|
||||||
jsonpath-ng ~= 1.5.3
|
chardet>2.3.0
|
||||||
|
|
||||||
|
wtforms~=3.0
|
||||||
|
jsonpath-ng~=1.5.3
|
||||||
|
|
||||||
|
# jq not available on Windows so must be installed manually
|
||||||
|
|
||||||
# Notification library
|
# Notification library
|
||||||
apprise ~= 1.0.0
|
apprise~=1.2.0
|
||||||
|
|
||||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
|
||||||
paho-mqtt
|
paho-mqtt
|
||||||
|
|
||||||
# Pinned version of cryptography otherwise
|
# Pinned version of cryptography otherwise
|
||||||
# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly
|
# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly
|
||||||
cryptography ~= 3.4
|
cryptography~=3.4
|
||||||
|
|
||||||
# Used for CSS filtering
|
# Used for CSS filtering
|
||||||
bs4
|
bs4
|
||||||
@@ -34,11 +39,20 @@ bs4
|
|||||||
lxml
|
lxml
|
||||||
|
|
||||||
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
|
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
|
||||||
selenium ~= 4.1.0
|
selenium~=4.1.0
|
||||||
|
|
||||||
# https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849
|
# https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849
|
||||||
# ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security'
|
# ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security'
|
||||||
# need to revisit flask login versions
|
# need to revisit flask login versions
|
||||||
werkzeug ~= 2.0.0
|
werkzeug~=2.0.0
|
||||||
|
|
||||||
|
# Templating, so far just in the URLs but in the future can be for the notifications also
|
||||||
|
jinja2~=3.1
|
||||||
|
jinja2-time
|
||||||
|
|
||||||
|
# https://peps.python.org/pep-0508/#environment-markers
|
||||||
|
# https://github.com/dgtlmoon/changedetection.io/pull/1009
|
||||||
|
jq~=1.3 ;python_version >= "3.8" and sys_platform == "linux"
|
||||||
|
|
||||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
# playwright is installed at Dockerfile build time because it's not available on all platforms
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user