mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 08:34:57 +00:00 
			
		
		
		
	Compare commits
	
		
			58 Commits
		
	
	
		
			proxies-js
			...
			bugfix-han
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					982b431118 | ||
| 
						 | 
					b043d477dc | ||
| 
						 | 
					06bcfb28e5 | ||
| 
						 | 
					ca3b351bae | ||
| 
						 | 
					b7e0f0a5e4 | ||
| 
						 | 
					61f0ac2937 | ||
| 
						 | 
					fca66eb558 | ||
| 
						 | 
					359fc48fb4 | ||
| 
						 | 
					d0efeb9770 | ||
| 
						 | 
					3416532cd6 | ||
| 
						 | 
					defc7a340e | ||
| 
						 | 
					c197c062e1 | ||
| 
						 | 
					77b59809ca | ||
| 
						 | 
					f90b170e68 | ||
| 
						 | 
					c93ca1841c | ||
| 
						 | 
					57f604dff1 | ||
| 
						 | 
					8499468749 | ||
| 
						 | 
					7f6a13ea6c | ||
| 
						 | 
					9874f0cbc7 | ||
| 
						 | 
					72834a42fd | ||
| 
						 | 
					724cb17224 | ||
| 
						 | 
					4eb4b401a1 | ||
| 
						 | 
					5d40e16c73 | ||
| 
						 | 
					492bbce6b6 | ||
| 
						 | 
					0394a56be5 | ||
| 
						 | 
					7839551d6b | ||
| 
						 | 
					9c5588c791 | ||
| 
						 | 
					5a43a350de | ||
| 
						 | 
					3c31f023ce | ||
| 
						 | 
					4cbcc59461 | ||
| 
						 | 
					4be0260381 | ||
| 
						 | 
					957a3c1c16 | ||
| 
						 | 
					85897e0bf9 | ||
| 
						 | 
					63095f70ea | ||
| 
						 | 
					8d5b0b5576 | ||
| 
						 | 
					1b077abd93 | ||
| 
						 | 
					32ea1a8721 | ||
| 
						 | 
					fff32cef0d | ||
| 
						 | 
					8fb146f3e4 | ||
| 
						 | 
					770b0faa45 | ||
| 
						 | 
					f6faa90340 | ||
| 
						 | 
					669fd3ae0b | ||
| 
						 | 
					17d37fb626 | ||
| 
						 | 
					dfa7fc3a81 | ||
| 
						 | 
					cd467df97a | ||
| 
						 | 
					71bc2fed82 | ||
| 
						 | 
					738fcfe01c | ||
| 
						 | 
					3ebb2ab9ba | ||
| 
						 | 
					ac98bc9144 | ||
| 
						 | 
					3705ce6681 | ||
| 
						 | 
					f7ea99412f | ||
| 
						 | 
					d4715e2bc8 | ||
| 
						 | 
					8567a83c47 | ||
| 
						 | 
					77fdf59ae3 | ||
| 
						 | 
					0e194aa4b4 | ||
| 
						 | 
					2ba55bb477 | ||
| 
						 | 
					4c759490da | ||
| 
						 | 
					58a52c1f60 | 
							
								
								
									
										31
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,31 @@
 | 
			
		||||
# Taken from https://github.com/linuxserver/docker-changedetection.io/blob/main/Dockerfile
 | 
			
		||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
 | 
			
		||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
 | 
			
		||||
 | 
			
		||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.16
 | 
			
		||||
ENV PYTHONUNBUFFERED=1
 | 
			
		||||
 | 
			
		||||
COPY requirements.txt /requirements.txt
 | 
			
		||||
 | 
			
		||||
RUN \
 | 
			
		||||
  apk add --update --no-cache --virtual=build-dependencies \
 | 
			
		||||
    cargo \
 | 
			
		||||
    g++ \
 | 
			
		||||
    gcc \
 | 
			
		||||
    libc-dev \
 | 
			
		||||
    libffi-dev \
 | 
			
		||||
    libxslt-dev \
 | 
			
		||||
    make \
 | 
			
		||||
    openssl-dev \
 | 
			
		||||
    py3-wheel \
 | 
			
		||||
    python3-dev \
 | 
			
		||||
    zlib-dev && \
 | 
			
		||||
  apk add --update --no-cache \
 | 
			
		||||
    libxslt \
 | 
			
		||||
    python3 \
 | 
			
		||||
    py3-pip && \
 | 
			
		||||
  echo "**** pip3 install test of changedetection.io ****" && \
 | 
			
		||||
  pip3 install -U pip wheel setuptools && \
 | 
			
		||||
  pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.16/ -r /requirements.txt && \
 | 
			
		||||
  apk del --purge \
 | 
			
		||||
    build-dependencies
 | 
			
		||||
							
								
								
									
										66
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,66 @@
 | 
			
		||||
name: ChangeDetection.io Container Build Test
 | 
			
		||||
 | 
			
		||||
# Triggers the workflow on push or pull request events
 | 
			
		||||
 | 
			
		||||
# This line doesnt work, even tho it is the documented one
 | 
			
		||||
#on: [push, pull_request]
 | 
			
		||||
 | 
			
		||||
on:
 | 
			
		||||
  push:
 | 
			
		||||
    paths:
 | 
			
		||||
      - requirements.txt
 | 
			
		||||
      - Dockerfile
 | 
			
		||||
 | 
			
		||||
  pull_request:
 | 
			
		||||
    paths:
 | 
			
		||||
      - requirements.txt
 | 
			
		||||
      - Dockerfile
 | 
			
		||||
 | 
			
		||||
  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
 | 
			
		||||
  # @todo: some kind of path filter for requirements.txt and Dockerfile
 | 
			
		||||
jobs:
 | 
			
		||||
  test-container-build:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    steps:
 | 
			
		||||
        - uses: actions/checkout@v2
 | 
			
		||||
        - name: Set up Python 3.9
 | 
			
		||||
          uses: actions/setup-python@v2
 | 
			
		||||
          with:
 | 
			
		||||
            python-version: 3.9
 | 
			
		||||
 | 
			
		||||
        # Just test that the build works, some libraries won't compile on ARM/rPi etc
 | 
			
		||||
        - name: Set up QEMU
 | 
			
		||||
          uses: docker/setup-qemu-action@v1
 | 
			
		||||
          with:
 | 
			
		||||
            image: tonistiigi/binfmt:latest
 | 
			
		||||
            platforms: all
 | 
			
		||||
 | 
			
		||||
        - name: Set up Docker Buildx
 | 
			
		||||
          id: buildx
 | 
			
		||||
          uses: docker/setup-buildx-action@v1
 | 
			
		||||
          with:
 | 
			
		||||
            install: true
 | 
			
		||||
            version: latest
 | 
			
		||||
            driver-opts: image=moby/buildkit:master
 | 
			
		||||
 | 
			
		||||
        # https://github.com/dgtlmoon/changedetection.io/pull/1067
 | 
			
		||||
        # Check we can still build under alpine/musl
 | 
			
		||||
        - name: Test that the docker containers can build (musl via alpine check)
 | 
			
		||||
          id: docker_build_musl
 | 
			
		||||
          uses: docker/build-push-action@v2
 | 
			
		||||
          with:
 | 
			
		||||
            context: ./
 | 
			
		||||
            file: ./.github/test/Dockerfile-alpine
 | 
			
		||||
            platforms: linux/amd64,linux/arm64
 | 
			
		||||
 | 
			
		||||
        - name: Test that the docker containers can build
 | 
			
		||||
          id: docker_build
 | 
			
		||||
          uses: docker/build-push-action@v2
 | 
			
		||||
          # https://github.com/docker/build-push-action#customizing
 | 
			
		||||
          with:
 | 
			
		||||
            context: ./
 | 
			
		||||
            file: ./Dockerfile
 | 
			
		||||
            platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64,
 | 
			
		||||
            cache-from: type=local,src=/tmp/.buildx-cache
 | 
			
		||||
            cache-to: type=local,dest=/tmp/.buildx-cache
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										12
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							@@ -1,28 +1,25 @@
 | 
			
		||||
name: ChangeDetection.io Test
 | 
			
		||||
name: ChangeDetection.io App Test
 | 
			
		||||
 | 
			
		||||
# Triggers the workflow on push or pull request events
 | 
			
		||||
on: [push, pull_request]
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  test-build:
 | 
			
		||||
  test-application:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    steps:
 | 
			
		||||
 | 
			
		||||
      - uses: actions/checkout@v2
 | 
			
		||||
      - name: Set up Python 3.9
 | 
			
		||||
        uses: actions/setup-python@v2
 | 
			
		||||
        with:
 | 
			
		||||
          python-version: 3.9
 | 
			
		||||
 | 
			
		||||
      - name: Show env vars
 | 
			
		||||
        run: set
 | 
			
		||||
 | 
			
		||||
      - name: Install dependencies
 | 
			
		||||
        run: |
 | 
			
		||||
          python -m pip install --upgrade pip
 | 
			
		||||
          pip install flake8 pytest
 | 
			
		||||
          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi
 | 
			
		||||
          if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi
 | 
			
		||||
 | 
			
		||||
      - name: Lint with flake8
 | 
			
		||||
        run: |
 | 
			
		||||
          # stop the build if there are Python syntax errors or undefined names
 | 
			
		||||
@@ -39,7 +36,4 @@ jobs:
 | 
			
		||||
          # Each test is totally isolated and performs its own cleanup/reset
 | 
			
		||||
          cd changedetectionio; ./run_all_tests.sh
 | 
			
		||||
 | 
			
		||||
      # https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ?
 | 
			
		||||
      # https://github.com/docker/buildx/issues/59 ? Needs to be one platform?
 | 
			
		||||
 | 
			
		||||
      # https://github.com/docker/buildx/issues/495#issuecomment-918925854
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch.
 | 
			
		||||
 | 
			
		||||
Please be sure that all new functionality has a matching test!
 | 
			
		||||
 | 
			
		||||
Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example
 | 
			
		||||
Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
pip3 install -r requirements-dev
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										13
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								Dockerfile
									
									
									
									
									
								
							@@ -5,13 +5,14 @@ FROM python:3.8-slim as builder
 | 
			
		||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
 | 
			
		||||
 | 
			
		||||
RUN apt-get update && apt-get install -y --no-install-recommends \
 | 
			
		||||
    libssl-dev \
 | 
			
		||||
    libffi-dev \
 | 
			
		||||
    g++ \
 | 
			
		||||
    gcc \
 | 
			
		||||
    libc-dev \
 | 
			
		||||
    libffi-dev \
 | 
			
		||||
    libssl-dev \
 | 
			
		||||
    libxslt-dev \
 | 
			
		||||
    zlib1g-dev \
 | 
			
		||||
    g++
 | 
			
		||||
    make \
 | 
			
		||||
    zlib1g-dev
 | 
			
		||||
 | 
			
		||||
RUN mkdir /install
 | 
			
		||||
WORKDIR /install
 | 
			
		||||
@@ -22,7 +23,8 @@ RUN pip install --target=/dependencies -r /requirements.txt
 | 
			
		||||
 | 
			
		||||
# Playwright is an alternative to Selenium
 | 
			
		||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
 | 
			
		||||
RUN pip install --target=/dependencies playwright~=1.24 \
 | 
			
		||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
 | 
			
		||||
RUN pip install --target=/dependencies playwright~=1.26 \
 | 
			
		||||
    || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
 | 
			
		||||
 | 
			
		||||
# Final image stage
 | 
			
		||||
@@ -58,6 +60,7 @@ EXPOSE 5000
 | 
			
		||||
 | 
			
		||||
# The actual flask app
 | 
			
		||||
COPY changedetectionio /app/changedetectionio
 | 
			
		||||
 | 
			
		||||
# The eventlet server wrapper
 | 
			
		||||
COPY changedetection.py /app/changedetection.py
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -2,6 +2,7 @@ recursive-include changedetectionio/api *
 | 
			
		||||
recursive-include changedetectionio/templates *
 | 
			
		||||
recursive-include changedetectionio/static *
 | 
			
		||||
recursive-include changedetectionio/model *
 | 
			
		||||
recursive-include changedetectionio/tests *
 | 
			
		||||
include changedetection.py
 | 
			
		||||
global-exclude *.pyc
 | 
			
		||||
global-exclude node_modules
 | 
			
		||||
 
 | 
			
		||||
@@ -33,7 +33,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
 | 
			
		||||
#### Key Features
 | 
			
		||||
 | 
			
		||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
 | 
			
		||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
 | 
			
		||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
 | 
			
		||||
- Switch between fast non-JS and Chrome JS based "fetchers"
 | 
			
		||||
- Easily specify how often a site should be checked
 | 
			
		||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										44
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								README.md
									
									
									
									
									
								
							@@ -1,6 +1,7 @@
 | 
			
		||||
## Web Site Change Detection, Monitoring and Notification.
 | 
			
		||||
 | 
			
		||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
 | 
			
		||||
_Live your data-life pro-actively, Detect website changes and perform meaningful actions, trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring"  title="Self-hosted web page change monitoring"  />](https://lemonade.changedetection.io/start?src=github)
 | 
			
		||||
 | 
			
		||||
@@ -8,15 +9,16 @@ Live your data-life pro-actively, track website content changes and receive noti
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
 | 
			
		||||
 | 
			
		||||
[**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
 | 
			
		||||
 | 
			
		||||
- Chrome browser included.
 | 
			
		||||
- Super fast, no registration needed setup.
 | 
			
		||||
- Start watching and receiving change notifications instantly.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
- Automatic Updates, Automatic Backups, No Heroku "paused application", don't miss a change!
 | 
			
		||||
- Javascript browser included
 | 
			
		||||
- Unlimited checks and watches!
 | 
			
		||||
Easily see what changed, examine by word, line, or individual character.
 | 
			
		||||
 | 
			
		||||
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference "  title="Self-hosted web page change monitoring context difference " />
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#### Example use cases
 | 
			
		||||
@@ -44,22 +46,18 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
 | 
			
		||||
#### Key Features
 | 
			
		||||
 | 
			
		||||
- Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions!
 | 
			
		||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules
 | 
			
		||||
- Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq
 | 
			
		||||
- Switch between fast non-JS and Chrome JS based "fetchers"
 | 
			
		||||
- Easily specify how often a site should be checked
 | 
			
		||||
- Execute JS before extracting text (Good for logging in, see examples in the UI!)
 | 
			
		||||
- Override Request Headers, Specify `POST` or `GET` and other methods
 | 
			
		||||
- Use the "Visual Selector" to help target specific elements
 | 
			
		||||
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
 | 
			
		||||
 | 
			
		||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
 | 
			
		||||
 | 
			
		||||
## Screenshots
 | 
			
		||||
 | 
			
		||||
### Examine differences in content.
 | 
			
		||||
 | 
			
		||||
Easily see what changed, examine by word, line, or individual character.
 | 
			
		||||
 | 
			
		||||
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference "  title="Self-hosted web page change monitoring context difference " />
 | 
			
		||||
 | 
			
		||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
 | 
			
		||||
 | 
			
		||||
### Filter by elements using the Visual Selector tool.
 | 
			
		||||
@@ -122,8 +120,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
## Filters
 | 
			
		||||
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
 | 
			
		||||
 | 
			
		||||
XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools. 
 | 
			
		||||
(We support LXML `re:test`, `re:math` and `re:replace`.)
 | 
			
		||||
 | 
			
		||||
## Notifications
 | 
			
		||||
@@ -152,7 +150,7 @@ Now you can also customise your notification content!
 | 
			
		||||
 | 
			
		||||
## JSON API Monitoring
 | 
			
		||||
 | 
			
		||||
Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter / selector.
 | 
			
		||||
Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed.
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
@@ -160,9 +158,17 @@ This will re-parse the JSON and apply formatting to the text, making it super ea
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
### JSONPath or jq?
 | 
			
		||||
 | 
			
		||||
For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq.
 | 
			
		||||
 | 
			
		||||
One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc.
 | 
			
		||||
 | 
			
		||||
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples
 | 
			
		||||
 | 
			
		||||
### Parse JSON embedded in HTML!
 | 
			
		||||
 | 
			
		||||
When you enable a `json:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites. 
 | 
			
		||||
When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites. 
 | 
			
		||||
 | 
			
		||||
```
 | 
			
		||||
<html>
 | 
			
		||||
@@ -172,11 +178,11 @@ When you enable a `json:` filter, you can even automatically extract and parse e
 | 
			
		||||
</script>
 | 
			
		||||
```  
 | 
			
		||||
 | 
			
		||||
`json:$.price` would give `23.50`, or you can extract the whole structure
 | 
			
		||||
`json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure
 | 
			
		||||
 | 
			
		||||
## Proxy configuration
 | 
			
		||||
## Proxy Configuration
 | 
			
		||||
 | 
			
		||||
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration
 | 
			
		||||
See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration , we also support using [BrightData proxy services where possible]( https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support)
 | 
			
		||||
 | 
			
		||||
## Raspberry Pi support?
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect
 | 
			
		||||
from changedetectionio import html_tools
 | 
			
		||||
from changedetectionio.api import api_v1
 | 
			
		||||
 | 
			
		||||
__version__ = '0.39.19.1'
 | 
			
		||||
__version__ = '0.39.21.1'
 | 
			
		||||
 | 
			
		||||
datastore = None
 | 
			
		||||
 | 
			
		||||
@@ -194,7 +194,8 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>',
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo',
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -547,6 +548,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
        # Defaults for proxy choice
 | 
			
		||||
        if datastore.proxy_list is not None:  # When enabled
 | 
			
		||||
            # @todo
 | 
			
		||||
            # Radio needs '' not None, or incase that the chosen one no longer exists
 | 
			
		||||
            if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list):
 | 
			
		||||
                default['proxy'] = ''
 | 
			
		||||
@@ -560,7 +562,10 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
 | 
			
		||||
            del form.proxy
 | 
			
		||||
        else:
 | 
			
		||||
            form.proxy.choices = [('', 'Default')] + datastore.proxy_list
 | 
			
		||||
            form.proxy.choices = [('', 'Default')]
 | 
			
		||||
            for p in datastore.proxy_list:
 | 
			
		||||
                form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        if request.method == 'POST' and form.validate():
 | 
			
		||||
            extra_update_obj = {}
 | 
			
		||||
@@ -594,7 +599,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
                    extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
 | 
			
		||||
 | 
			
		||||
            # Reset the previous_md5 so we process a new snapshot including stripping ignore text.
 | 
			
		||||
            if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']:
 | 
			
		||||
            if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []):
 | 
			
		||||
                if len(datastore.data['watching'][uuid].history):
 | 
			
		||||
                    extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid)
 | 
			
		||||
 | 
			
		||||
@@ -632,20 +637,27 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            # Only works reliably with Playwright
 | 
			
		||||
            visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver'
 | 
			
		||||
 | 
			
		||||
            # JQ is difficult to install on windows and must be manually added (outside requirements.txt)
 | 
			
		||||
            jq_support = True
 | 
			
		||||
            try:
 | 
			
		||||
                import jq
 | 
			
		||||
            except ModuleNotFoundError:
 | 
			
		||||
                jq_support = False
 | 
			
		||||
 | 
			
		||||
            output = render_template("edit.html",
 | 
			
		||||
                                     uuid=uuid,
 | 
			
		||||
                                     watch=datastore.data['watching'][uuid],
 | 
			
		||||
                                     form=form,
 | 
			
		||||
                                     has_empty_checktime=using_default_check_time,
 | 
			
		||||
                                     has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
 | 
			
		||||
                                     using_global_webdriver_wait=default['webdriver_delay'] is None,
 | 
			
		||||
                                     current_base_url=datastore.data['settings']['application']['base_url'],
 | 
			
		||||
                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
 | 
			
		||||
                                     form=form,
 | 
			
		||||
                                     has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False,
 | 
			
		||||
                                     has_empty_checktime=using_default_check_time,
 | 
			
		||||
                                     jq_support=jq_support,
 | 
			
		||||
                                     playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
 | 
			
		||||
                                     settings_application=datastore.data['settings']['application'],
 | 
			
		||||
                                     using_global_webdriver_wait=default['webdriver_delay'] is None,
 | 
			
		||||
                                     uuid=uuid,
 | 
			
		||||
                                     visualselector_data_is_ready=visualselector_data_is_ready,
 | 
			
		||||
                                     visualselector_enabled=visualselector_enabled,
 | 
			
		||||
                                     playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
 | 
			
		||||
                                     watch=datastore.data['watching'][uuid],
 | 
			
		||||
                                     )
 | 
			
		||||
 | 
			
		||||
        return output
 | 
			
		||||
@@ -657,15 +669,16 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
        default = deepcopy(datastore.data['settings'])
 | 
			
		||||
        if datastore.proxy_list is not None:
 | 
			
		||||
            available_proxies = list(datastore.proxy_list.keys())
 | 
			
		||||
            # When enabled
 | 
			
		||||
            system_proxy = datastore.data['settings']['requests']['proxy']
 | 
			
		||||
            # In the case it doesnt exist anymore
 | 
			
		||||
            if not any([system_proxy in tup for tup in datastore.proxy_list]):
 | 
			
		||||
            if not system_proxy in available_proxies:
 | 
			
		||||
                system_proxy = None
 | 
			
		||||
 | 
			
		||||
            default['requests']['proxy'] = system_proxy if system_proxy is not None else datastore.proxy_list[0][0]
 | 
			
		||||
            default['requests']['proxy'] = system_proxy if system_proxy is not None else available_proxies[0]
 | 
			
		||||
            # Used by the form handler to keep or remove the proxy settings
 | 
			
		||||
            default['proxy_list'] = datastore.proxy_list
 | 
			
		||||
            default['proxy_list'] = available_proxies[0]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status
 | 
			
		||||
@@ -680,7 +693,10 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead
 | 
			
		||||
            del form.requests.form.proxy
 | 
			
		||||
        else:
 | 
			
		||||
            form.requests.form.proxy.choices = datastore.proxy_list
 | 
			
		||||
            form.requests.form.proxy.choices = []
 | 
			
		||||
            for p in datastore.proxy_list:
 | 
			
		||||
                form.requests.form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label'])))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        if request.method == 'POST':
 | 
			
		||||
            # Password unset is a GET, but we can lock the session to a salted env password to always need the password
 | 
			
		||||
@@ -801,8 +817,10 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
        newest_file = history[dates[-1]]
 | 
			
		||||
 | 
			
		||||
        # Read as binary and force decode as UTF-8
 | 
			
		||||
        # Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
 | 
			
		||||
        try:
 | 
			
		||||
            with open(newest_file, 'r') as f:
 | 
			
		||||
            with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f:
 | 
			
		||||
                newest_version_file_contents = f.read()
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            newest_version_file_contents = "Unable to read {}.\n".format(newest_file)
 | 
			
		||||
@@ -815,7 +833,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            previous_file = history[dates[-2]]
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            with open(previous_file, 'r') as f:
 | 
			
		||||
            with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f:
 | 
			
		||||
                previous_version_file_contents = f.read()
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            previous_version_file_contents = "Unable to read {}.\n".format(previous_file)
 | 
			
		||||
@@ -892,7 +910,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
        timestamp = list(watch.history.keys())[-1]
 | 
			
		||||
        filename = watch.history[timestamp]
 | 
			
		||||
        try:
 | 
			
		||||
            with open(filename, 'r') as f:
 | 
			
		||||
            with open(filename, 'r', encoding='utf-8', errors='ignore') as f:
 | 
			
		||||
                tmp = f.readlines()
 | 
			
		||||
 | 
			
		||||
                # Get what needs to be highlighted
 | 
			
		||||
@@ -967,9 +985,6 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
        # create a ZipFile object
 | 
			
		||||
        backupname = "changedetection-backup-{}.zip".format(int(time.time()))
 | 
			
		||||
 | 
			
		||||
        # We only care about UUIDS from the current index file
 | 
			
		||||
        uuids = list(datastore.data['watching'].keys())
 | 
			
		||||
        backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
 | 
			
		||||
 | 
			
		||||
        with zipfile.ZipFile(backup_filepath, "w",
 | 
			
		||||
@@ -985,12 +1000,12 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            # Add the flask app secret
 | 
			
		||||
            zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt")
 | 
			
		||||
 | 
			
		||||
            # Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip.
 | 
			
		||||
            for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'):
 | 
			
		||||
                parent_p = txt_file_path.parent
 | 
			
		||||
                if parent_p.name in uuids:
 | 
			
		||||
                    zipObj.write(txt_file_path,
 | 
			
		||||
                                 arcname=str(txt_file_path).replace(datastore_o.datastore_path, ''),
 | 
			
		||||
            # Add any data in the watch data directory.
 | 
			
		||||
            for uuid, w in datastore.data['watching'].items():
 | 
			
		||||
                for f in Path(w.watch_data_dir).glob('*'):
 | 
			
		||||
                    zipObj.write(f,
 | 
			
		||||
                                 # Use the full path to access the file, but make the file 'relative' in the Zip.
 | 
			
		||||
                                 arcname=os.path.join(f.parts[-2], f.parts[-1]),
 | 
			
		||||
                                 compress_type=zipfile.ZIP_DEFLATED,
 | 
			
		||||
                                 compresslevel=8)
 | 
			
		||||
 | 
			
		||||
@@ -1292,8 +1307,8 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
    threading.Thread(target=notification_runner).start()
 | 
			
		||||
 | 
			
		||||
    # Check for new release version, but not when running in test/build
 | 
			
		||||
    if not os.getenv("GITHUB_REF", False):
 | 
			
		||||
    # Check for new release version, but not when running in test/build or pytest
 | 
			
		||||
    if not os.getenv("GITHUB_REF", False) and not config.get('disable_checkver') == True:
 | 
			
		||||
        threading.Thread(target=check_for_new_version).start()
 | 
			
		||||
 | 
			
		||||
    return app
 | 
			
		||||
@@ -1368,6 +1383,8 @@ def ticker_thread_check_time_launch_checks():
 | 
			
		||||
    import random
 | 
			
		||||
    from changedetectionio import update_worker
 | 
			
		||||
 | 
			
		||||
    proxy_last_called_time = {}
 | 
			
		||||
 | 
			
		||||
    recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
 | 
			
		||||
    print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
 | 
			
		||||
 | 
			
		||||
@@ -1428,10 +1445,30 @@ def ticker_thread_check_time_launch_checks():
 | 
			
		||||
                if watch.jitter_seconds == 0:
 | 
			
		||||
                    watch.jitter_seconds = random.uniform(-abs(jitter), jitter)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            seconds_since_last_recheck = now - watch['last_checked']
 | 
			
		||||
 | 
			
		||||
            if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
 | 
			
		||||
                if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
 | 
			
		||||
 | 
			
		||||
                    # Proxies can be set to have a limit on seconds between which they can be called
 | 
			
		||||
                    watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid)
 | 
			
		||||
                    if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()):
 | 
			
		||||
                        # Proxy may also have some threshold minimum
 | 
			
		||||
                        proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0))
 | 
			
		||||
                        if proxy_list_reuse_time_minimum:
 | 
			
		||||
                            proxy_last_used_time = proxy_last_called_time.get(watch_proxy, 0)
 | 
			
		||||
                            time_since_proxy_used = int(time.time() - proxy_last_used_time)
 | 
			
		||||
                            if time_since_proxy_used < proxy_list_reuse_time_minimum:
 | 
			
		||||
                                # Not enough time difference reached, skip this watch
 | 
			
		||||
                                print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
 | 
			
		||||
                                                                                                                         watch_proxy,
 | 
			
		||||
                                                                                                                         time_since_proxy_used,
 | 
			
		||||
                                                                                                                         proxy_list_reuse_time_minimum))
 | 
			
		||||
                                continue
 | 
			
		||||
                            else:
 | 
			
		||||
                                # Record the last used time
 | 
			
		||||
                                proxy_last_called_time[watch_proxy] = int(time.time())
 | 
			
		||||
 | 
			
		||||
                    # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
 | 
			
		||||
                    priority = int(time.time())
 | 
			
		||||
                    print(
 | 
			
		||||
 
 | 
			
		||||
@@ -122,3 +122,37 @@ class CreateWatch(Resource):
 | 
			
		||||
            return {'status': "OK"}, 200
 | 
			
		||||
 | 
			
		||||
        return list, 200
 | 
			
		||||
 | 
			
		||||
class SystemInfo(Resource):
 | 
			
		||||
    def __init__(self, **kwargs):
 | 
			
		||||
        # datastore is a black box dependency
 | 
			
		||||
        self.datastore = kwargs['datastore']
 | 
			
		||||
        self.update_q = kwargs['update_q']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    def get(self):
 | 
			
		||||
        import time
 | 
			
		||||
        overdue_watches = []
 | 
			
		||||
 | 
			
		||||
        # Check all watches and report which have not been checked but should have been
 | 
			
		||||
 | 
			
		||||
        for uuid, watch in self.datastore.data.get('watching', {}).items():
 | 
			
		||||
            # see if now - last_checked is greater than the time that should have been
 | 
			
		||||
            # this is not super accurate (maybe they just edited it) but better than nothing
 | 
			
		||||
            t = watch.threshold_seconds()
 | 
			
		||||
            if not t:
 | 
			
		||||
                # Use the system wide default
 | 
			
		||||
                t = self.datastore.threshold_seconds
 | 
			
		||||
 | 
			
		||||
            time_since_check = time.time() - watch.get('last_checked')
 | 
			
		||||
 | 
			
		||||
            # Allow 5 minutes of grace time before we decide it's overdue
 | 
			
		||||
            if time_since_check - (5 * 60) > t:
 | 
			
		||||
                overdue_watches.append(uuid)
 | 
			
		||||
 | 
			
		||||
        return {
 | 
			
		||||
                   'queue_size': self.update_q.qsize(),
 | 
			
		||||
                   'overdue_watches': overdue_watches,
 | 
			
		||||
                   'uptime': round(time.time() - self.datastore.start_time, 2),
 | 
			
		||||
                   'watch_count': len(self.datastore.data.get('watching', {}))
 | 
			
		||||
               }, 200
 | 
			
		||||
 
 | 
			
		||||
@@ -102,6 +102,14 @@ def main():
 | 
			
		||||
                    has_password=datastore.data['settings']['application']['password'] != False
 | 
			
		||||
                    )
 | 
			
		||||
 | 
			
		||||
    # Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
 | 
			
		||||
    # @Note: Incompatible with password login (and maybe other features) for now, submit a PR!
 | 
			
		||||
    @app.after_request
 | 
			
		||||
    def hide_referrer(response):
 | 
			
		||||
        if os.getenv("HIDE_REFERER", False):
 | 
			
		||||
            response.headers["Referrer-Policy"] = "no-referrer"
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    # Proxy sub-directory support
 | 
			
		||||
    # Set environment var USE_X_SETTINGS=1 on this script
 | 
			
		||||
    # And then in your proxy_pass settings
 | 
			
		||||
 
 | 
			
		||||
@@ -164,16 +164,16 @@ class Fetcher():
 | 
			
		||||
                }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                // inject the current one set in the css_filter, which may be a CSS rule
 | 
			
		||||
                // inject the current one set in the include_filters, which may be a CSS rule
 | 
			
		||||
                // used for displaying the current one in VisualSelector, where its not one we generated.
 | 
			
		||||
                if (css_filter.length) {
 | 
			
		||||
                if (include_filters.length) {
 | 
			
		||||
                   q=false;                   
 | 
			
		||||
                   try {
 | 
			
		||||
                       // is it xpath?
 | 
			
		||||
                       if (css_filter.startsWith('/') || css_filter.startsWith('xpath:')) {
 | 
			
		||||
                         q=document.evaluate(css_filter.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
 | 
			
		||||
                       if (include_filters.startsWith('/') || include_filters.startsWith('xpath:')) {
 | 
			
		||||
                         q=document.evaluate(include_filters.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
 | 
			
		||||
                       } else {
 | 
			
		||||
                         q=document.querySelector(css_filter);
 | 
			
		||||
                         q=document.querySelector(include_filters);
 | 
			
		||||
                       }                       
 | 
			
		||||
                   } catch (e) {
 | 
			
		||||
                    // Maybe catch DOMException and alert? 
 | 
			
		||||
@@ -186,7 +186,7 @@ class Fetcher():
 | 
			
		||||
                                   
 | 
			
		||||
                   if (bbox && bbox['width'] >0 && bbox['height']>0) {                       
 | 
			
		||||
                       size_pos.push({
 | 
			
		||||
                           xpath: css_filter,
 | 
			
		||||
                           xpath: include_filters,
 | 
			
		||||
                           width: bbox['width'], 
 | 
			
		||||
                           height: bbox['height'],
 | 
			
		||||
                           left: bbox['left'],
 | 
			
		||||
@@ -220,7 +220,7 @@ class Fetcher():
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_css_filter=None):
 | 
			
		||||
            current_include_filters=None):
 | 
			
		||||
        # Should set self.error, self.status_code and self.content
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
@@ -310,12 +310,13 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_css_filter=None):
 | 
			
		||||
            current_include_filters=None):
 | 
			
		||||
 | 
			
		||||
        from playwright.sync_api import sync_playwright
 | 
			
		||||
        import playwright._impl._api_types
 | 
			
		||||
        from playwright._impl._api_types import Error, TimeoutError
 | 
			
		||||
        response = None
 | 
			
		||||
 | 
			
		||||
        with sync_playwright() as p:
 | 
			
		||||
            browser_type = getattr(p, self.browser_type)
 | 
			
		||||
 | 
			
		||||
@@ -373,8 +374,11 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
                print("response object was none")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            # Bug 2(?) Set the viewport size AFTER loading the page
 | 
			
		||||
            page.set_viewport_size({"width": 1280, "height": 1024})
 | 
			
		||||
 | 
			
		||||
            # Removed browser-set-size, seemed to be needed to make screenshots work reliably in older playwright versions
 | 
			
		||||
            # Was causing exceptions like 'waiting for page but content is changing' etc
 | 
			
		||||
            # https://www.browserstack.com/docs/automate/playwright/change-browser-window-size 1280x720 should be the default
 | 
			
		||||
                        
 | 
			
		||||
            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
            time.sleep(extra_wait)
 | 
			
		||||
 | 
			
		||||
@@ -398,14 +402,21 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
 | 
			
		||||
                    raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url)
 | 
			
		||||
 | 
			
		||||
                else:
 | 
			
		||||
                    # JS eval was run, now we also wait some time if possible to let the page settle
 | 
			
		||||
                    if self.render_extract_delay:
 | 
			
		||||
                        page.wait_for_timeout(self.render_extract_delay * 1000)
 | 
			
		||||
 | 
			
		||||
            page.wait_for_timeout(500)
 | 
			
		||||
 | 
			
		||||
            self.content = page.content()
 | 
			
		||||
            self.status_code = response.status
 | 
			
		||||
            self.headers = response.all_headers()
 | 
			
		||||
 | 
			
		||||
            if current_css_filter is not None:
 | 
			
		||||
                page.evaluate("var css_filter={}".format(json.dumps(current_css_filter)))
 | 
			
		||||
            if current_include_filters is not None:
 | 
			
		||||
                page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
 | 
			
		||||
            else:
 | 
			
		||||
                page.evaluate("var css_filter=''")
 | 
			
		||||
                page.evaluate("var include_filters=''")
 | 
			
		||||
 | 
			
		||||
            self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}")
 | 
			
		||||
 | 
			
		||||
@@ -486,7 +497,7 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_css_filter=None):
 | 
			
		||||
            current_include_filters=None):
 | 
			
		||||
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
			
		||||
@@ -514,8 +525,6 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
 | 
			
		||||
            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
        self.screenshot = self.driver.get_screenshot_as_png()
 | 
			
		||||
 | 
			
		||||
        # @todo - how to check this? is it possible?
 | 
			
		||||
        self.status_code = 200
 | 
			
		||||
        # @todo somehow we should try to get this working for WebDriver
 | 
			
		||||
@@ -526,6 +535,8 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
        self.content = self.driver.page_source
 | 
			
		||||
        self.headers = {}
 | 
			
		||||
 | 
			
		||||
        self.screenshot = self.driver.get_screenshot_as_png()
 | 
			
		||||
 | 
			
		||||
    # Does the connection to the webdriver work? run a test connection.
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
@@ -562,7 +573,12 @@ class html_requests(Fetcher):
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_css_filter=None):
 | 
			
		||||
            current_include_filters=None):
 | 
			
		||||
 | 
			
		||||
        # Make requests use a more modern looking user-agent
 | 
			
		||||
        if not 'User-Agent' in request_headers:
 | 
			
		||||
            request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT",
 | 
			
		||||
                                                      'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36')
 | 
			
		||||
 | 
			
		||||
        proxies = {}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -10,6 +10,11 @@ from changedetectionio import content_fetcher, html_tools
 | 
			
		||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FilterNotFoundInResponse(ValueError):
 | 
			
		||||
    def __init__(self, msg):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Some common stuff here that can be moved to a base class
 | 
			
		||||
# (set_proxy_from_list)
 | 
			
		||||
class perform_site_check():
 | 
			
		||||
@@ -20,34 +25,6 @@ class perform_site_check():
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        self.datastore = datastore
 | 
			
		||||
 | 
			
		||||
    # If there was a proxy list enabled, figure out what proxy_args/which proxy to use
 | 
			
		||||
    # if watch.proxy use that
 | 
			
		||||
    # fetcher.proxy_override = watch.proxy or main config proxy
 | 
			
		||||
    # Allows override the proxy on a per-request basis
 | 
			
		||||
    # ALWAYS use the first one is nothing selected
 | 
			
		||||
 | 
			
		||||
    def set_proxy_from_list(self, watch):
 | 
			
		||||
        proxy_args = None
 | 
			
		||||
        if self.datastore.proxy_list is None:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        # If its a valid one
 | 
			
		||||
        if any([watch['proxy'] in p for p in self.datastore.proxy_list]):
 | 
			
		||||
            proxy_args = watch['proxy']
 | 
			
		||||
 | 
			
		||||
        # not valid (including None), try the system one
 | 
			
		||||
        else:
 | 
			
		||||
            system_proxy = self.datastore.data['settings']['requests']['proxy']
 | 
			
		||||
            # Is not None and exists
 | 
			
		||||
            if any([system_proxy in p for p in self.datastore.proxy_list]):
 | 
			
		||||
                proxy_args = system_proxy
 | 
			
		||||
 | 
			
		||||
        # Fallback - Did not resolve anything, use the first available
 | 
			
		||||
        if proxy_args is None:
 | 
			
		||||
            proxy_args = self.datastore.proxy_list[0][0]
 | 
			
		||||
 | 
			
		||||
        return proxy_args
 | 
			
		||||
 | 
			
		||||
    # Doesn't look like python supports forward slash auto enclosure in re.findall
 | 
			
		||||
    # So convert it to inline flag "foobar(?i)" type configuration
 | 
			
		||||
    def forward_slash_enclosed_regex_to_options(self, regex):
 | 
			
		||||
@@ -61,16 +38,20 @@ class perform_site_check():
 | 
			
		||||
 | 
			
		||||
        return regex
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def run(self, uuid):
 | 
			
		||||
        from copy import deepcopy
 | 
			
		||||
        changed_detected = False
 | 
			
		||||
        screenshot = False  # as bytes
 | 
			
		||||
        stripped_text_from_html = ""
 | 
			
		||||
 | 
			
		||||
        watch = self.datastore.data['watching'].get(uuid)
 | 
			
		||||
        # DeepCopy so we can be sure we don't accidently change anything by reference
 | 
			
		||||
        watch = deepcopy(self.datastore.data['watching'].get(uuid))
 | 
			
		||||
 | 
			
		||||
        if not watch:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        # Protect against file:// access
 | 
			
		||||
        if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
 | 
			
		||||
        if re.search(r'^file', watch.get('url', ''), re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False):
 | 
			
		||||
            raise Exception(
 | 
			
		||||
                "file:// type access is denied for security reasons."
 | 
			
		||||
            )
 | 
			
		||||
@@ -78,10 +59,10 @@ class perform_site_check():
 | 
			
		||||
        # Unset any existing notification error
 | 
			
		||||
        update_obj = {'last_notification_error': False, 'last_error': False}
 | 
			
		||||
 | 
			
		||||
        extra_headers =self.datastore.data['watching'][uuid].get('headers')
 | 
			
		||||
        extra_headers = watch.get('headers', [])
 | 
			
		||||
 | 
			
		||||
        # Tweak the base config with the per-watch ones
 | 
			
		||||
        request_headers = self.datastore.data['settings']['headers'].copy()
 | 
			
		||||
        request_headers = deepcopy(self.datastore.data['settings']['headers'])
 | 
			
		||||
        request_headers.update(extra_headers)
 | 
			
		||||
 | 
			
		||||
        # https://github.com/psf/requests/issues/4525
 | 
			
		||||
@@ -90,8 +71,10 @@ class perform_site_check():
 | 
			
		||||
        if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']:
 | 
			
		||||
            request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '')
 | 
			
		||||
 | 
			
		||||
        timeout = self.datastore.data['settings']['requests']['timeout']
 | 
			
		||||
        url = watch.get('url')
 | 
			
		||||
        timeout = self.datastore.data['settings']['requests'].get('timeout')
 | 
			
		||||
 | 
			
		||||
        url = watch.link
 | 
			
		||||
 | 
			
		||||
        request_body = self.datastore.data['watching'][uuid].get('body')
 | 
			
		||||
        request_method = self.datastore.data['watching'][uuid].get('method')
 | 
			
		||||
        ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False)
 | 
			
		||||
@@ -103,28 +86,32 @@ class perform_site_check():
 | 
			
		||||
            is_source = True
 | 
			
		||||
 | 
			
		||||
        # Pluggable content fetcher
 | 
			
		||||
        prefer_backend = watch['fetch_backend']
 | 
			
		||||
        prefer_backend = watch.get('fetch_backend')
 | 
			
		||||
        if hasattr(content_fetcher, prefer_backend):
 | 
			
		||||
            klass = getattr(content_fetcher, prefer_backend)
 | 
			
		||||
        else:
 | 
			
		||||
            # If the klass doesnt exist, just use a default
 | 
			
		||||
            klass = getattr(content_fetcher, "html_requests")
 | 
			
		||||
 | 
			
		||||
        proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid)
 | 
			
		||||
        proxy_url = None
 | 
			
		||||
        if proxy_id:
 | 
			
		||||
            proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
 | 
			
		||||
            print("UUID {} Using proxy {}".format(uuid, proxy_url))
 | 
			
		||||
 | 
			
		||||
        proxy_args = self.set_proxy_from_list(watch)
 | 
			
		||||
        fetcher = klass(proxy_override=proxy_args)
 | 
			
		||||
        fetcher = klass(proxy_override=proxy_url)
 | 
			
		||||
 | 
			
		||||
        # Configurable per-watch or global extra delay before extracting text (for webDriver types)
 | 
			
		||||
        system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None)
 | 
			
		||||
        if watch['webdriver_delay'] is not None:
 | 
			
		||||
            fetcher.render_extract_delay = watch['webdriver_delay']
 | 
			
		||||
            fetcher.render_extract_delay = watch.get('webdriver_delay')
 | 
			
		||||
        elif system_webdriver_delay is not None:
 | 
			
		||||
            fetcher.render_extract_delay = system_webdriver_delay
 | 
			
		||||
 | 
			
		||||
        if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
 | 
			
		||||
            fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
 | 
			
		||||
        if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip():
 | 
			
		||||
            fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code')
 | 
			
		||||
 | 
			
		||||
        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
 | 
			
		||||
        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'))
 | 
			
		||||
        fetcher.quit()
 | 
			
		||||
 | 
			
		||||
        self.screenshot = fetcher.screenshot
 | 
			
		||||
@@ -148,23 +135,26 @@ class perform_site_check():
 | 
			
		||||
            is_html = False
 | 
			
		||||
            is_json = False
 | 
			
		||||
 | 
			
		||||
        css_filter_rule = watch['css_filter']
 | 
			
		||||
        include_filters_rule = watch.get('include_filters', [])
 | 
			
		||||
        # include_filters_rule = watch['include_filters']
 | 
			
		||||
        subtractive_selectors = watch.get(
 | 
			
		||||
            "subtractive_selectors", []
 | 
			
		||||
        ) + self.datastore.data["settings"]["application"].get(
 | 
			
		||||
            "global_subtractive_selectors", []
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        has_filter_rule = css_filter_rule and len(css_filter_rule.strip())
 | 
			
		||||
        has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip())
 | 
			
		||||
        has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip())
 | 
			
		||||
 | 
			
		||||
        if is_json and not has_filter_rule:
 | 
			
		||||
            css_filter_rule = "json:$"
 | 
			
		||||
            include_filters_rule.append("json:$")
 | 
			
		||||
            has_filter_rule = True
 | 
			
		||||
 | 
			
		||||
        if has_filter_rule:
 | 
			
		||||
            if 'json:' in css_filter_rule:
 | 
			
		||||
                stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule)
 | 
			
		||||
            json_filter_prefixes = ['json:', 'jq:']
 | 
			
		||||
            for filter in include_filters_rule:
 | 
			
		||||
                if any(prefix in filter for prefix in json_filter_prefixes):
 | 
			
		||||
                    stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter)
 | 
			
		||||
                    is_html = False
 | 
			
		||||
 | 
			
		||||
        if is_html or is_source:
 | 
			
		||||
@@ -180,33 +170,36 @@ class perform_site_check():
 | 
			
		||||
            else:
 | 
			
		||||
                # Then we assume HTML
 | 
			
		||||
                if has_filter_rule:
 | 
			
		||||
                    html_content = ""
 | 
			
		||||
                    for filter_rule in include_filters_rule:
 | 
			
		||||
                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
 | 
			
		||||
                    if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'):
 | 
			
		||||
                        html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''),
 | 
			
		||||
                                                               html_content=fetcher.content)
 | 
			
		||||
                        if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
 | 
			
		||||
                            html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
 | 
			
		||||
                                                                    html_content=fetcher.content,
 | 
			
		||||
                                                                    append_pretty_line_formatting=not is_source)
 | 
			
		||||
                        else:
 | 
			
		||||
                            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
 | 
			
		||||
                        html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content)
 | 
			
		||||
                            html_content += html_tools.include_filters(include_filters=filter_rule,
 | 
			
		||||
                                                                       html_content=fetcher.content,
 | 
			
		||||
                                                                       append_pretty_line_formatting=not is_source)
 | 
			
		||||
 | 
			
		||||
                    if not html_content.strip():
 | 
			
		||||
                        raise FilterNotFoundInResponse(include_filters_rule)
 | 
			
		||||
 | 
			
		||||
                if has_subtractive_selectors:
 | 
			
		||||
                    html_content = html_tools.element_removal(subtractive_selectors, html_content)
 | 
			
		||||
 | 
			
		||||
                if not is_source:
 | 
			
		||||
                if is_source:
 | 
			
		||||
                    stripped_text_from_html = html_content
 | 
			
		||||
                else:
 | 
			
		||||
                    # extract text
 | 
			
		||||
                    do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
 | 
			
		||||
                    stripped_text_from_html = \
 | 
			
		||||
                        html_tools.html_to_text(
 | 
			
		||||
                            html_content,
 | 
			
		||||
                            render_anchor_tag_content=self.datastore.data["settings"][
 | 
			
		||||
                                "application"].get(
 | 
			
		||||
                                "render_anchor_tag_content", False)
 | 
			
		||||
                            render_anchor_tag_content=do_anchor
 | 
			
		||||
                        )
 | 
			
		||||
 | 
			
		||||
                elif is_source:
 | 
			
		||||
                    stripped_text_from_html = html_content
 | 
			
		||||
 | 
			
		||||
            # Re #340 - return the content before the 'ignore text' was applied
 | 
			
		||||
            text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
 | 
			
		||||
 | 
			
		||||
        # Re #340 - return the content before the 'ignore text' was applied
 | 
			
		||||
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
 | 
			
		||||
 | 
			
		||||
@@ -239,7 +232,7 @@ class perform_site_check():
 | 
			
		||||
 | 
			
		||||
                for l in result:
 | 
			
		||||
                    if type(l) is tuple:
 | 
			
		||||
                        #@todo - some formatter option default (between groups)
 | 
			
		||||
                        # @todo - some formatter option default (between groups)
 | 
			
		||||
                        regex_matched_output += list(l) + [b'\n']
 | 
			
		||||
                    else:
 | 
			
		||||
                        # @todo - some formatter option default (between each ungrouped result)
 | 
			
		||||
@@ -253,7 +246,6 @@ class perform_site_check():
 | 
			
		||||
                stripped_text_from_html = b''.join(regex_matched_output)
 | 
			
		||||
                text_content_before_ignored_filter = stripped_text_from_html
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
 | 
			
		||||
        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
 | 
			
		||||
            fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
 | 
			
		||||
@@ -263,29 +255,30 @@ class perform_site_check():
 | 
			
		||||
        ############ Blocking rules, after checksum #################
 | 
			
		||||
        blocked = False
 | 
			
		||||
 | 
			
		||||
        if len(watch['trigger_text']):
 | 
			
		||||
        trigger_text = watch.get('trigger_text', [])
 | 
			
		||||
        if len(trigger_text):
 | 
			
		||||
            # Assume blocked
 | 
			
		||||
            blocked = True
 | 
			
		||||
            # Filter and trigger works the same, so reuse it
 | 
			
		||||
            # It should return the line numbers that match
 | 
			
		||||
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
 | 
			
		||||
                                                  wordlist=watch['trigger_text'],
 | 
			
		||||
                                                  wordlist=trigger_text,
 | 
			
		||||
                                                  mode="line numbers")
 | 
			
		||||
            # Unblock if the trigger was found
 | 
			
		||||
            if result:
 | 
			
		||||
                blocked = False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        if len(watch['text_should_not_be_present']):
 | 
			
		||||
        text_should_not_be_present = watch.get('text_should_not_be_present', [])
 | 
			
		||||
        if len(text_should_not_be_present):
 | 
			
		||||
            # If anything matched, then we should block a change from happening
 | 
			
		||||
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
 | 
			
		||||
                                                  wordlist=watch['text_should_not_be_present'],
 | 
			
		||||
                                                  wordlist=text_should_not_be_present,
 | 
			
		||||
                                                  mode="line numbers")
 | 
			
		||||
            if result:
 | 
			
		||||
                blocked = True
 | 
			
		||||
 | 
			
		||||
        # The main thing that all this at the moment comes down to :)
 | 
			
		||||
        if watch['previous_md5'] != fetched_md5:
 | 
			
		||||
        if watch.get('previous_md5') != fetched_md5:
 | 
			
		||||
            changed_detected = True
 | 
			
		||||
 | 
			
		||||
        # Looks like something changed, but did it match all the rules?
 | 
			
		||||
@@ -294,7 +287,7 @@ class perform_site_check():
 | 
			
		||||
 | 
			
		||||
        # Extract title as title
 | 
			
		||||
        if is_html:
 | 
			
		||||
            if self.datastore.data['settings']['application']['extract_title_as_title'] or watch['extract_title_as_title']:
 | 
			
		||||
            if self.datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']:
 | 
			
		||||
                if not watch['title'] or not len(watch['title']):
 | 
			
		||||
                    update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -303,6 +303,25 @@ class ValidateCSSJSONXPATHInput(object):
 | 
			
		||||
 | 
			
		||||
                # Re #265 - maybe in the future fetch the page and offer a
 | 
			
		||||
                # warning/notice that its possible the rule doesnt yet match anything?
 | 
			
		||||
                if not self.allow_json:
 | 
			
		||||
                    raise ValidationError("jq not permitted in this field!")
 | 
			
		||||
 | 
			
		||||
            if 'jq:' in line:
 | 
			
		||||
                try:
 | 
			
		||||
                    import jq
 | 
			
		||||
                except ModuleNotFoundError:
 | 
			
		||||
                    # `jq` requires full compilation in windows and so isn't generally available
 | 
			
		||||
                    raise ValidationError("jq not support not found")
 | 
			
		||||
 | 
			
		||||
                input = line.replace('jq:', '')
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    jq.compile(input)
 | 
			
		||||
                except (ValueError) as e:
 | 
			
		||||
                    message = field.gettext('\'%s\' is not a valid jq expression. (%s)')
 | 
			
		||||
                    raise ValidationError(message % (input, str(e)))
 | 
			
		||||
                except:
 | 
			
		||||
                    raise ValidationError("A system-error occurred when validating your jq expression")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class quickWatchForm(Form):
 | 
			
		||||
@@ -330,7 +349,7 @@ class watchForm(commonSettingsForm):
 | 
			
		||||
 | 
			
		||||
    time_between_check = FormField(TimeBetweenCheckForm)
 | 
			
		||||
 | 
			
		||||
    css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='')
 | 
			
		||||
    include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
 | 
			
		||||
 | 
			
		||||
    subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,32 +1,36 @@
 | 
			
		||||
import json
 | 
			
		||||
from typing import List
 | 
			
		||||
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from jsonpath_ng.ext import parse
 | 
			
		||||
import re
 | 
			
		||||
from inscriptis import get_text
 | 
			
		||||
from inscriptis.model.config import ParserConfig
 | 
			
		||||
from jsonpath_ng.ext import parse
 | 
			
		||||
from typing import List
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
class FilterNotFoundInResponse(ValueError):
 | 
			
		||||
    def __init__(self, msg):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
# HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis
 | 
			
		||||
TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>"
 | 
			
		||||
 | 
			
		||||
class JSONNotFound(ValueError):
 | 
			
		||||
    def __init__(self, msg):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
        
 | 
			
		||||
 | 
			
		||||
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
 | 
			
		||||
def css_filter(css_filter, html_content):
 | 
			
		||||
def include_filters(include_filters, html_content, append_pretty_line_formatting=False):
 | 
			
		||||
    soup = BeautifulSoup(html_content, "html.parser")
 | 
			
		||||
    html_block = ""
 | 
			
		||||
    r = soup.select(css_filter, separator="")
 | 
			
		||||
    if len(html_content) > 0 and len(r) == 0:
 | 
			
		||||
        raise FilterNotFoundInResponse(css_filter)
 | 
			
		||||
    for item in r:
 | 
			
		||||
        html_block += str(item)
 | 
			
		||||
    r = soup.select(include_filters, separator="")
 | 
			
		||||
 | 
			
		||||
    return html_block + "\n"
 | 
			
		||||
    for element in r:
 | 
			
		||||
        # When there's more than 1 match, then add the suffix to separate each line
 | 
			
		||||
        # And where the matched result doesn't include something that will cause Inscriptis to add a newline
 | 
			
		||||
        # (This way each 'match' reliably has a new-line in the diff)
 | 
			
		||||
        # Divs are converted to 4 whitespaces by inscriptis
 | 
			
		||||
        if append_pretty_line_formatting and len(html_block) and not element.name in (['br', 'hr', 'div', 'p']):
 | 
			
		||||
            html_block += TEXT_FILTER_LIST_LINE_SUFFIX
 | 
			
		||||
 | 
			
		||||
        html_block += str(element)
 | 
			
		||||
 | 
			
		||||
    return html_block
 | 
			
		||||
 | 
			
		||||
def subtractive_css_selector(css_selector, html_content):
 | 
			
		||||
    soup = BeautifulSoup(html_content, "html.parser")
 | 
			
		||||
@@ -42,25 +46,29 @@ def element_removal(selectors: List[str], html_content):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Return str Utf-8 of matched rules
 | 
			
		||||
def xpath_filter(xpath_filter, html_content):
 | 
			
		||||
def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False):
 | 
			
		||||
    from lxml import etree, html
 | 
			
		||||
 | 
			
		||||
    tree = html.fromstring(bytes(html_content, encoding='utf-8'))
 | 
			
		||||
    html_block = ""
 | 
			
		||||
 | 
			
		||||
    r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
 | 
			
		||||
    if len(html_content) > 0 and len(r) == 0:
 | 
			
		||||
        raise FilterNotFoundInResponse(xpath_filter)
 | 
			
		||||
 | 
			
		||||
    #@note: //title/text() wont work where <title>CDATA..
 | 
			
		||||
 | 
			
		||||
    for element in r:
 | 
			
		||||
        # When there's more than 1 match, then add the suffix to separate each line
 | 
			
		||||
        # And where the matched result doesn't include something that will cause Inscriptis to add a newline
 | 
			
		||||
        # (This way each 'match' reliably has a new-line in the diff)
 | 
			
		||||
        # Divs are converted to 4 whitespaces by inscriptis
 | 
			
		||||
        if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])):
 | 
			
		||||
            html_block += TEXT_FILTER_LIST_LINE_SUFFIX
 | 
			
		||||
 | 
			
		||||
        if type(element) == etree._ElementStringResult:
 | 
			
		||||
            html_block += str(element) + "<br/>"
 | 
			
		||||
            html_block += str(element)
 | 
			
		||||
        elif type(element) == etree._ElementUnicodeResult:
 | 
			
		||||
            html_block += str(element) + "<br/>"
 | 
			
		||||
            html_block += str(element)
 | 
			
		||||
        else:
 | 
			
		||||
            html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>"
 | 
			
		||||
            html_block += etree.tostring(element, pretty_print=True).decode('utf-8')
 | 
			
		||||
 | 
			
		||||
    return html_block
 | 
			
		||||
 | 
			
		||||
@@ -79,19 +87,35 @@ def extract_element(find='title', html_content=''):
 | 
			
		||||
    return element_text
 | 
			
		||||
 | 
			
		||||
#
 | 
			
		||||
def _parse_json(json_data, jsonpath_filter):
 | 
			
		||||
    s=[]
 | 
			
		||||
    jsonpath_expression = parse(jsonpath_filter.replace('json:', ''))
 | 
			
		||||
def _parse_json(json_data, json_filter):
 | 
			
		||||
    if 'json:' in json_filter:
 | 
			
		||||
        jsonpath_expression = parse(json_filter.replace('json:', ''))
 | 
			
		||||
        match = jsonpath_expression.find(json_data)
 | 
			
		||||
        return _get_stripped_text_from_json_match(match)
 | 
			
		||||
 | 
			
		||||
    if 'jq:' in json_filter:
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            import jq
 | 
			
		||||
        except ModuleNotFoundError:
 | 
			
		||||
            # `jq` requires full compilation in windows and so isn't generally available
 | 
			
		||||
            raise Exception("jq not support not found")
 | 
			
		||||
 | 
			
		||||
        jq_expression = jq.compile(json_filter.replace('jq:', ''))
 | 
			
		||||
        match = jq_expression.input(json_data).all()
 | 
			
		||||
 | 
			
		||||
        return _get_stripped_text_from_json_match(match)
 | 
			
		||||
 | 
			
		||||
def _get_stripped_text_from_json_match(match):
 | 
			
		||||
    s = []
 | 
			
		||||
    # More than one result, we will return it as a JSON list.
 | 
			
		||||
    if len(match) > 1:
 | 
			
		||||
        for i in match:
 | 
			
		||||
            s.append(i.value)
 | 
			
		||||
            s.append(i.value if hasattr(i, 'value') else i)
 | 
			
		||||
 | 
			
		||||
    # Single value, use just the value, as it could be later used in a token in notifications.
 | 
			
		||||
    if len(match) == 1:
 | 
			
		||||
        s = match[0].value
 | 
			
		||||
        s = match[0].value if hasattr(match[0], 'value') else match[0]
 | 
			
		||||
 | 
			
		||||
    # Re #257 - Better handling where it does not exist, in the case the original 's' value was False..
 | 
			
		||||
    if not match:
 | 
			
		||||
@@ -103,16 +127,16 @@ def _parse_json(json_data, jsonpath_filter):
 | 
			
		||||
 | 
			
		||||
    return stripped_text_from_html
 | 
			
		||||
 | 
			
		||||
def extract_json_as_string(content, jsonpath_filter):
 | 
			
		||||
def extract_json_as_string(content, json_filter):
 | 
			
		||||
 | 
			
		||||
    stripped_text_from_html = False
 | 
			
		||||
 | 
			
		||||
    # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson>
 | 
			
		||||
    try:
 | 
			
		||||
        stripped_text_from_html = _parse_json(json.loads(content), jsonpath_filter)
 | 
			
		||||
        stripped_text_from_html = _parse_json(json.loads(content), json_filter)
 | 
			
		||||
    except json.JSONDecodeError:
 | 
			
		||||
 | 
			
		||||
        # Foreach <script json></script> blob.. just return the first that matches jsonpath_filter
 | 
			
		||||
        # Foreach <script json></script> blob.. just return the first that matches json_filter
 | 
			
		||||
        s = []
 | 
			
		||||
        soup = BeautifulSoup(content, 'html.parser')
 | 
			
		||||
        bs_result = soup.findAll('script')
 | 
			
		||||
@@ -131,7 +155,7 @@ def extract_json_as_string(content, jsonpath_filter):
 | 
			
		||||
                # Just skip it
 | 
			
		||||
                continue
 | 
			
		||||
            else:
 | 
			
		||||
                stripped_text_from_html = _parse_json(json_data, jsonpath_filter)
 | 
			
		||||
                stripped_text_from_html = _parse_json(json_data, json_filter)
 | 
			
		||||
                if stripped_text_from_html:
 | 
			
		||||
                    break
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -103,12 +103,12 @@ class import_distill_io_json(Importer):
 | 
			
		||||
                    pass
 | 
			
		||||
                except IndexError:
 | 
			
		||||
                    pass
 | 
			
		||||
 | 
			
		||||
                extras['include_filters'] = []
 | 
			
		||||
                try:
 | 
			
		||||
                    extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr']
 | 
			
		||||
                    if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath':
 | 
			
		||||
                        extras['css_filter'] = 'xpath:' + extras['css_filter']
 | 
			
		||||
 | 
			
		||||
                        extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
 | 
			
		||||
                    else:
 | 
			
		||||
                        extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr'])
 | 
			
		||||
                except KeyError:
 | 
			
		||||
                    pass
 | 
			
		||||
                except IndexError:
 | 
			
		||||
 
 | 
			
		||||
@@ -13,10 +13,6 @@ class model(dict):
 | 
			
		||||
            'watching': {},
 | 
			
		||||
            'settings': {
 | 
			
		||||
                'headers': {
 | 
			
		||||
                    'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
 | 
			
		||||
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
 | 
			
		||||
                    'Accept-Encoding': 'gzip, deflate',  # No support for brolti in python requests yet.
 | 
			
		||||
                    'Accept-Language': 'en-GB,en-US;q=0.9,en;'
 | 
			
		||||
                },
 | 
			
		||||
                'requests': {
 | 
			
		||||
                    'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,8 @@
 | 
			
		||||
import os
 | 
			
		||||
import uuid as uuid_builder
 | 
			
		||||
from distutils.util import strtobool
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
import uuid
 | 
			
		||||
 | 
			
		||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
 | 
			
		||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
 | 
			
		||||
@@ -14,42 +16,43 @@ class model(dict):
 | 
			
		||||
    __newest_history_key = None
 | 
			
		||||
    __history_n=0
 | 
			
		||||
    __base_config = {
 | 
			
		||||
            'url': None,
 | 
			
		||||
            'tag': None,
 | 
			
		||||
            'last_checked': 0,
 | 
			
		||||
            'paused': False,
 | 
			
		||||
            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
 | 
			
		||||
            #'newest_history_key': 0,
 | 
			
		||||
            'title': None,
 | 
			
		||||
            'previous_md5': False,
 | 
			
		||||
            'uuid': str(uuid_builder.uuid4()),
 | 
			
		||||
            'headers': {},  # Extra headers to send
 | 
			
		||||
            #'history': {},  # Dict of timestamp and output stripped filename (removed)
 | 
			
		||||
            #'newest_history_key': 0, (removed, taken from history.txt index)
 | 
			
		||||
            'body': None,
 | 
			
		||||
            'method': 'GET',
 | 
			
		||||
            #'history': {},  # Dict of timestamp and output stripped filename
 | 
			
		||||
            'check_unique_lines': False, # On change-detected, compare against all history if its something new
 | 
			
		||||
            'check_count': 0,
 | 
			
		||||
            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
 | 
			
		||||
            'extract_text': [],  # Extract text by regex after filters
 | 
			
		||||
            'extract_title_as_title': False,
 | 
			
		||||
            'fetch_backend': None,
 | 
			
		||||
            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
 | 
			
		||||
            'headers': {},  # Extra headers to send
 | 
			
		||||
            'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
 | 
			
		||||
            'include_filters': [],
 | 
			
		||||
            'last_checked': 0,
 | 
			
		||||
            'last_error': False,
 | 
			
		||||
            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
 | 
			
		||||
            'method': 'GET',
 | 
			
		||||
             # Custom notification content
 | 
			
		||||
            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
 | 
			
		||||
            'notification_title': None,
 | 
			
		||||
            'notification_body': None,
 | 
			
		||||
            'notification_format': default_notification_format_for_watch,
 | 
			
		||||
            'notification_muted': False,
 | 
			
		||||
            'css_filter': '',
 | 
			
		||||
            'last_error': False,
 | 
			
		||||
            'extract_text': [],  # Extract text by regex after filters
 | 
			
		||||
            'subtractive_selectors': [],
 | 
			
		||||
            'trigger_text': [],  # List of text or regex to wait for until a change is detected
 | 
			
		||||
            'text_should_not_be_present': [], # Text that should not present
 | 
			
		||||
            'fetch_backend': None,
 | 
			
		||||
            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
 | 
			
		||||
            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
 | 
			
		||||
            'extract_title_as_title': False,
 | 
			
		||||
            'check_unique_lines': False, # On change-detected, compare against all history if its something new
 | 
			
		||||
            'notification_title': None,
 | 
			
		||||
            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
 | 
			
		||||
            'paused': False,
 | 
			
		||||
            'previous_md5': False,
 | 
			
		||||
            'proxy': None, # Preferred proxy connection
 | 
			
		||||
            'subtractive_selectors': [],
 | 
			
		||||
            'tag': None,
 | 
			
		||||
            'text_should_not_be_present': [], # Text that should not present
 | 
			
		||||
            # Re #110, so then if this is set to None, we know to use the default value instead
 | 
			
		||||
            # Requires setting to None on submit if it's the same as the default
 | 
			
		||||
            # Should be all None by default, so we use the system default in this case.
 | 
			
		||||
            'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
 | 
			
		||||
            'title': None,
 | 
			
		||||
            'trigger_text': [],  # List of text or regex to wait for until a change is detected
 | 
			
		||||
            'url': None,
 | 
			
		||||
            'uuid': str(uuid.uuid4()),
 | 
			
		||||
            'webdriver_delay': None,
 | 
			
		||||
            'webdriver_js_execute_code': None, # Run before change-detection
 | 
			
		||||
        }
 | 
			
		||||
@@ -60,7 +63,7 @@ class model(dict):
 | 
			
		||||
        self.update(self.__base_config)
 | 
			
		||||
        self.__datastore_path = kw['datastore_path']
 | 
			
		||||
 | 
			
		||||
        self['uuid'] = str(uuid_builder.uuid4())
 | 
			
		||||
        self['uuid'] = str(uuid.uuid4())
 | 
			
		||||
 | 
			
		||||
        del kw['datastore_path']
 | 
			
		||||
 | 
			
		||||
@@ -82,10 +85,19 @@ class model(dict):
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def ensure_data_dir_exists(self):
 | 
			
		||||
        target_path = os.path.join(self.__datastore_path, self['uuid'])
 | 
			
		||||
        if not os.path.isdir(target_path):
 | 
			
		||||
            print ("> Creating data dir {}".format(target_path))
 | 
			
		||||
            os.mkdir(target_path)
 | 
			
		||||
        if not os.path.isdir(self.watch_data_dir):
 | 
			
		||||
            print ("> Creating data dir {}".format(self.watch_data_dir))
 | 
			
		||||
            os.mkdir(self.watch_data_dir)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def link(self):
 | 
			
		||||
        url = self.get('url', '')
 | 
			
		||||
        if '{%' in url or '{{' in url:
 | 
			
		||||
            from jinja2 import Environment
 | 
			
		||||
            # Jinja2 available in URLs along with https://pypi.org/project/jinja2-time/
 | 
			
		||||
            jinja2_env = Environment(extensions=['jinja2_time.TimeExtension'])
 | 
			
		||||
            return str(jinja2_env.from_string(url).render())
 | 
			
		||||
        return url
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def label(self):
 | 
			
		||||
@@ -109,16 +121,40 @@ class model(dict):
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def history(self):
 | 
			
		||||
        """History index is just a text file as a list
 | 
			
		||||
            {watch-uuid}/history.txt
 | 
			
		||||
 | 
			
		||||
            contains a list like
 | 
			
		||||
 | 
			
		||||
            {epoch-time},{filename}\n
 | 
			
		||||
 | 
			
		||||
            We read in this list as the history information
 | 
			
		||||
 | 
			
		||||
        """
 | 
			
		||||
        tmp_history = {}
 | 
			
		||||
        import logging
 | 
			
		||||
        import time
 | 
			
		||||
 | 
			
		||||
        # Read the history file as a dict
 | 
			
		||||
        fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
 | 
			
		||||
        fname = os.path.join(self.watch_data_dir, "history.txt")
 | 
			
		||||
        if os.path.isfile(fname):
 | 
			
		||||
            logging.debug("Reading history index " + str(time.time()))
 | 
			
		||||
            with open(fname, "r") as f:
 | 
			
		||||
                tmp_history = dict(i.strip().split(',', 2) for i in f.readlines())
 | 
			
		||||
                for i in f.readlines():
 | 
			
		||||
                    if ',' in i:
 | 
			
		||||
                        k, v = i.strip().split(',', 2)
 | 
			
		||||
 | 
			
		||||
                        # The index history could contain a relative path, so we need to make the fullpath
 | 
			
		||||
                        # so that python can read it
 | 
			
		||||
                        if not '/' in v and not '\'' in v:
 | 
			
		||||
                            v = os.path.join(self.watch_data_dir, v)
 | 
			
		||||
                        else:
 | 
			
		||||
                            # It's possible that they moved the datadir on older versions
 | 
			
		||||
                            # So the snapshot exists but is in a different path
 | 
			
		||||
                            snapshot_fname = v.split('/')[-1]
 | 
			
		||||
                            proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname)
 | 
			
		||||
                            if not os.path.exists(v) and os.path.exists(proposed_new_path):
 | 
			
		||||
                                v = proposed_new_path
 | 
			
		||||
 | 
			
		||||
                        tmp_history[k] = v
 | 
			
		||||
 | 
			
		||||
        if len(tmp_history):
 | 
			
		||||
            self.__newest_history_key = list(tmp_history.keys())[-1]
 | 
			
		||||
@@ -129,7 +165,7 @@ class model(dict):
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def has_history(self):
 | 
			
		||||
        fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
 | 
			
		||||
        fname = os.path.join(self.watch_data_dir, "history.txt")
 | 
			
		||||
        return os.path.isfile(fname)
 | 
			
		||||
 | 
			
		||||
    # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0.
 | 
			
		||||
@@ -148,31 +184,33 @@ class model(dict):
 | 
			
		||||
    # Save some text file to the appropriate path and bump the history
 | 
			
		||||
    # result_obj from fetch_site_status.run()
 | 
			
		||||
    def save_history_text(self, contents, timestamp):
 | 
			
		||||
        import uuid
 | 
			
		||||
        import logging
 | 
			
		||||
 | 
			
		||||
        output_path = "{}/{}".format(self.__datastore_path, self['uuid'])
 | 
			
		||||
 | 
			
		||||
        self.ensure_data_dir_exists()
 | 
			
		||||
 | 
			
		||||
        snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4())
 | 
			
		||||
        logging.debug("Saving history text {}".format(snapshot_fname))
 | 
			
		||||
        # Small hack so that we sleep just enough to allow 1 second  between history snapshots
 | 
			
		||||
        # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys
 | 
			
		||||
        if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
 | 
			
		||||
            time.sleep(timestamp - self.__newest_history_key)
 | 
			
		||||
 | 
			
		||||
        with open(snapshot_fname, 'wb') as f:
 | 
			
		||||
        snapshot_fname = "{}.txt".format(str(uuid.uuid4()))
 | 
			
		||||
 | 
			
		||||
        # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
 | 
			
		||||
        # most sites are utf-8 and some are even broken utf-8
 | 
			
		||||
        with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f:
 | 
			
		||||
            f.write(contents)
 | 
			
		||||
            f.close()
 | 
			
		||||
 | 
			
		||||
        # Append to index
 | 
			
		||||
        # @todo check last char was \n
 | 
			
		||||
        index_fname = "{}/history.txt".format(output_path)
 | 
			
		||||
        index_fname = os.path.join(self.watch_data_dir, "history.txt")
 | 
			
		||||
        with open(index_fname, 'a') as f:
 | 
			
		||||
            f.write("{},{}\n".format(timestamp, snapshot_fname))
 | 
			
		||||
            f.close()
 | 
			
		||||
 | 
			
		||||
        self.__newest_history_key = timestamp
 | 
			
		||||
        self.__history_n+=1
 | 
			
		||||
        self.__history_n += 1
 | 
			
		||||
 | 
			
		||||
        #@todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
 | 
			
		||||
        # @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status
 | 
			
		||||
        return snapshot_fname
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
@@ -205,14 +243,14 @@ class model(dict):
 | 
			
		||||
        return not local_lines.issubset(existing_history)
 | 
			
		||||
 | 
			
		||||
    def get_screenshot(self):
 | 
			
		||||
        fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png")
 | 
			
		||||
        fname = os.path.join(self.watch_data_dir, "last-screenshot.png")
 | 
			
		||||
        if os.path.isfile(fname):
 | 
			
		||||
            return fname
 | 
			
		||||
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    def __get_file_ctime(self, filename):
 | 
			
		||||
        fname = os.path.join(self.__datastore_path, self['uuid'], filename)
 | 
			
		||||
        fname = os.path.join(self.watch_data_dir, filename)
 | 
			
		||||
        if os.path.isfile(fname):
 | 
			
		||||
            return int(os.path.getmtime(fname))
 | 
			
		||||
        return False
 | 
			
		||||
@@ -237,9 +275,14 @@ class model(dict):
 | 
			
		||||
    def snapshot_error_screenshot_ctime(self):
 | 
			
		||||
        return self.__get_file_ctime('last-error-screenshot.png')
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def watch_data_dir(self):
 | 
			
		||||
        # The base dir of the watch data
 | 
			
		||||
        return os.path.join(self.__datastore_path, self['uuid'])
 | 
			
		||||
    
 | 
			
		||||
    def get_error_text(self):
 | 
			
		||||
        """Return the text saved from a previous request that resulted in a non-200 error"""
 | 
			
		||||
        fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt")
 | 
			
		||||
        fname = os.path.join(self.watch_data_dir, "last-error.txt")
 | 
			
		||||
        if os.path.isfile(fname):
 | 
			
		||||
            with open(fname, 'r') as f:
 | 
			
		||||
                return f.read()
 | 
			
		||||
@@ -247,7 +290,7 @@ class model(dict):
 | 
			
		||||
 | 
			
		||||
    def get_error_snapshot(self):
 | 
			
		||||
        """Return path to the screenshot that resulted in a non-200 error"""
 | 
			
		||||
        fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png")
 | 
			
		||||
        fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png")
 | 
			
		||||
        if os.path.isfile(fname):
 | 
			
		||||
            return fname
 | 
			
		||||
        return False
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,8 @@
 | 
			
		||||
# exit when any command fails
 | 
			
		||||
set -e
 | 
			
		||||
 | 
			
		||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 | 
			
		||||
 | 
			
		||||
find tests/test_*py -type f|while read test_name
 | 
			
		||||
do
 | 
			
		||||
  echo "TEST RUNNING $test_name"
 | 
			
		||||
@@ -22,7 +24,6 @@ echo "RUNNING WITH BASE_URL SET"
 | 
			
		||||
export BASE_URL="https://really-unique-domain.io"
 | 
			
		||||
pytest tests/test_notification.py
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Now for the selenium and playwright/browserless fetchers
 | 
			
		||||
# Note - this is not UI functional tests - just checking that each one can fetch the content
 | 
			
		||||
 | 
			
		||||
@@ -38,7 +39,9 @@ docker kill $$-test_selenium
 | 
			
		||||
 | 
			
		||||
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
 | 
			
		||||
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
 | 
			
		||||
pip3 install playwright~=1.24
 | 
			
		||||
PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+")
 | 
			
		||||
echo "using $PLAYWRIGHT_VERSION"
 | 
			
		||||
pip3 install "$PLAYWRIGHT_VERSION"
 | 
			
		||||
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable
 | 
			
		||||
# takes a while to spin up
 | 
			
		||||
sleep 5
 | 
			
		||||
@@ -49,3 +52,47 @@ pytest tests/visualselector/test_fetch_data.py
 | 
			
		||||
 | 
			
		||||
unset PLAYWRIGHT_DRIVER_URL
 | 
			
		||||
docker kill $$-test_browserless
 | 
			
		||||
 | 
			
		||||
# Test proxy list handling, starting two squids on different ports
 | 
			
		||||
# Each squid adds a different header to the response, which is the main thing we test for.
 | 
			
		||||
docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge
 | 
			
		||||
docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# So, basic HTTP as env var test
 | 
			
		||||
export HTTP_PROXY=http://localhost:3128
 | 
			
		||||
export HTTPS_PROXY=http://localhost:3128
 | 
			
		||||
pytest tests/proxy_list/test_proxy.py
 | 
			
		||||
docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io
 | 
			
		||||
if [ $? -ne 0 ]
 | 
			
		||||
then
 | 
			
		||||
  echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)"
 | 
			
		||||
fi
 | 
			
		||||
unset HTTP_PROXY
 | 
			
		||||
unset HTTPS_PROXY
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# 2nd test actually choose the preferred proxy from proxies.json
 | 
			
		||||
cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json
 | 
			
		||||
# Makes a watch use a preferred proxy
 | 
			
		||||
pytest tests/proxy_list/test_multiple_proxy.py
 | 
			
		||||
 | 
			
		||||
# Should be a request in the default "first" squid
 | 
			
		||||
docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io
 | 
			
		||||
if [ $? -ne 0 ]
 | 
			
		||||
then
 | 
			
		||||
  echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# And one in the 'second' squid (user selects this as preferred)
 | 
			
		||||
docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io
 | 
			
		||||
if [ $? -ne 0 ]
 | 
			
		||||
then
 | 
			
		||||
  echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)"
 | 
			
		||||
fi
 | 
			
		||||
 | 
			
		||||
# @todo - test system override proxy selection and watch defaults, setup a 3rd squid?
 | 
			
		||||
docker kill $$-squid-one
 | 
			
		||||
docker kill $$-squid-two
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -50,7 +50,7 @@ $(document).ready(function() {
 | 
			
		||||
        state_clicked=false;
 | 
			
		||||
        ctx.clearRect(0, 0, c.width, c.height);
 | 
			
		||||
        xctx.clearRect(0, 0, c.width, c.height);
 | 
			
		||||
        $("#css_filter").val('');
 | 
			
		||||
        $("#include_filters").val('');
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -68,7 +68,7 @@ $(document).ready(function() {
 | 
			
		||||
               xctx = c.getContext("2d");
 | 
			
		||||
                // redline highlight context
 | 
			
		||||
               ctx = c.getContext("2d");
 | 
			
		||||
               current_default_xpath =$("#css_filter").val();
 | 
			
		||||
               current_default_xpath =$("#include_filters").val();
 | 
			
		||||
               fetch_data();
 | 
			
		||||
               $('#selector-canvas').off("mousemove mousedown");
 | 
			
		||||
               // screenshot_url defined in the edit.html template
 | 
			
		||||
@@ -205,9 +205,9 @@ $(document).ready(function() {
 | 
			
		||||
        var sel = selector_data['size_pos'][current_selected_i];
 | 
			
		||||
        if (sel[0] == '/') {
 | 
			
		||||
        // @todo - not sure just checking / is right
 | 
			
		||||
            $("#css_filter").val('xpath:'+sel.xpath);
 | 
			
		||||
            $("#include_filters").val('xpath:'+sel.xpath);
 | 
			
		||||
        } else {
 | 
			
		||||
            $("#css_filter").val(sel.xpath);
 | 
			
		||||
            $("#include_filters").val(sel.xpath);
 | 
			
		||||
        }
 | 
			
		||||
        xctx.fillStyle = 'rgba(205,205,205,0.95)';
 | 
			
		||||
        xctx.strokeStyle = 'rgba(225,0,0,0.9)';
 | 
			
		||||
 
 | 
			
		||||
@@ -156,7 +156,7 @@ body:after, body:before {
 | 
			
		||||
 | 
			
		||||
.fetch-error {
 | 
			
		||||
  padding-top: 1em;
 | 
			
		||||
  font-size: 60%;
 | 
			
		||||
  font-size: 80%;
 | 
			
		||||
  max-width: 400px;
 | 
			
		||||
  display: block;
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -27,17 +27,18 @@ class ChangeDetectionStore:
 | 
			
		||||
    # For when we edit, we should write to disk
 | 
			
		||||
    needs_write_urgent = False
 | 
			
		||||
 | 
			
		||||
    __version_check = True
 | 
			
		||||
 | 
			
		||||
    def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"):
 | 
			
		||||
        # Should only be active for docker
 | 
			
		||||
        # logging.basicConfig(filename='/dev/stdout', level=logging.INFO)
 | 
			
		||||
        self.needs_write = False
 | 
			
		||||
        self.__data = App.model()
 | 
			
		||||
        self.datastore_path = datastore_path
 | 
			
		||||
        self.json_store_path = "{}/url-watches.json".format(self.datastore_path)
 | 
			
		||||
        self.needs_write = False
 | 
			
		||||
        self.proxy_list = None
 | 
			
		||||
        self.start_time = time.time()
 | 
			
		||||
        self.stop_thread = False
 | 
			
		||||
 | 
			
		||||
        self.__data = App.model()
 | 
			
		||||
 | 
			
		||||
        # Base definition for all watchers
 | 
			
		||||
        # deepcopy part of #569 - not sure why its needed exactly
 | 
			
		||||
        self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={}))
 | 
			
		||||
@@ -81,10 +82,13 @@ class ChangeDetectionStore:
 | 
			
		||||
        except (FileNotFoundError, json.decoder.JSONDecodeError):
 | 
			
		||||
            if include_default_watches:
 | 
			
		||||
                print("Creating JSON store at", self.datastore_path)
 | 
			
		||||
                self.add_watch(url='https://news.ycombinator.com/',
 | 
			
		||||
                               tag='Tech news',
 | 
			
		||||
                               extras={'fetch_backend': 'html_requests'})
 | 
			
		||||
 | 
			
		||||
                self.add_watch(url='http://www.quotationspage.com/random.php', tag='test')
 | 
			
		||||
                self.add_watch(url='https://news.ycombinator.com/', tag='Tech news')
 | 
			
		||||
                self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io')
 | 
			
		||||
                self.add_watch(url='https://changedetection.io/CHANGELOG.txt',
 | 
			
		||||
                               tag='changedetection.io',
 | 
			
		||||
                               extras={'fetch_backend': 'html_requests'})
 | 
			
		||||
 | 
			
		||||
        self.__data['version_tag'] = version_tag
 | 
			
		||||
 | 
			
		||||
@@ -113,9 +117,7 @@ class ChangeDetectionStore:
 | 
			
		||||
            self.__data['settings']['application']['api_access_token'] = secret
 | 
			
		||||
 | 
			
		||||
        # Proxy list support - available as a selection in settings when text file is imported
 | 
			
		||||
        # CSV list
 | 
			
		||||
        # "name, address", or just "name"
 | 
			
		||||
        proxy_list_file = "{}/proxies.txt".format(self.datastore_path)
 | 
			
		||||
        proxy_list_file = "{}/proxies.json".format(self.datastore_path)
 | 
			
		||||
        if path.isfile(proxy_list_file):
 | 
			
		||||
            self.import_proxy_list(proxy_list_file)
 | 
			
		||||
 | 
			
		||||
@@ -270,7 +272,7 @@ class ChangeDetectionStore:
 | 
			
		||||
            extras = {}
 | 
			
		||||
        # should always be str
 | 
			
		||||
        if tag is None or not tag:
 | 
			
		||||
            tag=''
 | 
			
		||||
            tag = ''
 | 
			
		||||
 | 
			
		||||
        # Incase these are copied across, assume it's a reference and deepcopy()
 | 
			
		||||
        apply_extras = deepcopy(extras)
 | 
			
		||||
@@ -285,17 +287,31 @@ class ChangeDetectionStore:
 | 
			
		||||
                res = r.json()
 | 
			
		||||
 | 
			
		||||
                # List of permissible attributes we accept from the wild internet
 | 
			
		||||
                for k in ['url', 'tag',
 | 
			
		||||
                          'paused', 'title',
 | 
			
		||||
                          'previous_md5', 'headers',
 | 
			
		||||
                          'body', 'method',
 | 
			
		||||
                          'ignore_text', 'css_filter',
 | 
			
		||||
                          'subtractive_selectors', 'trigger_text',
 | 
			
		||||
                          'extract_title_as_title', 'extract_text',
 | 
			
		||||
                for k in [
 | 
			
		||||
                    'body',
 | 
			
		||||
                    'css_filter',
 | 
			
		||||
                    'extract_text',
 | 
			
		||||
                    'extract_title_as_title',
 | 
			
		||||
                    'headers',
 | 
			
		||||
                    'ignore_text',
 | 
			
		||||
                    'include_filters',
 | 
			
		||||
                    'method',
 | 
			
		||||
                    'paused',
 | 
			
		||||
                    'previous_md5',
 | 
			
		||||
                    'subtractive_selectors',
 | 
			
		||||
                    'tag',
 | 
			
		||||
                    'text_should_not_be_present',
 | 
			
		||||
                          'webdriver_js_execute_code']:
 | 
			
		||||
                    'title',
 | 
			
		||||
                    'trigger_text',
 | 
			
		||||
                    'webdriver_js_execute_code',
 | 
			
		||||
                    'url',
 | 
			
		||||
                ]:
 | 
			
		||||
                    if res.get(k):
 | 
			
		||||
                        if k != 'css_filter':
 | 
			
		||||
                            apply_extras[k] = res[k]
 | 
			
		||||
                        else:
 | 
			
		||||
                            # We renamed the field and made it a list
 | 
			
		||||
                            apply_extras['include_filters'] = [res['css_filter']]
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logging.error("Error fetching metadata for shared watch link", url, str(e))
 | 
			
		||||
@@ -318,12 +334,13 @@ class ChangeDetectionStore:
 | 
			
		||||
                    del apply_extras[k]
 | 
			
		||||
 | 
			
		||||
            new_watch.update(apply_extras)
 | 
			
		||||
            self.__data['watching'][new_uuid]=new_watch
 | 
			
		||||
            self.__data['watching'][new_uuid] = new_watch
 | 
			
		||||
 | 
			
		||||
        self.__data['watching'][new_uuid].ensure_data_dir_exists()
 | 
			
		||||
 | 
			
		||||
        if write_to_disk_now:
 | 
			
		||||
            self.sync_to_json()
 | 
			
		||||
 | 
			
		||||
        return new_uuid
 | 
			
		||||
 | 
			
		||||
    def visualselector_data_is_ready(self, watch_uuid):
 | 
			
		||||
@@ -437,20 +454,42 @@ class ChangeDetectionStore:
 | 
			
		||||
                    unlink(item)
 | 
			
		||||
 | 
			
		||||
    def import_proxy_list(self, filename):
 | 
			
		||||
        import csv
 | 
			
		||||
        with open(filename, newline='') as f:
 | 
			
		||||
            reader = csv.reader(f, skipinitialspace=True)
 | 
			
		||||
            # @todo This loop can could be improved
 | 
			
		||||
            l = []
 | 
			
		||||
            for row in reader:
 | 
			
		||||
                if len(row):
 | 
			
		||||
                    if len(row)>=2:
 | 
			
		||||
                        l.append(tuple(row[:2]))
 | 
			
		||||
                    else:
 | 
			
		||||
                        l.append(tuple([row[0], row[0]]))
 | 
			
		||||
            self.proxy_list = l if len(l) else None
 | 
			
		||||
        with open(filename) as f:
 | 
			
		||||
            self.proxy_list = json.load(f)
 | 
			
		||||
            print ("Registered proxy list", list(self.proxy_list.keys()))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def get_preferred_proxy_for_watch(self, uuid):
 | 
			
		||||
        """
 | 
			
		||||
        Returns the preferred proxy by ID key
 | 
			
		||||
        :param uuid: UUID
 | 
			
		||||
        :return: proxy "key" id
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        proxy_id = None
 | 
			
		||||
        if self.proxy_list is None:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        # If its a valid one
 | 
			
		||||
        watch = self.data['watching'].get(uuid)
 | 
			
		||||
 | 
			
		||||
        if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()):
 | 
			
		||||
            return watch.get('proxy')
 | 
			
		||||
 | 
			
		||||
        # not valid (including None), try the system one
 | 
			
		||||
        else:
 | 
			
		||||
            system_proxy_id = self.data['settings']['requests'].get('proxy')
 | 
			
		||||
            # Is not None and exists
 | 
			
		||||
            if self.proxy_list.get(system_proxy_id):
 | 
			
		||||
                return system_proxy_id
 | 
			
		||||
 | 
			
		||||
        # Fallback - Did not resolve anything, use the first available
 | 
			
		||||
        if system_proxy_id is None:
 | 
			
		||||
            first_default = list(self.proxy_list)[0]
 | 
			
		||||
            return first_default
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    # Run all updates
 | 
			
		||||
    # IMPORTANT - Each update could be run even when they have a new install and the schema is correct
 | 
			
		||||
    #             So therefor - each `update_n` should be very careful about checking if it needs to actually run
 | 
			
		||||
@@ -557,3 +596,22 @@ class ChangeDetectionStore:
 | 
			
		||||
                continue
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # We incorrectly used common header overrides that should only apply to Requests
 | 
			
		||||
    # These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium
 | 
			
		||||
    def update_7(self):
 | 
			
		||||
        # These were hard-coded in early versions
 | 
			
		||||
        for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']:
 | 
			
		||||
            if self.data['settings']['headers'].get(v):
 | 
			
		||||
                del self.data['settings']['headers'][v]
 | 
			
		||||
 | 
			
		||||
    # Convert filters to a list of filters css_filter -> include_filters
 | 
			
		||||
    def update_8(self):
 | 
			
		||||
        for uuid, watch in self.data['watching'].items():
 | 
			
		||||
            try:
 | 
			
		||||
                existing_filter = watch.get('css_filter', '')
 | 
			
		||||
                if existing_filter:
 | 
			
		||||
                    watch['include_filters'] = [existing_filter]
 | 
			
		||||
            except:
 | 
			
		||||
                continue
 | 
			
		||||
        return
 | 
			
		||||
@@ -40,7 +40,8 @@
 | 
			
		||||
                <fieldset>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span>
 | 
			
		||||
                        <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/>
 | 
			
		||||
                        <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.title, class="m-d") }}
 | 
			
		||||
@@ -77,6 +78,7 @@
 | 
			
		||||
                        <span class="pure-form-message-inline">
 | 
			
		||||
                            <p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p>
 | 
			
		||||
                            <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
 | 
			
		||||
                            Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                {% if form.proxy %}
 | 
			
		||||
@@ -172,19 +174,29 @@ User-Agent: wonderbra 1.0") }}
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </fieldset>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {% set field = render_field(form.css_filter,
 | 
			
		||||
                            placeholder=".class-name or #some-id, or other CSS selector rule.",
 | 
			
		||||
                        {% set field = render_field(form.include_filters,
 | 
			
		||||
                            rows=5,
 | 
			
		||||
                            placeholder="#example
 | 
			
		||||
xpath://body/div/span[contains(@class, 'example-class')]",
 | 
			
		||||
                            class="m-d")
 | 
			
		||||
                        %}
 | 
			
		||||
                        {{ field }}
 | 
			
		||||
                        {% if '/text()' in  field %}
 | 
			
		||||
                          <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                        <span class="pure-form-message-inline">
 | 
			
		||||
                        <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/>
 | 
			
		||||
                    <ul>
 | 
			
		||||
                        <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li>
 | 
			
		||||
                        <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required,  <a
 | 
			
		||||
                                href="https://jsonpath.com/" target="new">test your JSONPath here</a></li>
 | 
			
		||||
                        <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed).
 | 
			
		||||
                            <ul>
 | 
			
		||||
                                <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required,  <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li>
 | 
			
		||||
                                {% if jq_support %}
 | 
			
		||||
                                <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li>
 | 
			
		||||
                                {% else %}
 | 
			
		||||
                                <li>jq support not installed</li>
 | 
			
		||||
                                {% endif %}
 | 
			
		||||
                            </ul>
 | 
			
		||||
                        </li>
 | 
			
		||||
                        <li>XPath - Limit text to this XPath rule, simply start with a forward-slash,
 | 
			
		||||
                            <ul>
 | 
			
		||||
                                <li>Example:  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a
 | 
			
		||||
@@ -193,7 +205,7 @@ User-Agent: wonderbra 1.0") }}
 | 
			
		||||
                            </ul>
 | 
			
		||||
                            </li>
 | 
			
		||||
                    </ul>
 | 
			
		||||
                    Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a
 | 
			
		||||
                    Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a
 | 
			
		||||
                                href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/>
 | 
			
		||||
                </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
 
 | 
			
		||||
@@ -99,6 +99,8 @@
 | 
			
		||||
                        <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p>
 | 
			
		||||
                        <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p>
 | 
			
		||||
                    </span>
 | 
			
		||||
                    <br/>
 | 
			
		||||
                    Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a>
 | 
			
		||||
                </div>
 | 
			
		||||
                <fieldset class="pure-group" id="webdriver-override-options">
 | 
			
		||||
                    <div class="pure-form-message-inline">
 | 
			
		||||
 
 | 
			
		||||
@@ -87,7 +87,7 @@
 | 
			
		||||
                    <a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a>
 | 
			
		||||
                </td>
 | 
			
		||||
                <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
 | 
			
		||||
                    <a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a>
 | 
			
		||||
                    <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
 | 
			
		||||
                    <a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a>
 | 
			
		||||
 | 
			
		||||
                    {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %}
 | 
			
		||||
 
 | 
			
		||||
@@ -41,7 +41,7 @@ def app(request):
 | 
			
		||||
 | 
			
		||||
    cleanup(datastore_path)
 | 
			
		||||
 | 
			
		||||
    app_config = {'datastore_path': datastore_path}
 | 
			
		||||
    app_config = {'datastore_path': datastore_path, 'disable_checkver' : True}
 | 
			
		||||
    cleanup(app_config['datastore_path'])
 | 
			
		||||
    datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False)
 | 
			
		||||
    app = changedetection_app(app_config, datastore)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								changedetectionio/tests/proxy_list/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								changedetectionio/tests/proxy_list/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,2 @@
 | 
			
		||||
"""Tests for the app."""
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										14
									
								
								changedetectionio/tests/proxy_list/conftest.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								changedetectionio/tests/proxy_list/conftest.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,14 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
from .. import conftest
 | 
			
		||||
 | 
			
		||||
#def pytest_addoption(parser):
 | 
			
		||||
#    parser.addoption("--url_suffix", action="store", default="identifier for request")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#def pytest_generate_tests(metafunc):
 | 
			
		||||
#    # This is called for every test. Only get/set command line arguments
 | 
			
		||||
#    # if the argument is specified in the list of test "fixturenames".
 | 
			
		||||
#    option_value = metafunc.config.option.url_suffix
 | 
			
		||||
#    if 'url_suffix' in metafunc.fixturenames and option_value is not None:
 | 
			
		||||
#        metafunc.parametrize("url_suffix", [option_value])
 | 
			
		||||
							
								
								
									
										10
									
								
								changedetectionio/tests/proxy_list/proxies.json-example
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								changedetectionio/tests/proxy_list/proxies.json-example
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,10 @@
 | 
			
		||||
{
 | 
			
		||||
  "proxy-one": {
 | 
			
		||||
    "label": "One",
 | 
			
		||||
    "url": "http://127.0.0.1:3128"
 | 
			
		||||
  },
 | 
			
		||||
  "proxy-two": {
 | 
			
		||||
    "label": "two",
 | 
			
		||||
    "url": "http://127.0.0.1:3129"
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
							
								
								
									
										41
									
								
								changedetectionio/tests/proxy_list/squid.conf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								changedetectionio/tests/proxy_list/squid.conf
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,41 @@
 | 
			
		||||
acl localnet src 0.0.0.1-0.255.255.255  # RFC 1122 "this" network (LAN)
 | 
			
		||||
acl localnet src 10.0.0.0/8             # RFC 1918 local private network (LAN)
 | 
			
		||||
acl localnet src 100.64.0.0/10          # RFC 6598 shared address space (CGN)
 | 
			
		||||
acl localnet src 169.254.0.0/16         # RFC 3927 link-local (directly plugged) machines
 | 
			
		||||
acl localnet src 172.16.0.0/12          # RFC 1918 local private network (LAN)
 | 
			
		||||
acl localnet src 192.168.0.0/16         # RFC 1918 local private network (LAN)
 | 
			
		||||
acl localnet src fc00::/7               # RFC 4193 local private network range
 | 
			
		||||
acl localnet src fe80::/10              # RFC 4291 link-local (directly plugged) machines
 | 
			
		||||
acl localnet src 159.65.224.174
 | 
			
		||||
acl SSL_ports port 443
 | 
			
		||||
acl Safe_ports port 80          # http
 | 
			
		||||
acl Safe_ports port 21          # ftp
 | 
			
		||||
acl Safe_ports port 443         # https
 | 
			
		||||
acl Safe_ports port 70          # gopher
 | 
			
		||||
acl Safe_ports port 210         # wais
 | 
			
		||||
acl Safe_ports port 1025-65535  # unregistered ports
 | 
			
		||||
acl Safe_ports port 280         # http-mgmt
 | 
			
		||||
acl Safe_ports port 488         # gss-http
 | 
			
		||||
acl Safe_ports port 591         # filemaker
 | 
			
		||||
acl Safe_ports port 777         # multiling http
 | 
			
		||||
acl CONNECT method CONNECT
 | 
			
		||||
 | 
			
		||||
http_access deny !Safe_ports
 | 
			
		||||
http_access deny CONNECT !SSL_ports
 | 
			
		||||
http_access allow localhost manager
 | 
			
		||||
http_access deny manager
 | 
			
		||||
http_access allow localhost
 | 
			
		||||
http_access allow localnet
 | 
			
		||||
http_access deny all
 | 
			
		||||
http_port 3128
 | 
			
		||||
coredump_dir /var/spool/squid
 | 
			
		||||
refresh_pattern ^ftp:           1440    20%     10080
 | 
			
		||||
refresh_pattern ^gopher:        1440    0%      1440
 | 
			
		||||
refresh_pattern -i (/cgi-bin/|\?) 0     0%      0
 | 
			
		||||
refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
 | 
			
		||||
refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims
 | 
			
		||||
refresh_pattern \/InRelease$ 0 0% 0 refresh-ims
 | 
			
		||||
refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims
 | 
			
		||||
refresh_pattern .               0       20%     4320
 | 
			
		||||
logfile_rotate 0
 | 
			
		||||
 | 
			
		||||
							
								
								
									
										38
									
								
								changedetectionio/tests/proxy_list/test_multiple_proxy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								changedetectionio/tests/proxy_list/test_multiple_proxy.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,38 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from ..util import live_server_setup
 | 
			
		||||
 | 
			
		||||
def test_preferred_proxy(client, live_server):
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    url = "http://chosen.changedetection.io"
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        # Because a URL wont show in squid/proxy logs due it being SSLed
 | 
			
		||||
        # Use plain HTTP or a specific domain-name here
 | 
			
		||||
        data={"urls": url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    time.sleep(2)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={
 | 
			
		||||
                "include_filters": "",
 | 
			
		||||
                "fetch_backend": "html_requests",
 | 
			
		||||
                "headers": "",
 | 
			
		||||
                "proxy": "proxy-two",
 | 
			
		||||
                "tag": "",
 | 
			
		||||
                "url": url,
 | 
			
		||||
              },
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
    time.sleep(2)
 | 
			
		||||
    # Now the request should appear in the second-squid logs
 | 
			
		||||
							
								
								
									
										19
									
								
								changedetectionio/tests/proxy_list/test_proxy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								changedetectionio/tests/proxy_list/test_proxy.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,19 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
 | 
			
		||||
 | 
			
		||||
# just make a request, we will grep in the docker logs to see it actually got called
 | 
			
		||||
def test_check_basic_change_detection_functionality(client, live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        # Because a URL wont show in squid/proxy logs due it being SSLed
 | 
			
		||||
        # Use plain HTTP or a specific domain-name here
 | 
			
		||||
        data={"urls": "http://one.changedetection.io"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
@@ -147,6 +147,16 @@ def test_api_simple(client, live_server):
 | 
			
		||||
    # @todo how to handle None/default global values?
 | 
			
		||||
    assert watch['history_n'] == 2, "Found replacement history section, which is in its own API"
 | 
			
		||||
 | 
			
		||||
    # basic systeminfo check
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("systeminfo"),
 | 
			
		||||
        headers={'x-api-key': api_key},
 | 
			
		||||
    )
 | 
			
		||||
    info = json.loads(res.data)
 | 
			
		||||
    assert info.get('watch_count') == 1
 | 
			
		||||
    assert info.get('uptime') > 0.5
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Finally delete the watch
 | 
			
		||||
    res = client.delete(
 | 
			
		||||
        url_for("watch", uuid=watch_uuid),
 | 
			
		||||
 
 | 
			
		||||
@@ -23,7 +23,7 @@ def test_basic_auth(client, live_server):
 | 
			
		||||
    # Check form validation
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
 
 | 
			
		||||
@@ -3,7 +3,7 @@
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
from .util import set_original_response, set_modified_response, live_server_setup
 | 
			
		||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
 | 
			
		||||
 | 
			
		||||
sleep_time_for_fetch_thread = 3
 | 
			
		||||
 | 
			
		||||
@@ -36,7 +36,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
 | 
			
		||||
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
        # Give the thread time to pick it up
 | 
			
		||||
        time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
        wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
        # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
        res = client.get(url_for("index"))
 | 
			
		||||
@@ -69,7 +69,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
 | 
			
		||||
    res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    assert b'1 watches are queued for rechecking.' in res.data
 | 
			
		||||
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # Now something should be ready, indicated by having a 'unviewed' class
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
@@ -98,14 +98,14 @@ def test_check_basic_change_detection_functionality(client, live_server):
 | 
			
		||||
    assert b'which has this one new line' in res.data
 | 
			
		||||
    assert b'Which is across multiple lines' not in res.data
 | 
			
		||||
 | 
			
		||||
    time.sleep(2)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # Do this a few times.. ensures we dont accidently set the status
 | 
			
		||||
    for n in range(2):
 | 
			
		||||
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
        # Give the thread time to pick it up
 | 
			
		||||
        time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
        wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
        # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
        res = client.get(url_for("index"))
 | 
			
		||||
@@ -125,7 +125,7 @@ def test_check_basic_change_detection_functionality(client, live_server):
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
 
 | 
			
		||||
@@ -1,18 +1,31 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from .util import set_original_response, set_modified_response, live_server_setup
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
from . util import set_original_response, set_modified_response, live_server_setup
 | 
			
		||||
from zipfile import ZipFile
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_backup(client, live_server):
 | 
			
		||||
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    set_original_response()
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": url_for('test_endpoint', _external=True)},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("get_backup"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
@@ -20,6 +33,19 @@ def test_backup(client, live_server):
 | 
			
		||||
 | 
			
		||||
    # Should get the right zip content type
 | 
			
		||||
    assert res.content_type == "application/zip"
 | 
			
		||||
 | 
			
		||||
    # Should be PK/ZIP stream
 | 
			
		||||
    assert res.data.count(b'PK') >= 2
 | 
			
		||||
 | 
			
		||||
    # ZipFile from buffer seems non-obvious, just save it instead
 | 
			
		||||
    with open("download.zip", 'wb') as f:
 | 
			
		||||
        f.write(res.data)
 | 
			
		||||
 | 
			
		||||
    zip = ZipFile('download.zip')
 | 
			
		||||
    l = zip.namelist()
 | 
			
		||||
    uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I)
 | 
			
		||||
    newlist = list(filter(uuid4hex.match, l))  # Read Note below
 | 
			
		||||
 | 
			
		||||
    # Should be two txt files in the archive (history and the snapshot)
 | 
			
		||||
    assert len(newlist) == 2
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -46,22 +46,23 @@ def set_modified_response():
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's
 | 
			
		||||
def test_css_filter_output():
 | 
			
		||||
    from changedetectionio import fetch_site_status
 | 
			
		||||
def test_include_filters_output():
 | 
			
		||||
    from inscriptis import get_text
 | 
			
		||||
 | 
			
		||||
    # Check text with sub-parts renders correctly
 | 
			
		||||
    content = """<html> <body><div id="thingthing" >  Some really <b>bold</b> text  </div> </body> </html>"""
 | 
			
		||||
    html_blob = css_filter(css_filter="#thingthing", html_content=content)
 | 
			
		||||
    html_blob = include_filters(include_filters="#thingthing", html_content=content)
 | 
			
		||||
    text = get_text(html_blob)
 | 
			
		||||
    assert text == "  Some really bold text"
 | 
			
		||||
 | 
			
		||||
    content = """<html> <body>
 | 
			
		||||
    <p>foo bar blah</p>
 | 
			
		||||
    <div class="parts">Block A</div> <div class="parts">Block B</div></body> 
 | 
			
		||||
    <DIV class="parts">Block A</DiV> <div class="parts">Block B</DIV></body> 
 | 
			
		||||
    </html>
 | 
			
		||||
"""
 | 
			
		||||
    html_blob = css_filter(css_filter=".parts", html_content=content)
 | 
			
		||||
 | 
			
		||||
    # in xPath this would be //*[@class='parts']
 | 
			
		||||
    html_blob = include_filters(include_filters=".parts", html_content=content)
 | 
			
		||||
    text = get_text(html_blob)
 | 
			
		||||
 | 
			
		||||
    # Divs are converted to 4 whitespaces by inscriptis
 | 
			
		||||
@@ -69,10 +70,10 @@ def test_css_filter_output():
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Tests the whole stack works with the CSS Filter
 | 
			
		||||
def test_check_markup_css_filter_restriction(client, live_server):
 | 
			
		||||
def test_check_markup_include_filters_restriction(client, live_server):
 | 
			
		||||
    sleep_time_for_fetch_thread = 3
 | 
			
		||||
 | 
			
		||||
    css_filter = "#sametext"
 | 
			
		||||
    include_filters = "#sametext"
 | 
			
		||||
 | 
			
		||||
    set_original_response()
 | 
			
		||||
 | 
			
		||||
@@ -98,7 +99,7 @@ def test_check_markup_css_filter_restriction(client, live_server):
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
@@ -107,7 +108,7 @@ def test_check_markup_css_filter_restriction(client, live_server):
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
    )
 | 
			
		||||
    assert bytes(css_filter.encode('utf-8')) in res.data
 | 
			
		||||
    assert bytes(include_filters.encode('utf-8')) in res.data
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
@@ -126,3 +127,58 @@ def test_check_markup_css_filter_restriction(client, live_server):
 | 
			
		||||
    # Because it should be looking at only that 'sametext' id
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Tests the whole stack works with the CSS Filter
 | 
			
		||||
def test_check_multiple_filters(client, live_server):
 | 
			
		||||
    sleep_time_for_fetch_thread = 3
 | 
			
		||||
 | 
			
		||||
    include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]"
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write("""<html><body>
 | 
			
		||||
     <div id="blob-a">Blob A</div>
 | 
			
		||||
     <div id="blob-b">Blob B</div>
 | 
			
		||||
     <div id="blob-c">Blob C</div>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """)
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": test_url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Goto the edit page, add our ignore text
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"include_filters": include_filters,
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
              "headers": "",
 | 
			
		||||
              'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("preview_page", uuid="first"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Only the two blobs should be here
 | 
			
		||||
    assert b"Blob A" in res.data # CSS was ok
 | 
			
		||||
    assert b"Blob B" in res.data # xPath was ok
 | 
			
		||||
    assert b"Blob C" not in res.data # Should not be included
 | 
			
		||||
 
 | 
			
		||||
@@ -88,7 +88,7 @@ def test_check_filter_multiline(client, live_server):
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": '',
 | 
			
		||||
        data={"include_filters": '',
 | 
			
		||||
              'extract_text': '/something.+?6 billion.+?lines/si',
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
@@ -116,7 +116,7 @@ def test_check_filter_multiline(client, live_server):
 | 
			
		||||
 | 
			
		||||
def test_check_filter_and_regex_extract(client, live_server):
 | 
			
		||||
    sleep_time_for_fetch_thread = 3
 | 
			
		||||
    css_filter = ".changetext"
 | 
			
		||||
    include_filters = ".changetext"
 | 
			
		||||
 | 
			
		||||
    set_original_response()
 | 
			
		||||
 | 
			
		||||
@@ -143,7 +143,7 @@ def test_check_filter_and_regex_extract(client, live_server):
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": css_filter,
 | 
			
		||||
        data={"include_filters": include_filters,
 | 
			
		||||
              'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
 
 | 
			
		||||
@@ -92,7 +92,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
 | 
			
		||||
        "tag": "my tag",
 | 
			
		||||
        "title": "my title",
 | 
			
		||||
        "headers": "",
 | 
			
		||||
        "css_filter": '.ticket-available',
 | 
			
		||||
        "include_filters": '.ticket-available',
 | 
			
		||||
        "fetch_backend": "html_requests"})
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
 
 | 
			
		||||
@@ -76,7 +76,7 @@ def run_filter_test(client, content_filter):
 | 
			
		||||
        "title": "my title",
 | 
			
		||||
        "headers": "",
 | 
			
		||||
        "filter_failure_notification_send": 'y',
 | 
			
		||||
        "css_filter": content_filter,
 | 
			
		||||
        "include_filters": content_filter,
 | 
			
		||||
        "fetch_backend": "html_requests"})
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
@@ -95,7 +95,7 @@ def run_filter_test(client, content_filter):
 | 
			
		||||
        time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # We should see something in the frontend
 | 
			
		||||
    assert b'Warning, filter' in res.data
 | 
			
		||||
    assert b'Warning, no filters were found' in res.data
 | 
			
		||||
 | 
			
		||||
    # Now it should exist and contain our "filter not found" alert
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
@@ -131,7 +131,7 @@ def run_filter_test(client, content_filter):
 | 
			
		||||
def test_setup(live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
def test_check_css_filter_failure_notification(client, live_server):
 | 
			
		||||
def test_check_include_filters_failure_notification(client, live_server):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    run_filter_test(client, '#nope-doesnt-exist')
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										33
									
								
								changedetectionio/tests/test_jinja2.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								changedetectionio/tests/test_jinja2.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,33 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import live_server_setup
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# If there was only a change in the whitespacing, then we shouldnt have a change detected
 | 
			
		||||
def test_jinja2_in_url_query(client, live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_return_query', _external=True)
 | 
			
		||||
 | 
			
		||||
    # because url_for() will URL-encode the var, but we dont here
 | 
			
		||||
    full_url = "{}?{}".format(test_url,
 | 
			
		||||
                              "date={% now 'Europe/Berlin', '%Y' %}.{% now 'Europe/Berlin', '%m' %}.{% now 'Europe/Berlin', '%d' %}", )
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("form_quick_watch_add"),
 | 
			
		||||
        data={"url": full_url, "tag": "test"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Watch added" in res.data
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("preview_page", uuid="first"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b'date=2' in res.data
 | 
			
		||||
@@ -2,10 +2,15 @@
 | 
			
		||||
# coding=utf-8
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from flask import url_for, escape
 | 
			
		||||
from . util import live_server_setup
 | 
			
		||||
import pytest
 | 
			
		||||
jq_support = True
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    import jq
 | 
			
		||||
except ModuleNotFoundError:
 | 
			
		||||
    jq_support = False
 | 
			
		||||
 | 
			
		||||
def test_setup(live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
@@ -36,16 +41,28 @@ and it can also be repeated
 | 
			
		||||
    from .. import html_tools
 | 
			
		||||
 | 
			
		||||
    # See that we can find the second <script> one, which is not broken, and matches our filter
 | 
			
		||||
    text = html_tools.extract_json_as_string(content, "$.offers.price")
 | 
			
		||||
    text = html_tools.extract_json_as_string(content, "json:$.offers.price")
 | 
			
		||||
    assert text == "23.5"
 | 
			
		||||
 | 
			
		||||
    text = html_tools.extract_json_as_string('{"id":5}', "$.id")
 | 
			
		||||
    # also check for jq
 | 
			
		||||
    if jq_support:
 | 
			
		||||
        text = html_tools.extract_json_as_string(content, "jq:.offers.price")
 | 
			
		||||
        assert text == "23.5"
 | 
			
		||||
 | 
			
		||||
        text = html_tools.extract_json_as_string('{"id":5}', "jq:.id")
 | 
			
		||||
        assert text == "5"
 | 
			
		||||
 | 
			
		||||
    text = html_tools.extract_json_as_string('{"id":5}', "json:$.id")
 | 
			
		||||
    assert text == "5"
 | 
			
		||||
 | 
			
		||||
    # When nothing at all is found, it should throw JSONNOTFound
 | 
			
		||||
    # Which is caught and shown to the user in the watch-overview table
 | 
			
		||||
    with pytest.raises(html_tools.JSONNotFound) as e_info:
 | 
			
		||||
        html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id")
 | 
			
		||||
        html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id")
 | 
			
		||||
 | 
			
		||||
    if jq_support:
 | 
			
		||||
        with pytest.raises(html_tools.JSONNotFound) as e_info:
 | 
			
		||||
            html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id")
 | 
			
		||||
 | 
			
		||||
def set_original_ext_response():
 | 
			
		||||
    data = """
 | 
			
		||||
@@ -66,6 +83,7 @@ def set_original_ext_response():
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(data)
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
def set_modified_ext_response():
 | 
			
		||||
    data = """
 | 
			
		||||
@@ -86,6 +104,7 @@ def set_modified_ext_response():
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(data)
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
def set_original_response():
 | 
			
		||||
    test_return_data = """
 | 
			
		||||
@@ -113,7 +132,7 @@ def set_original_response():
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_response_with_html():
 | 
			
		||||
def set_json_response_with_html():
 | 
			
		||||
    test_return_data = """
 | 
			
		||||
    {
 | 
			
		||||
      "test": [
 | 
			
		||||
@@ -157,7 +176,7 @@ def set_modified_response():
 | 
			
		||||
def test_check_json_without_filter(client, live_server):
 | 
			
		||||
    # Request a JSON document from a application/json source containing HTML
 | 
			
		||||
    # and be sure it doesn't get chewed up by instriptis
 | 
			
		||||
    set_response_with_html()
 | 
			
		||||
    set_json_response_with_html()
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
@@ -170,9 +189,6 @@ def test_check_json_without_filter(client, live_server):
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
@@ -181,13 +197,14 @@ def test_check_json_without_filter(client, live_server):
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Should still see '"html": "<b>"'
 | 
			
		||||
    assert b'"<b>' in res.data
 | 
			
		||||
    assert res.data.count(b'{\n') >= 2
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
 | 
			
		||||
def test_check_json_filter(client, live_server):
 | 
			
		||||
    json_filter = 'json:boss.name'
 | 
			
		||||
 | 
			
		||||
def check_json_filter(json_filter, client, live_server):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
@@ -202,9 +219,6 @@ def test_check_json_filter(client, live_server):
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
@@ -212,7 +226,7 @@ def test_check_json_filter(client, live_server):
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": json_filter,
 | 
			
		||||
        data={"include_filters": json_filter,
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
              "headers": "",
 | 
			
		||||
@@ -226,10 +240,7 @@ def test_check_json_filter(client, live_server):
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
    )
 | 
			
		||||
    assert bytes(json_filter.encode('utf-8')) in res.data
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    assert bytes(escape(json_filter).encode('utf-8')) in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
@@ -252,10 +263,17 @@ def test_check_json_filter(client, live_server):
 | 
			
		||||
    # And #462 - check we see the proper utf-8 string there
 | 
			
		||||
    assert "Örnsköldsvik".encode('utf-8') in res.data
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
 | 
			
		||||
def test_check_json_filter_bool_val(client, live_server):
 | 
			
		||||
    json_filter = "json:$['available']"
 | 
			
		||||
def test_check_jsonpath_filter(client, live_server):
 | 
			
		||||
    check_json_filter('json:boss.name', client, live_server)
 | 
			
		||||
 | 
			
		||||
def test_check_jq_filter(client, live_server):
 | 
			
		||||
    if jq_support:
 | 
			
		||||
        check_json_filter('jq:.boss.name', client, live_server)
 | 
			
		||||
 | 
			
		||||
def check_json_filter_bool_val(json_filter, client, live_server):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
@@ -275,7 +293,7 @@ def test_check_json_filter_bool_val(client, live_server):
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": json_filter,
 | 
			
		||||
        data={"include_filters": json_filter,
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
              "headers": "",
 | 
			
		||||
@@ -285,11 +303,6 @@ def test_check_json_filter_bool_val(client, live_server):
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
    #  Make a change
 | 
			
		||||
@@ -304,14 +317,22 @@ def test_check_json_filter_bool_val(client, live_server):
 | 
			
		||||
    # But the change should be there, tho its hard to test the change was detected because it will show old and new versions
 | 
			
		||||
    assert b'false' in res.data
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
 | 
			
		||||
def test_check_jsonpath_filter_bool_val(client, live_server):
 | 
			
		||||
    check_json_filter_bool_val("json:$['available']", client, live_server)
 | 
			
		||||
 | 
			
		||||
def test_check_jq_filter_bool_val(client, live_server):
 | 
			
		||||
    if jq_support:
 | 
			
		||||
        check_json_filter_bool_val("jq:.available", client, live_server)
 | 
			
		||||
 | 
			
		||||
# Re #265 - Extended JSON selector test
 | 
			
		||||
# Stuff to consider here
 | 
			
		||||
# - Selector should be allowed to return empty when it doesnt match (people might wait for some condition)
 | 
			
		||||
# - The 'diff' tab could show the old and new content
 | 
			
		||||
# - Form should let us enter a selector that doesnt (yet) match anything
 | 
			
		||||
def test_check_json_ext_filter(client, live_server):
 | 
			
		||||
    json_filter = 'json:$[?(@.status==Sold)]'
 | 
			
		||||
 | 
			
		||||
def check_json_ext_filter(json_filter, client, live_server):
 | 
			
		||||
    set_original_ext_response()
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
@@ -326,9 +347,6 @@ def test_check_json_ext_filter(client, live_server):
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
@@ -336,7 +354,7 @@ def test_check_json_ext_filter(client, live_server):
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": json_filter,
 | 
			
		||||
        data={"include_filters": json_filter,
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
              "headers": "",
 | 
			
		||||
@@ -350,10 +368,7 @@ def test_check_json_ext_filter(client, live_server):
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
    )
 | 
			
		||||
    assert bytes(json_filter.encode('utf-8')) in res.data
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    assert bytes(escape(json_filter).encode('utf-8')) in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
@@ -376,3 +391,12 @@ def test_check_json_ext_filter(client, live_server):
 | 
			
		||||
    assert b'ForSale' not in res.data
 | 
			
		||||
    assert b'Sold' in res.data
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
 | 
			
		||||
def test_check_jsonpath_ext_filter(client, live_server):
 | 
			
		||||
    check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server)
 | 
			
		||||
 | 
			
		||||
def test_check_jq_ext_filter(client, live_server):
 | 
			
		||||
    if jq_support:
 | 
			
		||||
        check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server)
 | 
			
		||||
@@ -14,7 +14,7 @@ def test_share_watch(client, live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    css_filter = ".nice-filter"
 | 
			
		||||
    include_filters = ".nice-filter"
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
@@ -29,7 +29,7 @@ def test_share_watch(client, live_server):
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
@@ -37,7 +37,7 @@ def test_share_watch(client, live_server):
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
    )
 | 
			
		||||
    assert bytes(css_filter.encode('utf-8')) in res.data
 | 
			
		||||
    assert bytes(include_filters.encode('utf-8')) in res.data
 | 
			
		||||
 | 
			
		||||
    # click share the link
 | 
			
		||||
    res = client.get(
 | 
			
		||||
@@ -73,4 +73,8 @@ def test_share_watch(client, live_server):
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
    )
 | 
			
		||||
    assert bytes(css_filter.encode('utf-8')) in res.data
 | 
			
		||||
    assert bytes(include_filters.encode('utf-8')) in res.data
 | 
			
		||||
 | 
			
		||||
    # Check it saved the URL
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert bytes(test_url.encode('utf-8')) in res.data
 | 
			
		||||
 
 | 
			
		||||
@@ -57,10 +57,9 @@ def test_check_basic_change_detection_functionality_source(client, live_server):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# `subtractive_selectors` should still work in `source:` type requests
 | 
			
		||||
def test_check_ignore_elements(client, live_server):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
 | 
			
		||||
    time.sleep(2)
 | 
			
		||||
    test_url = 'source:'+url_for('test_endpoint', _external=True)
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
@@ -77,9 +76,9 @@ def test_check_ignore_elements(client, live_server):
 | 
			
		||||
    #####################
 | 
			
		||||
    # We want <span> and <p> ONLY, but ignore span with .foobar-detection
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
    client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
@@ -89,7 +88,6 @@ def test_check_ignore_elements(client, live_server):
 | 
			
		||||
        url_for("preview_page", uuid="first"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert b'foobar-detection' not in res.data
 | 
			
		||||
    assert b'<br' not in res.data
 | 
			
		||||
    assert b'<p' in res.data
 | 
			
		||||
@@ -49,7 +49,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server):
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"trigger_text": "/cool.stuff/",
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "css_filter": '#in-here',
 | 
			
		||||
              "include_filters": '#in-here',
 | 
			
		||||
              "fetch_backend": "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 
 | 
			
		||||
@@ -22,7 +22,7 @@ def test_check_watch_field_storage(client, live_server):
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n",
 | 
			
		||||
               "time_between_check-minutes": 126,
 | 
			
		||||
               "css_filter" : ".fooclass",
 | 
			
		||||
               "include_filters" : ".fooclass",
 | 
			
		||||
               "title" : "My title",
 | 
			
		||||
               "ignore_text" : "ignore this",
 | 
			
		||||
               "url": test_url,
 | 
			
		||||
 
 | 
			
		||||
@@ -89,7 +89,7 @@ def test_check_xpath_filter_utf8(client, live_server):
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
@@ -143,7 +143,7 @@ def test_check_xpath_text_function_utf8(client, live_server):
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
@@ -182,9 +182,6 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
@@ -192,7 +189,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server):
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
@@ -230,10 +227,11 @@ def test_xpath_validation(client, live_server):
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
    time.sleep(2)
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"is not a valid XPath expression" in res.data
 | 
			
		||||
@@ -242,7 +240,7 @@ def test_xpath_validation(client, live_server):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# actually only really used by the distll.io importer, but could be handy too
 | 
			
		||||
def test_check_with_prefix_css_filter(client, live_server):
 | 
			
		||||
def test_check_with_prefix_include_filters(client, live_server):
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
 | 
			
		||||
@@ -263,7 +261,7 @@ def test_check_with_prefix_css_filter(client, live_server):
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        data={"include_filters":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -86,6 +86,7 @@ def extract_UUID_from_client(client):
 | 
			
		||||
def wait_for_all_checks(client):
 | 
			
		||||
    # Loop waiting until done..
 | 
			
		||||
    attempt=0
 | 
			
		||||
    time.sleep(0.1)
 | 
			
		||||
    while attempt < 60:
 | 
			
		||||
        time.sleep(1)
 | 
			
		||||
        res = client.get(url_for("index"))
 | 
			
		||||
@@ -159,5 +160,10 @@ def live_server_setup(live_server):
 | 
			
		||||
        ret = " ".join([auth.username, auth.password, auth.type])
 | 
			
		||||
        return ret
 | 
			
		||||
 | 
			
		||||
    # Just return some GET var
 | 
			
		||||
    @live_server.app.route('/test-return-query', methods=['GET'])
 | 
			
		||||
    def test_return_query():
 | 
			
		||||
        return request.query_string
 | 
			
		||||
 | 
			
		||||
    live_server.start()
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page, maybe better to use something we control?
 | 
			
		||||
    # We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
 | 
			
		||||
    test_url = 'https://news.ycombinator.com'
 | 
			
		||||
    # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url
 | 
			
		||||
    test_url = "https://changedetection.io/ci-test/test-runjs.html"
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("form_quick_watch_add"),
 | 
			
		||||
        data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
 | 
			
		||||
@@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server):
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first", unpause_on_save=1),
 | 
			
		||||
        data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
 | 
			
		||||
        data={
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
              "headers": "",
 | 
			
		||||
              'fetch_backend': "html_webdriver",
 | 
			
		||||
              'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();'
 | 
			
		||||
        },
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"unpaused" in res.data
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
    uuid = extract_UUID_from_client(client)
 | 
			
		||||
 | 
			
		||||
    # Check the JS execute code before extract worked
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("preview_page", uuid="first"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b'I smell JavaScript' in res.data
 | 
			
		||||
 | 
			
		||||
    assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
 | 
			
		||||
    assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@ import queue
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from changedetectionio import content_fetcher
 | 
			
		||||
from changedetectionio.html_tools import FilterNotFoundInResponse
 | 
			
		||||
from changedetectionio.fetch_site_status import FilterNotFoundInResponse
 | 
			
		||||
 | 
			
		||||
# A single update worker
 | 
			
		||||
#
 | 
			
		||||
@@ -91,8 +91,8 @@ class update_worker(threading.Thread):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
 | 
			
		||||
                    'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
 | 
			
		||||
                        watch['css_filter'],
 | 
			
		||||
                    'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
 | 
			
		||||
                        ", ".join(watch['include_filters']),
 | 
			
		||||
                        threshold),
 | 
			
		||||
                    'notification_format': 'text'}
 | 
			
		||||
 | 
			
		||||
@@ -189,7 +189,7 @@ class update_worker(threading.Thread):
 | 
			
		||||
                        if not self.datastore.data['watching'].get(uuid):
 | 
			
		||||
                            continue
 | 
			
		||||
 | 
			
		||||
                        err_text = "Warning, filter '{}' not found".format(str(e))
 | 
			
		||||
                        err_text = "Warning, no filters were found, no change detection ran."
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           # So that we get a trigger when the content is added again
 | 
			
		||||
                                                                           'previous_md5': ''})
 | 
			
		||||
@@ -282,10 +282,13 @@ class update_worker(threading.Thread):
 | 
			
		||||
                            self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
 | 
			
		||||
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                    if self.datastore.data['watching'].get(uuid):
 | 
			
		||||
                        # Always record that we atleast tried
 | 
			
		||||
                        count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
 | 
			
		||||
                                                                       'last_checked': round(time.time())})
 | 
			
		||||
                                                                           'last_checked': round(time.time()),
 | 
			
		||||
                                                                           'check_count': count
 | 
			
		||||
                                                                           })
 | 
			
		||||
 | 
			
		||||
                        # Always save the screenshot if it's available
 | 
			
		||||
                        if update_handler.screenshot:
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,8 @@ services:
 | 
			
		||||
      hostname: changedetection
 | 
			
		||||
      volumes:
 | 
			
		||||
        - changedetection-data:/datastore
 | 
			
		||||
# Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support
 | 
			
		||||
#        - ./proxies.json:/datastore/proxies.json
 | 
			
		||||
 | 
			
		||||
  #    environment:
 | 
			
		||||
  #        Default listening port, can also be changed with the -p option
 | 
			
		||||
@@ -43,6 +45,9 @@ services:
 | 
			
		||||
  #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
 | 
			
		||||
  #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
 | 
			
		||||
  #      - USE_X_SETTINGS=1
 | 
			
		||||
  #
 | 
			
		||||
  #        Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname.
 | 
			
		||||
  #      - HIDE_REFERER=true
 | 
			
		||||
 | 
			
		||||
      # Comment out ports: when using behind a reverse proxy , enable networks: etc.
 | 
			
		||||
      ports:
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										
											BIN
										
									
								
								docs/proxy-example.jpg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								docs/proxy-example.jpg
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| 
		 After Width: | Height: | Size: 46 KiB  | 
@@ -1,31 +1,36 @@
 | 
			
		||||
flask~= 2.0
 | 
			
		||||
flask~=2.0
 | 
			
		||||
flask_wtf
 | 
			
		||||
eventlet>=0.31.0
 | 
			
		||||
validators
 | 
			
		||||
timeago ~=1.0
 | 
			
		||||
inscriptis ~= 2.2
 | 
			
		||||
feedgen ~= 0.9
 | 
			
		||||
flask-login ~= 0.5
 | 
			
		||||
timeago~=1.0
 | 
			
		||||
inscriptis~=2.2
 | 
			
		||||
feedgen~=0.9
 | 
			
		||||
flask-login~=0.5
 | 
			
		||||
flask_restful
 | 
			
		||||
pytz
 | 
			
		||||
 | 
			
		||||
# Set these versions together to avoid a RequestsDependencyWarning
 | 
			
		||||
requests[socks] ~= 2.26
 | 
			
		||||
urllib3 > 1.26
 | 
			
		||||
chardet > 2.3.0
 | 
			
		||||
# >= 2.26 also adds Brotli support if brotli is installed
 | 
			
		||||
brotli~=1.0
 | 
			
		||||
requests[socks] ~=2.28
 | 
			
		||||
 | 
			
		||||
wtforms ~= 3.0
 | 
			
		||||
jsonpath-ng ~= 1.5.3
 | 
			
		||||
urllib3>1.26
 | 
			
		||||
chardet>2.3.0
 | 
			
		||||
 | 
			
		||||
wtforms~=3.0
 | 
			
		||||
jsonpath-ng~=1.5.3
 | 
			
		||||
 | 
			
		||||
# jq not available on Windows so must be installed manually
 | 
			
		||||
 | 
			
		||||
# Notification library
 | 
			
		||||
apprise ~= 1.0.0
 | 
			
		||||
apprise~=1.1.0
 | 
			
		||||
 | 
			
		||||
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
 | 
			
		||||
paho-mqtt
 | 
			
		||||
 | 
			
		||||
# Pinned version of cryptography otherwise
 | 
			
		||||
# ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly
 | 
			
		||||
cryptography ~= 3.4
 | 
			
		||||
cryptography~=3.4
 | 
			
		||||
 | 
			
		||||
# Used for CSS filtering
 | 
			
		||||
bs4
 | 
			
		||||
@@ -34,11 +39,20 @@ bs4
 | 
			
		||||
lxml
 | 
			
		||||
 | 
			
		||||
# 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0
 | 
			
		||||
selenium ~= 4.1.0
 | 
			
		||||
selenium~=4.1.0
 | 
			
		||||
 | 
			
		||||
# https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849
 | 
			
		||||
# ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security'
 | 
			
		||||
# need to revisit flask login versions
 | 
			
		||||
werkzeug ~= 2.0.0
 | 
			
		||||
werkzeug~=2.0.0
 | 
			
		||||
 | 
			
		||||
# Templating, so far just in the URLs but in the future can be for the notifications also
 | 
			
		||||
jinja2~=3.1
 | 
			
		||||
jinja2-time
 | 
			
		||||
 | 
			
		||||
# https://peps.python.org/pep-0508/#environment-markers
 | 
			
		||||
# https://github.com/dgtlmoon/changedetection.io/pull/1009
 | 
			
		||||
jq~=1.3 ;python_version >= "3.8" and sys_platform == "linux"
 | 
			
		||||
 | 
			
		||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user