Compare commits
	
		
			165 Commits
		
	
	
		
			windows-mi
			...
			3241-brows
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | e9d0362d01 | ||
|   | abac660bac | ||
|   | 26de64d873 | ||
|   | 79d9a8ca28 | ||
|   | 5c391fbcad | ||
|   | d7e24f64a5 | ||
|   | d6427d823f | ||
|   | 47eb874f47 | ||
|   | 37019355fd | ||
|   | a8e7f8236e | ||
|   | 2414b61fcb | ||
|   | a63ffa89b1 | ||
|   | 59e93c29d0 | ||
|   | d7173bb96e | ||
|   | d544e11a20 | ||
|   | 7f0c19c61c | ||
|   | 30e84f1030 | ||
|   | d5af91d8f7 | ||
|   | 4b18c633ba | ||
|   | 08728d7d03 | ||
|   | 73f3beda00 | ||
|   | 7b8d335c43 | ||
|   | ba0b6071e6 | ||
|   | a6603d5ad6 | ||
|   | 26833781a7 | ||
|   | f3ed9bdbb5 | ||
|   | 0f65178190 | ||
|   | a58fc82575 | ||
|   | 2575c03ae0 | ||
|   | 9b7372fff0 | ||
|   | fcd6ebe0ee | ||
|   | c162ec9d52 | ||
|   | bb7f7f473b | ||
|   | a9ca511004 | ||
|   | 8df61f5eaa | ||
|   | 162f573967 | ||
|   | eada0ef08d | ||
|   | f57bc10973 | ||
|   | d2e8f822d6 | ||
|   | 5fd8200fd9 | ||
|   | d0da8c9825 | ||
|   | fd7574d21b | ||
|   | c70706a27b | ||
|   | 968c364999 | ||
|   | 031cb76b7d | ||
|   | af568d064c | ||
|   | a75f57de43 | ||
|   | 72a1c3dda1 | ||
|   | ffde79ecac | ||
|   | 66ad43b2df | ||
|   | 6b0e56ca80 | ||
|   | 5a2d84d8b4 | ||
|   | a941156f26 | ||
|   | a1fdeeaa29 | ||
|   | 40ea2604a7 | ||
|   | ceda526093 | ||
|   | 4197254c53 | ||
|   | a0b7efb436 | ||
|   | 5f5e8ede6c | ||
|   | 52ca855a29 | ||
|   | 079efd0a85 | ||
|   | 3a583a4e5d | ||
|   | cfb4decf67 | ||
|   | 8067d5170b | ||
|   | 5551acf67d | ||
|   | 45a030bac6 | ||
|   | 96dc49e229 | ||
|   | 5f43d988a3 | ||
|   | 4269079c54 | ||
|   | cdfb3f206c | ||
|   | 9f326783e5 | ||
|   | 4e6e680d79 | ||
|   | 1378b5b2ff | ||
|   | 456c6e3f58 | ||
|   | 61be7f68db | ||
|   | 0e38a3c881 | ||
|   | 2c630e9853 | ||
|   | 786e0d1fab | ||
|   | 78b7aee512 | ||
|   | 9d9d01863a | ||
|   | 108cdf84a5 | ||
|   | 8c6f6f1578 | ||
|   | df4ffaaff8 | ||
|   | d522c65e50 | ||
|   | c3b2a8b019 | ||
|   | 28d3151090 | ||
|   | 2a1c832f8d | ||
|   | 0170adb171 | ||
|   | cb62404b8c | ||
|   | 8f9c46bd3f | ||
|   | 97291ce6d0 | ||
|   | f689e5418e | ||
|   | f751f0b0ef | ||
|   | ea9ba3bb2e | ||
|   | c7ffebce2a | ||
|   | 54b7c070f7 | ||
|   | 6c1b687cd1 | ||
|   | e850540a91 | ||
|   | d4bc9dfc50 | ||
|   | f26ea55e9c | ||
|   | b53e1985ac | ||
|   | 302ef80d95 | ||
|   | 5b97c29714 | ||
|   | 64075c87ee | ||
|   | d58a71cffc | ||
|   | 036b006226 | ||
|   | f29f89d078 | ||
|   | 289f118581 | ||
|   | 10b2bbea83 | ||
|   | 32d110b92f | ||
|   | 860a5f5c1a | ||
|   | 70a18ee4b5 | ||
|   | 73189672c3 | ||
|   | 7e7d5dc383 | ||
|   | 1c2cfc37aa | ||
|   | 0634fe021d | ||
|   | 04934b6b3b | ||
|   | ff00417bc5 | ||
|   | 849c5b2293 | ||
|   | 4bf560256b | ||
|   | 7903b03a0c | ||
|   | 5e7c0880c1 | ||
|   | 957aef4ff3 | ||
|   | 8e9a83d8f4 | ||
|   | 5961838143 | ||
|   | 8cf4a8128b | ||
|   | 24c3bfe5ad | ||
|   | bdd9760f3c | ||
|   | e37467f649 | ||
|   | d42fdf0257 | ||
|   | 939fa86582 | ||
|   | b87c92b9e0 | ||
|   | 4d5535d72c | ||
|   | ad08219d03 | ||
|   | 82211eef82 | ||
|   | 5d9380609c | ||
|   | a8b3918fca | ||
|   | e83fb37fb6 | ||
|   | 6b99afe0f7 | ||
|   | 09ebc6ec63 | ||
|   | 6b1065502e | ||
|   | d4c470984a | ||
|   | 55da48f719 | ||
|   | dbd4adf23a | ||
|   | b1e700b3ff | ||
|   | 1c61b5a623 | ||
|   | e799a1cdcb | ||
|   | 938065db6f | ||
|   | 4f2d38ff49 | ||
|   | 8960f401b7 | ||
|   | 1c1f1c6f6b | ||
|   | a2a98811a5 | ||
|   | 5a0ef8fc01 | ||
|   | d90de0851d | ||
|   | 360b4f0d8b | ||
|   | 6fc04d7f1c | ||
|   | 66fb05527b | ||
|   | 202e47d728 | ||
|   | d67d396b88 | ||
|   | 05f54f0ce6 | ||
|   | 6adf10597e | ||
|   | 4419bc0e61 | ||
|   | f7e9846c9b | ||
|   | 5dea5e1def | ||
|   | 0fade0a473 | 
| @@ -29,3 +29,35 @@ venv/ | ||||
|  | ||||
| # Visual Studio | ||||
| .vscode/ | ||||
|  | ||||
| # Test and development files | ||||
| test-datastore/ | ||||
| tests/ | ||||
| docs/ | ||||
| *.md | ||||
| !README.md | ||||
|  | ||||
| # Temporary and log files | ||||
| *.log | ||||
| *.tmp | ||||
| tmp/ | ||||
| temp/ | ||||
|  | ||||
| # Training data and large files | ||||
| train-data/ | ||||
| works-data/ | ||||
|  | ||||
| # Container files | ||||
| Dockerfile* | ||||
| docker-compose*.yml | ||||
| .dockerignore | ||||
|  | ||||
| # Development certificates and keys | ||||
| *.pem | ||||
| *.key | ||||
| *.crt | ||||
| profile_output.prof | ||||
|  | ||||
| # Large binary files that shouldn't be in container | ||||
| *.pdf | ||||
| chrome.json | ||||
							
								
								
									
										23
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -2,32 +2,33 @@ | ||||
| # Test that we can still build on Alpine (musl modified libc https://musl.libc.org/) | ||||
| # Some packages wont install via pypi because they dont have a wheel available under this architecture. | ||||
|  | ||||
| FROM ghcr.io/linuxserver/baseimage-alpine:3.18 | ||||
| FROM ghcr.io/linuxserver/baseimage-alpine:3.21 | ||||
| ENV PYTHONUNBUFFERED=1 | ||||
|  | ||||
| COPY requirements.txt /requirements.txt | ||||
|  | ||||
| RUN \ | ||||
|   apk add --update --no-cache --virtual=build-dependencies \ | ||||
|  apk add --update --no-cache --virtual=build-dependencies \ | ||||
|     build-base \ | ||||
|     cargo \ | ||||
|     g++ \ | ||||
|     gcc \ | ||||
|     git \ | ||||
|     jpeg-dev \ | ||||
|     libc-dev \ | ||||
|     libffi-dev \ | ||||
|     libjpeg \ | ||||
|     libxslt-dev \ | ||||
|     make \ | ||||
|     openssl-dev \ | ||||
|     py3-wheel \ | ||||
|     python3-dev \ | ||||
|     zip \ | ||||
|     zlib-dev && \ | ||||
|   apk add --update --no-cache \ | ||||
|     libjpeg \ | ||||
|     libxslt \ | ||||
|     python3 \ | ||||
|     py3-pip && \ | ||||
|     nodejs \ | ||||
|     poppler-utils \ | ||||
|     python3 && \ | ||||
|   echo "**** pip3 install test of changedetection.io ****" && \ | ||||
|   pip3 install -U pip wheel setuptools && \ | ||||
|   pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.18/ -r /requirements.txt && \ | ||||
|   python3 -m venv /lsiopy  && \ | ||||
|   pip install -U pip wheel setuptools && \ | ||||
|   pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \ | ||||
|   apk del --purge \ | ||||
|     build-dependencies | ||||
|   | ||||
							
								
								
									
										27
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -103,6 +103,27 @@ jobs: | ||||
| #          provenance: false | ||||
|  | ||||
|       # A new tagged release is required, which builds :tag and :latest | ||||
|       - name: Debug release info | ||||
|         if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') | ||||
|         run: | | ||||
|           echo "Release tag: ${{ github.event.release.tag_name }}" | ||||
|           echo "Github ref: ${{ github.ref }}" | ||||
|           echo "Github ref name: ${{ github.ref_name }}" | ||||
|            | ||||
|       - name: Docker meta :tag | ||||
|         if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') | ||||
|         uses: docker/metadata-action@v5 | ||||
|         id: meta | ||||
|         with: | ||||
|             images: | | ||||
|                 ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io | ||||
|                 ghcr.io/dgtlmoon/changedetection.io | ||||
|             tags: | | ||||
|                 type=semver,pattern={{version}},value=${{ github.event.release.tag_name }} | ||||
|                 type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }} | ||||
|                 type=semver,pattern={{major}},value=${{ github.event.release.tag_name }} | ||||
|                 type=raw,value=latest | ||||
|  | ||||
|       - name: Build and push :tag | ||||
|         id: docker_build_tag_release | ||||
|         if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') | ||||
| @@ -111,11 +132,7 @@ jobs: | ||||
|           context: ./ | ||||
|           file: ./Dockerfile | ||||
|           push: true | ||||
|           tags: | | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }} | ||||
|             ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }} | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest | ||||
|             ghcr.io/dgtlmoon/changedetection.io:latest | ||||
|           tags: ${{ steps.meta.outputs.tags }} | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|           cache-from: type=gha | ||||
|           cache-to: type=gha,mode=max | ||||
|   | ||||
							
								
								
									
										7
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -45,9 +45,12 @@ jobs: | ||||
|     - name: Test that the basic pip built package runs without error | ||||
|       run: | | ||||
|         set -ex | ||||
|         sudo pip3 install --upgrade pip  | ||||
|         pip3 install dist/changedetection.io*.whl | ||||
|         ls -alR  | ||||
|          | ||||
|         # Find and install the first .whl file | ||||
|         find dist -type f -name "*.whl" -exec pip3 install {} \; -quit | ||||
|         changedetection.io -d /tmp -p 10000 & | ||||
|          | ||||
|         sleep 3 | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null | ||||
|   | ||||
							
								
								
									
										42
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -23,8 +23,28 @@ on: | ||||
|   # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing | ||||
|   # @todo: some kind of path filter for requirements.txt and Dockerfile | ||||
| jobs: | ||||
|   test-container-build: | ||||
|   builder: | ||||
|     name: Build ${{ matrix.platform }} (${{ matrix.dockerfile == './Dockerfile' && 'main' || 'alpine' }}) | ||||
|     runs-on: ubuntu-latest | ||||
|     strategy: | ||||
|       matrix: | ||||
|         include: | ||||
|           # Main Dockerfile platforms | ||||
|           - platform: linux/amd64 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm64 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm/v7 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm64/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           # Alpine Dockerfile platforms (musl via alpine check) | ||||
|           - platform: linux/amd64 | ||||
|             dockerfile: ./.github/test/Dockerfile-alpine | ||||
|           - platform: linux/arm64 | ||||
|             dockerfile: ./.github/test/Dockerfile-alpine | ||||
|     steps: | ||||
|         - uses: actions/checkout@v4 | ||||
|         - name: Set up Python 3.11 | ||||
| @@ -47,24 +67,14 @@ jobs: | ||||
|             version: latest | ||||
|             driver-opts: image=moby/buildkit:master | ||||
|  | ||||
|         # https://github.com/dgtlmoon/changedetection.io/pull/1067 | ||||
|         # Check we can still build under alpine/musl | ||||
|         - name: Test that the docker containers can build (musl via alpine check) | ||||
|           id: docker_build_musl | ||||
|           uses: docker/build-push-action@v6 | ||||
|           with: | ||||
|             context: ./ | ||||
|             file: ./.github/test/Dockerfile-alpine | ||||
|             platforms: linux/amd64,linux/arm64 | ||||
|  | ||||
|         - name: Test that the docker containers can build | ||||
|         - name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }}) | ||||
|           id: docker_build | ||||
|           uses: docker/build-push-action@v6 | ||||
|           # https://github.com/docker/build-push-action#customizing | ||||
|           with: | ||||
|             context: ./ | ||||
|             file: ./Dockerfile | ||||
|             platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|             cache-from: type=local,src=/tmp/.buildx-cache | ||||
|             cache-to: type=local,dest=/tmp/.buildx-cache | ||||
|             file: ${{ matrix.dockerfile }} | ||||
|             platforms: ${{ matrix.platform }} | ||||
|             cache-from: type=gha | ||||
|             cache-to: type=gha,mode=max | ||||
|  | ||||
|   | ||||
							
								
								
									
										16
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -8,13 +8,13 @@ jobs: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - name: Lint with flake8 | ||||
|       - name: Lint with Ruff | ||||
|         run: | | ||||
|           pip3 install flake8 | ||||
|           # stop the build if there are Python syntax errors or undefined names | ||||
|           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||||
|           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||||
|           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||||
|           pip install ruff | ||||
|           # Check for syntax errors and undefined names | ||||
|           ruff check . --select E9,F63,F7,F82 | ||||
|           # Complete check with errors treated as warnings | ||||
|           ruff check . --exit-zero | ||||
|  | ||||
|   test-application-3-10: | ||||
|     needs: lint-code | ||||
| @@ -28,7 +28,6 @@ jobs: | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
|       python-version: '3.11' | ||||
|       skip-pypuppeteer: true | ||||
|  | ||||
|   test-application-3-12: | ||||
|     needs: lint-code | ||||
| @@ -42,5 +41,4 @@ jobs: | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
|       python-version: '3.13' | ||||
|       skip-pypuppeteer: true | ||||
|        | ||||
|       skip-pypuppeteer: true | ||||
| @@ -7,7 +7,7 @@ on: | ||||
|         description: 'Python version to use' | ||||
|         required: true | ||||
|         type: string | ||||
|         default: '3.10' | ||||
|         default: '3.11' | ||||
|       skip-pypuppeteer: | ||||
|         description: 'Skip PyPuppeteer (not supported in 3.11/3.12)' | ||||
|         required: false | ||||
| @@ -64,14 +64,16 @@ jobs: | ||||
|           echo "Running processes in docker..." | ||||
|           docker ps | ||||
|  | ||||
|       - name: Test built container with Pytest (generally as requests/plaintext fetching) | ||||
|       - name: Run Unit Tests | ||||
|         run: | | ||||
|           # Unit tests | ||||
|           echo "run test with unittest" | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' | ||||
|            | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver' | ||||
|  | ||||
|       - name: Test built container with Pytest (generally as requests/plaintext fetching) | ||||
|         run: | | ||||
|           # All tests | ||||
|           echo "run test with pytest" | ||||
|           # The default pytest logger_level is TRACE | ||||
| @@ -84,10 +86,10 @@ jobs: | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch | ||||
|           # tests/visualselector/test_fetch_data.py will do browser steps   | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|  | ||||
|  | ||||
|       - name: Playwright and SocketPuppetBrowser - Headers and requests | ||||
| @@ -170,8 +172,8 @@ jobs: | ||||
|           curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|  | ||||
|           # Check whether TRACE log is enabled. | ||||
|           # Also, check whether TRACE is came from STDERR | ||||
|           docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Also, check whether TRACE came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Check whether DEBUG is came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 | ||||
|  | ||||
|   | ||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						| @@ -16,6 +16,7 @@ dist/ | ||||
| .env | ||||
| .venv/ | ||||
| venv/ | ||||
| .python-version | ||||
|  | ||||
| # IDEs | ||||
| .idea | ||||
|   | ||||
							
								
								
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,9 @@ | ||||
| repos: | ||||
|   - repo: https://github.com/astral-sh/ruff-pre-commit | ||||
|     rev: v0.11.2 | ||||
|     hooks: | ||||
|       # Lint (and apply safe fixes) | ||||
|       - id: ruff | ||||
|         args: [--fix] | ||||
|       # Fomrat | ||||
|       - id: ruff-format | ||||
							
								
								
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,48 @@ | ||||
| # Minimum supported version | ||||
| target-version = "py310" | ||||
|  | ||||
| # Formatting options | ||||
| line-length = 100 | ||||
| indent-width = 4 | ||||
|  | ||||
| exclude = [ | ||||
|     "__pycache__", | ||||
|     ".eggs", | ||||
|     ".git", | ||||
|     ".tox", | ||||
|     ".venv", | ||||
|     "*.egg-info", | ||||
|     "*.pyc", | ||||
| ] | ||||
|  | ||||
| [lint] | ||||
| # https://docs.astral.sh/ruff/rules/ | ||||
| select = [ | ||||
|     "B", # flake8-bugbear | ||||
|     "B9", | ||||
|     "C",  | ||||
|     "E", # pycodestyle | ||||
|     "F", # Pyflakes | ||||
|     "I", # isort | ||||
|     "N", # pep8-naming | ||||
|     "UP", # pyupgrade | ||||
|     "W", # pycodestyle | ||||
| ] | ||||
| ignore = [ | ||||
|     "B007", # unused-loop-control-variable | ||||
|     "B909", # loop-iterator-mutation | ||||
|     "E203", # whitespace-before-punctuation | ||||
|     "E266", # multiple-leading-hashes-for-block-comment | ||||
|     "E501", # redundant-backslash | ||||
|     "F403", # undefined-local-with-import-star | ||||
|     "N802", # invalid-function-name | ||||
|     "N806", # non-lowercase-variable-in-function | ||||
|     "N815", # mixed-case-variable-in-class-scope | ||||
| ] | ||||
|  | ||||
| [lint.mccabe] | ||||
| max-complexity = 12 | ||||
|  | ||||
| [format] | ||||
| indent-style = "space" | ||||
| quote-style = "preserve" | ||||
							
								
								
									
										22
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						| @@ -1,8 +1,5 @@ | ||||
| # pip dependencies install stage | ||||
|  | ||||
| # @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py | ||||
| #        If you know how to fix it, please do! and test it for both 3.10 and 3.11 | ||||
|  | ||||
| ARG PYTHON_VERSION=3.11 | ||||
|  | ||||
| FROM python:${PYTHON_VERSION}-slim-bookworm AS builder | ||||
| @@ -26,13 +23,24 @@ WORKDIR /install | ||||
|  | ||||
| COPY requirements.txt /requirements.txt | ||||
|  | ||||
| # --extra-index-url https://www.piwheels.org/simple  is for cryptography module to be prebuilt (or rustc etc needs to be installed) | ||||
| RUN pip install --extra-index-url https://www.piwheels.org/simple  --target=/dependencies -r /requirements.txt | ||||
| # Use cache mounts and multiple wheel sources for faster ARM builds | ||||
| ENV PIP_CACHE_DIR=/tmp/pip-cache | ||||
| RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
|     pip install \ | ||||
|     --extra-index-url https://www.piwheels.org/simple \ | ||||
|     --extra-index-url https://pypi.anaconda.org/ARM-software/simple \ | ||||
|     --cache-dir=/tmp/pip-cache \ | ||||
|     --target=/dependencies \ | ||||
|     -r /requirements.txt | ||||
|  | ||||
| # Playwright is an alternative to Selenium | ||||
| # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) | ||||
| RUN pip install --target=/dependencies playwright~=1.48.0 \ | ||||
| RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
|     pip install \ | ||||
|     --cache-dir=/tmp/pip-cache \ | ||||
|     --target=/dependencies \ | ||||
|     playwright~=1.48.0 \ | ||||
|     || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
|  | ||||
| # Final image stage | ||||
| @@ -71,7 +79,7 @@ COPY changedetection.py /app/changedetection.py | ||||
| # Github Action test purpose(test-only.yml). | ||||
| # On production, it is effectively LOGGER_LEVEL=''. | ||||
| ARG LOGGER_LEVEL='' | ||||
| ENV LOGGER_LEVEL "$LOGGER_LEVEL" | ||||
| ENV LOGGER_LEVEL="$LOGGER_LEVEL" | ||||
|  | ||||
| WORKDIR /app | ||||
| CMD ["python", "./changedetection.py", "-d", "/datastore"] | ||||
|   | ||||
| @@ -1,9 +1,11 @@ | ||||
| recursive-include changedetectionio/api * | ||||
| recursive-include changedetectionio/apprise_plugin * | ||||
| recursive-include changedetectionio/blueprint * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/conditions * | ||||
| recursive-include changedetectionio/model * | ||||
| recursive-include changedetectionio/notification * | ||||
| recursive-include changedetectionio/processors * | ||||
| recursive-include changedetectionio/realtime * | ||||
| recursive-include changedetectionio/static * | ||||
| recursive-include changedetectionio/templates * | ||||
| recursive-include changedetectionio/tests * | ||||
|   | ||||
							
								
								
									
										10
									
								
								README.md
									
									
									
									
									
								
							
							
						
						| @@ -89,7 +89,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W | ||||
| #### Key Features | ||||
|  | ||||
| - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! | ||||
| - Target elements with xPath(1.0) and CSS Selectors, Easily monitor complex JSON with JSONPath or jq | ||||
| - Target elements with xPath 1 and xPath 2, CSS Selectors, Easily monitor complex JSON with JSONPath or jq | ||||
| - Switch between fast non-JS and Chrome JS based "fetchers" | ||||
| - Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums) | ||||
| - Easily specify how often a site should be checked | ||||
| @@ -105,6 +105,12 @@ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) glob | ||||
|  | ||||
| Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ | ||||
|  | ||||
| ### Conditional web page changes | ||||
|  | ||||
| Easily [configure conditional actions](https://changedetection.io/tutorial/conditional-actions-web-page-changes), for example, only trigger when a price is above or below a preset amount, or [when a web page includes (or does not include) a keyword](https://changedetection.io/tutorial/how-monitor-keywords-any-website) | ||||
|  | ||||
| <img src="./docs/web-page-change-conditions.png" style="max-width:80%;" alt="Conditional web page changes"  title="Conditional web page changes"  /> | ||||
|  | ||||
| ### Schedule web page watches in any timezone, limit by day of week and time. | ||||
|  | ||||
| Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours. | ||||
| @@ -120,7 +126,7 @@ Easily add the current web page to your changedetection.io tool, simply install | ||||
|  | ||||
| [<img src="./docs/chrome-extension-screenshot.png" style="max-width:80%;" alt="Chrome Extension to easily add the current web-page to detect a change."  title="Chrome Extension to easily add the current web-page to detect a change."  />](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) | ||||
|  | ||||
| [Goto the Chrome Webstore to download the extension.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) | ||||
| [Goto the Chrome Webstore to download the extension.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) ( Or check out the [GitHub repo](https://github.com/dgtlmoon/changedetection.io-browser-extension) )  | ||||
|  | ||||
| ## Installation | ||||
|  | ||||
|   | ||||
| @@ -3,4 +3,6 @@ | ||||
| # Only exists for direct CLI usage | ||||
|  | ||||
| import changedetectionio | ||||
| changedetectionio.main() | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     changedetectionio.main() | ||||
|   | ||||
							
								
								
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,98 @@ | ||||
| # Creating Plugins for changedetection.io | ||||
|  | ||||
| This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways. | ||||
|  | ||||
| ## Plugin Types | ||||
|  | ||||
| ### UI Stats Tab Plugins | ||||
|  | ||||
| These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch. | ||||
|  | ||||
| #### Creating a UI Stats Tab Plugin | ||||
|  | ||||
| 1. Create a Python file in a directory that will be loaded by the plugin system. | ||||
|  | ||||
| 2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook: | ||||
|  | ||||
| ```python | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add custom content to the stats tab""" | ||||
|     # Calculate or retrieve your stats | ||||
|     my_stat = calculate_something(watch) | ||||
|      | ||||
|     # Return HTML content as a string | ||||
|     html = f""" | ||||
|     <div class="my-plugin-stats"> | ||||
|         <h4>My Plugin Statistics</h4> | ||||
|         <p>My statistic: {my_stat}</p> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
| ``` | ||||
|  | ||||
| 3. The HTML you return will be included in the Stats tab. | ||||
|  | ||||
| ## Plugin Loading | ||||
|  | ||||
| Plugins can be loaded from: | ||||
|  | ||||
| 1. Built-in plugin directories in the codebase | ||||
| 2. External packages using setuptools entry points | ||||
|  | ||||
| To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`. | ||||
|  | ||||
| ## Example Plugin | ||||
|  | ||||
| Here's a simple example of a plugin that adds a word count statistic to the Stats tab: | ||||
|  | ||||
| ```python | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def count_words_in_history(watch): | ||||
|     """Count words in the latest snapshot""" | ||||
|     try: | ||||
|         if not watch.history.keys(): | ||||
|             return 0 | ||||
|              | ||||
|         latest_key = list(watch.history.keys())[-1] | ||||
|         latest_content = watch.get_history_snapshot(latest_key) | ||||
|         return len(latest_content.split()) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error counting words: {str(e)}") | ||||
|         return 0 | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count to the Stats tab""" | ||||
|     word_count = count_words_in_history(watch) | ||||
|      | ||||
|     html = f""" | ||||
|     <div class="word-count-stats"> | ||||
|         <h4>Content Analysis</h4> | ||||
|         <table class="pure-table"> | ||||
|             <tbody> | ||||
|                 <tr> | ||||
|                     <td>Word count (latest snapshot)</td> | ||||
|                     <td>{word_count}</td> | ||||
|                 </tr> | ||||
|             </tbody> | ||||
|         </table> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
| ``` | ||||
|  | ||||
| ## Testing Your Plugin | ||||
|  | ||||
| 1. Place your plugin in one of the directories scanned by the plugin system | ||||
| 2. Restart changedetection.io | ||||
| 3. Go to the Edit page of a watch and check the Stats tab to see your content | ||||
| @@ -2,40 +2,61 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.48.03' | ||||
| __version__ = '0.50.2' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
| import os | ||||
| os.environ['EVENTLET_NO_GREENDNS'] = 'yes' | ||||
| import eventlet | ||||
| import eventlet.wsgi | ||||
| import getopt | ||||
| import platform | ||||
| import signal | ||||
| import socket | ||||
|  | ||||
| import sys | ||||
|  | ||||
| # Eventlet completely removed - using threading mode for SocketIO | ||||
| # This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts | ||||
| from changedetectionio import store | ||||
| from changedetectionio.flask_app import changedetection_app | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| # Only global so we can access it in the signal handler | ||||
| app = None | ||||
| datastore = None | ||||
|  | ||||
| def get_version(): | ||||
|     return __version__ | ||||
|  | ||||
| # Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown | ||||
| def sigshutdown_handler(_signo, _stack_frame): | ||||
|     global app | ||||
|     global datastore | ||||
|     name = signal.Signals(_signo).name | ||||
|     logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown') | ||||
|     datastore.sync_to_json() | ||||
|     logger.success('Sync JSON to disk complete.') | ||||
|     # This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it. | ||||
|     # Solution: move to gevent or other server in the future (#2014) | ||||
|     datastore.stop_thread = True | ||||
|     logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated') | ||||
|      | ||||
|     # Set exit flag immediately to stop all loops | ||||
|     app.config.exit.set() | ||||
|     datastore.stop_thread = True | ||||
|      | ||||
|     # Shutdown workers immediately | ||||
|     try: | ||||
|         from changedetectionio import worker_handler | ||||
|         worker_handler.shutdown_workers() | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error shutting down workers: {str(e)}") | ||||
|      | ||||
|     # Shutdown socketio server fast | ||||
|     from changedetectionio.flask_app import socketio_server | ||||
|     if socketio_server and hasattr(socketio_server, 'shutdown'): | ||||
|         try: | ||||
|             socketio_server.shutdown() | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error shutting down Socket.IO server: {str(e)}") | ||||
|      | ||||
|     # Save data quickly | ||||
|     try: | ||||
|         datastore.sync_to_json() | ||||
|         logger.success('Fast sync to disk complete.') | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error syncing to disk: {str(e)}") | ||||
|      | ||||
|     sys.exit() | ||||
|  | ||||
| def main(): | ||||
| @@ -44,9 +65,9 @@ def main(): | ||||
|  | ||||
|     datastore_path = None | ||||
|     do_cleanup = False | ||||
|     host = '' | ||||
|     host = "0.0.0.0" | ||||
|     ipv6_enabled = False | ||||
|     port = os.environ.get('PORT') or 5000 | ||||
|     port = int(os.environ.get('PORT', 5000)) | ||||
|     ssl_mode = False | ||||
|  | ||||
|     # On Windows, create and use a default path. | ||||
| @@ -105,7 +126,7 @@ def main(): | ||||
|     # Without this, a logger will be duplicated | ||||
|     logger.remove() | ||||
|     try: | ||||
|         log_level_for_stdout = { 'DEBUG', 'SUCCESS' } | ||||
|         log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' } | ||||
|         logger.configure(handlers=[ | ||||
|             {"sink": sys.stdout, "level": logger_level, | ||||
|              "filter" : lambda record: record['level'].name in log_level_for_stdout}, | ||||
| @@ -142,8 +163,26 @@ def main(): | ||||
|  | ||||
|     app = changedetection_app(app_config, datastore) | ||||
|  | ||||
|     # Get the SocketIO instance from the Flask app (created in flask_app.py) | ||||
|     from changedetectionio.flask_app import socketio_server | ||||
|     global socketio | ||||
|     socketio = socketio_server | ||||
|  | ||||
|     signal.signal(signal.SIGTERM, sigshutdown_handler) | ||||
|     signal.signal(signal.SIGINT, sigshutdown_handler) | ||||
|      | ||||
|     # Custom signal handler for memory cleanup | ||||
|     def sigusr_clean_handler(_signo, _stack_frame): | ||||
|         from changedetectionio.gc_cleanup import memory_cleanup | ||||
|         logger.info('SIGUSR1 received: Running memory cleanup') | ||||
|         return memory_cleanup(app) | ||||
|  | ||||
|     # Register the SIGUSR1 signal handler | ||||
|     # Only register the signal handler if running on Linux | ||||
|     if platform.system() == "Linux": | ||||
|         signal.signal(signal.SIGUSR1, sigusr_clean_handler) | ||||
|     else: | ||||
|         logger.info("SIGUSR1 handler only registered on Linux, skipped.") | ||||
|  | ||||
|     # Go into cleanup mode | ||||
|     if do_cleanup: | ||||
| @@ -153,10 +192,11 @@ def main(): | ||||
|  | ||||
|  | ||||
|     @app.context_processor | ||||
|     def inject_version(): | ||||
|     def inject_template_globals(): | ||||
|         return dict(right_sticky="v{}".format(datastore.data['version_tag']), | ||||
|                     new_version_available=app.config['NEW_VERSION_AVAILABLE'], | ||||
|                     has_password=datastore.data['settings']['application']['password'] != False | ||||
|                     has_password=datastore.data['settings']['application']['password'] != False, | ||||
|                     socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True) | ||||
|                     ) | ||||
|  | ||||
|     # Monitored websites will not receive a Referer header when a user clicks on an outgoing link. | ||||
| @@ -180,15 +220,21 @@ def main(): | ||||
|         from werkzeug.middleware.proxy_fix import ProxyFix | ||||
|         app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1) | ||||
|  | ||||
|     s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET | ||||
|  | ||||
|     if ssl_mode: | ||||
|         # @todo finalise SSL config, but this should get you in the right direction if you need it. | ||||
|         eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type), | ||||
|                                                certfile='cert.pem', | ||||
|                                                keyfile='privkey.pem', | ||||
|                                                server_side=True), app) | ||||
|     # SocketIO instance is already initialized in flask_app.py | ||||
|  | ||||
|     # Launch using SocketIO run method for proper integration (if enabled) | ||||
|     if socketio_server: | ||||
|         if ssl_mode: | ||||
|             socketio.run(app, host=host, port=int(port), debug=False,  | ||||
|                         certfile='cert.pem', keyfile='privkey.pem', allow_unsafe_werkzeug=True) | ||||
|         else: | ||||
|             socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True) | ||||
|     else: | ||||
|         eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app) | ||||
|  | ||||
|         # Run Flask app without Socket.IO if disabled | ||||
|         logger.info("Starting Flask app without Socket.IO server") | ||||
|         if ssl_mode: | ||||
|             app.run(host=host, port=int(port), debug=False,  | ||||
|                    ssl_context=('cert.pem', 'privkey.pem')) | ||||
|         else: | ||||
|             app.run(host=host, port=int(port), debug=False) | ||||
|   | ||||
							
								
								
									
										62
									
								
								changedetectionio/api/Import.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,62 @@ | ||||
| import os | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request | ||||
| import validators | ||||
| from . import auth | ||||
|  | ||||
|  | ||||
| class Import(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/import Import a list of watched URLs | ||||
|         @apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag  id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line. | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a" | ||||
|         @apiName Import | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {List} OK List of watch UUIDs added | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|  | ||||
|         extras = {} | ||||
|  | ||||
|         if request.args.get('proxy'): | ||||
|             plist = self.datastore.proxy_list | ||||
|             if not request.args.get('proxy') in plist: | ||||
|                 return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400 | ||||
|             else: | ||||
|                 extras['proxy'] = request.args.get('proxy') | ||||
|  | ||||
|         dedupe = strtobool(request.args.get('dedupe', 'true')) | ||||
|  | ||||
|         tags = request.args.get('tag') | ||||
|         tag_uuids = request.args.get('tag_uuids') | ||||
|  | ||||
|         if tag_uuids: | ||||
|             tag_uuids = tag_uuids.split(',') | ||||
|  | ||||
|         urls = request.get_data().decode('utf8').splitlines() | ||||
|         added = [] | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         for url in urls: | ||||
|             url = url.strip() | ||||
|             if not len(url): | ||||
|                 continue | ||||
|  | ||||
|             # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|             if not validators.url(url, simple_host=allow_simplehost): | ||||
|                 return f"Invalid or unsupported URL - {url}", 400 | ||||
|  | ||||
|             if dedupe and self.datastore.url_exists(url): | ||||
|                 continue | ||||
|  | ||||
|             new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids) | ||||
|             added.append(new_uuid) | ||||
|  | ||||
|         return added | ||||
							
								
								
									
										145
									
								
								changedetectionio/api/Notifications.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,145 @@ | ||||
| from flask_expects_json import expects_json | ||||
| from flask_restful import Resource | ||||
| from . import auth | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request | ||||
| from . import auth | ||||
| from . import schema_create_notification_urls, schema_delete_notification_urls | ||||
|  | ||||
| class Notifications(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/notifications Return Notification URL List | ||||
|         @apiDescription Return the Notification URL List from the configuration | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             HTTP/1.0 200 | ||||
|             { | ||||
|                 'notification_urls': ["notification-urls-list"] | ||||
|             } | ||||
|         @apiName Get | ||||
|         @apiGroup Notifications | ||||
|         """ | ||||
|  | ||||
|         notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])         | ||||
|  | ||||
|         return { | ||||
|                 'notification_urls': notification_urls, | ||||
|                }, 200 | ||||
|      | ||||
|     @auth.check_token | ||||
|     @expects_json(schema_create_notification_urls) | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/notifications Create Notification URLs | ||||
|         @apiDescription Add one or more notification URLs from the configuration | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/notifications/batch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}' | ||||
|         @apiName CreateBatch | ||||
|         @apiGroup Notifications | ||||
|         @apiSuccess (201) {Object[]} notification_urls List of added notification URLs | ||||
|         @apiError (400) {String} Invalid input | ||||
|         """ | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         notification_urls = json_data.get("notification_urls", []) | ||||
|  | ||||
|         from wtforms import ValidationError | ||||
|         try: | ||||
|             validate_notification_urls(notification_urls) | ||||
|         except ValidationError as e: | ||||
|             return str(e), 400 | ||||
|  | ||||
|         added_urls = [] | ||||
|  | ||||
|         for url in notification_urls: | ||||
|             clean_url = url.strip() | ||||
|             added_url = self.datastore.add_notification_url(clean_url) | ||||
|             if added_url: | ||||
|                 added_urls.append(added_url) | ||||
|  | ||||
|         if not added_urls: | ||||
|             return "No valid notification URLs were added", 400 | ||||
|  | ||||
|         return {'notification_urls': added_urls}, 201 | ||||
|      | ||||
|     @auth.check_token | ||||
|     @expects_json(schema_create_notification_urls) | ||||
|     def put(self): | ||||
|         """ | ||||
|         @api {put} /api/v1/notifications Replace Notification URLs | ||||
|         @apiDescription Replace all notification URLs with the provided list (can be empty) | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl -X PUT http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}' | ||||
|         @apiName Replace | ||||
|         @apiGroup Notifications | ||||
|         @apiSuccess (200) {Object[]} notification_urls List of current notification URLs | ||||
|         @apiError (400) {String} Invalid input | ||||
|         """ | ||||
|         json_data = request.get_json() | ||||
|         notification_urls = json_data.get("notification_urls", []) | ||||
|  | ||||
|         from wtforms import ValidationError | ||||
|         try: | ||||
|             validate_notification_urls(notification_urls) | ||||
|         except ValidationError as e: | ||||
|             return str(e), 400 | ||||
|          | ||||
|         if not isinstance(notification_urls, list): | ||||
|             return "Invalid input format", 400 | ||||
|  | ||||
|         clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)] | ||||
|         self.datastore.data['settings']['application']['notification_urls'] = clean_urls | ||||
|         self.datastore.needs_write = True | ||||
|  | ||||
|         return {'notification_urls': clean_urls}, 200 | ||||
|          | ||||
|     @auth.check_token | ||||
|     @expects_json(schema_delete_notification_urls) | ||||
|     def delete(self): | ||||
|         """ | ||||
|         @api {delete} /api/v1/notifications Delete Notification URLs | ||||
|         @apiDescription Deletes one or more notification URLs from the configuration | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/notifications -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}' | ||||
|         @apiParam {String[]} notification_urls The notification URLs to delete. | ||||
|         @apiName Delete | ||||
|         @apiGroup Notifications | ||||
|         @apiSuccess (204) {String} OK Deleted | ||||
|         @apiError (400) {String} No matching notification URLs found. | ||||
|         """ | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         urls_to_delete = json_data.get("notification_urls", []) | ||||
|         if not isinstance(urls_to_delete, list): | ||||
|             abort(400, message="Expected a list of notification URLs.") | ||||
|  | ||||
|         notification_urls = self.datastore.data['settings']['application'].get('notification_urls', []) | ||||
|         deleted = [] | ||||
|  | ||||
|         for url in urls_to_delete: | ||||
|             clean_url = url.strip() | ||||
|             if clean_url in notification_urls: | ||||
|                 notification_urls.remove(clean_url) | ||||
|                 deleted.append(clean_url) | ||||
|  | ||||
|         if not deleted: | ||||
|             abort(400, message="No matching notification URLs found.") | ||||
|  | ||||
|         self.datastore.data['settings']['application']['notification_urls'] = notification_urls | ||||
|         self.datastore.needs_write = True | ||||
|  | ||||
|         return 'OK', 204 | ||||
|      | ||||
| def validate_notification_urls(notification_urls): | ||||
|     from changedetectionio.forms import ValidateAppRiseServers | ||||
|     validator = ValidateAppRiseServers() | ||||
|     class DummyForm: pass | ||||
|     dummy_form = DummyForm() | ||||
|     field = type("Field", (object,), {"data": notification_urls, "gettext": lambda self, x: x})() | ||||
|     validator(dummy_form, field) | ||||
							
								
								
									
										51
									
								
								changedetectionio/api/Search.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,51 @@ | ||||
| from flask_restful import Resource, abort | ||||
| from flask import request | ||||
| from . import auth | ||||
|  | ||||
| class Search(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/search Search for watches | ||||
|         @apiDescription Search watches by URL or title text | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com/page1" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com/page1?tag=Favourites" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com?partial=true" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiName Search | ||||
|         @apiGroup Watch Management | ||||
|         @apiQuery {String} q Search query to match against watch URLs and titles | ||||
|         @apiQuery {String} [tag] Optional name of tag to limit results (name not UUID) | ||||
|         @apiQuery {String} [partial] Allow partial matching of URL query | ||||
|         @apiSuccess (200) {Object} JSON Object containing matched watches | ||||
|         """ | ||||
|         query = request.args.get('q', '').strip() | ||||
|         tag_limit = request.args.get('tag', '').strip() | ||||
|         from changedetectionio.strtobool import strtobool | ||||
|         partial = bool(strtobool(request.args.get('partial', '0'))) if 'partial' in request.args else False | ||||
|  | ||||
|         # Require a search query | ||||
|         if not query: | ||||
|             abort(400, message="Search query 'q' parameter is required") | ||||
|  | ||||
|         # Use the search function from the datastore | ||||
|         matching_uuids = self.datastore.search_watches_for_url(query=query, tag_limit=tag_limit, partial=partial) | ||||
|  | ||||
|         # Build the response with watch details | ||||
|         results = {} | ||||
|         for uuid in matching_uuids: | ||||
|             watch = self.datastore.data['watching'].get(uuid) | ||||
|             results[uuid] = { | ||||
|                 'last_changed': watch.last_changed, | ||||
|                 'last_checked': watch['last_checked'], | ||||
|                 'last_error': watch['last_error'], | ||||
|                 'title': watch['title'], | ||||
|                 'url': watch['url'], | ||||
|                 'viewed': watch.viewed | ||||
|             } | ||||
|  | ||||
|         return results, 200 | ||||
							
								
								
									
										54
									
								
								changedetectionio/api/SystemInfo.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,54 @@ | ||||
| from flask_restful import Resource | ||||
| from . import auth | ||||
|  | ||||
|  | ||||
| class SystemInfo(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|         self.update_q = kwargs['update_q'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/systeminfo Return system info | ||||
|         @apiDescription Return some info about the current system state | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             HTTP/1.0 200 | ||||
|             { | ||||
|                 'queue_size': 10 , | ||||
|                 'overdue_watches': ["watch-uuid-list"], | ||||
|                 'uptime': 38344.55, | ||||
|                 'watch_count': 800, | ||||
|                 'version': "0.40.1" | ||||
|             } | ||||
|         @apiName Get Info | ||||
|         @apiGroup System Information | ||||
|         """ | ||||
|         import time | ||||
|         overdue_watches = [] | ||||
|  | ||||
|         # Check all watches and report which have not been checked but should have been | ||||
|  | ||||
|         for uuid, watch in self.datastore.data.get('watching', {}).items(): | ||||
|             # see if now - last_checked is greater than the time that should have been | ||||
|             # this is not super accurate (maybe they just edited it) but better than nothing | ||||
|             t = watch.threshold_seconds() | ||||
|             if not t: | ||||
|                 # Use the system wide default | ||||
|                 t = self.datastore.threshold_seconds | ||||
|  | ||||
|             time_since_check = time.time() - watch.get('last_checked') | ||||
|  | ||||
|             # Allow 5 minutes of grace time before we decide it's overdue | ||||
|             if time_since_check - (5 * 60) > t: | ||||
|                 overdue_watches.append(uuid) | ||||
|         from changedetectionio import __version__ as main_version | ||||
|         return { | ||||
|                    'queue_size': self.update_q.qsize(), | ||||
|                    'overdue_watches': overdue_watches, | ||||
|                    'uptime': round(time.time() - self.datastore.start_time, 2), | ||||
|                    'watch_count': len(self.datastore.data.get('watching', {})), | ||||
|                    'version': main_version | ||||
|                }, 200 | ||||
							
								
								
									
										156
									
								
								changedetectionio/api/Tags.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,156 @@ | ||||
| from flask_expects_json import expects_json | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request | ||||
| from . import auth | ||||
|  | ||||
| # Import schemas from __init__.py | ||||
| from . import schema_tag, schema_create_tag, schema_update_tag | ||||
|  | ||||
|  | ||||
| class Tag(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     # Get information about a single tag | ||||
|     # curl http://localhost:5000/api/v1/tag/<string:uuid> | ||||
|     @auth.check_token | ||||
|     def get(self, uuid): | ||||
|         """ | ||||
|         @api {get} /api/v1/tag/:uuid Single tag - get data or toggle notification muting. | ||||
|         @apiDescription Retrieve tag information and set notification_muted status | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=muted" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiName Tag | ||||
|         @apiGroup Tag | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state | ||||
|         @apiSuccess (200) {String} OK When muted operation OR full JSON object of the tag | ||||
|         @apiSuccess (200) {JSON} TagJSON JSON Full JSON object of the tag | ||||
|         """ | ||||
|         from copy import deepcopy | ||||
|         tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid)) | ||||
|         if not tag: | ||||
|             abort(404, message=f'No tag exists with the UUID of {uuid}') | ||||
|  | ||||
|         if request.args.get('muted', '') == 'muted': | ||||
|             self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True | ||||
|             return "OK", 200 | ||||
|         elif request.args.get('muted', '') == 'unmuted': | ||||
|             self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = False | ||||
|             return "OK", 200 | ||||
|  | ||||
|         return tag | ||||
|  | ||||
|     @auth.check_token | ||||
|     def delete(self, uuid): | ||||
|         """ | ||||
|         @api {delete} /api/v1/tag/:uuid Delete a tag and remove it from all watches | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiName DeleteTag | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was deleted | ||||
|         """ | ||||
|         if not self.datastore.data['settings']['application']['tags'].get(uuid): | ||||
|             abort(400, message='No tag exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
|         # Delete the tag, and any tag reference | ||||
|         del self.datastore.data['settings']['application']['tags'][uuid] | ||||
|          | ||||
|         # Remove tag from all watches | ||||
|         for watch_uuid, watch in self.datastore.data['watching'].items(): | ||||
|             if watch.get('tags') and uuid in watch['tags']: | ||||
|                 watch['tags'].remove(uuid) | ||||
|  | ||||
|         return 'OK', 204 | ||||
|  | ||||
|     @auth.check_token | ||||
|     @expects_json(schema_update_tag) | ||||
|     def put(self, uuid): | ||||
|         """ | ||||
|         @api {put} /api/v1/tag/:uuid Update tag information | ||||
|         @apiExample {curl} Example usage: | ||||
|             Update (PUT) | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"title": "New Tag Title"}' | ||||
|  | ||||
|         @apiDescription Updates an existing tag using JSON | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiName UpdateTag | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was updated | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         tag = self.datastore.data['settings']['application']['tags'].get(uuid) | ||||
|         if not tag: | ||||
|             abort(404, message='No tag exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
|         tag.update(request.json) | ||||
|         self.datastore.needs_write_urgent = True | ||||
|  | ||||
|         return "OK", 200 | ||||
|  | ||||
|  | ||||
|     @auth.check_token | ||||
|     # Only cares for {'title': 'xxxx'} | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/watch Create a single tag | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"name": "Work related"}' | ||||
|         @apiName Create | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was created | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         title = json_data.get("title",'').strip() | ||||
|  | ||||
|  | ||||
|         new_uuid = self.datastore.add_tag(title=title) | ||||
|         if new_uuid: | ||||
|             return {'uuid': new_uuid}, 201 | ||||
|         else: | ||||
|             return "Invalid or unsupported tag", 400 | ||||
|  | ||||
| class Tags(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/tags List tags | ||||
|         @apiDescription Return list of available tags | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tags -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             { | ||||
|                 "cc0cfffa-f449-477b-83ea-0caafd1dc091": { | ||||
|                     "title": "Tech News", | ||||
|                     "notification_muted": false, | ||||
|                     "date_created": 1677103794 | ||||
|                 }, | ||||
|                 "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": { | ||||
|                     "title": "Shopping", | ||||
|                     "notification_muted": true, | ||||
|                     "date_created": 1676662819 | ||||
|                 } | ||||
|             } | ||||
|         @apiName ListTags | ||||
|         @apiGroup Tag Management | ||||
|         @apiSuccess (200) {String} OK JSON dict | ||||
|         """ | ||||
|         result = {} | ||||
|         for uuid, tag in self.datastore.data['settings']['application']['tags'].items(): | ||||
|             result[uuid] = { | ||||
|                 'date_created': tag.get('date_created', 0), | ||||
|                 'notification_muted': tag.get('notification_muted', False), | ||||
|                 'title': tag.get('title', ''), | ||||
|                 'uuid': tag.get('uuid') | ||||
|             } | ||||
|  | ||||
|         return result, 200 | ||||
| @@ -3,26 +3,16 @@ from changedetectionio.strtobool import strtobool | ||||
| 
 | ||||
| from flask_expects_json import expects_json | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request, make_response | ||||
| import validators | ||||
| from . import auth | ||||
| import copy | ||||
| 
 | ||||
| # See docs/README.md for rebuilding the docs/apidoc information | ||||
| # Import schemas from __init__.py | ||||
| from . import schema, schema_create_watch, schema_update_watch | ||||
| 
 | ||||
| from . import api_schema | ||||
| from ..model import watch_base | ||||
| 
 | ||||
| # Build a JSON Schema atleast partially based on our Watch model | ||||
| watch_base_config = watch_base() | ||||
| schema = api_schema.build_watch_json_schema(watch_base_config) | ||||
| 
 | ||||
| schema_create_watch = copy.deepcopy(schema) | ||||
| schema_create_watch['required'] = ['url'] | ||||
| 
 | ||||
| schema_update_watch = copy.deepcopy(schema) | ||||
| schema_update_watch['additionalProperties'] = False | ||||
| 
 | ||||
| class Watch(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
| @@ -58,7 +48,7 @@ class Watch(Resource): | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
| 
 | ||||
|         if request.args.get('recheck'): | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             return "OK", 200 | ||||
|         if request.args.get('paused', '') == 'paused': | ||||
|             self.datastore.data['watching'].get(uuid).pause() | ||||
| @@ -76,6 +66,7 @@ class Watch(Resource): | ||||
|         # Return without history, get that via another API call | ||||
|         # Properties are not returned as a JSON, so add the required props manually | ||||
|         watch['history_n'] = watch.history_n | ||||
|         # attr .last_changed will check for the last written text snapshot on change | ||||
|         watch['last_changed'] = watch.last_changed | ||||
|         watch['viewed'] = watch.viewed | ||||
|         return watch | ||||
| @@ -246,7 +237,7 @@ class CreateWatch(Resource): | ||||
| 
 | ||||
|         new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags) | ||||
|         if new_uuid: | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|             worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|             return {'uuid': new_uuid}, 201 | ||||
|         else: | ||||
|             return "Invalid or unsupported URL", 400 | ||||
| @@ -284,8 +275,6 @@ class CreateWatch(Resource): | ||||
|         list = {} | ||||
| 
 | ||||
|         tag_limit = request.args.get('tag', '').lower() | ||||
| 
 | ||||
| 
 | ||||
|         for uuid, watch in self.datastore.data['watching'].items(): | ||||
|             # Watch tags by name (replace the other calls?) | ||||
|             tags = self.datastore.get_all_tags_for_watch(uuid=uuid) | ||||
| @@ -303,113 +292,7 @@ class CreateWatch(Resource): | ||||
| 
 | ||||
|         if request.args.get('recheck_all'): | ||||
|             for uuid in self.datastore.data['watching'].keys(): | ||||
|                 self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                 worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             return {'status': "OK"}, 200 | ||||
| 
 | ||||
|         return list, 200 | ||||
| 
 | ||||
| class Import(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
| 
 | ||||
|     @auth.check_token | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/import Import a list of watched URLs | ||||
|         @apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag  id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line. | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a" | ||||
|         @apiName Import | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {List} OK List of watch UUIDs added | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
| 
 | ||||
|         extras = {} | ||||
| 
 | ||||
|         if request.args.get('proxy'): | ||||
|             plist = self.datastore.proxy_list | ||||
|             if not request.args.get('proxy') in plist: | ||||
|                 return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400 | ||||
|             else: | ||||
|                 extras['proxy'] = request.args.get('proxy') | ||||
| 
 | ||||
|         dedupe = strtobool(request.args.get('dedupe', 'true')) | ||||
| 
 | ||||
|         tags = request.args.get('tag') | ||||
|         tag_uuids = request.args.get('tag_uuids') | ||||
| 
 | ||||
|         if tag_uuids: | ||||
|             tag_uuids = tag_uuids.split(',') | ||||
| 
 | ||||
|         urls = request.get_data().decode('utf8').splitlines() | ||||
|         added = [] | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         for url in urls: | ||||
|             url = url.strip() | ||||
|             if not len(url): | ||||
|                 continue | ||||
| 
 | ||||
|             # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|             if not validators.url(url, simple_host=allow_simplehost): | ||||
|                 return f"Invalid or unsupported URL - {url}", 400 | ||||
| 
 | ||||
|             if dedupe and self.datastore.url_exists(url): | ||||
|                 continue | ||||
| 
 | ||||
|             new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags, tag_uuids=tag_uuids) | ||||
|             added.append(new_uuid) | ||||
| 
 | ||||
|         return added | ||||
| 
 | ||||
| class SystemInfo(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|         self.update_q = kwargs['update_q'] | ||||
| 
 | ||||
|     @auth.check_token | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/systeminfo Return system info | ||||
|         @apiDescription Return some info about the current system state | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             HTTP/1.0 200 | ||||
|             { | ||||
|                 'queue_size': 10 , | ||||
|                 'overdue_watches': ["watch-uuid-list"], | ||||
|                 'uptime': 38344.55, | ||||
|                 'watch_count': 800, | ||||
|                 'version': "0.40.1" | ||||
|             } | ||||
|         @apiName Get Info | ||||
|         @apiGroup System Information | ||||
|         """ | ||||
|         import time | ||||
|         overdue_watches = [] | ||||
| 
 | ||||
|         # Check all watches and report which have not been checked but should have been | ||||
| 
 | ||||
|         for uuid, watch in self.datastore.data.get('watching', {}).items(): | ||||
|             # see if now - last_checked is greater than the time that should have been | ||||
|             # this is not super accurate (maybe they just edited it) but better than nothing | ||||
|             t = watch.threshold_seconds() | ||||
|             if not t: | ||||
|                 # Use the system wide default | ||||
|                 t = self.datastore.threshold_seconds | ||||
| 
 | ||||
|             time_since_check = time.time() - watch.get('last_checked') | ||||
| 
 | ||||
|             # Allow 5 minutes of grace time before we decide it's overdue | ||||
|             if time_since_check - (5 * 60) > t: | ||||
|                 overdue_watches.append(uuid) | ||||
|         from changedetectionio import __version__ as main_version | ||||
|         return { | ||||
|                    'queue_size': self.update_q.qsize(), | ||||
|                    'overdue_watches': overdue_watches, | ||||
|                    'uptime': round(time.time() - self.datastore.start_time, 2), | ||||
|                    'watch_count': len(self.datastore.data.get('watching', {})), | ||||
|                    'version': main_version | ||||
|                }, 200 | ||||
|         return list, 200 | ||||
| @@ -0,0 +1,33 @@ | ||||
| import copy | ||||
| from . import api_schema | ||||
| from ..model import watch_base | ||||
|  | ||||
| # Build a JSON Schema atleast partially based on our Watch model | ||||
| watch_base_config = watch_base() | ||||
| schema = api_schema.build_watch_json_schema(watch_base_config) | ||||
|  | ||||
| schema_create_watch = copy.deepcopy(schema) | ||||
| schema_create_watch['required'] = ['url'] | ||||
|  | ||||
| schema_update_watch = copy.deepcopy(schema) | ||||
| schema_update_watch['additionalProperties'] = False | ||||
|  | ||||
| # Tag schema is also based on watch_base since Tag inherits from it | ||||
| schema_tag = copy.deepcopy(schema) | ||||
| schema_create_tag = copy.deepcopy(schema_tag) | ||||
| schema_create_tag['required'] = ['title'] | ||||
| schema_update_tag = copy.deepcopy(schema_tag) | ||||
| schema_update_tag['additionalProperties'] = False | ||||
|  | ||||
| schema_notification_urls = copy.deepcopy(schema) | ||||
| schema_create_notification_urls = copy.deepcopy(schema_notification_urls) | ||||
| schema_create_notification_urls['required'] = ['notification_urls'] | ||||
| schema_delete_notification_urls = copy.deepcopy(schema_notification_urls) | ||||
| schema_delete_notification_urls['required'] = ['notification_urls'] | ||||
|  | ||||
| # Import all API resources | ||||
| from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch | ||||
| from .Tags import Tags, Tag | ||||
| from .Import import Import | ||||
| from .SystemInfo import SystemInfo | ||||
| from .Notifications import Notifications | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| # Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API | ||||
| # Probably other ways to solve this when the backend switches to some ORM | ||||
| from changedetectionio.notification import valid_notification_formats | ||||
|  | ||||
|  | ||||
| def build_time_between_check_json_schema(): | ||||
|     # Setup time between check schema | ||||
| @@ -98,8 +100,6 @@ def build_watch_json_schema(d): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     from changedetectionio.notification import valid_notification_formats | ||||
|  | ||||
|     schema['properties']['notification_format'] = {'type': 'string', | ||||
|                                                    'enum': list(valid_notification_formats.keys()) | ||||
|                                                    } | ||||
| @@ -112,6 +112,35 @@ def build_watch_json_schema(d): | ||||
|  | ||||
|     schema['properties']['time_between_check'] = build_time_between_check_json_schema() | ||||
|  | ||||
|     schema['properties']['browser_steps'] = { | ||||
|         "anyOf": [ | ||||
|             { | ||||
|                 "type": "array", | ||||
|                 "items": { | ||||
|                     "type": "object", | ||||
|                     "properties": { | ||||
|                         "operation": { | ||||
|                             "type": ["string", "null"], | ||||
|                             "maxLength": 5000  # Allows null and any string up to 5000 chars (including "") | ||||
|                         }, | ||||
|                         "selector": { | ||||
|                             "type": ["string", "null"], | ||||
|                             "maxLength": 5000 | ||||
|                         }, | ||||
|                         "optional_value": { | ||||
|                             "type": ["string", "null"], | ||||
|                             "maxLength": 5000 | ||||
|                         } | ||||
|                     }, | ||||
|                     "required": ["operation", "selector", "optional_value"], | ||||
|                     "additionalProperties": False  # No extra keys allowed | ||||
|                 } | ||||
|             }, | ||||
|             {"type": "null"},  # Allows null for `browser_steps` | ||||
|             {"type": "array", "maxItems": 0}  # Allows empty array [] | ||||
|         ] | ||||
|     } | ||||
|  | ||||
|     # headers ? | ||||
|     return schema | ||||
|  | ||||
|   | ||||
| @@ -11,22 +11,14 @@ def check_token(f): | ||||
|         datastore = args[0].datastore | ||||
|  | ||||
|         config_api_token_enabled = datastore.data['settings']['application'].get('api_access_token_enabled') | ||||
|         if not config_api_token_enabled: | ||||
|             return | ||||
|  | ||||
|         try: | ||||
|             api_key_header = request.headers['x-api-key'] | ||||
|         except KeyError: | ||||
|             return make_response( | ||||
|                 jsonify("No authorization x-api-key header."), 403 | ||||
|             ) | ||||
|  | ||||
|         config_api_token = datastore.data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|         if api_key_header != config_api_token: | ||||
|             return make_response( | ||||
|                 jsonify("Invalid access - API key invalid."), 403 | ||||
|             ) | ||||
|         # config_api_token_enabled - a UI option in settings if access should obey the key or not | ||||
|         if config_api_token_enabled: | ||||
|             if request.headers.get('x-api-key') != config_api_token: | ||||
|                 return make_response( | ||||
|                     jsonify("Invalid access - API key invalid."), 403 | ||||
|                 ) | ||||
|  | ||||
|         return f(*args, **kwargs) | ||||
|  | ||||
|   | ||||
| @@ -1,11 +0,0 @@ | ||||
| import apprise | ||||
|  | ||||
| # Create our AppriseAsset and populate it with some of our new values: | ||||
| # https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object | ||||
| asset = apprise.AppriseAsset( | ||||
|    image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png' | ||||
| ) | ||||
|  | ||||
| asset.app_id = "changedetection.io" | ||||
| asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection" | ||||
| asset.app_url = "https://changedetection.io" | ||||
| @@ -1,82 +0,0 @@ | ||||
| # include the decorator | ||||
| from apprise.decorators import notify | ||||
| from loguru import logger | ||||
|  | ||||
| @notify(on="delete") | ||||
| @notify(on="deletes") | ||||
| @notify(on="get") | ||||
| @notify(on="gets") | ||||
| @notify(on="post") | ||||
| @notify(on="posts") | ||||
| @notify(on="put") | ||||
| @notify(on="puts") | ||||
| def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): | ||||
|     import requests | ||||
|     import json | ||||
|     from urllib.parse import unquote_plus | ||||
|     from apprise.utils import parse_url as apprise_parse_url | ||||
|     from apprise import URLBase | ||||
|  | ||||
|     url = kwargs['meta'].get('url') | ||||
|  | ||||
|     if url.startswith('post'): | ||||
|         r = requests.post | ||||
|     elif url.startswith('get'): | ||||
|         r = requests.get | ||||
|     elif url.startswith('put'): | ||||
|         r = requests.put | ||||
|     elif url.startswith('delete'): | ||||
|         r = requests.delete | ||||
|  | ||||
|     url = url.replace('post://', 'http://') | ||||
|     url = url.replace('posts://', 'https://') | ||||
|     url = url.replace('put://', 'http://') | ||||
|     url = url.replace('puts://', 'https://') | ||||
|     url = url.replace('get://', 'http://') | ||||
|     url = url.replace('gets://', 'https://') | ||||
|     url = url.replace('put://', 'http://') | ||||
|     url = url.replace('puts://', 'https://') | ||||
|     url = url.replace('delete://', 'http://') | ||||
|     url = url.replace('deletes://', 'https://') | ||||
|  | ||||
|     headers = {} | ||||
|     params = {} | ||||
|     auth = None | ||||
|  | ||||
|     # Convert /foobar?+some-header=hello to proper header dictionary | ||||
|     results = apprise_parse_url(url) | ||||
|     if results: | ||||
|         # Add our headers that the user can potentially over-ride if they wish | ||||
|         # to to our returned result set and tidy entries by unquoting them | ||||
|         headers = {unquote_plus(x): unquote_plus(y) | ||||
|                    for x, y in results['qsd+'].items()} | ||||
|  | ||||
|         # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation | ||||
|         # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise | ||||
|         # but here we are making straight requests, so we need todo convert this against apprise's logic | ||||
|         for k, v in results['qsd'].items(): | ||||
|             if not k.strip('+-') in results['qsd+'].keys(): | ||||
|                 params[unquote_plus(k)] = unquote_plus(v) | ||||
|  | ||||
|         # Determine Authentication | ||||
|         auth = '' | ||||
|         if results.get('user') and results.get('password'): | ||||
|             auth = (unquote_plus(results.get('user')), unquote_plus(results.get('user'))) | ||||
|         elif results.get('user'): | ||||
|             auth = (unquote_plus(results.get('user'))) | ||||
|  | ||||
|     # Try to auto-guess if it's JSON | ||||
|     h = 'application/json; charset=utf-8' | ||||
|     try: | ||||
|         json.loads(body) | ||||
|         headers['Content-Type'] = h | ||||
|     except ValueError as e: | ||||
|         logger.warning(f"Could not automatically add '{h}' header to the {kwargs['meta'].get('schema')}:// notification because the document failed to parse as JSON: {e}") | ||||
|         pass | ||||
|  | ||||
|     r(results.get('url'), | ||||
|       auth=auth, | ||||
|       data=body.encode('utf-8') if type(body) is str else body, | ||||
|       headers=headers, | ||||
|       params=params | ||||
|       ) | ||||
							
								
								
									
										449
									
								
								changedetectionio/async_update_worker.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,449 @@ | ||||
| from .processors.exceptions import ProcessorException | ||||
| import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions | ||||
| from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio.flask_app import watch_check_update | ||||
|  | ||||
| import asyncio | ||||
| import importlib | ||||
| import os | ||||
| import time | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| # Async version of update_worker | ||||
| # Processes jobs from AsyncSignalPriorityQueue instead of threaded queue | ||||
|  | ||||
| async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|     """ | ||||
|     Async worker function that processes watch check jobs from the queue. | ||||
|      | ||||
|     Args: | ||||
|         worker_id: Unique identifier for this worker | ||||
|         q: AsyncSignalPriorityQueue containing jobs to process | ||||
|         notification_q: Standard queue for notifications | ||||
|         app: Flask application instance | ||||
|         datastore: Application datastore | ||||
|     """ | ||||
|     # Set a descriptive name for this task | ||||
|     task = asyncio.current_task() | ||||
|     if task: | ||||
|         task.set_name(f"async-worker-{worker_id}") | ||||
|      | ||||
|     logger.info(f"Starting async worker {worker_id}") | ||||
|      | ||||
|     while not app.config.exit.is_set(): | ||||
|         update_handler = None | ||||
|         watch = None | ||||
|  | ||||
|         try: | ||||
|             # Use asyncio wait_for to make queue.get() cancellable | ||||
|             queued_item_data = await asyncio.wait_for(q.get(), timeout=1.0) | ||||
|         except asyncio.TimeoutError: | ||||
|             # No jobs available, continue loop | ||||
|             continue | ||||
|         except Exception as e: | ||||
|             logger.error(f"Worker {worker_id} error getting queue item: {e}") | ||||
|             await asyncio.sleep(0.1) | ||||
|             continue | ||||
|          | ||||
|         uuid = queued_item_data.item.get('uuid') | ||||
|         fetch_start_time = round(time.time()) | ||||
|          | ||||
|         # Mark this UUID as being processed | ||||
|         from changedetectionio import worker_handler | ||||
|         worker_handler.set_uuid_processing(uuid, processing=True) | ||||
|          | ||||
|         try: | ||||
|             if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'): | ||||
|                 changed_detected = False | ||||
|                 contents = b'' | ||||
|                 process_changedetection_results = True | ||||
|                 update_obj = {} | ||||
|  | ||||
|                 # Clear last errors | ||||
|                 datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None | ||||
|                 datastore.data['watching'][uuid]['last_checked'] = fetch_start_time | ||||
|  | ||||
|                 watch = datastore.data['watching'].get(uuid) | ||||
|  | ||||
|                 logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}") | ||||
|  | ||||
|                 try: | ||||
|                     watch_check_update.send(watch_uuid=uuid) | ||||
|  | ||||
|                     # Processor is what we are using for detecting the "Change" | ||||
|                     processor = watch.get('processor', 'text_json_diff') | ||||
|  | ||||
|                     # Init a new 'difference_detection_processor' | ||||
|                     processor_module_name = f"changedetectionio.processors.{processor}.processor" | ||||
|                     try: | ||||
|                         processor_module = importlib.import_module(processor_module_name) | ||||
|                     except ModuleNotFoundError as e: | ||||
|                         print(f"Processor module '{processor}' not found.") | ||||
|                         raise e | ||||
|  | ||||
|                     update_handler = processor_module.perform_site_check(datastore=datastore, | ||||
|                                                                          watch_uuid=uuid) | ||||
|  | ||||
|                     # All fetchers are now async, so call directly | ||||
|                     await update_handler.call_browser() | ||||
|  | ||||
|                     # Run change detection (this is synchronous) | ||||
|                     changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch) | ||||
|  | ||||
|                 except PermissionError as e: | ||||
|                     logger.critical(f"File permission error updating file, watch: {uuid}") | ||||
|                     logger.critical(str(e)) | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except ProcessorException as e: | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot) | ||||
|                     if e.xpath_data: | ||||
|                         watch.save_xpath_data(data=e.xpath_data) | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message}) | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except content_fetchers_exceptions.ReplyWithContentButNoText as e: | ||||
|                     extra_help = "" | ||||
|                     if e.has_filters: | ||||
|                         has_img = html_tools.include_filters(include_filters='img', | ||||
|                                                              html_content=e.html_content) | ||||
|                         if has_img: | ||||
|                             extra_help = ", it's possible that the filters you have give an empty result or contain only an image." | ||||
|                         else: | ||||
|                             extra_help = ", it's possible that the filters were found, but contained no usable text." | ||||
|  | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={ | ||||
|                         'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}" | ||||
|                     }) | ||||
|  | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|  | ||||
|                     if e.xpath_data: | ||||
|                         watch.save_xpath_data(data=e.xpath_data) | ||||
|                          | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except content_fetchers_exceptions.Non200ErrorCodeReceived as e: | ||||
|                     if e.status_code == 403: | ||||
|                         err_text = "Error - 403 (Access denied) received" | ||||
|                     elif e.status_code == 404: | ||||
|                         err_text = "Error - 404 (Page not found) received" | ||||
|                     elif e.status_code == 407: | ||||
|                         err_text = "Error - 407 (Proxy authentication required) received, did you need a username and password for the proxy?" | ||||
|                     elif e.status_code == 500: | ||||
|                         err_text = "Error - 500 (Internal server error) received from the web site" | ||||
|                     else: | ||||
|                         extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else '' | ||||
|                         err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}" | ||||
|  | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|                     if e.xpath_data: | ||||
|                         watch.save_xpath_data(data=e.xpath_data, as_error=True) | ||||
|                     if e.page_text: | ||||
|                         watch.save_error_text(contents=e.page_text) | ||||
|  | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except FilterNotFoundInResponse as e: | ||||
|                     if not datastore.data['watching'].get(uuid): | ||||
|                         continue | ||||
|  | ||||
|                     err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary." | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|  | ||||
|                     # Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot) | ||||
|  | ||||
|                     if e.xpath_data: | ||||
|                         watch.save_xpath_data(data=e.xpath_data) | ||||
|  | ||||
|                     # Only when enabled, send the notification | ||||
|                     if watch.get('filter_failure_notification_send', False): | ||||
|                         c = watch.get('consecutive_filter_failures', 0) | ||||
|                         c += 1 | ||||
|                         # Send notification if we reached the threshold? | ||||
|                         threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) | ||||
|                         logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}") | ||||
|                         if c >= threshold: | ||||
|                             if not watch.get('notification_muted'): | ||||
|                                 logger.debug(f"Sending filter failed notification for {uuid}") | ||||
|                                 await send_filter_failure_notification(uuid, notification_q, datastore) | ||||
|                             c = 0 | ||||
|                             logger.debug(f"Reset filter failure count back to zero") | ||||
|  | ||||
|                         datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) | ||||
|                     else: | ||||
|                         logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping") | ||||
|  | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e: | ||||
|                     # Yes fine, so nothing todo, don't continue to process. | ||||
|                     process_changedetection_results = False | ||||
|                     changed_detected = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.BrowserConnectError as e: | ||||
|                     datastore.update_watch(uuid=uuid, | ||||
|                                          update_obj={'last_error': e.msg}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.BrowserFetchTimedOut as e: | ||||
|                     datastore.update_watch(uuid=uuid, | ||||
|                                          update_obj={'last_error': e.msg}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.BrowserStepsStepException as e: | ||||
|                     if not datastore.data['watching'].get(uuid): | ||||
|                         continue | ||||
|  | ||||
|                     error_step = e.step_n + 1 | ||||
|                     from playwright._impl._errors import TimeoutError, Error | ||||
|  | ||||
|                     # Generally enough info for TimeoutError (couldnt locate the element after default seconds) | ||||
|                     err_text = f"Browser step at position {error_step} could not run, check the watch, add a delay if necessary, view Browser Steps to see screenshot at that step." | ||||
|  | ||||
|                     if e.original_e.name == "TimeoutError": | ||||
|                         # Just the first line is enough, the rest is the stack trace | ||||
|                         err_text += " Could not find the target." | ||||
|                     else: | ||||
|                         # Other Error, more info is good. | ||||
|                         err_text += " " + str(e.original_e).splitlines()[0] | ||||
|  | ||||
|                     logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}") | ||||
|  | ||||
|                     datastore.update_watch(uuid=uuid, | ||||
|                                          update_obj={'last_error': err_text, | ||||
|                                                    'browser_steps_last_error_step': error_step}) | ||||
|  | ||||
|                     if watch.get('filter_failure_notification_send', False): | ||||
|                         c = watch.get('consecutive_filter_failures', 0) | ||||
|                         c += 1 | ||||
|                         # Send notification if we reached the threshold? | ||||
|                         threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) | ||||
|                         logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}") | ||||
|                         if threshold > 0 and c >= threshold: | ||||
|                             if not watch.get('notification_muted'): | ||||
|                                 await send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n, notification_q=notification_q, datastore=datastore) | ||||
|                             c = 0 | ||||
|  | ||||
|                         datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) | ||||
|  | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except content_fetchers_exceptions.EmptyReply as e: | ||||
|                     # Some kind of custom to-str handler in the exception handler that does this? | ||||
|                     err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code) | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                 'last_check_status': e.status_code}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.ScreenshotUnavailable as e: | ||||
|                     err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'" | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                 'last_check_status': e.status_code}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.JSActionExceptions as e: | ||||
|                     err_text = "Error running JS Actions - Page request - "+e.message | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                 'last_check_status': e.status_code}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.PageUnloadable as e: | ||||
|                     err_text = "Page request from server didnt respond correctly" | ||||
|                     if e.message: | ||||
|                         err_text = "{} - {}".format(err_text, e.message) | ||||
|  | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|  | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                 'last_check_status': e.status_code, | ||||
|                                                                 'has_ldjson_price_data': None}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e: | ||||
|                     err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher." | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|                     process_changedetection_results = False | ||||
|                     logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}") | ||||
|  | ||||
|                 except Exception as e: | ||||
|                     logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}") | ||||
|                     logger.error(str(e)) | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)}) | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 else: | ||||
|                     if not datastore.data['watching'].get(uuid): | ||||
|                         continue | ||||
|  | ||||
|                     update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|  | ||||
|                     if not watch.get('ignore_status_codes'): | ||||
|                         update_obj['consecutive_filter_failures'] = 0 | ||||
|  | ||||
|                     update_obj['last_error'] = False | ||||
|                     cleanup_error_artifacts(uuid, datastore) | ||||
|  | ||||
|                 if not datastore.data['watching'].get(uuid): | ||||
|                     continue | ||||
|  | ||||
|                 if process_changedetection_results: | ||||
|                     # Extract title if needed | ||||
|                     if datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']: | ||||
|                         if not watch['title'] or not len(watch['title']): | ||||
|                             try: | ||||
|                                 update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content) | ||||
|                                 logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}") | ||||
|                             except Exception as e: | ||||
|                                 logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.") | ||||
|  | ||||
|                     try: | ||||
|                         datastore.update_watch(uuid=uuid, update_obj=update_obj) | ||||
|  | ||||
|                         if changed_detected or not watch.history_n: | ||||
|                             if update_handler.screenshot: | ||||
|                                 watch.save_screenshot(screenshot=update_handler.screenshot) | ||||
|  | ||||
|                             if update_handler.xpath_data: | ||||
|                                 watch.save_xpath_data(data=update_handler.xpath_data) | ||||
|  | ||||
|                             # Ensure unique timestamp for history | ||||
|                             if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key): | ||||
|                                 logger.warning(f"Timestamp {fetch_start_time} already exists, waiting 1 seconds") | ||||
|                                 fetch_start_time += 1 | ||||
|                                 await asyncio.sleep(1) | ||||
|  | ||||
|                             watch.save_history_text(contents=contents, | ||||
|                                                     timestamp=int(fetch_start_time), | ||||
|                                                     snapshot_id=update_obj.get('previous_md5', 'none')) | ||||
|  | ||||
|                             empty_pages_are_a_change = datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|                             if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change): | ||||
|                                 watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time)) | ||||
|  | ||||
|                             # Send notifications on second+ check | ||||
|                             if watch.history_n >= 2: | ||||
|                                 logger.info(f"Change detected in UUID {uuid} - {watch['url']}") | ||||
|                                 if not watch.get('notification_muted'): | ||||
|                                     await send_content_changed_notification(uuid, notification_q, datastore) | ||||
|  | ||||
|                     except Exception as e: | ||||
|                         logger.critical(f"Worker {worker_id} exception in process_changedetection_results") | ||||
|                         logger.critical(str(e)) | ||||
|                         datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) | ||||
|  | ||||
|                 # Always record attempt count | ||||
|                 count = watch.get('check_count', 0) + 1 | ||||
|  | ||||
|                 # Record server header | ||||
|                 try: | ||||
|                     server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255] | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header}) | ||||
|                 except Exception as e: | ||||
|                     pass | ||||
|  | ||||
|                 datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3), | ||||
|                                                                'check_count': count}) | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}") | ||||
|             logger.error(f"Worker {worker_id} traceback:", exc_info=True) | ||||
|              | ||||
|             # Also update the watch with error information | ||||
|             if datastore and uuid in datastore.data['watching']: | ||||
|                 datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"}) | ||||
|          | ||||
|         finally: | ||||
|             # Always cleanup - this runs whether there was an exception or not | ||||
|             if uuid: | ||||
|                 try: | ||||
|                     # Mark UUID as no longer being processed | ||||
|                     worker_handler.set_uuid_processing(uuid, processing=False) | ||||
|                      | ||||
|                     # Send completion signal | ||||
|                     if watch: | ||||
|                         #logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}") | ||||
|                         watch_check_update.send(watch_uuid=watch['uuid']) | ||||
|  | ||||
|                     update_handler = None | ||||
|                     logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s") | ||||
|                 except Exception as cleanup_error: | ||||
|                     logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}") | ||||
|              | ||||
|             # Brief pause before continuing to avoid tight error loops (only on error) | ||||
|             if 'e' in locals(): | ||||
|                 await asyncio.sleep(1.0) | ||||
|             else: | ||||
|                 # Small yield for normal completion | ||||
|                 await asyncio.sleep(0.01) | ||||
|  | ||||
|         # Check if we should exit | ||||
|         if app.config.exit.is_set(): | ||||
|             break | ||||
|  | ||||
|     # Check if we're in pytest environment - if so, be more gentle with logging | ||||
|     import sys | ||||
|     in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ | ||||
|      | ||||
|     if not in_pytest: | ||||
|         logger.info(f"Worker {worker_id} shutting down") | ||||
|  | ||||
|  | ||||
| def cleanup_error_artifacts(uuid, datastore): | ||||
|     """Helper function to clean up error artifacts""" | ||||
|     cleanup_files = ["last-error-screenshot.png", "last-error.txt"] | ||||
|     for f in cleanup_files: | ||||
|         full_path = os.path.join(datastore.datastore_path, uuid, f) | ||||
|         if os.path.isfile(full_path): | ||||
|             os.unlink(full_path) | ||||
|  | ||||
|  | ||||
|  | ||||
| async def send_content_changed_notification(watch_uuid, notification_q, datastore): | ||||
|     """Helper function to queue notifications using the new notification service""" | ||||
|     try: | ||||
|         from changedetectionio.notification_service import create_notification_service | ||||
|          | ||||
|         # Create notification service instance | ||||
|         notification_service = create_notification_service(datastore, notification_q) | ||||
|          | ||||
|         notification_service.send_content_changed_notification(watch_uuid) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error sending notification for {watch_uuid}: {e}") | ||||
|  | ||||
|  | ||||
| async def send_filter_failure_notification(watch_uuid, notification_q, datastore): | ||||
|     """Helper function to send filter failure notifications using the new notification service""" | ||||
|     try: | ||||
|         from changedetectionio.notification_service import create_notification_service | ||||
|          | ||||
|         # Create notification service instance | ||||
|         notification_service = create_notification_service(datastore, notification_q) | ||||
|          | ||||
|         notification_service.send_filter_failure_notification(watch_uuid) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error sending filter failure notification for {watch_uuid}: {e}") | ||||
|  | ||||
|  | ||||
| async def send_step_failure_notification(watch_uuid, step_n, notification_q, datastore): | ||||
|     """Helper function to send step failure notifications using the new notification service""" | ||||
|     try: | ||||
|         from changedetectionio.notification_service import create_notification_service | ||||
|          | ||||
|         # Create notification service instance | ||||
|         notification_service = create_notification_service(datastore, notification_q) | ||||
|          | ||||
|         notification_service.send_step_failure_notification(watch_uuid, step_n) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error sending step failure notification for {watch_uuid}: {e}") | ||||
							
								
								
									
										33
									
								
								changedetectionio/auth_decorator.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,33 @@ | ||||
| import os | ||||
| from functools import wraps | ||||
| from flask import current_app, redirect, request | ||||
| from loguru import logger | ||||
|  | ||||
| def login_optionally_required(func): | ||||
|     """ | ||||
|     If password authentication is enabled, verify the user is logged in. | ||||
|     To be used as a decorator for routes that should optionally require login. | ||||
|     This version is blueprint-friendly as it uses current_app instead of directly accessing app. | ||||
|     """ | ||||
|     @wraps(func) | ||||
|     def decorated_view(*args, **kwargs): | ||||
|         from flask import current_app | ||||
|         import flask_login | ||||
|         from flask_login import current_user | ||||
|  | ||||
|         # Access datastore through the app config | ||||
|         datastore = current_app.config['DATASTORE'] | ||||
|         has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False) | ||||
|  | ||||
|         # Permitted | ||||
|         if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'): | ||||
|             return func(*args, **kwargs) | ||||
|         elif request.method in flask_login.config.EXEMPT_METHODS: | ||||
|             return func(*args, **kwargs) | ||||
|         elif current_app.config.get('LOGIN_DISABLED'): | ||||
|             return func(*args, **kwargs) | ||||
|         elif has_password_enabled and not current_user.is_authenticated: | ||||
|             return current_app.login_manager.unauthorized() | ||||
|  | ||||
|         return func(*args, **kwargs) | ||||
|     return decorated_view | ||||
| @@ -138,7 +138,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         return send_from_directory(os.path.abspath(datastore.datastore_path), filename, as_attachment=True) | ||||
|  | ||||
|     @login_optionally_required | ||||
|     @backups_blueprint.route("/", methods=['GET']) | ||||
|     @backups_blueprint.route("", methods=['GET']) | ||||
|     def index(): | ||||
|         backups = find_backups() | ||||
|         output = render_template("overview.html", | ||||
|   | ||||
| @@ -22,45 +22,56 @@ from loguru import logger | ||||
|  | ||||
| browsersteps_sessions = {} | ||||
| io_interface_context = None | ||||
| import json | ||||
| import hashlib | ||||
| from flask import Response | ||||
| import asyncio | ||||
| import threading | ||||
|  | ||||
| def run_async_in_browser_loop(coro): | ||||
|     """Run async coroutine using the existing async worker event loop""" | ||||
|     from changedetectionio import worker_handler | ||||
|      | ||||
|     # Use the existing async worker event loop instead of creating a new one | ||||
|     if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed(): | ||||
|         logger.debug("Browser steps using existing async worker event loop") | ||||
|         future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop) | ||||
|         return future.result() | ||||
|     else: | ||||
|         # Fallback: create a new event loop (for sync workers or if async loop not available) | ||||
|         logger.debug("Browser steps creating temporary event loop") | ||||
|         loop = asyncio.new_event_loop() | ||||
|         asyncio.set_event_loop(loop) | ||||
|         try: | ||||
|             return loop.run_until_complete(coro) | ||||
|         finally: | ||||
|             loop.close() | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates") | ||||
|  | ||||
|     def start_browsersteps_session(watch_uuid): | ||||
|         from . import nonContext | ||||
|     async def start_browsersteps_session(watch_uuid): | ||||
|         from . import browser_steps | ||||
|         import time | ||||
|         global browsersteps_sessions | ||||
|         global io_interface_context | ||||
|  | ||||
|         from playwright.async_api import async_playwright | ||||
|  | ||||
|         # We keep the playwright session open for many minutes | ||||
|         keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60 | ||||
|  | ||||
|         browsersteps_start_session = {'start_time': time.time()} | ||||
|  | ||||
|         # You can only have one of these running | ||||
|         # This should be very fine to leave running for the life of the application | ||||
|         # @idea - Make it global so the pool of watch fetchers can use it also | ||||
|         if not io_interface_context: | ||||
|             io_interface_context = nonContext.c_sync_playwright() | ||||
|             # Start the Playwright context, which is actually a nodejs sub-process and communicates over STDIN/STDOUT pipes | ||||
|             io_interface_context = io_interface_context.start() | ||||
|         # Create a new async playwright instance for browser steps | ||||
|         playwright_instance = async_playwright() | ||||
|         playwright_context = await playwright_instance.start() | ||||
|  | ||||
|         keepalive_ms = ((keepalive_seconds + 3) * 1000) | ||||
|         base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"') | ||||
|         a = "?" if not '?' in base_url else '&' | ||||
|         base_url += a + f"timeout={keepalive_ms}" | ||||
|  | ||||
|         try: | ||||
|             browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url) | ||||
|         except Exception as e: | ||||
|             if 'ECONNREFUSED' in str(e): | ||||
|                 return make_response('Unable to start the Playwright Browser session, is it running?', 401) | ||||
|             else: | ||||
|                 # Other errors, bad URL syntax, bad reply etc | ||||
|                 return make_response(str(e), 401) | ||||
|         browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms) | ||||
|         browsersteps_start_session['browser'] = browser | ||||
|         browsersteps_start_session['playwright_context'] = playwright_context | ||||
|  | ||||
|         proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid) | ||||
|         proxy = None | ||||
| @@ -82,15 +93,20 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                 logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}") | ||||
|  | ||||
|         # Tell Playwright to connect to Chrome and setup a new session via our stepper interface | ||||
|         browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui( | ||||
|             playwright_browser=browsersteps_start_session['browser'], | ||||
|         browserstepper = browser_steps.browsersteps_live_ui( | ||||
|             playwright_browser=browser, | ||||
|             proxy=proxy, | ||||
|             start_url=datastore.data['watching'][watch_uuid].get('url'), | ||||
|             start_url=datastore.data['watching'][watch_uuid].link, | ||||
|             headers=datastore.data['watching'][watch_uuid].get('headers') | ||||
|         ) | ||||
|          | ||||
|         # Initialize the async connection | ||||
|         await browserstepper.connect(proxy=proxy) | ||||
|          | ||||
|         browsersteps_start_session['browserstepper'] = browserstepper | ||||
|  | ||||
|         # For test | ||||
|         #browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time())) | ||||
|         #await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time())) | ||||
|  | ||||
|         return browsersteps_start_session | ||||
|  | ||||
| @@ -99,10 +115,8 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     @browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET']) | ||||
|     def browsersteps_start_session(): | ||||
|         # A new session was requested, return sessionID | ||||
|  | ||||
|         import asyncio | ||||
|         import uuid | ||||
|         global browsersteps_sessions | ||||
|  | ||||
|         browsersteps_session_id = str(uuid.uuid4()) | ||||
|         watch_uuid = request.args.get('uuid') | ||||
|  | ||||
| @@ -111,7 +125,19 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|         logger.debug("Starting connection with playwright") | ||||
|         logger.debug("browser_steps.py connecting") | ||||
|         browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid) | ||||
|  | ||||
|         try: | ||||
|             # Run the async function in the dedicated browser steps event loop | ||||
|             browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop( | ||||
|                 start_browsersteps_session(watch_uuid) | ||||
|             ) | ||||
|         except Exception as e: | ||||
|             if 'ECONNREFUSED' in str(e): | ||||
|                 return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401) | ||||
|             else: | ||||
|                 # Other errors, bad URL syntax, bad reply etc | ||||
|                 return make_response(str(e), 401) | ||||
|  | ||||
|         logger.debug("Starting connection with playwright - done") | ||||
|         return {'browsersteps_session_id': browsersteps_session_id} | ||||
|  | ||||
| @@ -146,7 +172,6 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     def browsersteps_ui_update(): | ||||
|         import base64 | ||||
|         import playwright._impl._errors | ||||
|         global browsersteps_sessions | ||||
|         from changedetectionio.blueprint.browser_steps import browser_steps | ||||
|  | ||||
|         remaining =0 | ||||
| @@ -160,38 +185,30 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         if not browsersteps_sessions.get(browsersteps_session_id): | ||||
|             return make_response('No session exists under that ID', 500) | ||||
|  | ||||
|  | ||||
|         is_last_step = False | ||||
|         # Actions - step/apply/etc, do the thing and return state | ||||
|         if request.method == 'POST': | ||||
|             # @todo - should always be an existing session | ||||
|             step_operation = request.form.get('operation') | ||||
|             step_selector = request.form.get('selector') | ||||
|             step_optional_value = request.form.get('optional_value') | ||||
|             step_n = int(request.form.get('step_n')) | ||||
|             is_last_step = strtobool(request.form.get('is_last_step')) | ||||
|  | ||||
|             # @todo try.. accept.. nice errors not popups.. | ||||
|             try: | ||||
|  | ||||
|                 browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation, | ||||
|                                          selector=step_selector, | ||||
|                                          optional_value=step_optional_value) | ||||
|                 # Run the async call_action method in the dedicated browser steps event loop | ||||
|                 run_async_in_browser_loop( | ||||
|                     browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action( | ||||
|                         action_name=step_operation, | ||||
|                         selector=step_selector, | ||||
|                         optional_value=step_optional_value | ||||
|                     ) | ||||
|                 ) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Exception when calling step operation {step_operation} {str(e)}") | ||||
|                 # Try to find something of value to give back to the user | ||||
|                 return make_response(str(e).splitlines()[0], 401) | ||||
|  | ||||
|             # Get visual selector ready/update its data (also use the current filter info from the page?) | ||||
|             # When the last 'apply' button was pressed | ||||
|             # @todo this adds overhead because the xpath selection is happening twice | ||||
|             u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url | ||||
|             if is_last_step and u: | ||||
|                 (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].request_visualselector_data() | ||||
|                 watch = datastore.data['watching'].get(uuid) | ||||
|                 if watch: | ||||
|                     watch.save_screenshot(screenshot=screenshot) | ||||
|                     watch.save_xpath_data(data=xpath_data) | ||||
|  | ||||
| #        if not this_session.page: | ||||
| #            cleanup_playwright_session() | ||||
| @@ -199,31 +216,36 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|         # Screenshots and other info only needed on requesting a step (POST) | ||||
|         try: | ||||
|             state = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state() | ||||
|         except playwright._impl._api_types.Error as e: | ||||
|             return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401) | ||||
|             # Run the async get_current_state method in the dedicated browser steps event loop | ||||
|             (screenshot, xpath_data) = run_async_in_browser_loop( | ||||
|                 browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state() | ||||
|             ) | ||||
|                  | ||||
|             if is_last_step: | ||||
|                 watch = datastore.data['watching'].get(uuid) | ||||
|                 u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url | ||||
|                 if watch and u: | ||||
|                     watch.save_screenshot(screenshot=screenshot) | ||||
|                     watch.save_xpath_data(data=xpath_data) | ||||
|  | ||||
|         # Use send_file() which is way faster than read/write loop on bytes | ||||
|         import json | ||||
|         from tempfile import mkstemp | ||||
|         from flask import send_file | ||||
|         tmp_fd, tmp_file = mkstemp(text=True, suffix=".json", prefix="changedetectionio-") | ||||
|         except Exception as e: | ||||
|             return make_response(f"Error fetching screenshot and element data - {str(e)}", 401) | ||||
|  | ||||
|         output = json.dumps({'screenshot': "data:image/jpeg;base64,{}".format( | ||||
|             base64.b64encode(state[0]).decode('ascii')), | ||||
|             'xpath_data': state[1], | ||||
|             'session_age_start': browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start, | ||||
|             'browser_time_remaining': round(remaining) | ||||
|         }) | ||||
|         # SEND THIS BACK TO THE BROWSER | ||||
|         output = { | ||||
|             "screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}", | ||||
|             "xpath_data": xpath_data, | ||||
|             "session_age_start": browsersteps_sessions[browsersteps_session_id]['browserstepper'].age_start, | ||||
|             "browser_time_remaining": round(remaining) | ||||
|         } | ||||
|         json_data = json.dumps(output) | ||||
|  | ||||
|         with os.fdopen(tmp_fd, 'w') as f: | ||||
|             f.write(output) | ||||
|         # Generate an ETag (hash of the response body) | ||||
|         etag_hash = hashlib.md5(json_data.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         response = make_response(send_file(path_or_file=tmp_file, | ||||
|                                            mimetype='application/json; charset=UTF-8', | ||||
|                                            etag=True)) | ||||
|         # No longer needed | ||||
|         os.unlink(tmp_file) | ||||
|         # Create the response with ETag | ||||
|         response = Response(json_data, mimetype="application/json; charset=UTF-8") | ||||
|         response.set_etag(etag_hash) | ||||
|  | ||||
|         return response | ||||
|  | ||||
|   | ||||
| @@ -1,14 +1,15 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import os | ||||
| import time | ||||
| import re | ||||
| from random import randint | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT | ||||
| from changedetectionio.content_fetchers.base import manage_user_agent | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
|  | ||||
|  | ||||
|  | ||||
| # Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end | ||||
| # 0- off, 1- on | ||||
| browser_step_ui_config = {'Choose one': '0 0', | ||||
| @@ -31,13 +32,16 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
| #                          'Extract text and use as filter': '1 0', | ||||
|                           'Goto site': '0 0', | ||||
|                           'Goto URL': '0 1', | ||||
|                           'Make all child elements visible': '1 0', | ||||
|                           'Press Enter': '0 0', | ||||
|                           'Select by label': '1 1', | ||||
|                           '<select> by option text': '1 1', | ||||
|                           'Scroll down': '0 0', | ||||
|                           'Uncheck checkbox': '1 0', | ||||
|                           'Wait for seconds': '0 1', | ||||
|                           'Wait for text': '0 1', | ||||
|                           'Wait for text in element': '1 1', | ||||
|                           'Remove elements': '1 0', | ||||
|                           #                          'Press Page Down': '0 0', | ||||
|                           #                          'Press Page Up': '0 0', | ||||
|                           # weird bug, come back to it later | ||||
| @@ -51,12 +55,17 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
| class steppable_browser_interface(): | ||||
|     page = None | ||||
|     start_url = None | ||||
|     action_timeout = 10 * 1000 | ||||
|  | ||||
|     def __init__(self, start_url): | ||||
|         self.start_url = start_url | ||||
|  | ||||
|     # Convert and perform "Click Button" for example | ||||
|     def call_action(self, action_name, selector=None, optional_value=None): | ||||
|     async def call_action(self, action_name, selector=None, optional_value=None): | ||||
|         if self.page is None: | ||||
|             logger.warning("Cannot call action on None page object") | ||||
|             return | ||||
|              | ||||
|         now = time.time() | ||||
|         call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower()) | ||||
|         if call_action_name == 'choose_one': | ||||
| @@ -67,129 +76,227 @@ class steppable_browser_interface(): | ||||
|         if selector and selector.startswith('/') and not selector.startswith('//'): | ||||
|             selector = "xpath=" + selector | ||||
|  | ||||
|         # Check if action handler exists | ||||
|         if not hasattr(self, "action_" + call_action_name): | ||||
|             logger.warning(f"Action handler for '{call_action_name}' not found") | ||||
|             return | ||||
|              | ||||
|         action_handler = getattr(self, "action_" + call_action_name) | ||||
|  | ||||
|         # Support for Jinja2 variables in the value and selector | ||||
|  | ||||
|         if selector and ('{%' in selector or '{{' in selector): | ||||
|             selector = jinja_render(template_str=selector) | ||||
|  | ||||
|         if optional_value and ('{%' in optional_value or '{{' in optional_value): | ||||
|             optional_value = jinja_render(template_str=optional_value) | ||||
|  | ||||
|         action_handler(selector, optional_value) | ||||
|         self.page.wait_for_timeout(1.5 * 1000) | ||||
|         # Trigger click and cautiously handle potential navigation | ||||
|         # This means the page redirects/reloads/changes JS etc etc | ||||
|         if call_action_name.startswith('click_'): | ||||
|             try: | ||||
|                 # Set up navigation expectation before the click (like sync version) | ||||
|                 async with self.page.expect_event("framenavigated", timeout=3000) as navigation_info: | ||||
|                     await action_handler(selector, optional_value) | ||||
|                  | ||||
|                 # Check if navigation actually occurred | ||||
|                 try: | ||||
|                     await navigation_info.value  # This waits for the navigation promise | ||||
|                     logger.debug(f"Navigation occurred on {call_action_name}.") | ||||
|                 except Exception: | ||||
|                     logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.") | ||||
|                      | ||||
|             except Exception as e: | ||||
|                 # If expect_event itself times out, that means no navigation occurred - that's OK | ||||
|                 if "framenavigated" in str(e) and "exceeded" in str(e): | ||||
|                     logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.") | ||||
|                 else: | ||||
|                     raise e | ||||
|         else: | ||||
|             # Some other action that probably a navigation is not expected | ||||
|             await action_handler(selector, optional_value) | ||||
|  | ||||
|  | ||||
|         # Safely wait for timeout | ||||
|         await self.page.wait_for_timeout(1.5 * 1000) | ||||
|         logger.debug(f"Call action done in {time.time()-now:.2f}s") | ||||
|  | ||||
|     def action_goto_url(self, selector=None, value=None): | ||||
|         # self.page.set_viewport_size({"width": 1280, "height": 5000}) | ||||
|     async def action_goto_url(self, selector=None, value=None): | ||||
|         if not value: | ||||
|             logger.warning("No URL provided for goto_url action") | ||||
|             return None | ||||
|              | ||||
|         now = time.time() | ||||
|         response = self.page.goto(value, timeout=0, wait_until='load') | ||||
|         # Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout) | ||||
|         #and also wait for seconds ? | ||||
|         #await page.waitForTimeout(1000); | ||||
|         #await page.waitForTimeout(extra_wait_ms); | ||||
|         response = await self.page.goto(value, timeout=0, wait_until='load') | ||||
|         logger.debug(f"Time to goto URL {time.time()-now:.2f}s") | ||||
|         return response | ||||
|  | ||||
|     # Incase they request to go back to the start | ||||
|     def action_goto_site(self, selector=None, value=None): | ||||
|         return self.action_goto_url(value=self.start_url) | ||||
|     async def action_goto_site(self, selector=None, value=None): | ||||
|         return await self.action_goto_url(value=re.sub(r'^source:', '', self.start_url, flags=re.IGNORECASE)) | ||||
|  | ||||
|     def action_click_element_containing_text(self, selector=None, value=''): | ||||
|     async def action_click_element_containing_text(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text") | ||||
|         if not len(value.strip()): | ||||
|         if not value or not len(value.strip()): | ||||
|             return | ||||
|              | ||||
|         elem = self.page.get_by_text(value) | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=3000) | ||||
|         if await elem.count(): | ||||
|             await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|  | ||||
|     def action_click_element_containing_text_if_exists(self, selector=None, value=''): | ||||
|  | ||||
|     async def action_click_element_containing_text_if_exists(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text if exists") | ||||
|         if not len(value.strip()): | ||||
|         if not value or not len(value.strip()): | ||||
|             return | ||||
|              | ||||
|         elem = self.page.get_by_text(value) | ||||
|         logger.debug(f"Clicking element containing text - {elem.count()} elements found") | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=3000) | ||||
|         else: | ||||
|         count = await elem.count() | ||||
|         logger.debug(f"Clicking element containing text - {count} elements found") | ||||
|         if count: | ||||
|             await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|                  | ||||
|  | ||||
|     async def action_enter_text_in_field(self, selector, value): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|     def action_enter_text_in_field(self, selector, value): | ||||
|         if not len(selector.strip()): | ||||
|             return | ||||
|         await self.page.fill(selector, value, timeout=self.action_timeout) | ||||
|  | ||||
|         self.page.fill(selector, value, timeout=10 * 1000) | ||||
|     async def action_execute_js(self, selector, value): | ||||
|         if not value: | ||||
|             return None | ||||
|              | ||||
|         return await self.page.evaluate(value) | ||||
|  | ||||
|     def action_execute_js(self, selector, value): | ||||
|         response = self.page.evaluate(value) | ||||
|         return response | ||||
|  | ||||
|     def action_click_element(self, selector, value): | ||||
|     async def action_click_element(self, selector, value): | ||||
|         logger.debug("Clicking element") | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         self.page.click(selector=selector, timeout=30 * 1000, delay=randint(200, 500)) | ||||
|         await self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500)) | ||||
|  | ||||
|     def action_click_element_if_exists(self, selector, value): | ||||
|     async def action_click_element_if_exists(self, selector, value): | ||||
|         import playwright._impl._errors as _api_types | ||||
|         logger.debug("Clicking element if exists") | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|              | ||||
|         try: | ||||
|             self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500)) | ||||
|         except _api_types.TimeoutError as e: | ||||
|             await self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500)) | ||||
|         except _api_types.TimeoutError: | ||||
|             return | ||||
|         except _api_types.Error as e: | ||||
|         except _api_types.Error: | ||||
|             # Element was there, but page redrew and now its long long gone | ||||
|             return | ||||
|                  | ||||
|  | ||||
|     def action_click_x_y(self, selector, value): | ||||
|         if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): | ||||
|             raise Exception("'Click X,Y' step should be in the format of '100 , 90'") | ||||
|     async def action_click_x_y(self, selector, value): | ||||
|         if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): | ||||
|             logger.warning("'Click X,Y' step should be in the format of '100 , 90'") | ||||
|             return | ||||
|  | ||||
|         x, y = value.strip().split(',') | ||||
|         x = int(float(x.strip())) | ||||
|         y = int(float(y.strip())) | ||||
|         self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) | ||||
|         try: | ||||
|             x, y = value.strip().split(',') | ||||
|             x = int(float(x.strip())) | ||||
|             y = int(float(y.strip())) | ||||
|              | ||||
|             await self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error parsing x,y coordinates: {str(e)}") | ||||
|  | ||||
|     def action_scroll_down(self, selector, value): | ||||
|     async def action__select_by_option_text(self, selector, value): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         await self.page.select_option(selector, label=value, timeout=self.action_timeout) | ||||
|  | ||||
|     async def action_scroll_down(self, selector, value): | ||||
|         # Some sites this doesnt work on for some reason | ||||
|         self.page.mouse.wheel(0, 600) | ||||
|         self.page.wait_for_timeout(1000) | ||||
|         await self.page.mouse.wheel(0, 600) | ||||
|         await self.page.wait_for_timeout(1000) | ||||
|  | ||||
|     def action_wait_for_seconds(self, selector, value): | ||||
|         self.page.wait_for_timeout(float(value.strip()) * 1000) | ||||
|     async def action_wait_for_seconds(self, selector, value): | ||||
|         try: | ||||
|             seconds = float(value.strip()) if value else 1.0 | ||||
|             await self.page.wait_for_timeout(seconds * 1000) | ||||
|         except (ValueError, TypeError) as e: | ||||
|             logger.error(f"Invalid value for wait_for_seconds: {str(e)}") | ||||
|  | ||||
|     def action_wait_for_text(self, selector, value): | ||||
|     async def action_wait_for_text(self, selector, value): | ||||
|         if not value: | ||||
|             return | ||||
|              | ||||
|         import json | ||||
|         v = json.dumps(value) | ||||
|         self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000) | ||||
|         await self.page.wait_for_function( | ||||
|             f'document.querySelector("body").innerText.includes({v});', | ||||
|             timeout=30000 | ||||
|         ) | ||||
|              | ||||
|  | ||||
|     def action_wait_for_text_in_element(self, selector, value): | ||||
|     async def action_wait_for_text_in_element(self, selector, value): | ||||
|         if not selector or not value: | ||||
|             return | ||||
|              | ||||
|         import json | ||||
|         s = json.dumps(selector) | ||||
|         v = json.dumps(value) | ||||
|         self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000) | ||||
|          | ||||
|         await self.page.wait_for_function( | ||||
|             f'document.querySelector({s}).innerText.includes({v});', | ||||
|             timeout=30000 | ||||
|         ) | ||||
|  | ||||
|     # @todo - in the future make some popout interface to capture what needs to be set | ||||
|     # https://playwright.dev/python/docs/api/class-keyboard | ||||
|     def action_press_enter(self, selector, value): | ||||
|         self.page.keyboard.press("Enter", delay=randint(200, 500)) | ||||
|     async def action_press_enter(self, selector, value): | ||||
|         await self.page.keyboard.press("Enter", delay=randint(200, 500)) | ||||
|              | ||||
|  | ||||
|     def action_press_page_up(self, selector, value): | ||||
|         self.page.keyboard.press("PageUp", delay=randint(200, 500)) | ||||
|     async def action_press_page_up(self, selector, value): | ||||
|         await self.page.keyboard.press("PageUp", delay=randint(200, 500)) | ||||
|  | ||||
|     def action_press_page_down(self, selector, value): | ||||
|         self.page.keyboard.press("PageDown", delay=randint(200, 500)) | ||||
|     async def action_press_page_down(self, selector, value): | ||||
|         await self.page.keyboard.press("PageDown", delay=randint(200, 500)) | ||||
|  | ||||
|     def action_check_checkbox(self, selector, value): | ||||
|         self.page.locator(selector).check(timeout=1000) | ||||
|     async def action_check_checkbox(self, selector, value): | ||||
|         if not selector: | ||||
|             return | ||||
|  | ||||
|     def action_uncheck_checkbox(self, selector, value): | ||||
|         self.page.locator(selector, timeout=1000).uncheck(timeout=1000) | ||||
|         await self.page.locator(selector).check(timeout=self.action_timeout) | ||||
|  | ||||
|     async def action_uncheck_checkbox(self, selector, value): | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         await self.page.locator(selector).uncheck(timeout=self.action_timeout) | ||||
|              | ||||
|  | ||||
|     async def action_remove_elements(self, selector, value): | ||||
|         """Removes all elements matching the given selector from the DOM.""" | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         await self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())") | ||||
|  | ||||
|     async def action_make_all_child_elements_visible(self, selector, value): | ||||
|         """Recursively makes all child elements inside the given selector fully visible.""" | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         await self.page.locator(selector).locator("*").evaluate_all(""" | ||||
|             els => els.forEach(el => { | ||||
|                 el.style.display = 'block';   // Forces it to be displayed | ||||
|                 el.style.visibility = 'visible';   // Ensures it's not hidden | ||||
|                 el.style.opacity = '1';   // Fully opaque | ||||
|                 el.style.position = 'relative';   // Avoids 'absolute' hiding | ||||
|                 el.style.height = 'auto';   // Expands collapsed elements | ||||
|                 el.style.width = 'auto';   // Ensures full visibility | ||||
|                 el.removeAttribute('hidden');   // Removes hidden attribute | ||||
|                 el.classList.remove('hidden', 'd-none');  // Removes common CSS hidden classes | ||||
|             }) | ||||
|         """) | ||||
|  | ||||
| # Responsible for maintaining a live 'context' with the chrome CDP | ||||
| # @todo - how long do contexts live for anyway? | ||||
| @@ -201,7 +308,9 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|     # bump and kill this if idle after X sec | ||||
|     age_start = 0 | ||||
|     headers = {} | ||||
|  | ||||
|     # Track if resources are properly cleaned up | ||||
|     _is_cleaned_up = False | ||||
|      | ||||
|     # use a special driver, maybe locally etc | ||||
|     command_executor = os.getenv( | ||||
|         "PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL" | ||||
| @@ -220,17 +329,23 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|         self.age_start = time.time() | ||||
|         self.playwright_browser = playwright_browser | ||||
|         self.start_url = start_url | ||||
|         if self.context is None: | ||||
|             self.connect(proxy=proxy) | ||||
|         self._is_cleaned_up = False | ||||
|         self.proxy = proxy | ||||
|         # Note: connect() is now async and must be called separately | ||||
|  | ||||
|     def __del__(self): | ||||
|         # Ensure cleanup happens if object is garbage collected | ||||
|         # Note: cleanup is now async, so we can only mark as cleaned up here | ||||
|         self._is_cleaned_up = True | ||||
|  | ||||
|     # Connect and setup a new context | ||||
|     def connect(self, proxy=None): | ||||
|     async def connect(self, proxy=None): | ||||
|         # Should only get called once - test that | ||||
|         keep_open = 1000 * 60 * 5 | ||||
|         now = time.time() | ||||
|  | ||||
|         # @todo handle multiple contexts, bind a unique id from the browser on each req? | ||||
|         self.context = self.playwright_browser.new_context( | ||||
|         self.context = await self.playwright_browser.new_context( | ||||
|             accept_downloads=False,  # Should never be needed | ||||
|             bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others | ||||
|             extra_http_headers=self.headers, | ||||
| @@ -241,72 +356,142 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|             user_agent=manage_user_agent(headers=self.headers), | ||||
|         ) | ||||
|  | ||||
|  | ||||
|         self.page = self.context.new_page() | ||||
|         self.page = await self.context.new_page() | ||||
|  | ||||
|         # self.page.set_default_navigation_timeout(keep_open) | ||||
|         self.page.set_default_timeout(keep_open) | ||||
|         # @todo probably this doesnt work | ||||
|         self.page.on( | ||||
|             "close", | ||||
|             self.mark_as_closed, | ||||
|         ) | ||||
|         # Set event handlers | ||||
|         self.page.on("close", self.mark_as_closed) | ||||
|         # Listen for all console events and handle errors | ||||
|         self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|         logger.debug(f"Time to browser setup {time.time()-now:.2f}s") | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|         await self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|     def mark_as_closed(self): | ||||
|         logger.debug("Page closed, cleaning up..") | ||||
|         # Note: This is called from a sync context (event handler) | ||||
|         # so we'll just mark as cleaned up and let __del__ handle the rest | ||||
|         self._is_cleaned_up = True | ||||
|  | ||||
|     async def cleanup(self): | ||||
|         """Properly clean up all resources to prevent memory leaks""" | ||||
|         if self._is_cleaned_up: | ||||
|             return | ||||
|              | ||||
|         logger.debug("Cleaning up browser steps resources") | ||||
|          | ||||
|         # Clean up page | ||||
|         if hasattr(self, 'page') and self.page is not None: | ||||
|             try: | ||||
|                 # Force garbage collection before closing | ||||
|                 await self.page.request_gc() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error during page garbage collection: {str(e)}") | ||||
|                  | ||||
|             try: | ||||
|                 # Remove event listeners before closing | ||||
|                 self.page.remove_listener("close", self.mark_as_closed) | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error removing event listeners: {str(e)}") | ||||
|                  | ||||
|             try: | ||||
|                 await self.page.close() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error closing page: {str(e)}") | ||||
|              | ||||
|             self.page = None | ||||
|  | ||||
|         # Clean up context | ||||
|         if hasattr(self, 'context') and self.context is not None: | ||||
|             try: | ||||
|                 await self.context.close() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error closing context: {str(e)}") | ||||
|              | ||||
|             self.context = None | ||||
|              | ||||
|         self._is_cleaned_up = True | ||||
|         logger.debug("Browser steps resources cleanup complete") | ||||
|  | ||||
|     @property | ||||
|     def has_expired(self): | ||||
|         if not self.page: | ||||
|         if not self.page or self._is_cleaned_up: | ||||
|             return True | ||||
|          | ||||
|         # Check if session has expired based on age | ||||
|         max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10))  # Default 10 minutes | ||||
|         if (time.time() - self.age_start) > max_age_seconds: | ||||
|             logger.debug(f"Browser steps session expired after {max_age_seconds} seconds") | ||||
|             return True | ||||
|              | ||||
|         return False | ||||
|  | ||||
|  | ||||
|     def get_current_state(self): | ||||
|     async def get_current_state(self): | ||||
|         """Return the screenshot and interactive elements mapping, generally always called after action_()""" | ||||
|         import importlib.resources | ||||
|         import json | ||||
|         # because we for now only run browser steps in playwright mode (not puppeteer mode) | ||||
|         from changedetectionio.content_fetchers.playwright import capture_full_page_async | ||||
|  | ||||
|         # Safety check - don't proceed if resources are cleaned up | ||||
|         if self._is_cleaned_up or self.page is None: | ||||
|             logger.warning("Attempted to get current state after cleanup") | ||||
|             return (None, None) | ||||
|  | ||||
|         xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() | ||||
|  | ||||
|         now = time.time() | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|         await self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|         # The actual screenshot | ||||
|         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40) | ||||
|         screenshot = None | ||||
|         xpath_data = None | ||||
|          | ||||
|         try: | ||||
|             # Get screenshot first | ||||
|             screenshot = await capture_full_page_async(page=self.page) | ||||
|             if not screenshot: | ||||
|                 logger.error("No screenshot was retrieved :((") | ||||
|  | ||||
|         self.page.evaluate("var include_filters=''") | ||||
|         # Go find the interactive elements | ||||
|         # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers? | ||||
|         elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' | ||||
|         xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements) | ||||
|         xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") | ||||
|         # So the JS will find the smallest one first | ||||
|         xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) | ||||
|         logger.debug(f"Time to complete get_current_state of browser {time.time()-now:.2f}s") | ||||
|         # except | ||||
|         # playwright._impl._api_types.Error: Browser closed. | ||||
|         # @todo show some countdown timer? | ||||
|             logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") | ||||
|  | ||||
|             # Then get interactive elements | ||||
|             now = time.time() | ||||
|             await self.page.evaluate("var include_filters=''") | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|             scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' | ||||
|  | ||||
|             MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|             xpath_data = json.loads(await self.page.evaluate(xpath_element_js, { | ||||
|                 "visualselector_xpath_selectors": scan_elements, | ||||
|                 "max_height": MAX_TOTAL_HEIGHT | ||||
|             })) | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|             # Sort elements by size | ||||
|             xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) | ||||
|             logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s") | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error getting current state: {str(e)}") | ||||
|             # If the page has navigated (common with logins) then the context is destroyed on navigation, continue | ||||
|             # I'm not sure that this is required anymore because we have the "expect navigation wrapper" at the top | ||||
|             if "Execution context was destroyed" in str(e): | ||||
|                 logger.debug("Execution context was destroyed, most likely because of navigation, continuing...") | ||||
|             pass | ||||
|  | ||||
|             # Attempt recovery - force garbage collection | ||||
|             try: | ||||
|                 await self.page.request_gc() | ||||
|             except: | ||||
|                 pass | ||||
|          | ||||
|         # Request garbage collection one final time | ||||
|         try: | ||||
|             await self.page.request_gc() | ||||
|         except: | ||||
|             pass | ||||
|              | ||||
|         return (screenshot, xpath_data) | ||||
|  | ||||
|     def request_visualselector_data(self): | ||||
|         """ | ||||
|         Does the same that the playwright operation in content_fetcher does | ||||
|         This is used to just bump the VisualSelector data so it' ready to go if they click on the tab | ||||
|         @todo refactor and remove duplicate code, add include_filters | ||||
|         :param xpath_data: | ||||
|         :param screenshot: | ||||
|         :param current_include_filters: | ||||
|         :return: | ||||
|         """ | ||||
|         import importlib.resources | ||||
|         self.page.evaluate("var include_filters=''") | ||||
|         xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
|         xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) | ||||
|         xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") | ||||
|         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|  | ||||
|         return (screenshot, xpath_data) | ||||
|   | ||||
| @@ -1,17 +0,0 @@ | ||||
| from playwright.sync_api import PlaywrightContextManager | ||||
|  | ||||
| # So playwright wants to run as a context manager, but we do something horrible and hacky | ||||
| # we are holding the session open for as long as possible, then shutting it down, and opening a new one | ||||
| # So it means we don't get to use PlaywrightContextManager' __enter__ __exit__ | ||||
| # To work around this, make goodbye() act the same as the __exit__() | ||||
| # | ||||
| # But actually I think this is because the context is opened correctly with __enter__() but we timeout the connection | ||||
| # then theres some lock condition where we cant destroy it without it hanging | ||||
|  | ||||
| class c_PlaywrightContextManager(PlaywrightContextManager): | ||||
|  | ||||
|     def goodbye(self) -> None: | ||||
|         self.__exit__() | ||||
|  | ||||
| def c_sync_playwright() -> PlaywrightContextManager: | ||||
|     return c_PlaywrightContextManager() | ||||
							
								
								
									
										75
									
								
								changedetectionio/blueprint/imports/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,75 @@ | ||||
| from flask import Blueprint, request, redirect, url_for, flash, render_template | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
| from changedetectionio import worker_handler | ||||
| from changedetectionio.blueprint.imports.importer import ( | ||||
|     import_url_list,  | ||||
|     import_distill_io_json,  | ||||
|     import_xlsx_wachete,  | ||||
|     import_xlsx_custom | ||||
| ) | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData): | ||||
|     import_blueprint = Blueprint('imports', __name__, template_folder="templates") | ||||
|      | ||||
|     @import_blueprint.route("/import", methods=['GET', 'POST']) | ||||
|     @login_optionally_required | ||||
|     def import_page(): | ||||
|         remaining_urls = [] | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         if request.method == 'POST': | ||||
|             # URL List import | ||||
|             if request.values.get('urls') and len(request.values.get('urls').strip()): | ||||
|                 # Import and push into the queue for immediate update check | ||||
|                 importer_handler = import_url_list() | ||||
|                 importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff')) | ||||
|                 for uuid in importer_handler.new_uuids: | ||||
|                     worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|                 if len(importer_handler.remaining_data) == 0: | ||||
|                     return redirect(url_for('watchlist.index')) | ||||
|                 else: | ||||
|                     remaining_urls = importer_handler.remaining_data | ||||
|  | ||||
|             # Distill.io import | ||||
|             if request.values.get('distill-io') and len(request.values.get('distill-io').strip()): | ||||
|                 # Import and push into the queue for immediate update check | ||||
|                 d_importer = import_distill_io_json() | ||||
|                 d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) | ||||
|                 for uuid in d_importer.new_uuids: | ||||
|                     worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|             # XLSX importer | ||||
|             if request.files and request.files.get('xlsx_file'): | ||||
|                 file = request.files['xlsx_file'] | ||||
|  | ||||
|                 if request.values.get('file_mapping') == 'wachete': | ||||
|                     w_importer = import_xlsx_wachete() | ||||
|                     w_importer.run(data=file, flash=flash, datastore=datastore) | ||||
|                 else: | ||||
|                     w_importer = import_xlsx_custom() | ||||
|                     # Building mapping of col # to col # type | ||||
|                     map = {} | ||||
|                     for i in range(10): | ||||
|                         c = request.values.get(f"custom_xlsx[col_{i}]") | ||||
|                         v = request.values.get(f"custom_xlsx[col_type_{i}]") | ||||
|                         if c and v: | ||||
|                             map[int(c)] = v | ||||
|  | ||||
|                     w_importer.import_profile = map | ||||
|                     w_importer.run(data=file, flash=flash, datastore=datastore) | ||||
|  | ||||
|                 for uuid in w_importer.new_uuids: | ||||
|                     worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|         # Could be some remaining, or we could be on GET | ||||
|         form = forms.importForm(formdata=request.form if request.method == 'POST' else None) | ||||
|         output = render_template("import.html", | ||||
|                                 form=form, | ||||
|                                 import_url_list_remaining="\n".join(remaining_urls), | ||||
|                                 original_distill_json='' | ||||
|                                 ) | ||||
|         return output | ||||
|  | ||||
|     return import_blueprint | ||||
| @@ -1,6 +1,5 @@ | ||||
| from abc import ABC, abstractmethod | ||||
| from abc import abstractmethod | ||||
| import time | ||||
| import validators | ||||
| from wtforms import ValidationError | ||||
| from loguru import logger | ||||
| 
 | ||||
| @@ -241,7 +240,7 @@ class import_xlsx_custom(Importer): | ||||
|             return | ||||
| 
 | ||||
|         # @todo cehck atleast 2 rows, same in other method | ||||
|         from .forms import validate_url | ||||
|         from changedetectionio.forms import validate_url | ||||
|         row_i = 1 | ||||
| 
 | ||||
|         try: | ||||
| @@ -300,4 +299,4 @@ class import_xlsx_custom(Importer): | ||||
|             row_i += 1 | ||||
| 
 | ||||
|         flash( | ||||
|             "{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now)) | ||||
|             "{} imported from custom .xlsx in {:.2f}s".format(len(self.new_uuids), time.time() - now)) | ||||
| @@ -13,29 +13,27 @@ | ||||
|     </div> | ||||
| 
 | ||||
|     <div class="box-wrap inner"> | ||||
|         <form class="pure-form" action="{{url_for('import_page')}}" method="POST" enctype="multipart/form-data"> | ||||
|         <form class="pure-form" action="{{url_for('imports.import_page')}}" method="POST" enctype="multipart/form-data"> | ||||
|             <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"> | ||||
|             <div class="tab-pane-inner" id="url-list"> | ||||
|                     <legend> | ||||
|                 <div class="pure-control-group"> | ||||
|                         Enter one URL per line, and optionally add tags for each URL after a space, delineated by comma | ||||
|                         (,): | ||||
|                         <br> | ||||
|                         <code>https://example.com tag1, tag2, last tag</code> | ||||
|                         <br> | ||||
|                         <p><strong>Example:  </strong><code>https://example.com tag1, tag2, last tag</code></p> | ||||
|                         URLs which do not pass validation will stay in the textarea. | ||||
|                     </legend> | ||||
|                 </div> | ||||
|                 {{ render_field(form.processor, class="processor") }} | ||||
| 
 | ||||
|                  | ||||
|                 <div class="pure-control-group"> | ||||
|                     <textarea name="urls" class="pure-input-1-2" placeholder="https://" | ||||
|                               style="width: 100%; | ||||
|                                 font-family:monospace; | ||||
|                                 white-space: pre; | ||||
|                                 overflow-wrap: normal; | ||||
|                                 overflow-x: scroll;" rows="25">{{ import_url_list_remaining }}</textarea> | ||||
| 
 | ||||
| <div id="quick-watch-processor-type"> | ||||
| 
 | ||||
|                     </div> | ||||
|                  </div> | ||||
|                  <div id="quick-watch-processor-type"></div> | ||||
| 
 | ||||
|             </div> | ||||
| 
 | ||||
| @@ -43,7 +41,7 @@ | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
|                     <legend> | ||||
|                     <div class="pure-control-group"> | ||||
|                         Copy and Paste your Distill.io watch 'export' file, this should be a JSON file.<br> | ||||
|                         This is <i>experimental</i>, supported fields are <code>name</code>, <code>uri</code>, <code>tags</code>, <code>config:selections</code>, the rest (including <code>schedule</code>) are ignored. | ||||
|                         <br> | ||||
| @@ -51,7 +49,7 @@ | ||||
|                         How to export? <a href="https://distill.io/docs/web-monitor/how-export-and-import-monitors/">https://distill.io/docs/web-monitor/how-export-and-import-monitors/</a><br> | ||||
|                         Be sure to set your default fetcher to Chrome if required.<br> | ||||
|                         </p> | ||||
|                     </legend> | ||||
|                     </div> | ||||
| 
 | ||||
| 
 | ||||
|                     <textarea name="distill-io" class="pure-input-1-2" style="width: 100%; | ||||
| @@ -122,4 +120,4 @@ | ||||
|     </div> | ||||
| </div> | ||||
| 
 | ||||
| {% endblock %} | ||||
| {% endblock %} | ||||
| @@ -4,6 +4,7 @@ from flask import Blueprint, flash, redirect, url_for | ||||
| from flask_login import login_required | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from queue import PriorityQueue | ||||
|  | ||||
| PRICE_DATA_TRACK_ACCEPT = 'accepted' | ||||
| @@ -19,14 +20,14 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue | ||||
|         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT | ||||
|         datastore.data['watching'][uuid]['processor'] = 'restock_diff' | ||||
|         datastore.data['watching'][uuid].clear_watch() | ||||
|         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|         return redirect(url_for("index")) | ||||
|         worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|         return redirect(url_for("watchlist.index")) | ||||
|  | ||||
|     @login_required | ||||
|     @price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET']) | ||||
|     def reject(uuid): | ||||
|         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT | ||||
|         return redirect(url_for("index")) | ||||
|         return redirect(url_for("watchlist.index")) | ||||
|  | ||||
|  | ||||
|     return price_data_follower_blueprint | ||||
|   | ||||
							
								
								
									
										1
									
								
								changedetectionio/blueprint/rss/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1 @@ | ||||
| RSS_FORMAT_TYPES = [('plaintext', 'Plain text'), ('html', 'HTML Color')] | ||||
							
								
								
									
										147
									
								
								changedetectionio/blueprint/rss/blueprint.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,147 @@ | ||||
|  | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from feedgen.feed import FeedGenerator | ||||
| from flask import Blueprint, make_response, request, url_for, redirect | ||||
| from loguru import logger | ||||
| import datetime | ||||
| import pytz | ||||
| import re | ||||
| import time | ||||
|  | ||||
|  | ||||
| BAD_CHARS_REGEX=r'[\x00-\x08\x0B\x0C\x0E-\x1F]' | ||||
|  | ||||
| # Anything that is not text/UTF-8 should be stripped before it breaks feedgen (such as binary data etc) | ||||
| def scan_invalid_chars_in_rss(content): | ||||
|     for match in re.finditer(BAD_CHARS_REGEX, content): | ||||
|         i = match.start() | ||||
|         bad_char = content[i] | ||||
|         hex_value = f"0x{ord(bad_char):02x}" | ||||
|         # Grab context | ||||
|         start = max(0, i - 20) | ||||
|         end = min(len(content), i + 21) | ||||
|         context = content[start:end].replace('\n', '\\n').replace('\r', '\\r') | ||||
|         logger.warning(f"Invalid char {hex_value} at pos {i}: ...{context}...") | ||||
|         # First match is enough | ||||
|         return True | ||||
|  | ||||
|     return False | ||||
|  | ||||
|  | ||||
| def clean_entry_content(content): | ||||
|     cleaned = re.sub(BAD_CHARS_REGEX, '', content) | ||||
|     return cleaned | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     rss_blueprint = Blueprint('rss', __name__) | ||||
|  | ||||
|     # Some RSS reader situations ended up with rss/ (forward slash after RSS) due | ||||
|     # to some earlier blueprint rerouting work, it should goto feed. | ||||
|     @rss_blueprint.route("/", methods=['GET']) | ||||
|     def extraslash(): | ||||
|         return redirect(url_for('rss.feed')) | ||||
|  | ||||
|     # Import the login decorator if needed | ||||
|     # from changedetectionio.auth_decorator import login_optionally_required | ||||
|     @rss_blueprint.route("", methods=['GET']) | ||||
|     def feed(): | ||||
|         now = time.time() | ||||
|         # Always requires token set | ||||
|         app_rss_token = datastore.data['settings']['application'].get('rss_access_token') | ||||
|         rss_url_token = request.args.get('token') | ||||
|         if rss_url_token != app_rss_token: | ||||
|             return "Access denied, bad token", 403 | ||||
|  | ||||
|         from changedetectionio import diff | ||||
|         limit_tag = request.args.get('tag', '').lower().strip() | ||||
|         # Be sure limit_tag is a uuid | ||||
|         for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): | ||||
|             if limit_tag == tag.get('title', '').lower().strip(): | ||||
|                 limit_tag = uuid | ||||
|  | ||||
|         # Sort by last_changed and add the uuid which is usually the key.. | ||||
|         sorted_watches = [] | ||||
|  | ||||
|         # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away | ||||
|         for uuid, watch in datastore.data['watching'].items(): | ||||
|             # @todo tag notification_muted skip also (improve Watch model) | ||||
|             if datastore.data['settings']['application'].get('rss_hide_muted_watches') and watch.get('notification_muted'): | ||||
|                 continue | ||||
|             if limit_tag and not limit_tag in watch['tags']: | ||||
|                 continue | ||||
|             watch['uuid'] = uuid | ||||
|             sorted_watches.append(watch) | ||||
|  | ||||
|         sorted_watches.sort(key=lambda x: x.last_changed, reverse=False) | ||||
|  | ||||
|         fg = FeedGenerator() | ||||
|         fg.title('changedetection.io') | ||||
|         fg.description('Feed description') | ||||
|         fg.link(href='https://changedetection.io') | ||||
|  | ||||
|         html_colour_enable = False | ||||
|         if datastore.data['settings']['application'].get('rss_content_format') == 'html': | ||||
|             html_colour_enable = True | ||||
|  | ||||
|         for watch in sorted_watches: | ||||
|  | ||||
|             dates = list(watch.history.keys()) | ||||
|             # Re #521 - Don't bother processing this one if theres less than 2 snapshots, means we never had a change detected. | ||||
|             if len(dates) < 2: | ||||
|                 continue | ||||
|  | ||||
|             if not watch.viewed: | ||||
|                 # Re #239 - GUID needs to be individual for each event | ||||
|                 # @todo In the future make this a configurable link back (see work on BASE_URL https://github.com/dgtlmoon/changedetection.io/pull/228) | ||||
|                 guid = "{}/{}".format(watch['uuid'], watch.last_changed) | ||||
|                 fe = fg.add_entry() | ||||
|  | ||||
|                 # Include a link to the diff page, they will have to login here to see if password protection is enabled. | ||||
|                 # Description is the page you watch, link takes you to the diff JS UI page | ||||
|                 # Dict val base_url will get overriden with the env var if it is set. | ||||
|                 ext_base_url = datastore.data['settings']['application'].get('active_base_url') | ||||
|                 # @todo fix | ||||
|  | ||||
|                 # Because we are called via whatever web server, flask should figure out the right path ( | ||||
|                 diff_link = {'href': url_for('ui.ui_views.diff_history_page', uuid=watch['uuid'], _external=True)} | ||||
|  | ||||
|                 fe.link(link=diff_link) | ||||
|  | ||||
|                 # @todo watch should be a getter - watch.get('title') (internally if URL else..) | ||||
|  | ||||
|                 watch_title = watch.get('title') if watch.get('title') else watch.get('url') | ||||
|                 fe.title(title=watch_title) | ||||
|                 try: | ||||
|  | ||||
|                     html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]), | ||||
|                                                  newest_version_file_contents=watch.get_history_snapshot(dates[-1]), | ||||
|                                                  include_equal=False, | ||||
|                                                  line_feed_sep="<br>", | ||||
|                                                  html_colour=html_colour_enable | ||||
|                                                  ) | ||||
|                 except FileNotFoundError as e: | ||||
|                     html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found." | ||||
|  | ||||
|                 # @todo Make this configurable and also consider html-colored markup | ||||
|                 # @todo User could decide if <link> goes to the diff page, or to the watch link | ||||
|                 rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n" | ||||
|  | ||||
|                 content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link) | ||||
|  | ||||
|                 # Out of range chars could also break feedgen | ||||
|                 if scan_invalid_chars_in_rss(content): | ||||
|                     content = clean_entry_content(content) | ||||
|  | ||||
|                 fe.content(content=content, type='CDATA') | ||||
|                 fe.guid(guid, permalink=False) | ||||
|                 dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) | ||||
|                 dt = dt.replace(tzinfo=pytz.UTC) | ||||
|                 fe.pubDate(dt) | ||||
|  | ||||
|         response = make_response(fg.rss_str()) | ||||
|         response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') | ||||
|         logger.trace(f"RSS generated in {time.time() - now:.3f}s") | ||||
|         return response | ||||
|  | ||||
|     return rss_blueprint | ||||
							
								
								
									
										145
									
								
								changedetectionio/blueprint/settings/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,145 @@ | ||||
| import os | ||||
| from copy import deepcopy | ||||
| from datetime import datetime | ||||
| from zoneinfo import ZoneInfo, available_timezones | ||||
| import secrets | ||||
| import flask_login | ||||
| from flask import Blueprint, render_template, request, redirect, url_for, flash | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
|  | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     settings_blueprint = Blueprint('settings', __name__, template_folder="templates") | ||||
|  | ||||
|     @settings_blueprint.route("", methods=['GET', "POST"]) | ||||
|     @login_optionally_required | ||||
|     def settings_page(): | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         default = deepcopy(datastore.data['settings']) | ||||
|         if datastore.proxy_list is not None: | ||||
|             available_proxies = list(datastore.proxy_list.keys()) | ||||
|             # When enabled | ||||
|             system_proxy = datastore.data['settings']['requests']['proxy'] | ||||
|             # In the case it doesnt exist anymore | ||||
|             if not system_proxy in available_proxies: | ||||
|                 system_proxy = None | ||||
|  | ||||
|             default['requests']['proxy'] = system_proxy if system_proxy is not None else available_proxies[0] | ||||
|             # Used by the form handler to keep or remove the proxy settings | ||||
|             default['proxy_list'] = available_proxies[0] | ||||
|  | ||||
|         # Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status | ||||
|         form = forms.globalSettingsForm(formdata=request.form if request.method == 'POST' else None, | ||||
|                                         data=default, | ||||
|                                         extra_notification_tokens=datastore.get_unique_notification_tokens_available() | ||||
|                                         ) | ||||
|  | ||||
|         # Remove the last option 'System default' | ||||
|         form.application.form.notification_format.choices.pop() | ||||
|  | ||||
|         if datastore.proxy_list is None: | ||||
|             # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead | ||||
|             del form.requests.form.proxy | ||||
|         else: | ||||
|             form.requests.form.proxy.choices = [] | ||||
|             for p in datastore.proxy_list: | ||||
|                 form.requests.form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) | ||||
|  | ||||
|         if request.method == 'POST': | ||||
|             # Password unset is a GET, but we can lock the session to a salted env password to always need the password | ||||
|             if form.application.form.data.get('removepassword_button', False): | ||||
|                 # SALTED_PASS means the password is "locked" to what we set in the Env var | ||||
|                 if not os.getenv("SALTED_PASS", False): | ||||
|                     datastore.remove_password() | ||||
|                     flash("Password protection removed.", 'notice') | ||||
|                     flask_login.logout_user() | ||||
|                     return redirect(url_for('settings.settings_page')) | ||||
|  | ||||
|             if form.validate(): | ||||
|                 # Don't set password to False when a password is set - should be only removed with the `removepassword` button | ||||
|                 app_update = dict(deepcopy(form.data['application'])) | ||||
|  | ||||
|                 # Never update password with '' or False (Added by wtforms when not in submission) | ||||
|                 if 'password' in app_update and not app_update['password']: | ||||
|                     del (app_update['password']) | ||||
|  | ||||
|                 datastore.data['settings']['application'].update(app_update) | ||||
|                  | ||||
|                 # Handle dynamic worker count adjustment | ||||
|                 old_worker_count = datastore.data['settings']['requests'].get('workers', 1) | ||||
|                 new_worker_count = form.data['requests'].get('workers', 1) | ||||
|                  | ||||
|                 datastore.data['settings']['requests'].update(form.data['requests']) | ||||
|                  | ||||
|                 # Adjust worker count if it changed | ||||
|                 if new_worker_count != old_worker_count: | ||||
|                     from changedetectionio import worker_handler | ||||
|                     from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds | ||||
|                      | ||||
|                     result = worker_handler.adjust_async_worker_count( | ||||
|                         new_count=new_worker_count, | ||||
|                         update_q=update_q, | ||||
|                         notification_q=notification_q, | ||||
|                         app=app, | ||||
|                         datastore=ds | ||||
|                     ) | ||||
|                      | ||||
|                     if result['status'] == 'success': | ||||
|                         flash(f"Worker count adjusted: {result['message']}", 'notice') | ||||
|                     elif result['status'] == 'not_supported': | ||||
|                         flash("Dynamic worker adjustment not supported for sync workers", 'warning') | ||||
|                     elif result['status'] == 'error': | ||||
|                         flash(f"Error adjusting workers: {result['message']}", 'error') | ||||
|  | ||||
|                 if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password): | ||||
|                     datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password | ||||
|                     datastore.needs_write_urgent = True | ||||
|                     flash("Password protection enabled.", 'notice') | ||||
|                     flask_login.logout_user() | ||||
|                     return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|                 datastore.needs_write_urgent = True | ||||
|                 flash("Settings updated.") | ||||
|  | ||||
|             else: | ||||
|                 flash("An error occurred, please see below.", "error") | ||||
|  | ||||
|         # Convert to ISO 8601 format, all date/time relative events stored as UTC time | ||||
|         utc_time = datetime.now(ZoneInfo("UTC")).isoformat() | ||||
|  | ||||
|         output = render_template("settings.html", | ||||
|                                 api_key=datastore.data['settings']['application'].get('api_access_token'), | ||||
|                                 available_timezones=sorted(available_timezones()), | ||||
|                                 emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), | ||||
|                                 extra_notification_token_placeholder_info=datastore.get_unique_notification_token_placeholders_available(), | ||||
|                                 form=form, | ||||
|                                 hide_remove_pass=os.getenv("SALTED_PASS", False), | ||||
|                                 min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)), | ||||
|                                 settings_application=datastore.data['settings']['application'], | ||||
|                                 timezone_default_config=datastore.data['settings']['application'].get('timezone'), | ||||
|                                 utc_time=utc_time, | ||||
|                                 ) | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     @settings_blueprint.route("/reset-api-key", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def settings_reset_api_key(): | ||||
|         secret = secrets.token_hex(16) | ||||
|         datastore.data['settings']['application']['api_access_token'] = secret | ||||
|         datastore.needs_write_urgent = True | ||||
|         flash("API Key was regenerated.") | ||||
|         return redirect(url_for('settings.settings_page')+'#api') | ||||
|          | ||||
|     @settings_blueprint.route("/notification-logs", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def notification_logs(): | ||||
|         from changedetectionio.flask_app import notification_debug_log | ||||
|         output = render_template("notification-log.html", | ||||
|                                logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."]) | ||||
|         return output | ||||
|  | ||||
|     return settings_blueprint | ||||
| @@ -4,7 +4,7 @@ | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ajax_callback_send_notification_test', mode="global-settings")}}"; | ||||
|     const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}"; | ||||
| {% if emailprefix %} | ||||
|     const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}'); | ||||
| {% endif %} | ||||
| @@ -22,13 +22,14 @@ | ||||
|             <li class="tab"><a href="#notifications">Notifications</a></li> | ||||
|             <li class="tab"><a href="#fetching">Fetching</a></li> | ||||
|             <li class="tab"><a href="#filters">Global Filters</a></li> | ||||
|             <li class="tab"><a href="#ui-options">UI Options</a></li> | ||||
|             <li class="tab"><a href="#api">API</a></li> | ||||
|             <li class="tab"><a href="#timedate">Time & Date</a></li> | ||||
|             <li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li> | ||||
|         </ul> | ||||
|     </div> | ||||
|     <div class="box-wrap inner"> | ||||
|         <form class="pure-form pure-form-stacked settings" action="{{url_for('settings_page')}}" method="POST"> | ||||
|         <form class="pure-form pure-form-stacked settings" action="{{url_for('settings.settings_page')}}" method="POST"> | ||||
|             <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" > | ||||
|             <div class="tab-pane-inner" id="general"> | ||||
|                 <fieldset> | ||||
| @@ -78,7 +79,10 @@ | ||||
|                         {{ render_field(form.application.form.pager_size) }} | ||||
|                         <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span> | ||||
|                     </div> | ||||
| 
 | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.rss_content_format) }} | ||||
|                         <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.extract_title_as_title) }} | ||||
|                         <span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span> | ||||
| @@ -131,6 +135,12 @@ | ||||
|                         {{ render_field(form.application.form.webdriver_delay) }} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.requests.form.workers) }} | ||||
|                     {% set worker_info = get_worker_status_info() %} | ||||
|                     <span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br> | ||||
|                     Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group inline-radio"> | ||||
|                     {{ render_field(form.requests.form.default_ua) }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
| @@ -203,7 +213,7 @@ nav | ||||
|                     </div> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     <a href="{{url_for('settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a> | ||||
|                     <a href="{{url_for('settings.settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     <h4>Chrome Extension</h4> | ||||
| @@ -214,7 +224,7 @@ nav | ||||
|                         <a id="chrome-extension-link" | ||||
|                            title="Try our new Chrome Extension!" | ||||
|                            href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop"> | ||||
|                             <img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome"> | ||||
|                             <img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" alt="Chrome"> | ||||
|                             Chrome Webstore | ||||
|                         </a> | ||||
|                     </p> | ||||
| @@ -237,6 +247,16 @@ nav | ||||
|                     </p> | ||||
|                 </div> | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="ui-options"> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }} | ||||
|                     <span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }} | ||||
|                     <span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span> | ||||
|                 </div> | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="proxies"> | ||||
|                 <div id="recommended-proxy"> | ||||
|                     <div> | ||||
| @@ -280,9 +300,7 @@ nav | ||||
|                          | ||||
|                     </div> | ||||
|                 </div> | ||||
|                 <p> | ||||
|                     Your proxy provider may need to whitelist our IP of <code>204.15.192.195</code> | ||||
|                 </p> | ||||
| 
 | ||||
|                <p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites. | ||||
| 
 | ||||
|                 <div class="pure-control-group" id="extra-proxies-setting"> | ||||
| @@ -301,8 +319,8 @@ nav | ||||
|             <div id="actions"> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_button(form.save_button) }} | ||||
|                     <a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a> | ||||
|                     <a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a> | ||||
|                     <a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a> | ||||
|                     <a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </form> | ||||
| @@ -104,6 +104,9 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             uuid = list(datastore.data['settings']['application']['tags'].keys()).pop() | ||||
|  | ||||
|         default = datastore.data['settings']['application']['tags'].get(uuid) | ||||
|         if not default: | ||||
|             flash("Tag not found", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         form = group_restock_settings_form( | ||||
|                                        formdata=request.form if request.method == 'POST' else None, | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ajax_callback_send_notification_test', mode="group-settings")}}"; | ||||
|     const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}"; | ||||
| </script> | ||||
|  | ||||
| <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> | ||||
| @@ -13,6 +13,7 @@ | ||||
|     /*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/ | ||||
| /*{% endif %}*/ | ||||
|  | ||||
| {% set has_tag_filters_extra='' %} | ||||
|  | ||||
| </script> | ||||
|  | ||||
| @@ -46,59 +47,12 @@ | ||||
|             </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="filters-and-triggers"> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% set field = render_field(form.include_filters, | ||||
|                             rows=5, | ||||
|                             placeholder="#example | ||||
| xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                             class="m-d") | ||||
|                         %} | ||||
|                         {{ field }} | ||||
|                         {% if '/text()' in  field %} | ||||
|                           <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br> | ||||
|                         {% endif %} | ||||
|                         <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br> | ||||
|                     <div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div> | ||||
|                     <ul id="advanced-help-selectors"> | ||||
|                         <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). | ||||
|                             <ul> | ||||
|                                 <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required,  <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> | ||||
|                                 {% if jq_support %} | ||||
|                                 <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li> | ||||
|                                 {% else %} | ||||
|                                 <li>jq support not installed</li> | ||||
|                                 {% endif %} | ||||
|                             </ul> | ||||
|                         </li> | ||||
|                         <li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code> | ||||
|                             <ul> | ||||
|                                 <li>Example:  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a | ||||
|                                 href="http://xpather.com/" target="new">test your XPath here</a></li> | ||||
|                                 <li>Example: Get all titles from an RSS feed <code>//title/text()</code></li> | ||||
|                                 <li>To use XPath1.0: Prefix with <code>xpath1:</code></li> | ||||
|                             </ul> | ||||
|                             </li> | ||||
|                     </ul> | ||||
|                     Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a | ||||
|                                 href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br> | ||||
|                 </span> | ||||
|                     </div> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_field(form.subtractive_selectors, rows=5, placeholder="header | ||||
| footer | ||||
| nav | ||||
| .stockticker | ||||
| //*[contains(text(), 'Advertisement')]") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                           <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li> | ||||
|                           <li> Don't paste HTML here, use only CSS and XPath selectors </li> | ||||
|                           <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                         </ul> | ||||
|                       </span> | ||||
|                 </fieldset> | ||||
|  | ||||
|                 <p>These settings are <strong><i>added</i></strong> to any existing watch configurations.</p> | ||||
|                 {% include "edit/include_subtract.html" %} | ||||
|                 <div class="text-filtering border-fieldset"> | ||||
|                     <h3>Text filtering</h3> | ||||
|                     {% include "edit/text-options.html" %} | ||||
|                 </div> | ||||
|             </div> | ||||
|  | ||||
|         {# rendered sub Template #} | ||||
| @@ -112,7 +66,7 @@ nav | ||||
|                     <div  class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_muted) }} | ||||
|                     </div> | ||||
|                     {% if is_html_webdriver %} | ||||
|                     {% if 1 %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_screenshot) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
| @@ -124,7 +78,7 @@ nav | ||||
|                         {% if has_default_notification_urls %} | ||||
|                         <div class="inline-warning"> | ||||
|                             <img class="inline-warning-icon" src="{{url_for('static_content', group='images', filename='notice.svg')}}" alt="Look out!" title="Lookout!" > | ||||
|                             There are <a href="{{ url_for('settings_page')}}#notifications">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications. | ||||
|                             There are <a href="{{ url_for('settings.settings_page')}}#notifications">system-wide notification URLs enabled</a>, this form will override notification settings for this watch only ‐ an empty Notification URL list here will still send notifications. | ||||
|                         </div> | ||||
|                         {% endif %} | ||||
|                         <a href="#notifications" id="notification-setting-reset-to-default" class="pure-button button-xsmall" style="right: 20px; top: 20px; position: absolute; background-color: #5f42dd; border-radius: 4px; font-size: 70%; color: #fff">Use system defaults</a> | ||||
|   | ||||
| @@ -47,7 +47,7 @@ | ||||
|                     <a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a> | ||||
|                 </td> | ||||
|                 <td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td> | ||||
|                 <td class="title-col inline"> <a href="{{url_for('index', tag=uuid) }}">{{ tag.title }}</a></td> | ||||
|                 <td class="title-col inline"> <a href="{{url_for('watchlist.index', tag=uuid) }}">{{ tag.title }}</a></td> | ||||
|                 <td> | ||||
|                     <a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">Edit</a>  | ||||
|                     <a class="pure-button pure-button-primary" href="{{ url_for('tags.delete', uuid=uuid) }}" title="Deletes and removes tag">Delete</a> | ||||
|   | ||||
							
								
								
									
										317
									
								
								changedetectionio/blueprint/ui/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,317 @@ | ||||
| import time | ||||
| from flask import Blueprint, request, redirect, url_for, flash, render_template, session | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint | ||||
| from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint | ||||
| from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint | ||||
|  | ||||
| def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True): | ||||
|     from flask import request, flash | ||||
|  | ||||
|     if op == 'delete': | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.delete(uuid) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches deleted") | ||||
|  | ||||
|     elif op == 'pause': | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['paused'] = True | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches paused") | ||||
|  | ||||
|     elif op == 'unpause': | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid.strip()]['paused'] = False | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches unpaused") | ||||
|  | ||||
|     elif (op == 'mark-viewed'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.set_last_viewed(uuid, int(time.time())) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches updated") | ||||
|  | ||||
|     elif (op == 'mute'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_muted'] = True | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches muted") | ||||
|  | ||||
|     elif (op == 'unmute'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_muted'] = False | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches un-muted") | ||||
|  | ||||
|     elif (op == 'recheck'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 # Recheck and require a full reprocessing | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches queued for rechecking") | ||||
|  | ||||
|     elif (op == 'clear-errors'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]["last_error"] = False | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches errors cleared") | ||||
|  | ||||
|     elif (op == 'clear-history'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.clear_watch_history(uuid) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches cleared/reset.") | ||||
|  | ||||
|     elif (op == 'notification-default'): | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch | ||||
|         ) | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_title'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_body'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_urls'] = [] | ||||
|                 datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches set to use default notification settings") | ||||
|  | ||||
|     elif (op == 'assign-tag'): | ||||
|         op_extradata = extra_data | ||||
|         if op_extradata: | ||||
|             tag_uuid = datastore.add_tag(title=op_extradata) | ||||
|             if op_extradata and tag_uuid: | ||||
|                 for uuid in uuids: | ||||
|                     if datastore.data['watching'].get(uuid): | ||||
|                         # Bug in old versions caused by bad edit page/tag handler | ||||
|                         if isinstance(datastore.data['watching'][uuid]['tags'], str): | ||||
|                             datastore.data['watching'][uuid]['tags'] = [] | ||||
|  | ||||
|                         datastore.data['watching'][uuid]['tags'].append(tag_uuid) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches were tagged") | ||||
|  | ||||
|     if uuids: | ||||
|         for uuid in uuids: | ||||
|             watch_check_update.send(watch_uuid=uuid) | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update): | ||||
|     ui_blueprint = Blueprint('ui', __name__, template_folder="templates") | ||||
|      | ||||
|     # Register the edit blueprint | ||||
|     edit_blueprint = construct_edit_blueprint(datastore, update_q, queuedWatchMetaData) | ||||
|     ui_blueprint.register_blueprint(edit_blueprint) | ||||
|      | ||||
|     # Register the notification blueprint | ||||
|     notification_blueprint = construct_notification_blueprint(datastore) | ||||
|     ui_blueprint.register_blueprint(notification_blueprint) | ||||
|      | ||||
|     # Register the views blueprint | ||||
|     views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update) | ||||
|     ui_blueprint.register_blueprint(views_blueprint) | ||||
|  | ||||
|     # Import the login decorator | ||||
|     from changedetectionio.auth_decorator import login_optionally_required | ||||
|  | ||||
|     @ui_blueprint.route("/clear_history/<string:uuid>", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def clear_watch_history(uuid): | ||||
|         try: | ||||
|             datastore.clear_watch_history(uuid) | ||||
|         except KeyError: | ||||
|             flash('Watch not found', 'error') | ||||
|         else: | ||||
|             flash("Cleared snapshot history for watch {}".format(uuid)) | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|     @ui_blueprint.route("/clear_history", methods=['GET', 'POST']) | ||||
|     @login_optionally_required | ||||
|     def clear_all_history(): | ||||
|         if request.method == 'POST': | ||||
|             confirmtext = request.form.get('confirmtext') | ||||
|  | ||||
|             if confirmtext == 'clear': | ||||
|                 for uuid in datastore.data['watching'].keys(): | ||||
|                     datastore.clear_watch_history(uuid) | ||||
|                 flash("Cleared snapshot history for all watches") | ||||
|             else: | ||||
|                 flash('Incorrect confirmation text.', 'error') | ||||
|  | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         output = render_template("clear_all_history.html") | ||||
|         return output | ||||
|  | ||||
|     # Clear all statuses, so we do not see the 'unviewed' class | ||||
|     @ui_blueprint.route("/form/mark-all-viewed", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def mark_all_viewed(): | ||||
|         # Save the current newest history as the most recently viewed | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|         for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|             if with_errors and not watch.get('last_error'): | ||||
|                 continue | ||||
|             datastore.set_last_viewed(watch_uuid, int(time.time())) | ||||
|  | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|     @ui_blueprint.route("/delete", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def form_delete(): | ||||
|         uuid = request.args.get('uuid') | ||||
|  | ||||
|         if uuid != 'all' and not uuid in datastore.data['watching'].keys(): | ||||
|             flash('The watch by UUID {} does not exist.'.format(uuid), 'error') | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         # More for testing, possible to return the first/only | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|         datastore.delete(uuid) | ||||
|         flash('Deleted.') | ||||
|  | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|     @ui_blueprint.route("/clone", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def form_clone(): | ||||
|         uuid = request.args.get('uuid') | ||||
|         # More for testing, possible to return the first/only | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         new_uuid = datastore.clone(uuid) | ||||
|  | ||||
|         if not datastore.data['watching'].get(uuid).get('paused'): | ||||
|             worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid})) | ||||
|  | ||||
|         flash('Cloned, you are editing the new watch.') | ||||
|  | ||||
|         return redirect(url_for("ui.ui_edit.edit_page", uuid=new_uuid)) | ||||
|  | ||||
|     @ui_blueprint.route("/checknow", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def form_watch_checknow(): | ||||
|         # Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True}))) | ||||
|         tag = request.args.get('tag') | ||||
|         uuid = request.args.get('uuid') | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|  | ||||
|         i = 0 | ||||
|  | ||||
|         running_uuids = worker_handler.get_running_uuids() | ||||
|  | ||||
|         if uuid: | ||||
|             if uuid not in running_uuids: | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                 i += 1 | ||||
|  | ||||
|         else: | ||||
|             # Recheck all, including muted | ||||
|             # Get most overdue first | ||||
|             for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)): | ||||
|                 watch_uuid = k[0] | ||||
|                 watch = k[1] | ||||
|                 if not watch['paused']: | ||||
|                     if watch_uuid not in running_uuids: | ||||
|                         if with_errors and not watch.get('last_error'): | ||||
|                             continue | ||||
|  | ||||
|                         if tag != None and tag not in watch['tags']: | ||||
|                             continue | ||||
|  | ||||
|                         worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) | ||||
|                         i += 1 | ||||
|  | ||||
|         if i == 1: | ||||
|             flash("Queued 1 watch for rechecking.") | ||||
|         if i > 1: | ||||
|             flash(f"Queued {i} watches for rechecking.") | ||||
|         if i == 0: | ||||
|             flash("No watches available to recheck.") | ||||
|  | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|     @ui_blueprint.route("/form/checkbox-operations", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def form_watch_list_checkbox_operations(): | ||||
|         op = request.form['op'] | ||||
|         uuids = [u.strip() for u in request.form.getlist('uuids') if u] | ||||
|         extra_data = request.form.get('op_extradata', '').strip() | ||||
|         _handle_operations( | ||||
|             datastore=datastore, | ||||
|             extra_data=extra_data, | ||||
|             queuedWatchMetaData=queuedWatchMetaData, | ||||
|             uuids=uuids, | ||||
|             worker_handler=worker_handler, | ||||
|             update_q=update_q, | ||||
|             watch_check_update=watch_check_update, | ||||
|             op=op, | ||||
|         ) | ||||
|  | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|  | ||||
|     @ui_blueprint.route("/share-url/<string:uuid>", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def form_share_put_watch(uuid): | ||||
|         """Given a watch UUID, upload the info and return a share-link | ||||
|            the share-link can be imported/added""" | ||||
|         import requests | ||||
|         import json | ||||
|         from copy import deepcopy | ||||
|  | ||||
|         # more for testing | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         # copy it to memory as trim off what we dont need (history) | ||||
|         watch = deepcopy(datastore.data['watching'].get(uuid)) | ||||
|         # For older versions that are not a @property | ||||
|         if (watch.get('history')): | ||||
|             del (watch['history']) | ||||
|  | ||||
|         # for safety/privacy | ||||
|         for k in list(watch.keys()): | ||||
|             if k.startswith('notification_'): | ||||
|                 del watch[k] | ||||
|  | ||||
|         for r in['uuid', 'last_checked', 'last_changed']: | ||||
|             if watch.get(r): | ||||
|                 del (watch[r]) | ||||
|  | ||||
|         # Add the global stuff which may have an impact | ||||
|         watch['ignore_text'] += datastore.data['settings']['application']['global_ignore_text'] | ||||
|         watch['subtractive_selectors'] += datastore.data['settings']['application']['global_subtractive_selectors'] | ||||
|  | ||||
|         watch_json = json.dumps(watch) | ||||
|  | ||||
|         try: | ||||
|             r = requests.request(method="POST", | ||||
|                                  data={'watch': watch_json}, | ||||
|                                  url="https://changedetection.io/share/share", | ||||
|                                  headers={'App-Guid': datastore.data['app_guid']}) | ||||
|             res = r.json() | ||||
|  | ||||
|             # Add to the flask session | ||||
|             session['share-link'] = f"https://changedetection.io/share/{res['share_key']}" | ||||
|  | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error sharing -{str(e)}") | ||||
|             flash(f"Could not share, something went wrong while communicating with the share server - {str(e)}", 'error') | ||||
|  | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|     return ui_blueprint | ||||
							
								
								
									
										339
									
								
								changedetectionio/blueprint/ui/edit.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,339 @@ | ||||
| import time | ||||
| from copy import deepcopy | ||||
| import os | ||||
| import importlib.resources | ||||
| from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort | ||||
| from loguru import logger | ||||
| from jinja2 import Environment, FileSystemLoader | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
| from changedetectionio.time_handler import is_within_schedule | ||||
| from changedetectionio import worker_handler | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData): | ||||
|     edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates") | ||||
|      | ||||
|     def _watch_has_tag_options_set(watch): | ||||
|         """This should be fixed better so that Tag is some proper Model, a tag is just a Watch also""" | ||||
|         for tag_uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): | ||||
|             if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')): | ||||
|                 return True | ||||
|  | ||||
|     @edit_blueprint.route("/edit/<string:uuid>", methods=['GET', 'POST']) | ||||
|     @login_optionally_required | ||||
|     # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists | ||||
|     # https://wtforms.readthedocs.io/en/3.0.x/forms/#wtforms.form.Form.populate_obj ? | ||||
|     def edit_page(uuid): | ||||
|         from changedetectionio import forms | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config | ||||
|         from changedetectionio import processors | ||||
|         import importlib | ||||
|  | ||||
|         # More for testing, possible to return the first/only | ||||
|         if not datastore.data['watching'].keys(): | ||||
|             flash("No watches to edit", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         if not uuid in datastore.data['watching']: | ||||
|             flash("No watch with the UUID %s found." % (uuid), "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         switch_processor = request.args.get('switch_processor') | ||||
|         if switch_processor: | ||||
|             for p in processors.available_processors(): | ||||
|                 if p[0] == switch_processor: | ||||
|                     datastore.data['watching'][uuid]['processor'] = switch_processor | ||||
|                     flash(f"Switched to mode - {p[1]}.") | ||||
|                     datastore.clear_watch_history(uuid) | ||||
|                     redirect(url_for('ui_edit.edit_page', uuid=uuid)) | ||||
|  | ||||
|         # be sure we update with a copy instead of accidently editing the live object by reference | ||||
|         default = deepcopy(datastore.data['watching'][uuid]) | ||||
|  | ||||
|         # Defaults for proxy choice | ||||
|         if datastore.proxy_list is not None:  # When enabled | ||||
|             # @todo | ||||
|             # Radio needs '' not None, or incase that the chosen one no longer exists | ||||
|             if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list): | ||||
|                 default['proxy'] = '' | ||||
|         # proxy_override set to the json/text list of the items | ||||
|  | ||||
|         # Does it use some custom form? does one exist? | ||||
|         processor_name = datastore.data['watching'][uuid].get('processor', '') | ||||
|         processor_classes = next((tpl for tpl in processors.find_processors() if tpl[1] == processor_name), None) | ||||
|         if not processor_classes: | ||||
|             flash(f"Cannot load the edit form for processor/plugin '{processor_classes[1]}', plugin missing?", 'error') | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         parent_module = processors.get_parent_module(processor_classes[0]) | ||||
|  | ||||
|         try: | ||||
|             # Get the parent of the "processor.py" go up one, get the form (kinda spaghetti but its reusing existing code) | ||||
|             forms_module = importlib.import_module(f"{parent_module.__name__}.forms") | ||||
|             # Access the 'processor_settings_form' class from the 'forms' module | ||||
|             form_class = getattr(forms_module, 'processor_settings_form') | ||||
|         except ModuleNotFoundError as e: | ||||
|             # .forms didnt exist | ||||
|             form_class = forms.processor_text_json_diff_form | ||||
|         except AttributeError as e: | ||||
|             # .forms exists but no useful form | ||||
|             form_class = forms.processor_text_json_diff_form | ||||
|  | ||||
|         form = form_class(formdata=request.form if request.method == 'POST' else None, | ||||
|                           data=default, | ||||
|                           extra_notification_tokens=default.extra_notification_token_values(), | ||||
|                           default_system_settings=datastore.data['settings'] | ||||
|                           ) | ||||
|  | ||||
|         # For the form widget tag UUID back to "string name" for the field | ||||
|         form.tags.datastore = datastore | ||||
|  | ||||
|         # Used by some forms that need to dig deeper | ||||
|         form.datastore = datastore | ||||
|         form.watch = default | ||||
|  | ||||
|         for p in datastore.extra_browsers: | ||||
|             form.fetch_backend.choices.append(p) | ||||
|  | ||||
|         form.fetch_backend.choices.append(("system", 'System settings default')) | ||||
|  | ||||
|         # form.browser_steps[0] can be assumed that we 'goto url' first | ||||
|  | ||||
|         if datastore.proxy_list is None: | ||||
|             # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead | ||||
|             del form.proxy | ||||
|         else: | ||||
|             form.proxy.choices = [('', 'Default')] | ||||
|             for p in datastore.proxy_list: | ||||
|                 form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) | ||||
|  | ||||
|  | ||||
|         if request.method == 'POST' and form.validate(): | ||||
|  | ||||
|             # If they changed processor, it makes sense to reset it. | ||||
|             if datastore.data['watching'][uuid].get('processor') != form.data.get('processor'): | ||||
|                 datastore.data['watching'][uuid].clear_watch() | ||||
|                 flash("Reset watch history due to change of processor") | ||||
|  | ||||
|             extra_update_obj = { | ||||
|                 'consecutive_filter_failures': 0, | ||||
|                 'last_error' : False | ||||
|             } | ||||
|  | ||||
|             if request.args.get('unpause_on_save'): | ||||
|                 extra_update_obj['paused'] = False | ||||
|  | ||||
|             extra_update_obj['time_between_check'] = form.time_between_check.data | ||||
|  | ||||
|              # Ignore text | ||||
|             form_ignore_text = form.ignore_text.data | ||||
|             datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text | ||||
|  | ||||
|             # Be sure proxy value is None | ||||
|             if datastore.proxy_list is not None and form.data['proxy'] == '': | ||||
|                 extra_update_obj['proxy'] = None | ||||
|  | ||||
|             # Unsetting all filter_text methods should make it go back to default | ||||
|             # This particularly affects tests running | ||||
|             if 'filter_text_added' in form.data and not form.data.get('filter_text_added') \ | ||||
|                     and 'filter_text_replaced' in form.data and not form.data.get('filter_text_replaced') \ | ||||
|                     and 'filter_text_removed' in form.data and not form.data.get('filter_text_removed'): | ||||
|                 extra_update_obj['filter_text_added'] = True | ||||
|                 extra_update_obj['filter_text_replaced'] = True | ||||
|                 extra_update_obj['filter_text_removed'] = True | ||||
|  | ||||
|             # Because wtforms doesn't support accessing other data in process_ , but we convert the CSV list of tags back to a list of UUIDs | ||||
|             tag_uuids = [] | ||||
|             if form.data.get('tags'): | ||||
|                 # Sometimes in testing this can be list, dont know why | ||||
|                 if type(form.data.get('tags')) == list: | ||||
|                     extra_update_obj['tags'] = form.data.get('tags') | ||||
|                 else: | ||||
|                     for t in form.data.get('tags').split(','): | ||||
|                         tag_uuids.append(datastore.add_tag(title=t)) | ||||
|                     extra_update_obj['tags'] = tag_uuids | ||||
|  | ||||
|             datastore.data['watching'][uuid].update(form.data) | ||||
|             datastore.data['watching'][uuid].update(extra_update_obj) | ||||
|  | ||||
|             if not datastore.data['watching'][uuid].get('tags'): | ||||
|                 # Force it to be a list, because form.data['tags'] will be string if nothing found | ||||
|                 # And del(form.data['tags'] ) wont work either for some reason | ||||
|                 datastore.data['watching'][uuid]['tags'] = [] | ||||
|  | ||||
|             # Recast it if need be to right data Watch handler | ||||
|             watch_class = processors.get_custom_watch_obj_for_processor(form.data.get('processor')) | ||||
|             datastore.data['watching'][uuid] = watch_class(datastore_path=datastore.datastore_path, default=datastore.data['watching'][uuid]) | ||||
|             flash("Updated watch - unpaused!" if request.args.get('unpause_on_save') else "Updated watch.") | ||||
|  | ||||
|             # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds | ||||
|             # But in the case something is added we should save straight away | ||||
|             datastore.needs_write_urgent = True | ||||
|  | ||||
|             # Do not queue on edit if its not within the time range | ||||
|  | ||||
|             # @todo maybe it should never queue anyway on edit... | ||||
|             is_in_schedule = True | ||||
|             watch = datastore.data['watching'].get(uuid) | ||||
|  | ||||
|             if watch.get('time_between_check_use_default'): | ||||
|                 time_schedule_limit = datastore.data['settings']['requests'].get('time_schedule_limit', {}) | ||||
|             else: | ||||
|                 time_schedule_limit = watch.get('time_schedule_limit') | ||||
|  | ||||
|             tz_name = time_schedule_limit.get('timezone') | ||||
|             if not tz_name: | ||||
|                 tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') | ||||
|  | ||||
|             if time_schedule_limit and time_schedule_limit.get('enabled'): | ||||
|                 try: | ||||
|                     is_in_schedule = is_within_schedule(time_schedule_limit=time_schedule_limit, | ||||
|                                                       default_tz=tz_name | ||||
|                                                       ) | ||||
|                 except Exception as e: | ||||
|                     logger.error( | ||||
|                         f"{uuid} - Recheck scheduler, error handling timezone, check skipped - TZ name '{tz_name}' - {str(e)}") | ||||
|                     return False | ||||
|  | ||||
|             ############################# | ||||
|             if not datastore.data['watching'][uuid].get('paused') and is_in_schedule: | ||||
|                 # Queue the watch for immediate recheck, with a higher priority | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|             # Diff page [edit] link should go back to diff page | ||||
|             if request.args.get("next") and request.args.get("next") == 'diff': | ||||
|                 return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid)) | ||||
|  | ||||
|             return redirect(url_for('watchlist.index', tag=request.args.get("tag",''))) | ||||
|  | ||||
|         else: | ||||
|             if request.method == 'POST' and not form.validate(): | ||||
|                 flash("An error occurred, please see below.", "error") | ||||
|  | ||||
|             # JQ is difficult to install on windows and must be manually added (outside requirements.txt) | ||||
|             jq_support = True | ||||
|             try: | ||||
|                 import jq | ||||
|             except ModuleNotFoundError: | ||||
|                 jq_support = False | ||||
|  | ||||
|             watch = datastore.data['watching'].get(uuid) | ||||
|  | ||||
|             # if system or watch is configured to need a chrome type browser | ||||
|             system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' | ||||
|             watch_needs_selenium_or_playwright = False | ||||
|             if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): | ||||
|                 watch_needs_selenium_or_playwright = True | ||||
|  | ||||
|  | ||||
|             from zoneinfo import available_timezones | ||||
|  | ||||
|             # Only works reliably with Playwright | ||||
|  | ||||
|             # Import the global plugin system | ||||
|             from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras | ||||
|              | ||||
|             template_args = { | ||||
|                 'available_processors': processors.available_processors(), | ||||
|                 'available_timezones': sorted(available_timezones()), | ||||
|                 'browser_steps_config': browser_step_ui_config, | ||||
|                 'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), | ||||
|                 'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(), | ||||
|                 'extra_processor_config': form.extra_tab_content(), | ||||
|                 'extra_title': f" - Edit - {watch.label}", | ||||
|                 'form': form, | ||||
|                 'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False, | ||||
|                 'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, | ||||
|                 'has_special_tag_options': _watch_has_tag_options_set(watch=watch), | ||||
|                 'jq_support': jq_support, | ||||
|                 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), | ||||
|                 'settings_application': datastore.data['settings']['application'], | ||||
|                 'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'), | ||||
|                 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), | ||||
|                 'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch), | ||||
|                 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), | ||||
|                 'using_global_webdriver_wait': not default['webdriver_delay'], | ||||
|                 'uuid': uuid, | ||||
|                 'watch': watch, | ||||
|                 'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright, | ||||
|             } | ||||
|  | ||||
|             included_content = None | ||||
|             if form.extra_form_content(): | ||||
|                 # So that the extra panels can access _helpers.html etc, we set the environment to load from templates/ | ||||
|                 # And then render the code from the module | ||||
|                 templates_dir = str(importlib.resources.files("changedetectionio").joinpath('templates')) | ||||
|                 env = Environment(loader=FileSystemLoader(templates_dir)) | ||||
|                 template = env.from_string(form.extra_form_content()) | ||||
|                 included_content = template.render(**template_args) | ||||
|  | ||||
|             output = render_template("edit.html", | ||||
|                                      extra_tab_content=form.extra_tab_content() if form.extra_tab_content() else None, | ||||
|                                      extra_form_content=included_content, | ||||
|                                      **template_args | ||||
|                                      ) | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     @edit_blueprint.route("/edit/<string:uuid>/get-html", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def watch_get_latest_html(uuid): | ||||
|         from io import BytesIO | ||||
|         from flask import send_file | ||||
|         import brotli | ||||
|  | ||||
|         watch = datastore.data['watching'].get(uuid) | ||||
|         if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir): | ||||
|             latest_filename = list(watch.history.keys())[-1] | ||||
|             html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br") | ||||
|             with open(html_fname, 'rb') as f: | ||||
|                 if html_fname.endswith('.br'): | ||||
|                     # Read and decompress the Brotli file | ||||
|                     decompressed_data = brotli.decompress(f.read()) | ||||
|                 else: | ||||
|                     decompressed_data = f.read() | ||||
|  | ||||
|             buffer = BytesIO(decompressed_data) | ||||
|  | ||||
|             return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html') | ||||
|  | ||||
|         # Return a 500 error | ||||
|         abort(500) | ||||
|  | ||||
|     # Ajax callback | ||||
|     @edit_blueprint.route("/edit/<string:uuid>/preview-rendered", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def watch_get_preview_rendered(uuid): | ||||
|         '''For when viewing the "preview" of the rendered text from inside of Edit''' | ||||
|         from flask import jsonify | ||||
|         from changedetectionio.processors.text_json_diff import prepare_filter_prevew | ||||
|         result = prepare_filter_prevew(watch_uuid=uuid, form_data=request.form, datastore=datastore) | ||||
|         return jsonify(result) | ||||
|  | ||||
|     @edit_blueprint.route("/highlight_submit_ignore_url", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def highlight_submit_ignore_url(): | ||||
|         import re | ||||
|         mode = request.form.get('mode') | ||||
|         selection = request.form.get('selection') | ||||
|  | ||||
|         uuid = request.args.get('uuid','') | ||||
|         if datastore.data["watching"].get(uuid): | ||||
|             if mode == 'exact': | ||||
|                 for l in selection.splitlines(): | ||||
|                     datastore.data["watching"][uuid]['ignore_text'].append(l.strip()) | ||||
|             elif mode == 'digit-regex': | ||||
|                 for l in selection.splitlines(): | ||||
|                     # Replace any series of numbers with a regex | ||||
|                     s = re.escape(l.strip()) | ||||
|                     s = re.sub(r'[0-9]+', r'\\d+', s) | ||||
|                     datastore.data["watching"][uuid]['ignore_text'].append('/' + s + '/') | ||||
|  | ||||
|         return f"<a href={url_for('ui.ui_views.preview_page', uuid=uuid)}>Click to preview</a>" | ||||
|      | ||||
|     return edit_blueprint | ||||
							
								
								
									
										108
									
								
								changedetectionio/blueprint/ui/notification.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,108 @@ | ||||
| from flask import Blueprint, request, make_response | ||||
| import random | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     notification_blueprint = Blueprint('ui_notification', __name__, template_folder="../ui/templates") | ||||
|      | ||||
|     # AJAX endpoint for sending a test | ||||
|     @notification_blueprint.route("/notification/send-test/<string:watch_uuid>", methods=['POST']) | ||||
|     @notification_blueprint.route("/notification/send-test", methods=['POST']) | ||||
|     @notification_blueprint.route("/notification/send-test/", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def ajax_callback_send_notification_test(watch_uuid=None): | ||||
|  | ||||
|         # Watch_uuid could be unset in the case it`s used in tag editor, global settings | ||||
|         import apprise | ||||
|         from changedetectionio.notification.handler import process_notification | ||||
|         from changedetectionio.notification.apprise_plugin.assets import apprise_asset | ||||
|  | ||||
|         from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
|  | ||||
|         apobj = apprise.Apprise(asset=apprise_asset) | ||||
|  | ||||
|         is_global_settings_form = request.args.get('mode', '') == 'global-settings' | ||||
|         is_group_settings_form = request.args.get('mode', '') == 'group-settings' | ||||
|  | ||||
|         # Use an existing random one on the global/main settings form | ||||
|         if not watch_uuid and (is_global_settings_form or is_group_settings_form) \ | ||||
|                 and datastore.data.get('watching'): | ||||
|             logger.debug(f"Send test notification - Choosing random Watch {watch_uuid}") | ||||
|             watch_uuid = random.choice(list(datastore.data['watching'].keys())) | ||||
|  | ||||
|         if not watch_uuid: | ||||
|             return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400) | ||||
|  | ||||
|         watch = datastore.data['watching'].get(watch_uuid) | ||||
|  | ||||
|         notification_urls = None | ||||
|  | ||||
|         if request.form.get('notification_urls'): | ||||
|             notification_urls = request.form['notification_urls'].strip().splitlines() | ||||
|  | ||||
|         if not notification_urls: | ||||
|             logger.debug("Test notification - Trying by group/tag in the edit form if available") | ||||
|             # On an edit page, we should also fire off to the tags if they have notifications | ||||
|             if request.form.get('tags') and request.form['tags'].strip(): | ||||
|                 for k in request.form['tags'].split(','): | ||||
|                     tag = datastore.tag_exists_by_name(k.strip()) | ||||
|                     notification_urls = tag.get('notifications_urls') if tag and tag.get('notifications_urls') else None | ||||
|  | ||||
|         if not notification_urls and not is_global_settings_form and not is_group_settings_form: | ||||
|             # In the global settings, use only what is typed currently in the text box | ||||
|             logger.debug("Test notification - Trying by global system settings notifications") | ||||
|             if datastore.data['settings']['application'].get('notification_urls'): | ||||
|                 notification_urls = datastore.data['settings']['application']['notification_urls'] | ||||
|  | ||||
|         if not notification_urls: | ||||
|             return 'Error: No Notification URLs set/found' | ||||
|  | ||||
|         for n_url in notification_urls: | ||||
|             if len(n_url.strip()): | ||||
|                 if not apobj.add(n_url): | ||||
|                     return f'Error:  {n_url} is not a valid AppRise URL.' | ||||
|  | ||||
|         try: | ||||
|             # use the same as when it is triggered, but then override it with the form test values | ||||
|             n_object = { | ||||
|                 'watch_url': request.form.get('window_url', "https://changedetection.io"), | ||||
|                 'notification_urls': notification_urls | ||||
|             } | ||||
|  | ||||
|             # Only use if present, if not set in n_object it should use the default system value | ||||
|             if 'notification_format' in request.form and request.form['notification_format'].strip(): | ||||
|                 n_object['notification_format'] = request.form.get('notification_format', '').strip() | ||||
|  | ||||
|             if 'notification_title' in request.form and request.form['notification_title'].strip(): | ||||
|                 n_object['notification_title'] = request.form.get('notification_title', '').strip() | ||||
|             elif datastore.data['settings']['application'].get('notification_title'): | ||||
|                 n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title') | ||||
|             else: | ||||
|                 n_object['notification_title'] = "Test title" | ||||
|  | ||||
|             if 'notification_body' in request.form and request.form['notification_body'].strip(): | ||||
|                 n_object['notification_body'] = request.form.get('notification_body', '').strip() | ||||
|             elif datastore.data['settings']['application'].get('notification_body'): | ||||
|                 n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body') | ||||
|             else: | ||||
|                 n_object['notification_body'] = "Test body" | ||||
|  | ||||
|             n_object['as_async'] = False | ||||
|             n_object.update(watch.extra_notification_token_values()) | ||||
|             sent_obj = process_notification(n_object, datastore) | ||||
|  | ||||
|         except Exception as e: | ||||
|             e_str = str(e) | ||||
|             # Remove this text which is not important and floods the container | ||||
|             e_str = e_str.replace( | ||||
|                 "DEBUG - <class 'apprise.decorators.base.CustomNotifyPlugin.instantiate_plugin.<locals>.CustomNotifyPluginWrapper'>", | ||||
|                 '') | ||||
|  | ||||
|             return make_response(e_str, 400) | ||||
|  | ||||
|         return 'OK - Sent test notifications' | ||||
|  | ||||
|     return notification_blueprint | ||||
| @@ -3,7 +3,7 @@ | ||||
|   <div class="box-wrap inner"> | ||||
|     <form | ||||
|       class="pure-form pure-form-stacked" | ||||
|       action="{{url_for('clear_all_history')}}" | ||||
|       action="{{url_for('ui.clear_all_history')}}" | ||||
|       method="POST" | ||||
|     > | ||||
|       <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" > | ||||
| @@ -37,7 +37,7 @@ | ||||
|         </div> | ||||
|         <br /> | ||||
|         <div class="pure-control-group"> | ||||
|           <a href="{{url_for('index')}}" class="pure-button button-cancel" | ||||
|           <a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel" | ||||
|             >Cancel</a | ||||
|           > | ||||
|         </div> | ||||
							
								
								
									
										221
									
								
								changedetectionio/blueprint/ui/views.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,221 @@ | ||||
| from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort | ||||
| from flask_login import current_user | ||||
| import os | ||||
| import time | ||||
| from copy import deepcopy | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio import worker_handler | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update): | ||||
|     views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates") | ||||
|      | ||||
|     @views_blueprint.route("/preview/<string:uuid>", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def preview_page(uuid): | ||||
|         content = [] | ||||
|         versions = [] | ||||
|         timestamp = None | ||||
|  | ||||
|         # More for testing, possible to return the first/only | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         try: | ||||
|             watch = datastore.data['watching'][uuid] | ||||
|         except KeyError: | ||||
|             flash("No history found for the specified link, bad link?", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' | ||||
|         extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] | ||||
|  | ||||
|         is_html_webdriver = False | ||||
|         if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): | ||||
|             is_html_webdriver = True | ||||
|         triggered_line_numbers = [] | ||||
|         if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()): | ||||
|             flash("Preview unavailable - No fetch/check completed or triggers not reached", "error") | ||||
|         else: | ||||
|             # So prepare the latest preview or not | ||||
|             preferred_version = request.args.get('version') | ||||
|             versions = list(watch.history.keys()) | ||||
|             timestamp = versions[-1] | ||||
|             if preferred_version and preferred_version in versions: | ||||
|                 timestamp = preferred_version | ||||
|  | ||||
|             try: | ||||
|                 versions = list(watch.history.keys()) | ||||
|                 content = watch.get_history_snapshot(timestamp) | ||||
|  | ||||
|                 triggered_line_numbers = html_tools.strip_ignore_text(content=content, | ||||
|                                                                       wordlist=watch['trigger_text'], | ||||
|                                                                       mode='line numbers' | ||||
|                                                                       ) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 content.append({'line': f"File doesnt exist or unable to read timestamp {timestamp}", 'classes': ''}) | ||||
|  | ||||
|         output = render_template("preview.html", | ||||
|                                  content=content, | ||||
|                                  current_version=timestamp, | ||||
|                                  history_n=watch.history_n, | ||||
|                                  extra_stylesheets=extra_stylesheets, | ||||
|                                  extra_title=f" - Diff - {watch.label} @ {timestamp}", | ||||
|                                  triggered_line_numbers=triggered_line_numbers, | ||||
|                                  current_diff_url=watch['url'], | ||||
|                                  screenshot=watch.get_screenshot(), | ||||
|                                  watch=watch, | ||||
|                                  uuid=uuid, | ||||
|                                  is_html_webdriver=is_html_webdriver, | ||||
|                                  last_error=watch['last_error'], | ||||
|                                  last_error_text=watch.get_error_text(), | ||||
|                                  last_error_screenshot=watch.get_error_snapshot(), | ||||
|                                  versions=versions | ||||
|                                 ) | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     @views_blueprint.route("/diff/<string:uuid>", methods=['GET', 'POST']) | ||||
|     @login_optionally_required | ||||
|     def diff_history_page(uuid): | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         # More for testing, possible to return the first/only | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] | ||||
|         try: | ||||
|             watch = datastore.data['watching'][uuid] | ||||
|         except KeyError: | ||||
|             flash("No history found for the specified link, bad link?", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         # For submission of requesting an extract | ||||
|         extract_form = forms.extractDataForm(request.form) | ||||
|         if request.method == 'POST': | ||||
|             if not extract_form.validate(): | ||||
|                 flash("An error occurred, please see below.", "error") | ||||
|  | ||||
|             else: | ||||
|                 extract_regex = request.form.get('extract_regex').strip() | ||||
|                 output = watch.extract_regex_from_all_history(extract_regex) | ||||
|                 if output: | ||||
|                     watch_dir = os.path.join(datastore.datastore_path, uuid) | ||||
|                     response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True)) | ||||
|                     response.headers['Content-type'] = 'text/csv' | ||||
|                     response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' | ||||
|                     response.headers['Pragma'] = 'no-cache' | ||||
|                     response.headers['Expires'] = 0 | ||||
|                     return response | ||||
|  | ||||
|                 flash('Nothing matches that RegEx', 'error') | ||||
|                 redirect(url_for('ui_views.diff_history_page', uuid=uuid)+'#extract') | ||||
|  | ||||
|         history = watch.history | ||||
|         dates = list(history.keys()) | ||||
|  | ||||
|         if len(dates) < 2: | ||||
|             flash("Not enough saved change detection snapshots to produce a report.", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         # Save the current newest history as the most recently viewed | ||||
|         datastore.set_last_viewed(uuid, time.time()) | ||||
|  | ||||
|         # Read as binary and force decode as UTF-8 | ||||
|         # Windows may fail decode in python if we just use 'r' mode (chardet decode exception) | ||||
|         from_version = request.args.get('from_version') | ||||
|         from_version_index = -2  # second newest | ||||
|         if from_version and from_version in dates: | ||||
|             from_version_index = dates.index(from_version) | ||||
|         else: | ||||
|             from_version = dates[from_version_index] | ||||
|  | ||||
|         try: | ||||
|             from_version_file_contents = watch.get_history_snapshot(dates[from_version_index]) | ||||
|         except Exception as e: | ||||
|             from_version_file_contents = f"Unable to read to-version at index {dates[from_version_index]}.\n" | ||||
|  | ||||
|         to_version = request.args.get('to_version') | ||||
|         to_version_index = -1 | ||||
|         if to_version and to_version in dates: | ||||
|             to_version_index = dates.index(to_version) | ||||
|         else: | ||||
|             to_version = dates[to_version_index] | ||||
|  | ||||
|         try: | ||||
|             to_version_file_contents = watch.get_history_snapshot(dates[to_version_index]) | ||||
|         except Exception as e: | ||||
|             to_version_file_contents = "Unable to read to-version at index{}.\n".format(dates[to_version_index]) | ||||
|  | ||||
|         screenshot_url = watch.get_screenshot() | ||||
|  | ||||
|         system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' | ||||
|  | ||||
|         is_html_webdriver = False | ||||
|         if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): | ||||
|             is_html_webdriver = True | ||||
|  | ||||
|         password_enabled_and_share_is_off = False | ||||
|         if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False): | ||||
|             password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access') | ||||
|  | ||||
|         output = render_template("diff.html", | ||||
|                                  current_diff_url=watch['url'], | ||||
|                                  from_version=str(from_version), | ||||
|                                  to_version=str(to_version), | ||||
|                                  extra_stylesheets=extra_stylesheets, | ||||
|                                  extra_title=f" - Diff - {watch.label}", | ||||
|                                  extract_form=extract_form, | ||||
|                                  is_html_webdriver=is_html_webdriver, | ||||
|                                  last_error=watch['last_error'], | ||||
|                                  last_error_screenshot=watch.get_error_snapshot(), | ||||
|                                  last_error_text=watch.get_error_text(), | ||||
|                                  left_sticky=True, | ||||
|                                  newest=to_version_file_contents, | ||||
|                                  newest_version_timestamp=dates[-1], | ||||
|                                  password_enabled_and_share_is_off=password_enabled_and_share_is_off, | ||||
|                                  from_version_file_contents=from_version_file_contents, | ||||
|                                  to_version_file_contents=to_version_file_contents, | ||||
|                                  screenshot=screenshot_url, | ||||
|                                  uuid=uuid, | ||||
|                                  versions=dates, # All except current/last | ||||
|                                  watch_a=watch | ||||
|                                  ) | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     @views_blueprint.route("/form/add/quickwatch", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def form_quick_watch_add(): | ||||
|         from changedetectionio import forms | ||||
|         form = forms.quickWatchForm(request.form) | ||||
|  | ||||
|         if not form.validate(): | ||||
|             for widget, l in form.errors.items(): | ||||
|                 flash(','.join(l), 'error') | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         url = request.form.get('url').strip() | ||||
|         if datastore.url_exists(url): | ||||
|             flash(f'Warning, URL {url} already exists', "notice") | ||||
|  | ||||
|         add_paused = request.form.get('edit_and_watch_submit_button') != None | ||||
|         processor = request.form.get('processor', 'text_json_diff') | ||||
|         new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor}) | ||||
|  | ||||
|         if new_uuid: | ||||
|             if add_paused: | ||||
|                 flash('Watch added in Paused state, saving will unpause.') | ||||
|                 return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag'))) | ||||
|             else: | ||||
|                 # Straight into the queue. | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|                 flash("Watch added.") | ||||
|  | ||||
|         return redirect(url_for('watchlist.index', tag=request.args.get('tag',''))) | ||||
|  | ||||
|     return views_blueprint | ||||
							
								
								
									
										112
									
								
								changedetectionio/blueprint/watchlist/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,112 @@ | ||||
| import os | ||||
| import time | ||||
|  | ||||
| from flask import Blueprint, request, make_response, render_template, redirect, url_for, flash, session | ||||
| from flask_login import current_user | ||||
| from flask_paginate import Pagination, get_page_parameter | ||||
|  | ||||
| from changedetectionio import forms | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData): | ||||
|     watchlist_blueprint = Blueprint('watchlist', __name__, template_folder="templates") | ||||
|      | ||||
|     @watchlist_blueprint.route("/", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def index(): | ||||
|         active_tag_req = request.args.get('tag', '').lower().strip() | ||||
|         active_tag_uuid = active_tag = None | ||||
|  | ||||
|         # Be sure limit_tag is a uuid | ||||
|         if active_tag_req: | ||||
|             for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): | ||||
|                 if active_tag_req == tag.get('title', '').lower().strip() or active_tag_req == uuid: | ||||
|                     active_tag = tag | ||||
|                     active_tag_uuid = uuid | ||||
|                     break | ||||
|  | ||||
|         # Redirect for the old rss path which used the /?rss=true | ||||
|         if request.args.get('rss'): | ||||
|             return redirect(url_for('rss.feed', tag=active_tag_uuid)) | ||||
|  | ||||
|         op = request.args.get('op') | ||||
|         if op: | ||||
|             uuid = request.args.get('uuid') | ||||
|             if op == 'pause': | ||||
|                 datastore.data['watching'][uuid].toggle_pause() | ||||
|             elif op == 'mute': | ||||
|                 datastore.data['watching'][uuid].toggle_mute() | ||||
|  | ||||
|             datastore.needs_write = True | ||||
|             return redirect(url_for('watchlist.index', tag = active_tag_uuid)) | ||||
|  | ||||
|         # Sort by last_changed and add the uuid which is usually the key.. | ||||
|         sorted_watches = [] | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|         errored_count = 0 | ||||
|         search_q = request.args.get('q').strip().lower() if request.args.get('q') else False | ||||
|         for uuid, watch in datastore.data['watching'].items(): | ||||
|             if with_errors and not watch.get('last_error'): | ||||
|                 continue | ||||
|  | ||||
|             if active_tag_uuid and not active_tag_uuid in watch['tags']: | ||||
|                     continue | ||||
|             if watch.get('last_error'): | ||||
|                 errored_count += 1 | ||||
|  | ||||
|             if search_q: | ||||
|                 if (watch.get('title') and search_q in watch.get('title').lower()) or search_q in watch.get('url', '').lower(): | ||||
|                     sorted_watches.append(watch) | ||||
|                 elif watch.get('last_error') and search_q in watch.get('last_error').lower(): | ||||
|                     sorted_watches.append(watch) | ||||
|             else: | ||||
|                 sorted_watches.append(watch) | ||||
|  | ||||
|         form = forms.quickWatchForm(request.form) | ||||
|         page = request.args.get(get_page_parameter(), type=int, default=1) | ||||
|         total_count = len(sorted_watches) | ||||
|  | ||||
|         pagination = Pagination(page=page, | ||||
|                                 total=total_count, | ||||
|                                 per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic") | ||||
|  | ||||
|         sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title']) | ||||
|  | ||||
|         output = render_template( | ||||
|             "watch-overview.html", | ||||
|             active_tag=active_tag, | ||||
|             active_tag_uuid=active_tag_uuid, | ||||
|             app_rss_token=datastore.data['settings']['application'].get('rss_access_token'), | ||||
|             datastore=datastore, | ||||
|             errored_count=errored_count, | ||||
|             form=form, | ||||
|             guid=datastore.data['app_guid'], | ||||
|             has_proxies=datastore.proxy_list, | ||||
|             has_unviewed=datastore.has_unviewed, | ||||
|             hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|             now_time_server=round(time.time()), | ||||
|             pagination=pagination, | ||||
|             queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue], | ||||
|             search_q=request.args.get('q', '').strip(), | ||||
|             sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'), | ||||
|             sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), | ||||
|             system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'), | ||||
|             tags=sorted_tags, | ||||
|             watches=sorted_watches | ||||
|         ) | ||||
|  | ||||
|         if session.get('share-link'): | ||||
|             del (session['share-link']) | ||||
|  | ||||
|         resp = make_response(output) | ||||
|  | ||||
|         # The template can run on cookie or url query info | ||||
|         if request.args.get('sort'): | ||||
|             resp.set_cookie('sort', request.args.get('sort')) | ||||
|         if request.args.get('order'): | ||||
|             resp.set_cookie('order', request.args.get('order')) | ||||
|  | ||||
|         return resp | ||||
|          | ||||
|     return watchlist_blueprint | ||||
| @@ -0,0 +1,227 @@ | ||||
| {%- extends 'base.html' -%} | ||||
| {%- block content -%} | ||||
| {%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%} | ||||
| <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script> | ||||
| <script>let nowtimeserver={{ now_time_server }};</script> | ||||
| <script> | ||||
| // Initialize Feather icons after the page loads | ||||
| document.addEventListener('DOMContentLoaded', function() { | ||||
|     feather.replace(); | ||||
| }); | ||||
| </script> | ||||
| <style> | ||||
| .checking-now .last-checked { | ||||
|     background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%); | ||||
|     background-size: 0 100%; | ||||
|     background-repeat: no-repeat; | ||||
|     transition: background-size 0.9s ease | ||||
| } | ||||
| </style> | ||||
| <div class="box"> | ||||
|  | ||||
|     <form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form"> | ||||
|         <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" > | ||||
|         <fieldset> | ||||
|             <legend>Add a new change detection watch</legend> | ||||
|             <div id="watch-add-wrapper-zone"> | ||||
|  | ||||
|                     {{ render_nolabel_field(form.url, placeholder="https://...", required=true) }} | ||||
|                     {{ render_nolabel_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="watch label / tag") }} | ||||
|                     {{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }} | ||||
|                     {{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }} | ||||
|             </div> | ||||
|             <div id="quick-watch-processor-type"> | ||||
|                 {{ render_simple_field(form.processor) }} | ||||
|             </div> | ||||
|  | ||||
|         </fieldset> | ||||
|         <span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span> | ||||
|     </form> | ||||
|  | ||||
|     <form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form"> | ||||
|     <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" > | ||||
|     <input type="hidden" id="op_extradata" name="op_extradata" value="" > | ||||
|     <div id="checkbox-operations"> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Pause</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnPause</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mute</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnMute</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Recheck</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Tag</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mark viewed</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Use default notification</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear errors</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear/reset history</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Delete</button> | ||||
|     </div> | ||||
|     {%- if watches|length >= pagination.per_page -%} | ||||
|         {{ pagination.info }} | ||||
|     {%- endif -%} | ||||
|     {%- if search_q -%}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%} | ||||
|     <div> | ||||
|         <a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a> | ||||
|  | ||||
|     <!-- tag list --> | ||||
|     {%- for uuid, tag in tags -%} | ||||
|         {%- if tag != "" -%} | ||||
|             <a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a> | ||||
|         {%- endif -%} | ||||
|     {%- endfor -%} | ||||
|     </div> | ||||
|  | ||||
|     {%- set sort_order = sort_order or 'asc' -%} | ||||
|     {%- set sort_attribute = sort_attribute or 'last_changed'  -%} | ||||
|     {%- set pagination_page = request.args.get('page', 0) -%} | ||||
|     {%- set cols_required = 6 -%} | ||||
|     {%- set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") -%} | ||||
|     {%- if any_has_restock_price_processor -%} | ||||
|         {%- set cols_required = cols_required + 1 -%} | ||||
|     {%- endif -%} | ||||
|  | ||||
|     <div id="watch-table-wrapper"> | ||||
|  | ||||
|         <table class="pure-table pure-table-striped watch-table"> | ||||
|             <thead> | ||||
|             <tr> | ||||
|                 {%- set link_order = "desc" if sort_order  == 'asc' else "asc" -%} | ||||
|                 {%- set arrow_span = "" -%} | ||||
|                 <th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}"  href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th> | ||||
|                 <th class="empty-cell"></th> | ||||
|                 <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th> | ||||
|              {%- if any_has_restock_price_processor -%} | ||||
|                 <th>Restock & Price</th> | ||||
|              {%- endif -%} | ||||
|                 <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th> | ||||
|                 <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th> | ||||
|                 <th class="empty-cell"></th> | ||||
|             </tr> | ||||
|             </thead> | ||||
|             <tbody> | ||||
|             {%- if not watches|length -%} | ||||
|             <tr> | ||||
|                 <td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td> | ||||
|             </tr> | ||||
|             {%- endif -%} | ||||
|             {%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%} | ||||
|                 {%- set checking_now = is_checking_now(watch) -%} | ||||
|                 {%- set history_n = watch.history_n -%} | ||||
|                 {#  Mirror in changedetectionio/static/js/realtime.js for the frontend #} | ||||
|                 {%- set row_classes = [ | ||||
|                     loop.cycle('pure-table-odd', 'pure-table-even'), | ||||
|                     'processor-' ~ watch['processor'], | ||||
|                     'has-error' if watch.compile_error_texts()|length > 2 else '', | ||||
|                     'paused' if watch.paused is defined and watch.paused != False else '', | ||||
|                     'unviewed' if watch.has_unviewed else '', | ||||
|                     'has-restock-info' if watch.has_restock_info else 'no-restock-info', | ||||
|                     'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '', | ||||
|                     'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '', | ||||
|                     'queued' if watch.uuid in queued_uuids else '', | ||||
|                     'checking-now' if checking_now else '', | ||||
|                     'notification_muted' if watch.notification_muted else '', | ||||
|                     'single-history' if history_n == 1 else '', | ||||
|                     'multiple-history' if history_n >= 2 else '' | ||||
|                 ] -%} | ||||
|             <tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}"> | ||||
|                 <td class="inline checkbox-uuid" ><input name="uuids"  type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td> | ||||
|                 <td class="inline watch-controls"> | ||||
|                     <a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a> | ||||
|                     <a class="ajax-op state-on pause-toggle"  data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a> | ||||
|                     <a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a> | ||||
|                     <a class="ajax-op state-on mute-toggle" data-op="mute"  style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a> | ||||
|                 </td> | ||||
|                 <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} | ||||
|                     <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a> | ||||
|                     <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a> | ||||
|  | ||||
|                     {%- if watch.get_fetch_backend == "html_webdriver" | ||||
|                          or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver'  ) | ||||
|                          or "extra_browser_" in watch.get_fetch_backend | ||||
|                     -%} | ||||
|                     <img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" > | ||||
|                     {%- endif -%} | ||||
|  | ||||
|                     {%- if watch.is_pdf  -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%} | ||||
|                     {%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%} | ||||
|  | ||||
|                     <div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list)|safe }}</div> | ||||
|  | ||||
|                     {%- if watch['processor'] == 'text_json_diff'  -%} | ||||
|                         {%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  -%} | ||||
|                         <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> | ||||
|                         {%- endif -%} | ||||
|                     {%- endif -%} | ||||
|                     {%- if watch['processor'] == 'restock_diff' -%} | ||||
|                         <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon" > Price</span> | ||||
|                     {%- endif -%} | ||||
|                     {%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%} | ||||
|                       <span class="watch-tag-list">{{ watch_tag.title }}</span> | ||||
|                     {%- endfor -%} | ||||
|                 </td> | ||||
| {%- if any_has_restock_price_processor -%} | ||||
|                 <td class="restock-and-price"> | ||||
|                     {%- if watch['processor'] == 'restock_diff'  -%} | ||||
|                         {%- if watch.has_restock_info -%} | ||||
|                             <span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price"> | ||||
|                                 <!-- maybe some object watch['processor'][restock_diff] or.. --> | ||||
|                                  {%- if watch['restock']['in_stock']-%}  In stock {%- else-%}  Not in stock {%- endif -%} | ||||
|                             </span> | ||||
|                         {%- endif -%} | ||||
|  | ||||
|                         {%- if watch.get('restock') and watch['restock']['price'] != None -%} | ||||
|                             {%- if watch['restock']['price'] != None -%} | ||||
|                                 <span class="restock-label price" title="Price"> | ||||
|                                 {{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }} | ||||
|                                 </span> | ||||
|                             {%- endif -%} | ||||
|                         {%- elif not watch.has_restock_info -%} | ||||
|                             <span class="restock-label error">No information</span> | ||||
|                         {%- endif -%} | ||||
|                     {%- endif -%} | ||||
|                 </td> | ||||
| {%- endif -%} | ||||
|             {#last_checked becomes fetch-start-time#} | ||||
|                 <td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" > | ||||
|                     <div class="spinner-wrapper" style="display:none;" > | ||||
|                         <span class="spinner"></span><span> Checking now</span> | ||||
|                     </div> | ||||
|                     <span class="innertext">{{watch|format_last_checked_time|safe}}</span> | ||||
|                 </td> | ||||
|                 <td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%} | ||||
|                     {{watch.last_changed|format_timestamp_timeago}} | ||||
|                     {%- else -%} | ||||
|                     Not yet | ||||
|                     {%- endif -%} | ||||
|                 </td> | ||||
|                 <td> | ||||
|                     {%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%} | ||||
|                     <a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a> | ||||
|                     <a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a> | ||||
|                     <a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a> | ||||
|                     <a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a> | ||||
|                     <a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a> | ||||
|                 </td> | ||||
|             </tr> | ||||
|             {%- endfor -%} | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <ul id="post-list-buttons"> | ||||
|             <li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a> | ||||
|             </li> | ||||
|             <li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" > | ||||
|                 <a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a> | ||||
|             </li> | ||||
|             <li> | ||||
|                <a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck | ||||
|                 all {%- if active_tag_uuid-%}  in "{{active_tag.title}}"{%endif%}</a> | ||||
|             </li> | ||||
|             <li> | ||||
|                 <a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a> | ||||
|             </li> | ||||
|         </ul> | ||||
|         {{ pagination.links }} | ||||
|     </div> | ||||
|     </form> | ||||
| </div> | ||||
| {%- endblock -%} | ||||
							
								
								
									
										170
									
								
								changedetectionio/conditions/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,170 @@ | ||||
| from flask import Blueprint | ||||
|  | ||||
| from json_logic.builtins import BUILTINS | ||||
|  | ||||
| from .exceptions import EmptyConditionRuleRowNotUsable | ||||
| from .pluggy_interface import plugin_manager  # Import the pluggy plugin manager | ||||
| from . import default_plugin | ||||
| from loguru import logger | ||||
| # List of all supported JSON Logic operators | ||||
| operator_choices = [ | ||||
|     (None, "Choose one - Operator"), | ||||
|     (">", "Greater Than"), | ||||
|     ("<", "Less Than"), | ||||
|     (">=", "Greater Than or Equal To"), | ||||
|     ("<=", "Less Than or Equal To"), | ||||
|     ("==", "Equals"), | ||||
|     ("!=", "Not Equals"), | ||||
|     ("in", "Contains"), | ||||
|     ("!in", "Does Not Contain"), | ||||
| ] | ||||
|  | ||||
| # Fields available in the rules | ||||
| field_choices = [ | ||||
|     (None, "Choose one - Field"), | ||||
| ] | ||||
|  | ||||
| # The data we will feed the JSON Rules to see if it passes the test/conditions or not | ||||
| EXECUTE_DATA = {} | ||||
|  | ||||
|  | ||||
| # Define the extended operations dictionary | ||||
| CUSTOM_OPERATIONS = { | ||||
|     **BUILTINS,  # Include all standard operators | ||||
| } | ||||
|  | ||||
| def filter_complete_rules(ruleset): | ||||
|     rules = [ | ||||
|         rule for rule in ruleset | ||||
|         if all(value not in ("", False, "None", None) for value in [rule["operator"], rule["field"], rule["value"]]) | ||||
|     ] | ||||
|     return rules | ||||
|  | ||||
| def convert_to_jsonlogic(logic_operator: str, rule_dict: list): | ||||
|     """ | ||||
|     Convert a structured rule dict into a JSON Logic rule. | ||||
|  | ||||
|     :param rule_dict: Dictionary containing conditions. | ||||
|     :return: JSON Logic rule as a dictionary. | ||||
|     """ | ||||
|  | ||||
|  | ||||
|     json_logic_conditions = [] | ||||
|  | ||||
|     for condition in rule_dict: | ||||
|         operator = condition["operator"] | ||||
|         field = condition["field"] | ||||
|         value = condition["value"] | ||||
|  | ||||
|         if not operator or operator == 'None' or not value or not field: | ||||
|             raise EmptyConditionRuleRowNotUsable() | ||||
|  | ||||
|         # Convert value to int/float if possible | ||||
|         try: | ||||
|             if isinstance(value, str) and "." in value and str != "None": | ||||
|                 value = float(value) | ||||
|             else: | ||||
|                 value = int(value) | ||||
|         except (ValueError, TypeError): | ||||
|             pass  # Keep as a string if conversion fails | ||||
|  | ||||
|         # Handle different JSON Logic operators properly | ||||
|         if operator == "in": | ||||
|             json_logic_conditions.append({"in": [value, {"var": field}]})  # value first | ||||
|         elif operator in ("!", "!!", "-"): | ||||
|             json_logic_conditions.append({operator: [{"var": field}]})  # Unary operators | ||||
|         elif operator in ("min", "max", "cat"): | ||||
|             json_logic_conditions.append({operator: value})  # Multi-argument operators | ||||
|         else: | ||||
|             json_logic_conditions.append({operator: [{"var": field}, value]})  # Standard binary operators | ||||
|  | ||||
|     return {logic_operator: json_logic_conditions} if len(json_logic_conditions) > 1 else json_logic_conditions[0] | ||||
|  | ||||
|  | ||||
| def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_datastruct, ephemeral_data={} ): | ||||
|     """ | ||||
|     Build our data and options by calling our plugins then pass it to jsonlogic and see if the conditions pass | ||||
|  | ||||
|     :param ruleset: JSON Logic rule dictionary. | ||||
|     :param extracted_data: Dictionary containing the facts.   <-- maybe the app struct+uuid | ||||
|     :return: Dictionary of plugin results. | ||||
|     """ | ||||
|     from json_logic import jsonLogic | ||||
|  | ||||
|     EXECUTE_DATA = {} | ||||
|     result = True | ||||
|      | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|  | ||||
|     if watch and watch.get("conditions"): | ||||
|         logic_operator = "and" if watch.get("conditions_match_logic", "ALL") == "ALL" else "or" | ||||
|         complete_rules = filter_complete_rules(watch['conditions']) | ||||
|         if complete_rules: | ||||
|             # Give all plugins a chance to update the data dict again (that we will test the conditions against) | ||||
|             for plugin in plugin_manager.get_plugins(): | ||||
|                 try: | ||||
|                     import concurrent.futures | ||||
|                     import time | ||||
|                      | ||||
|                     with concurrent.futures.ThreadPoolExecutor() as executor: | ||||
|                         future = executor.submit( | ||||
|                             plugin.add_data, | ||||
|                             current_watch_uuid=current_watch_uuid, | ||||
|                             application_datastruct=application_datastruct, | ||||
|                             ephemeral_data=ephemeral_data | ||||
|                         ) | ||||
|                         logger.debug(f"Trying plugin {plugin}....") | ||||
|  | ||||
|                         # Set a timeout of 10 seconds | ||||
|                         try: | ||||
|                             new_execute_data = future.result(timeout=10) | ||||
|                             if new_execute_data and isinstance(new_execute_data, dict): | ||||
|                                 EXECUTE_DATA.update(new_execute_data) | ||||
|  | ||||
|                         except concurrent.futures.TimeoutError: | ||||
|                             # The plugin took too long, abort processing for this watch | ||||
|                             raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.") | ||||
|                 except Exception as e: | ||||
|                     # Log the error but continue with the next plugin | ||||
|                     import logging | ||||
|                     logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}") | ||||
|                     continue | ||||
|  | ||||
|             # Create the ruleset | ||||
|             ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules) | ||||
|              | ||||
|             # Pass the custom operations dictionary to jsonLogic | ||||
|             if not jsonLogic(logic=ruleset, data=EXECUTE_DATA, operations=CUSTOM_OPERATIONS): | ||||
|                 result = False | ||||
|  | ||||
|     return {'executed_data': EXECUTE_DATA, 'result': result} | ||||
|  | ||||
| # Load plugins dynamically | ||||
| for plugin in plugin_manager.get_plugins(): | ||||
|     new_ops = plugin.register_operators() | ||||
|     if isinstance(new_ops, dict): | ||||
|         CUSTOM_OPERATIONS.update(new_ops) | ||||
|  | ||||
|     new_operator_choices = plugin.register_operator_choices() | ||||
|     if isinstance(new_operator_choices, list): | ||||
|         operator_choices.extend(new_operator_choices) | ||||
|  | ||||
|     new_field_choices = plugin.register_field_choices() | ||||
|     if isinstance(new_field_choices, list): | ||||
|         field_choices.extend(new_field_choices) | ||||
|  | ||||
| def collect_ui_edit_stats_extras(watch): | ||||
|     """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" | ||||
|     extras_content = [] | ||||
|      | ||||
|     for plugin in plugin_manager.get_plugins(): | ||||
|         try: | ||||
|             content = plugin.ui_edit_stats_extras(watch=watch) | ||||
|             if content: | ||||
|                 extras_content.append(content) | ||||
|         except Exception as e: | ||||
|             # Skip plugins that don't implement the hook or have errors | ||||
|             pass | ||||
|              | ||||
|     return "\n".join(extras_content) if extras_content else "" | ||||
|  | ||||
							
								
								
									
										81
									
								
								changedetectionio/conditions/blueprint.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,81 @@ | ||||
| # Flask Blueprint Definition | ||||
| import json | ||||
|  | ||||
| from flask import Blueprint | ||||
|  | ||||
| from changedetectionio.conditions import execute_ruleset_against_all_plugins | ||||
|  | ||||
|  | ||||
| def construct_blueprint(datastore): | ||||
|     from changedetectionio.flask_app import login_optionally_required | ||||
|  | ||||
|     conditions_blueprint = Blueprint('conditions', __name__, template_folder="templates") | ||||
|  | ||||
|     @conditions_blueprint.route("/<string:watch_uuid>/verify-condition-single-rule", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def verify_condition_single_rule(watch_uuid): | ||||
|         """Verify a single condition rule against the current snapshot""" | ||||
|         from changedetectionio.processors.text_json_diff import prepare_filter_prevew | ||||
|         from flask import request, jsonify | ||||
|         from copy import deepcopy | ||||
|  | ||||
|         ephemeral_data = {} | ||||
|  | ||||
|         # Get the watch data | ||||
|         watch = datastore.data['watching'].get(watch_uuid) | ||||
|         if not watch: | ||||
|             return jsonify({'status': 'error', 'message': 'Watch not found'}), 404 | ||||
|  | ||||
|         # First use prepare_filter_prevew to process the form data | ||||
|         # This will return text_after_filter which is after all current form settings are applied | ||||
|         # Create ephemeral data with the text from the current snapshot | ||||
|  | ||||
|         try: | ||||
|             # Call prepare_filter_prevew to get a processed version of the content with current form settings | ||||
|             # We'll ignore the returned response and just use the datastore which is modified by the function | ||||
|  | ||||
|             # this should apply all filters etc so then we can run the CONDITIONS against the final output text | ||||
|             result = prepare_filter_prevew(datastore=datastore, | ||||
|                                            form_data=request.form, | ||||
|                                            watch_uuid=watch_uuid) | ||||
|  | ||||
|             ephemeral_data['text'] = result.get('after_filter', '') | ||||
|             # Create a temporary watch data structure with this single rule | ||||
|             tmp_watch_data = deepcopy(datastore.data['watching'].get(watch_uuid)) | ||||
|  | ||||
|             # Override the conditions in the temporary watch | ||||
|             rule_json = request.args.get("rule") | ||||
|             rule = json.loads(rule_json) if rule_json else None | ||||
|  | ||||
|             # Should be key/value of field, operator, value | ||||
|             tmp_watch_data['conditions'] = [rule] | ||||
|             tmp_watch_data['conditions_match_logic'] = "ALL"  # Single rule, so use ALL | ||||
|  | ||||
|             # Create a temporary application data structure for the rule check | ||||
|             temp_app_data = { | ||||
|                 'watching': { | ||||
|                     watch_uuid: tmp_watch_data | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|             # Execute the rule against the current snapshot with form data | ||||
|             result = execute_ruleset_against_all_plugins( | ||||
|                 current_watch_uuid=watch_uuid, | ||||
|                 application_datastruct=temp_app_data, | ||||
|                 ephemeral_data=ephemeral_data | ||||
|             ) | ||||
|  | ||||
|             return jsonify({ | ||||
|                 'status': 'success', | ||||
|                 'result': result.get('result'), | ||||
|                 'data': result.get('executed_data'), | ||||
|                 'message': 'Condition passes' if result else 'Condition does not pass' | ||||
|             }) | ||||
|  | ||||
|         except Exception as e: | ||||
|             return jsonify({ | ||||
|                 'status': 'error', | ||||
|                 'message': f'Error verifying condition: {str(e)}' | ||||
|             }), 500 | ||||
|  | ||||
|     return conditions_blueprint | ||||
							
								
								
									
										78
									
								
								changedetectionio/conditions/default_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,78 @@ | ||||
| import re | ||||
|  | ||||
| import pluggy | ||||
| from price_parser import Price | ||||
| from loguru import logger | ||||
|  | ||||
| hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
|  | ||||
|  | ||||
| @hookimpl | ||||
| def register_operators(): | ||||
|     def starts_with(_, text, prefix): | ||||
|         return text.lower().strip().startswith(str(prefix).strip().lower()) | ||||
|  | ||||
|     def ends_with(_, text, suffix): | ||||
|         return text.lower().strip().endswith(str(suffix).strip().lower()) | ||||
|  | ||||
|     def length_min(_, text, strlen): | ||||
|         return len(text) >= int(strlen) | ||||
|  | ||||
|     def length_max(_, text, strlen): | ||||
|         return len(text) <= int(strlen) | ||||
|  | ||||
|     # ✅ Custom function for case-insensitive regex matching | ||||
|     def contains_regex(_, text, pattern): | ||||
|         """Returns True if `text` contains `pattern` (case-insensitive regex match).""" | ||||
|         return bool(re.search(pattern, str(text), re.IGNORECASE)) | ||||
|  | ||||
|     # ✅ Custom function for NOT matching case-insensitive regex | ||||
|     def not_contains_regex(_, text, pattern): | ||||
|         """Returns True if `text` does NOT contain `pattern` (case-insensitive regex match).""" | ||||
|         return not bool(re.search(pattern, str(text), re.IGNORECASE)) | ||||
|  | ||||
|     return { | ||||
|         "!contains_regex": not_contains_regex, | ||||
|         "contains_regex": contains_regex, | ||||
|         "ends_with": ends_with, | ||||
|         "length_max": length_max, | ||||
|         "length_min": length_min, | ||||
|         "starts_with": starts_with, | ||||
|     } | ||||
|  | ||||
| @hookimpl | ||||
| def register_operator_choices(): | ||||
|     return [ | ||||
|         ("starts_with", "Text Starts With"), | ||||
|         ("ends_with", "Text Ends With"), | ||||
|         ("length_min", "Length minimum"), | ||||
|         ("length_max", "Length maximum"), | ||||
|         ("contains_regex", "Text Matches Regex"), | ||||
|         ("!contains_regex", "Text Does NOT Match Regex"), | ||||
|     ] | ||||
|  | ||||
| @hookimpl | ||||
| def register_field_choices(): | ||||
|     return [ | ||||
|         ("extracted_number", "Extracted number after 'Filters & Triggers'"), | ||||
| #        ("meta_description", "Meta Description"), | ||||
| #        ("meta_keywords", "Meta Keywords"), | ||||
|         ("page_filtered_text", "Page text after 'Filters & Triggers'"), | ||||
|         #("page_title", "Page <title>"), # actual page title <title> | ||||
|     ] | ||||
|  | ||||
| @hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|  | ||||
|     res = {} | ||||
|     if 'text' in ephemeral_data: | ||||
|         res['page_filtered_text'] = ephemeral_data['text'] | ||||
|  | ||||
|         # Better to not wrap this in try/except so that the UI can see any errors | ||||
|         price = Price.fromstring(ephemeral_data.get('text')) | ||||
|         if price and price.amount != None: | ||||
|             # This is slightly misleading, it's extracting a PRICE not a Number.. | ||||
|             res['extracted_number'] = float(price.amount) | ||||
|             logger.debug(f"Extracted number result: '{price}' - returning float({res['extracted_number']})") | ||||
|  | ||||
|     return res | ||||
							
								
								
									
										6
									
								
								changedetectionio/conditions/exceptions.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,6 @@ | ||||
| class EmptyConditionRuleRowNotUsable(Exception): | ||||
|     def __init__(self): | ||||
|         super().__init__("One of the 'conditions' rulesets is incomplete, cannot run.") | ||||
|  | ||||
|     def __str__(self): | ||||
|         return self.args[0] | ||||
							
								
								
									
										44
									
								
								changedetectionio/conditions/form.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,44 @@ | ||||
| # Condition Rule Form (for each rule row) | ||||
| from wtforms import Form, SelectField, StringField, validators | ||||
| from wtforms import validators | ||||
|  | ||||
| class ConditionFormRow(Form): | ||||
|  | ||||
|     # ✅ Ensure Plugins Are Loaded BEFORE Importing Choices | ||||
|     from changedetectionio.conditions import plugin_manager | ||||
|     from changedetectionio.conditions import operator_choices, field_choices | ||||
|     field = SelectField( | ||||
|         "Field", | ||||
|         choices=field_choices, | ||||
|         validators=[validators.Optional()] | ||||
|     ) | ||||
|  | ||||
|     operator = SelectField( | ||||
|         "Operator", | ||||
|         choices=operator_choices, | ||||
|         validators=[validators.Optional()] | ||||
|     ) | ||||
|  | ||||
|     value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"}) | ||||
|  | ||||
|     def validate(self, extra_validators=None): | ||||
|         # First, run the default validators | ||||
|         if not super().validate(extra_validators): | ||||
|             return False | ||||
|  | ||||
|         # Custom validation logic | ||||
|         # If any of the operator/field/value is set, then they must be all set | ||||
|         if any(value not in ("", False, "None", None) for value in [self.operator.data, self.field.data, self.value.data]): | ||||
|             if not self.operator.data or self.operator.data == 'None': | ||||
|                 self.operator.errors.append("Operator is required.") | ||||
|                 return False | ||||
|  | ||||
|             if not self.field.data or self.field.data == 'None': | ||||
|                 self.field.errors.append("Field is required.") | ||||
|                 return False | ||||
|  | ||||
|             if not self.value.data: | ||||
|                 self.value.errors.append("Value is required.") | ||||
|                 return False | ||||
|  | ||||
|         return True  # Only return True if all conditions pass | ||||
							
								
								
									
										74
									
								
								changedetectionio/conditions/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,74 @@ | ||||
| import pluggy | ||||
| import os | ||||
| import importlib | ||||
| import sys | ||||
| from . import default_plugin | ||||
|  | ||||
| # ✅ Ensure that the namespace in HookspecMarker matches PluginManager | ||||
| PLUGIN_NAMESPACE = "changedetectionio_conditions" | ||||
|  | ||||
| hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE) | ||||
| hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE) | ||||
|  | ||||
|  | ||||
| class ConditionsSpec: | ||||
|     """Hook specifications for extending JSON Logic conditions.""" | ||||
|  | ||||
|     @hookspec | ||||
|     def register_operators(): | ||||
|         """Return a dictionary of new JSON Logic operators.""" | ||||
|         pass | ||||
|  | ||||
|     @hookspec | ||||
|     def register_operator_choices(): | ||||
|         """Return a list of new operator choices.""" | ||||
|         pass | ||||
|  | ||||
|     @hookspec | ||||
|     def register_field_choices(): | ||||
|         """Return a list of new field choices.""" | ||||
|         pass | ||||
|  | ||||
|     @hookspec | ||||
|     def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|         """Add to the datadict""" | ||||
|         pass | ||||
|          | ||||
|     @hookspec | ||||
|     def ui_edit_stats_extras(watch): | ||||
|         """Return HTML content to add to the stats tab in the edit view""" | ||||
|         pass | ||||
|  | ||||
| # ✅ Set up Pluggy Plugin Manager | ||||
| plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # ✅ Register hookspecs (Ensures they are detected) | ||||
| plugin_manager.add_hookspecs(ConditionsSpec) | ||||
|  | ||||
| # ✅ Register built-in plugins manually | ||||
| plugin_manager.register(default_plugin, "default_plugin") | ||||
|  | ||||
| # ✅ Load plugins from the plugins directory | ||||
| def load_plugins_from_directory(): | ||||
|     plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins') | ||||
|     if not os.path.exists(plugins_dir): | ||||
|         return | ||||
|          | ||||
|     # Get all Python files (excluding __init__.py) | ||||
|     for filename in os.listdir(plugins_dir): | ||||
|         if filename.endswith(".py") and filename != "__init__.py": | ||||
|             module_name = filename[:-3]  # Remove .py extension | ||||
|             module_path = f"changedetectionio.conditions.plugins.{module_name}" | ||||
|              | ||||
|             try: | ||||
|                 module = importlib.import_module(module_path) | ||||
|                 # Register the plugin with pluggy | ||||
|                 plugin_manager.register(module, module_name) | ||||
|             except (ImportError, AttributeError) as e: | ||||
|                 print(f"Error loading plugin {module_name}: {e}") | ||||
|  | ||||
| # Load plugins from the plugins directory | ||||
| load_plugins_from_directory() | ||||
|  | ||||
| # ✅ Discover installed plugins from external packages (if any) | ||||
| plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) | ||||
							
								
								
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1 @@ | ||||
| # Import plugins package to make them discoverable | ||||
							
								
								
									
										107
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,107 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def levenshtein_ratio_recent_history(watch, incoming_text=None): | ||||
|     try: | ||||
|         from Levenshtein import ratio, distance | ||||
|         k = list(watch.history.keys()) | ||||
|         a = None | ||||
|         b = None | ||||
|  | ||||
|         # When called from ui_edit_stats_extras, we don't have incoming_text | ||||
|         if incoming_text is None: | ||||
|             a = watch.get_history_snapshot(timestamp=k[-1])  # Latest snapshot | ||||
|             b = watch.get_history_snapshot(timestamp=k[-2])  # Previous snapshot | ||||
|  | ||||
|         # Needs atleast one snapshot | ||||
|         elif len(k) >= 1: # Should be atleast one snapshot to compare against | ||||
|             a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot | ||||
|             b = incoming_text if incoming_text else k[-2] | ||||
|  | ||||
|         if a and b: | ||||
|             distance_value = distance(a, b) | ||||
|             ratio_value = ratio(a, b) | ||||
|             return { | ||||
|                 'distance': distance_value, | ||||
|                 'ratio': ratio_value, | ||||
|                 'percent_similar': round(ratio_value * 100, 2) | ||||
|             } | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Unable to calc similarity: {str(e)}") | ||||
|  | ||||
|     return '' | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operators(): | ||||
|     pass | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operator_choices(): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_field_choices(): | ||||
|     return [ | ||||
|         ("levenshtein_ratio", "Levenshtein - Text similarity ratio"), | ||||
|         ("levenshtein_distance", "Levenshtein - Text change distance"), | ||||
|     ] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|     res = {} | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|     # ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc | ||||
|  | ||||
|     if watch and 'text' in ephemeral_data: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text','')) | ||||
|         if isinstance(lev_data, dict): | ||||
|             res['levenshtein_ratio'] = lev_data.get('ratio', 0) | ||||
|             res['levenshtein_similarity'] = lev_data.get('percent_similar', 0) | ||||
|             res['levenshtein_distance'] = lev_data.get('distance', 0) | ||||
|  | ||||
|     return res | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add Levenshtein stats to the UI using the global plugin system""" | ||||
|     """Generate the HTML for Levenshtein stats - shared by both plugin systems""" | ||||
|     if len(watch.history.keys()) < 2: | ||||
|         return "<p>Not enough history to calculate Levenshtein metrics</p>" | ||||
|      | ||||
|     try: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch) | ||||
|         if not lev_data or not isinstance(lev_data, dict): | ||||
|             return "<p>Unable to calculate Levenshtein metrics</p>" | ||||
|              | ||||
|         html = f""" | ||||
|         <div class="levenshtein-stats"> | ||||
|             <h4>Levenshtein Text Similarity Details</h4> | ||||
|             <table class="pure-table"> | ||||
|                 <tbody> | ||||
|                     <tr> | ||||
|                         <td>Raw distance (edits needed)</td> | ||||
|                         <td>{lev_data['distance']}</td> | ||||
|                     </tr> | ||||
|                     <tr> | ||||
|                         <td>Similarity ratio</td> | ||||
|                         <td>{lev_data['ratio']:.4f}</td> | ||||
|                     </tr> | ||||
|                     <tr> | ||||
|                         <td>Percent similar</td> | ||||
|                         <td>{lev_data['percent_similar']}%</td> | ||||
|                     </tr> | ||||
|                 </tbody> | ||||
|             </table> | ||||
|             <p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p> | ||||
|         </div> | ||||
|         """ | ||||
|         return html | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error generating Levenshtein UI extras: {str(e)}") | ||||
|         return "<p>Error calculating Levenshtein metrics</p>" | ||||
|          | ||||
							
								
								
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,82 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def count_words_in_history(watch, incoming_text=None): | ||||
|     """Count words in snapshot text""" | ||||
|     try: | ||||
|         if incoming_text is not None: | ||||
|             # When called from add_data with incoming text | ||||
|             return len(incoming_text.split()) | ||||
|         elif watch.history.keys(): | ||||
|             # When called from UI extras to count latest snapshot | ||||
|             latest_key = list(watch.history.keys())[-1] | ||||
|             latest_content = watch.get_history_snapshot(latest_key) | ||||
|             return len(latest_content.split()) | ||||
|         return 0 | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error counting words: {str(e)}") | ||||
|         return 0 | ||||
|  | ||||
| # Implement condition plugin hooks | ||||
| @conditions_hookimpl | ||||
| def register_operators(): | ||||
|     # No custom operators needed | ||||
|     return {} | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operator_choices(): | ||||
|     # No custom operator choices needed | ||||
|     return [] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_field_choices(): | ||||
|     # Add a field that will be available in conditions | ||||
|     return [ | ||||
|         ("word_count", "Word count of content"), | ||||
|     ] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|     """Add word count data for conditions""" | ||||
|     result = {} | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|      | ||||
|     if watch and 'text' in ephemeral_data: | ||||
|         word_count = count_words_in_history(watch, ephemeral_data['text']) | ||||
|         result['word_count'] = word_count | ||||
|      | ||||
|     return result | ||||
|  | ||||
| def _generate_stats_html(watch): | ||||
|     """Generate the HTML content for the stats tab""" | ||||
|     word_count = count_words_in_history(watch) | ||||
|      | ||||
|     html = f""" | ||||
|     <div class="word-count-stats"> | ||||
|         <h4>Content Analysis</h4> | ||||
|         <table class="pure-table"> | ||||
|             <tbody> | ||||
|                 <tr> | ||||
|                     <td>Word count (latest snapshot)</td> | ||||
|                     <td>{word_count}</td> | ||||
|                 </tr> | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count stats to the UI through conditions plugin system""" | ||||
|     return _generate_stats_html(watch) | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count stats to the UI using the global plugin system""" | ||||
|     return _generate_stats_html(watch) | ||||
| @@ -7,11 +7,29 @@ import os | ||||
| # Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>. | ||||
| visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button' | ||||
|  | ||||
| SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000 | ||||
| SCREENSHOT_DEFAULT_QUALITY = 40 | ||||
|  | ||||
| # Maximum total height for the final image (When in stitch mode). | ||||
| # We limit this to 16000px due to the huge amount of RAM that was being used | ||||
| # Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc) | ||||
| SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|  | ||||
| # The size at which we will switch to stitching method, when below this (and | ||||
| # MAX_TOTAL_HEIGHT which can be set by a user) we will use the default | ||||
| # screenshot method. | ||||
| SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000 | ||||
|  | ||||
| # available_fetchers() will scan this implementation looking for anything starting with html_ | ||||
| # this information is used in the form selections | ||||
| from changedetectionio.content_fetchers.requests import fetcher as html_requests | ||||
|  | ||||
|  | ||||
| import importlib.resources | ||||
| XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8') | ||||
| INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8') | ||||
|  | ||||
|  | ||||
| def available_fetchers(): | ||||
|     # See the if statement at the bottom of this file for how we switch between playwright and webdriver | ||||
|     import inspect | ||||
|   | ||||
| @@ -63,17 +63,12 @@ class Fetcher(): | ||||
|     # Time ONTOP of the system defined env minimum time | ||||
|     render_extract_delay = 0 | ||||
|  | ||||
|     def __init__(self): | ||||
|         import importlib.resources | ||||
|         self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8') | ||||
|         self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8') | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_error(self): | ||||
|         return self.error | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run(self, | ||||
|     async def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
| @@ -87,7 +82,7 @@ class Fetcher(): | ||||
|         pass | ||||
|  | ||||
|     @abstractmethod | ||||
|     def quit(self): | ||||
|     def quit(self, watch=None): | ||||
|         return | ||||
|  | ||||
|     @abstractmethod | ||||
| @@ -127,7 +122,7 @@ class Fetcher(): | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def iterate_browser_steps(self, start_url=None): | ||||
|     async def iterate_browser_steps(self, start_url=None): | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|         from playwright._impl._errors import TimeoutError, Error | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
| @@ -141,8 +136,9 @@ class Fetcher(): | ||||
|             for step in valid_steps: | ||||
|                 step_n += 1 | ||||
|                 logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...") | ||||
|                 self.screenshot_step("before-" + str(step_n)) | ||||
|                 self.save_step_html("before-" + str(step_n)) | ||||
|                 await self.screenshot_step("before-" + str(step_n)) | ||||
|                 await self.save_step_html("before-" + str(step_n)) | ||||
|  | ||||
|                 try: | ||||
|                     optional_value = step['optional_value'] | ||||
|                     selector = step['selector'] | ||||
| @@ -152,11 +148,11 @@ class Fetcher(): | ||||
|                     if '{%' in step['selector'] or '{{' in step['selector']: | ||||
|                         selector = jinja_render(template_str=step['selector']) | ||||
|  | ||||
|                     getattr(interface, "call_action")(action_name=step['operation'], | ||||
|                     await getattr(interface, "call_action")(action_name=step['operation'], | ||||
|                                                       selector=selector, | ||||
|                                                       optional_value=optional_value) | ||||
|                     self.screenshot_step(step_n) | ||||
|                     self.save_step_html(step_n) | ||||
|                     await self.screenshot_step(step_n) | ||||
|                     await self.save_step_html(step_n) | ||||
|                 except (Error, TimeoutError) as e: | ||||
|                     logger.debug(str(e)) | ||||
|                     # Stop processing here | ||||
|   | ||||
| @@ -4,9 +4,75 @@ from urllib.parse import urlparse | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \ | ||||
|     SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS | ||||
| from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable | ||||
|  | ||||
| async def capture_full_page_async(page): | ||||
|     import os | ||||
|     import time | ||||
|     from multiprocessing import Process, Pipe | ||||
|  | ||||
|     start = time.time() | ||||
|  | ||||
|     page_height = await page.evaluate("document.documentElement.scrollHeight") | ||||
|     page_width = await page.evaluate("document.documentElement.scrollWidth") | ||||
|     original_viewport = page.viewport_size | ||||
|  | ||||
|     logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}") | ||||
|  | ||||
|     # Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks | ||||
|     step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow | ||||
|     screenshot_chunks = [] | ||||
|     y = 0 | ||||
|  | ||||
|     if page_height > page.viewport_size['height']: | ||||
|         if page_height < step_size: | ||||
|             step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size | ||||
|         logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size") | ||||
|         # Set viewport to a larger size to capture more content at once | ||||
|         await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size}) | ||||
|  | ||||
|     # Capture screenshots in chunks up to the max total height | ||||
|     while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT): | ||||
|         await page.request_gc() | ||||
|         await page.evaluate(f"window.scrollTo(0, {y})") | ||||
|         await page.request_gc() | ||||
|         screenshot_chunks.append(await page.screenshot( | ||||
|             type="jpeg", | ||||
|             full_page=False, | ||||
|             quality=int(os.getenv("SCREENSHOT_QUALITY", 72)) | ||||
|         )) | ||||
|         y += step_size | ||||
|         await page.request_gc() | ||||
|  | ||||
|     # Restore original viewport size | ||||
|     await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']}) | ||||
|  | ||||
|     # If we have multiple chunks, stitch them together | ||||
|     if len(screenshot_chunks) > 1: | ||||
|         from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker | ||||
|         logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together") | ||||
|         parent_conn, child_conn = Pipe() | ||||
|         p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)) | ||||
|         p.start() | ||||
|         screenshot = parent_conn.recv_bytes() | ||||
|         p.join() | ||||
|         logger.debug( | ||||
|             f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|         # Explicit cleanup | ||||
|         del screenshot_chunks | ||||
|         del p | ||||
|         del parent_conn, child_conn | ||||
|         screenshot_chunks = None | ||||
|         return screenshot | ||||
|  | ||||
|     logger.debug( | ||||
|         f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|  | ||||
|     return screenshot_chunks[0] | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     fetcher_description = "Playwright {}/Javascript".format( | ||||
|         os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize() | ||||
| @@ -57,9 +123,10 @@ class fetcher(Fetcher): | ||||
|                 self.proxy['username'] = parsed.username | ||||
|                 self.proxy['password'] = parsed.password | ||||
|  | ||||
|     def screenshot_step(self, step_n=''): | ||||
|     async def screenshot_step(self, step_n=''): | ||||
|         super().screenshot_step(step_n=step_n) | ||||
|         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|         screenshot = await capture_full_page_async(page=self.page) | ||||
|  | ||||
|  | ||||
|         if self.browser_steps_screenshot_path is not None: | ||||
|             destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n)) | ||||
| @@ -67,15 +134,15 @@ class fetcher(Fetcher): | ||||
|             with open(destination, 'wb') as f: | ||||
|                 f.write(screenshot) | ||||
|  | ||||
|     def save_step_html(self, step_n): | ||||
|     async def save_step_html(self, step_n): | ||||
|         super().save_step_html(step_n=step_n) | ||||
|         content = self.page.content() | ||||
|         content = await self.page.content() | ||||
|         destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) | ||||
|         logger.debug(f"Saving step HTML to {destination}") | ||||
|         with open(destination, 'w') as f: | ||||
|             f.write(content) | ||||
|  | ||||
|     def run(self, | ||||
|     async def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
| @@ -86,26 +153,26 @@ class fetcher(Fetcher): | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         from playwright.async_api import async_playwright | ||||
|         import playwright._impl._errors | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
|         import time | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         response = None | ||||
|  | ||||
|         with sync_playwright() as p: | ||||
|         async with async_playwright() as p: | ||||
|             browser_type = getattr(p, self.browser_type) | ||||
|  | ||||
|             # Seemed to cause a connection Exception even tho I can see it connect | ||||
|             # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000) | ||||
|             # 60,000 connection timeout only | ||||
|             browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000) | ||||
|             browser = await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000) | ||||
|  | ||||
|             # SOCKS5 with authentication is not supported (yet) | ||||
|             # https://github.com/microsoft/playwright/issues/10567 | ||||
|  | ||||
|             # Set user agent to prevent Cloudflare from blocking the browser | ||||
|             # Use the default one configured in the App.py model that's passed from fetch_site_status.py | ||||
|             context = browser.new_context( | ||||
|             context = await browser.new_context( | ||||
|                 accept_downloads=False,  # Should never be needed | ||||
|                 bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others | ||||
|                 extra_http_headers=request_headers, | ||||
| @@ -115,41 +182,47 @@ class fetcher(Fetcher): | ||||
|                 user_agent=manage_user_agent(headers=request_headers), | ||||
|             ) | ||||
|  | ||||
|             self.page = context.new_page() | ||||
|             self.page = await context.new_page() | ||||
|  | ||||
|             # Listen for all console events and handle errors | ||||
|             self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|             self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|             # Re-use as much code from browser steps as possible so its the same | ||||
|             from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|             browsersteps_interface = steppable_browser_interface(start_url=url) | ||||
|             browsersteps_interface.page = self.page | ||||
|  | ||||
|             response = browsersteps_interface.action_goto_url(value=url) | ||||
|             self.headers = response.all_headers() | ||||
|             response = await browsersteps_interface.action_goto_url(value=url) | ||||
|  | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 logger.debug("Content Fetcher > Response object from the browser communication was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             # In async_playwright, all_headers() returns a coroutine | ||||
|             try: | ||||
|                 self.headers = await response.all_headers() | ||||
|             except TypeError: | ||||
|                 # Fallback for sync version | ||||
|                 self.headers = response.all_headers() | ||||
|  | ||||
|             try: | ||||
|                 if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): | ||||
|                     browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) | ||||
|                     await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) | ||||
|             except playwright._impl._errors.TimeoutError as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 # This can be ok, we will try to grab what we could retrieve | ||||
|                 pass | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}") | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|             await self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|             try: | ||||
|                 self.status_code = response.status | ||||
| @@ -157,39 +230,50 @@ class fetcher(Fetcher): | ||||
|                 # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962 | ||||
|                 logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.") | ||||
|                 logger.critical(response) | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 screenshot = self.page.screenshot(type='jpeg', full_page=True, | ||||
|                                                   quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|  | ||||
|                 screenshot = await capture_full_page_async(self.page) | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|  | ||||
|             if not empty_pages_are_a_change and len(self.page.content().strip()) == 0: | ||||
|             if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0: | ||||
|                 logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False") | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|             # Run Browser Steps here | ||||
|             if self.browser_steps_get_valid_steps(): | ||||
|                 self.iterate_browser_steps(start_url=url) | ||||
|                 await self.iterate_browser_steps(start_url=url) | ||||
|  | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|             await self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|             now = time.time() | ||||
|             # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) | ||||
|             if current_include_filters is not None: | ||||
|                 self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) | ||||
|                 await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) | ||||
|             else: | ||||
|                 self.page.evaluate("var include_filters=''") | ||||
|                 await self.page.evaluate("var include_filters=''") | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|             self.xpath_data = self.page.evaluate( | ||||
|                 "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") | ||||
|             self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}") | ||||
|             # request_gc before and after evaluate to free up memory | ||||
|             # @todo browsersteps etc | ||||
|             MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|             self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, { | ||||
|                 "visualselector_xpath_selectors": visualselector_xpath_selectors, | ||||
|                 "max_height": MAX_TOTAL_HEIGHT | ||||
|             }) | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|             self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS) | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|             self.content = await self.page.content() | ||||
|             await self.page.request_gc() | ||||
|             logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s") | ||||
|  | ||||
|             self.content = self.page.content() | ||||
|             # Bug 3 in Playwright screenshot handling | ||||
|             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|             # JPEG is better here because the screenshots can be very very large | ||||
| @@ -199,13 +283,41 @@ class fetcher(Fetcher): | ||||
|             # acceptable screenshot quality here | ||||
|             try: | ||||
|                 # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage | ||||
|                 self.screenshot = self.page.screenshot(type='jpeg', | ||||
|                                                        full_page=True, | ||||
|                                                        quality=int(os.getenv("SCREENSHOT_QUALITY", 72)), | ||||
|                                                        ) | ||||
|                 self.screenshot = await capture_full_page_async(page=self.page) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 # It's likely the screenshot was too long/big and something crashed | ||||
|                 raise ScreenshotUnavailable(url=url, status_code=self.status_code) | ||||
|             finally: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 # Request garbage collection one more time before closing | ||||
|                 try: | ||||
|                     await self.page.request_gc() | ||||
|                 except: | ||||
|                     pass | ||||
|                  | ||||
|                 # Clean up resources properly | ||||
|                 try: | ||||
|                     await self.page.request_gc() | ||||
|                 except: | ||||
|                     pass | ||||
|  | ||||
|                 try: | ||||
|                     await self.page.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 self.page = None | ||||
|  | ||||
|                 try: | ||||
|                     await context.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 context = None | ||||
|  | ||||
|                 try: | ||||
|                     await browser.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 browser = None | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -6,8 +6,85 @@ from urllib.parse import urlparse | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \ | ||||
|     SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \ | ||||
|     SCREENSHOT_MAX_TOTAL_HEIGHT | ||||
| from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \ | ||||
|     BrowserConnectError | ||||
|  | ||||
|  | ||||
| # Bug 3 in Playwright screenshot handling | ||||
| # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|  | ||||
| # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
| # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
| # acceptable screenshot quality here | ||||
| async def capture_full_page(page): | ||||
|     import os | ||||
|     import time | ||||
|     from multiprocessing import Process, Pipe | ||||
|  | ||||
|     start = time.time() | ||||
|  | ||||
|     page_height = await page.evaluate("document.documentElement.scrollHeight") | ||||
|     page_width = await page.evaluate("document.documentElement.scrollWidth") | ||||
|     original_viewport = page.viewport | ||||
|  | ||||
|     logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}") | ||||
|  | ||||
|     # Bug 3 in Playwright screenshot handling | ||||
|     # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|     # JPEG is better here because the screenshots can be very very large | ||||
|  | ||||
|     # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
|     # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
|     # acceptable screenshot quality here | ||||
|  | ||||
|  | ||||
|     step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot | ||||
|     screenshot_chunks = [] | ||||
|     y = 0 | ||||
|     if page_height > page.viewport['height']: | ||||
|         if page_height < step_size: | ||||
|             step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size | ||||
|         await page.setViewport({'width': page.viewport['width'], 'height': step_size}) | ||||
|  | ||||
|     while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT): | ||||
|         # better than scrollTo incase they override it in the page | ||||
|         await page.evaluate( | ||||
|             """(y) => { | ||||
|                 document.documentElement.scrollTop = y; | ||||
|                 document.body.scrollTop = y; | ||||
|             }""", | ||||
|             y | ||||
|         ) | ||||
|  | ||||
|         screenshot_chunks.append(await page.screenshot(type_='jpeg', | ||||
|                                                        fullPage=False, | ||||
|                                                        quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))) | ||||
|         y += step_size | ||||
|  | ||||
|     await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']}) | ||||
|  | ||||
|     if len(screenshot_chunks) > 1: | ||||
|         from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker | ||||
|         logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together") | ||||
|         parent_conn, child_conn = Pipe() | ||||
|         p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)) | ||||
|         p.start() | ||||
|         screenshot = parent_conn.recv_bytes() | ||||
|         p.join() | ||||
|         logger.debug( | ||||
|             f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|  | ||||
|         screenshot_chunks = None | ||||
|         return screenshot | ||||
|  | ||||
|     logger.debug( | ||||
|         f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|     return screenshot_chunks[0] | ||||
|  | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     fetcher_description = "Puppeteer/direct {}/Javascript".format( | ||||
| @@ -78,10 +155,13 @@ class fetcher(Fetcher): | ||||
|                          is_binary, | ||||
|                          empty_pages_are_a_change | ||||
|                          ): | ||||
|  | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
|         import re | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|  | ||||
|         n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|         extra_wait = min(n, 15) | ||||
|  | ||||
|         logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.") | ||||
|  | ||||
|         from pyppeteer import Pyppeteer | ||||
|         pyppeteer_instance = Pyppeteer() | ||||
| @@ -97,13 +177,24 @@ class fetcher(Fetcher): | ||||
|         except websockets.exceptions.InvalidURI: | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://") | ||||
|         except Exception as e: | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}") | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'") | ||||
|  | ||||
|         # Better is to launch chrome with the URL as arg | ||||
|         # non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab | ||||
|         # headless - ask a new page | ||||
|         self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() | ||||
|  | ||||
|         if '--window-size' in self.browser_connection_url: | ||||
|             # Be sure the viewport is always the window-size, this is often not the same thing | ||||
|             match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url) | ||||
|             if match: | ||||
|                 logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}") | ||||
|                 await self.page.setViewport({ | ||||
|                     "width": int(match.group(1)), | ||||
|                     "height": int(match.group(2)) | ||||
|                 }) | ||||
|                 logger.debug(f"Puppeteer viewport size {self.page.viewport}") | ||||
|  | ||||
|         try: | ||||
|             from pyppeteerstealth import inject_evasions_into_page | ||||
|         except ImportError: | ||||
| @@ -148,14 +239,35 @@ class fetcher(Fetcher): | ||||
|         #            browsersteps_interface = steppable_browser_interface() | ||||
|         #            browsersteps_interface.page = self.page | ||||
|  | ||||
|         response = await self.page.goto(url, waitUntil="load") | ||||
|         async def handle_frame_navigation(event): | ||||
|             logger.debug(f"Frame navigated: {event}") | ||||
|             w = extra_wait - 2 if extra_wait > 4 else 2 | ||||
|             logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...") | ||||
|             await asyncio.sleep(w) | ||||
|             logger.debug("Issuing stopLoading command...") | ||||
|             await self.page._client.send('Page.stopLoading') | ||||
|             logger.debug("stopLoading command sent!") | ||||
|  | ||||
|         self.page._client.on('Page.frameStartedNavigating', lambda event: asyncio.create_task(handle_frame_navigation(event))) | ||||
|         self.page._client.on('Page.frameStartedLoading', lambda event: asyncio.create_task(handle_frame_navigation(event))) | ||||
|         self.page._client.on('Page.frameStoppedLoading', lambda event: logger.debug(f"Frame stopped loading: {event}")) | ||||
|  | ||||
|         if response is None: | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|             logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)") | ||||
|             raise EmptyReply(url=url, status_code=None) | ||||
|         response = None | ||||
|         attempt=0 | ||||
|         while not response: | ||||
|             logger.debug(f"Attempting page fetch {url} attempt {attempt}") | ||||
|             response = await self.page.goto(url) | ||||
|             await asyncio.sleep(1 + extra_wait) | ||||
|             if response: | ||||
|                 break | ||||
|             if not response: | ||||
|                 logger.warning("Page did not fetch! trying again!") | ||||
|             if response is None and attempt>=2: | ||||
|                 await self.page.close() | ||||
|                 await browser.close() | ||||
|                 logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attmpt {attempt}") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|             attempt+=1 | ||||
|  | ||||
|         self.headers = response.headers | ||||
|  | ||||
| @@ -181,11 +293,10 @@ class fetcher(Fetcher): | ||||
|             raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|         if self.status_code != 200 and not ignore_status_codes: | ||||
|             screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                     fullPage=True, | ||||
|                                                     quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|             screenshot = await capture_full_page(page=self.page) | ||||
|  | ||||
|             raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|  | ||||
|         content = await self.page.content | ||||
|  | ||||
|         if not empty_pages_are_a_change and len(content.strip()) == 0: | ||||
| @@ -199,64 +310,48 @@ class fetcher(Fetcher): | ||||
|         #            if self.browser_steps_get_valid_steps(): | ||||
|         #                self.iterate_browser_steps() | ||||
|  | ||||
|         await asyncio.sleep(1 + extra_wait) | ||||
|  | ||||
|         # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) | ||||
|         # Setup the xPath/VisualSelector scraper | ||||
|         if current_include_filters is not None: | ||||
|         if current_include_filters: | ||||
|             js = json.dumps(current_include_filters) | ||||
|             await self.page.evaluate(f"var include_filters={js}") | ||||
|         else: | ||||
|             await self.page.evaluate(f"var include_filters=''") | ||||
|  | ||||
|         self.xpath_data = await self.page.evaluate( | ||||
|             "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") | ||||
|         self.instock_data = await self.page.evaluate("async () => {" + self.instock_data_js + "}") | ||||
|         MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|         self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, { | ||||
|             "visualselector_xpath_selectors": visualselector_xpath_selectors, | ||||
|             "max_height": MAX_TOTAL_HEIGHT | ||||
|         }) | ||||
|         if not self.xpath_data: | ||||
|             raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)") | ||||
|  | ||||
|         self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS) | ||||
|  | ||||
|         self.content = await self.page.content | ||||
|         # Bug 3 in Playwright screenshot handling | ||||
|         # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|         # JPEG is better here because the screenshots can be very very large | ||||
|  | ||||
|         # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
|         # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
|         # acceptable screenshot quality here | ||||
|         try: | ||||
|             self.screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                          fullPage=True, | ||||
|                                                          quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|         except Exception as e: | ||||
|             logger.error("Error fetching screenshot") | ||||
|             # // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw' | ||||
|             # // @ todo after text extract, we can place some overlay text with red background to say 'croppped' | ||||
|             logger.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot') | ||||
|             try: | ||||
|                 self.screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                              fullPage=False, | ||||
|                                                              quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|             except Exception as e: | ||||
|                 logger.error('ERROR: Failed to get viewport-only reduced screenshot :(') | ||||
|                 pass | ||||
|         finally: | ||||
|             # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need | ||||
|             logger.success(f"Fetching '{url}' complete, closing page") | ||||
|             await self.page.close() | ||||
|             logger.success(f"Fetching '{url}' complete, closing browser") | ||||
|             await browser.close() | ||||
|         self.screenshot = await capture_full_page(page=self.page) | ||||
|  | ||||
|         # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need | ||||
|         logger.success(f"Fetching '{url}' complete, closing page") | ||||
|         await self.page.close() | ||||
|         logger.success(f"Fetching '{url}' complete, closing browser") | ||||
|         await browser.close() | ||||
|         logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.") | ||||
|  | ||||
|     async def main(self, **kwargs): | ||||
|         await self.fetch_page(**kwargs) | ||||
|  | ||||
|     def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False, | ||||
|     async def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False, | ||||
|             current_include_filters=None, is_binary=False, empty_pages_are_a_change=False): | ||||
|  | ||||
|         #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints | ||||
|         max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180) | ||||
|         max_time = int(os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)) | ||||
|  | ||||
|         # This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only | ||||
|         # Now we run this properly in async context since we're called from async worker | ||||
|         try: | ||||
|             asyncio.run(asyncio.wait_for(self.main( | ||||
|             await asyncio.wait_for(self.main( | ||||
|                 url=url, | ||||
|                 timeout=timeout, | ||||
|                 request_headers=request_headers, | ||||
| @@ -266,7 +361,7 @@ class fetcher(Fetcher): | ||||
|                 current_include_filters=current_include_filters, | ||||
|                 is_binary=is_binary, | ||||
|                 empty_pages_are_a_change=empty_pages_are_a_change | ||||
|             ), timeout=max_time)) | ||||
|             ), timeout=max_time) | ||||
|         except asyncio.TimeoutError: | ||||
|             raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds.")) | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from loguru import logger | ||||
| import hashlib | ||||
| import os | ||||
| import asyncio | ||||
| from changedetectionio import strtobool | ||||
| from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived | ||||
| from changedetectionio.content_fetchers.base import Fetcher | ||||
| @@ -15,7 +16,7 @@ class fetcher(Fetcher): | ||||
|         self.proxy_override = proxy_override | ||||
|         # browser_connection_url is none because its always 'launched locally' | ||||
|  | ||||
|     def run(self, | ||||
|     def _run_sync(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
| @@ -25,9 +26,11 @@ class fetcher(Fetcher): | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|         """Synchronous version of run - the original requests implementation""" | ||||
|  | ||||
|         import chardet | ||||
|         import requests | ||||
|         from requests.exceptions import ProxyError, ConnectionError, RequestException | ||||
|  | ||||
|         if self.browser_steps_get_valid_steps(): | ||||
|             raise BrowserStepsInUnsupportedFetcher(url=url) | ||||
| @@ -35,7 +38,6 @@ class fetcher(Fetcher): | ||||
|         proxies = {} | ||||
|  | ||||
|         # Allows override the proxy on a per-request basis | ||||
|  | ||||
|         # https://requests.readthedocs.io/en/latest/user/advanced/#socks | ||||
|         # Should also work with `socks5://user:pass@host:port` type syntax. | ||||
|  | ||||
| @@ -52,14 +54,19 @@ class fetcher(Fetcher): | ||||
|         if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'): | ||||
|             from requests_file import FileAdapter | ||||
|             session.mount('file://', FileAdapter()) | ||||
|  | ||||
|         r = session.request(method=request_method, | ||||
|                             data=request_body.encode('utf-8') if type(request_body) is str else request_body, | ||||
|                             url=url, | ||||
|                             headers=request_headers, | ||||
|                             timeout=timeout, | ||||
|                             proxies=proxies, | ||||
|                             verify=False) | ||||
|         try: | ||||
|             r = session.request(method=request_method, | ||||
|                                 data=request_body.encode('utf-8') if type(request_body) is str else request_body, | ||||
|                                 url=url, | ||||
|                                 headers=request_headers, | ||||
|                                 timeout=timeout, | ||||
|                                 proxies=proxies, | ||||
|                                 verify=False) | ||||
|         except Exception as e: | ||||
|             msg = str(e) | ||||
|             if proxies and 'SOCKSHTTPSConnectionPool' in msg: | ||||
|                 msg = f"Proxy connection failed? {msg}" | ||||
|             raise Exception(msg) from e | ||||
|  | ||||
|         # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. | ||||
|         # For example - some sites don't tell us it's utf-8, but return utf-8 content | ||||
| @@ -94,5 +101,48 @@ class fetcher(Fetcher): | ||||
|         else: | ||||
|             self.content = r.text | ||||
|  | ||||
|  | ||||
|         self.raw_content = r.content | ||||
|  | ||||
|     async def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|         """Async wrapper that runs the synchronous requests code in a thread pool""" | ||||
|          | ||||
|         loop = asyncio.get_event_loop() | ||||
|          | ||||
|         # Run the synchronous _run_sync in a thread pool to avoid blocking the event loop | ||||
|         await loop.run_in_executor( | ||||
|             None,  # Use default ThreadPoolExecutor | ||||
|             lambda: self._run_sync( | ||||
|                 url=url, | ||||
|                 timeout=timeout, | ||||
|                 request_headers=request_headers, | ||||
|                 request_body=request_body, | ||||
|                 request_method=request_method, | ||||
|                 ignore_status_codes=ignore_status_codes, | ||||
|                 current_include_filters=current_include_filters, | ||||
|                 is_binary=is_binary, | ||||
|                 empty_pages_are_a_change=empty_pages_are_a_change | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|     def quit(self, watch=None): | ||||
|  | ||||
|         # In case they switched to `requests` fetcher from something else | ||||
|         # Then the screenshot could be old, in any case, it's not used here. | ||||
|         # REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing | ||||
|         if strtobool(os.getenv("REMOVE_REQUESTS_OLD_SCREENSHOTS", 'true')): | ||||
|             screenshot = watch.get_screenshot() | ||||
|             if screenshot: | ||||
|                 try: | ||||
|                     os.unlink(screenshot) | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}") | ||||
|  | ||||
|   | ||||
| @@ -1,190 +0,0 @@ | ||||
| module.exports = async ({page, context}) => { | ||||
|  | ||||
|     var { | ||||
|         url, | ||||
|         execute_js, | ||||
|         user_agent, | ||||
|         extra_wait_ms, | ||||
|         req_headers, | ||||
|         include_filters, | ||||
|         xpath_element_js, | ||||
|         screenshot_quality, | ||||
|         proxy_username, | ||||
|         proxy_password, | ||||
|         disk_cache_dir, | ||||
|         no_cache_list, | ||||
|         block_url_list, | ||||
|     } = context; | ||||
|  | ||||
|     await page.setBypassCSP(true) | ||||
|     await page.setExtraHTTPHeaders(req_headers); | ||||
|  | ||||
|     if (user_agent) { | ||||
|         await page.setUserAgent(user_agent); | ||||
|     } | ||||
|     // https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded | ||||
|  | ||||
|     await page.setDefaultNavigationTimeout(0); | ||||
|  | ||||
|     if (proxy_username) { | ||||
|         // Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer | ||||
|         // https://github.com/puppeteer/puppeteer/issues/676 ? | ||||
|         // https://help.brightdata.com/hc/en-us/articles/12632549957649-Proxy-Manager-How-to-Guides#h_01HAKWR4Q0AFS8RZTNYWRDFJC2 | ||||
|         // https://cri.dev/posts/2020-03-30-How-to-solve-Puppeteer-Chrome-Error-ERR_INVALID_ARGUMENT/ | ||||
|         await page.authenticate({ | ||||
|             username: proxy_username, | ||||
|             password: proxy_password | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     await page.setViewport({ | ||||
|         width: 1024, | ||||
|         height: 768, | ||||
|         deviceScaleFactor: 1, | ||||
|     }); | ||||
|  | ||||
|     await page.setRequestInterception(true); | ||||
|     if (disk_cache_dir) { | ||||
|         console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<"); | ||||
|     } | ||||
|     const fs = require('fs'); | ||||
|     const crypto = require('crypto'); | ||||
|  | ||||
|     function file_is_expired(file_path) { | ||||
|         if (!fs.existsSync(file_path)) { | ||||
|             return true; | ||||
|         } | ||||
|         var stats = fs.statSync(file_path); | ||||
|         const now_date = new Date(); | ||||
|         const expire_seconds = 300; | ||||
|         if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) { | ||||
|             console.log("CACHE EXPIRED: " + file_path); | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|  | ||||
|     } | ||||
|  | ||||
|     page.on('request', async (request) => { | ||||
|         // General blocking of requests that waste traffic | ||||
|         if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort(); | ||||
|  | ||||
|         if (disk_cache_dir) { | ||||
|             const url = request.url(); | ||||
|             const key = crypto.createHash('md5').update(url).digest("hex"); | ||||
|             const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/'; | ||||
|  | ||||
|             // https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js | ||||
|  | ||||
|             if (fs.existsSync(dir_path + key)) { | ||||
|                 console.log("* CACHE HIT , using - " + dir_path + key + " - " + url); | ||||
|                 const cached_data = fs.readFileSync(dir_path + key); | ||||
|                 // @todo headers can come from dir_path+key+".meta" json file | ||||
|                 request.respond({ | ||||
|                     status: 200, | ||||
|                     //contentType: 'text/html', //@todo | ||||
|                     body: cached_data | ||||
|                 }); | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|         request.continue(); | ||||
|     }); | ||||
|  | ||||
|  | ||||
|     if (disk_cache_dir) { | ||||
|         page.on('response', async (response) => { | ||||
|             const url = response.url(); | ||||
|             // Basic filtering for sane responses | ||||
|             if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) { | ||||
|                 console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url); | ||||
|                 return; | ||||
|             } | ||||
|             if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) { | ||||
|                 console.log("Skipping (no_cache_list) - " + url); | ||||
|                 return; | ||||
|             } | ||||
|             if (url.toLowerCase().includes('data:')) { | ||||
|                 console.log("Skipping (embedded-data) - " + url); | ||||
|                 return; | ||||
|             } | ||||
|             response.buffer().then(buffer => { | ||||
|                 if (buffer.length > 100) { | ||||
|                     console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType()); | ||||
|  | ||||
|                     const key = crypto.createHash('md5').update(url).digest("hex"); | ||||
|                     const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/'; | ||||
|  | ||||
|                     if (!fs.existsSync(dir_path)) { | ||||
|                         fs.mkdirSync(dir_path, {recursive: true}) | ||||
|                     } | ||||
|  | ||||
|                     if (fs.existsSync(dir_path + key)) { | ||||
|                         if (file_is_expired(dir_path + key)) { | ||||
|                             fs.writeFileSync(dir_path + key, buffer); | ||||
|                         } | ||||
|                     } else { | ||||
|                         fs.writeFileSync(dir_path + key, buffer); | ||||
|                     } | ||||
|                 } | ||||
|             }); | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     const r = await page.goto(url, { | ||||
|         waitUntil: 'load' | ||||
|     }); | ||||
|  | ||||
|     await page.waitForTimeout(1000); | ||||
|     await page.waitForTimeout(extra_wait_ms); | ||||
|  | ||||
|     if (execute_js) { | ||||
|         await page.evaluate(execute_js); | ||||
|         await page.waitForTimeout(200); | ||||
|     } | ||||
|  | ||||
|     var xpath_data; | ||||
|     var instock_data; | ||||
|     try { | ||||
|         // Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode | ||||
|         // (Once the old playwright is removed) | ||||
|         xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters); | ||||
|         instock_data = await page.evaluate(() => {%instock_scrape_code%}); | ||||
|     } catch (e) { | ||||
|         console.log(e); | ||||
|     } | ||||
|  | ||||
|     // Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure | ||||
|     // Wrap it here (for now) | ||||
|  | ||||
|     var b64s = false; | ||||
|     try { | ||||
|         b64s = await page.screenshot({encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg'}); | ||||
|     } catch (e) { | ||||
|         console.log(e); | ||||
|     } | ||||
|  | ||||
|     // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw' | ||||
|     if (!b64s) { | ||||
|         // @todo after text extract, we can place some overlay text with red background to say 'croppped' | ||||
|         console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot'); | ||||
|         try { | ||||
|             b64s = await page.screenshot({encoding: "base64", quality: screenshot_quality, type: 'jpeg'}); | ||||
|         } catch (e) { | ||||
|             console.log(e); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     var html = await page.content(); | ||||
|     return { | ||||
|         data: { | ||||
|             'content': html, | ||||
|             'headers': r.headers(), | ||||
|             'instock_data': instock_data, | ||||
|             'screenshot': b64s, | ||||
|             'status_code': r.status(), | ||||
|             'xpath_data': xpath_data | ||||
|         }, | ||||
|         type: 'application/json', | ||||
|     }; | ||||
| }; | ||||
| @@ -1,216 +1,236 @@ | ||||
| // Restock Detector | ||||
| // (c) Leigh Morresi dgtlmoon@gmail.com | ||||
| // | ||||
| // Assumes the product is in stock to begin with, unless the following appears above the fold ; | ||||
| // - outOfStockTexts appears above the fold (out of stock) | ||||
| // - negateOutOfStockRegex (really is in stock) | ||||
| async () => { | ||||
|  | ||||
| function isItemInStock() { | ||||
|     // @todo Pass these in so the same list can be used in non-JS fetchers | ||||
|     const outOfStockTexts = [ | ||||
|         ' أخبرني عندما يتوفر', | ||||
|         '0 in stock', | ||||
|         'actuellement indisponible', | ||||
|         'agotado', | ||||
|         'article épuisé', | ||||
|         'artikel zurzeit vergriffen', | ||||
|         'as soon as stock is available', | ||||
|         'ausverkauft', // sold out | ||||
|         'available for back order', | ||||
|         'awaiting stock', | ||||
|         'back in stock soon', | ||||
|         'back-order or out of stock', | ||||
|         'backordered', | ||||
|         'benachrichtigt mich', // notify me | ||||
|         'brak na stanie', | ||||
|         'brak w magazynie', | ||||
|         'coming soon', | ||||
|         'currently have any tickets for this', | ||||
|         'currently unavailable', | ||||
|         'dieser artikel ist bald wieder verfügbar', | ||||
|         'dostępne wkrótce', | ||||
|         'en rupture de stock', | ||||
|         'esgotado', | ||||
|         'indisponível', | ||||
|         'isn\'t in stock right now', | ||||
|         'isnt in stock right now', | ||||
|         'isn’t in stock right now', | ||||
|         'item is no longer available', | ||||
|         'let me know when it\'s available', | ||||
|         'mail me when available', | ||||
|         'message if back in stock', | ||||
|         'mevcut değil', | ||||
|         'nachricht bei', | ||||
|         'nicht auf lager', | ||||
|         'nicht lagernd', | ||||
|         'nicht lieferbar', | ||||
|         'nicht verfügbar', | ||||
|         'nicht vorrätig', | ||||
|         'nicht zur verfügung', | ||||
|         'nie znaleziono produktów', | ||||
|         'niet beschikbaar', | ||||
|         'niet leverbaar', | ||||
|         'niet op voorraad', | ||||
|         'no disponible', | ||||
|         'no longer in stock', | ||||
|         'no tickets available', | ||||
|         'not available', | ||||
|         'not currently available', | ||||
|         'not in stock', | ||||
|         'notify me when available', | ||||
|         'notify me', | ||||
|         'notify when available', | ||||
|         'não disponível', | ||||
|         'não estamos a aceitar encomendas', | ||||
|         'out of stock', | ||||
|         'out-of-stock', | ||||
|         'prodotto esaurito', | ||||
|         'produkt niedostępny', | ||||
|         'sold out', | ||||
|         'sold-out', | ||||
|         'stokta yok', | ||||
|         'temporarily out of stock', | ||||
|         'temporarily unavailable', | ||||
|         'there were no search results for', | ||||
|         'this item is currently unavailable', | ||||
|         'tickets unavailable', | ||||
|         'tijdelijk uitverkocht', | ||||
|         'tükendi', | ||||
|         'unavailable nearby', | ||||
|         'unavailable tickets', | ||||
|         'vergriffen', | ||||
|         'vorbestellen', | ||||
|         'vorbestellung ist bald möglich', | ||||
|         'we don\'t currently have any', | ||||
|         'we couldn\'t find any products that match', | ||||
|         'we do not currently have an estimate of when this product will be back in stock.', | ||||
|         'we don\'t know when or if this item will be back in stock.', | ||||
|         'we were not able to find a match', | ||||
|         'when this arrives in stock', | ||||
|         'zur zeit nicht an lager', | ||||
|         '品切れ', | ||||
|         '已售', | ||||
|         '已售完', | ||||
|         '품절' | ||||
|     ]; | ||||
|     function isItemInStock() { | ||||
|         // @todo Pass these in so the same list can be used in non-JS fetchers | ||||
|         const outOfStockTexts = [ | ||||
|             ' أخبرني عندما يتوفر', | ||||
|             '0 in stock', | ||||
|             'actuellement indisponible', | ||||
|             'agotado', | ||||
|             'article épuisé', | ||||
|             'artikel zurzeit vergriffen', | ||||
|             'as soon as stock is available', | ||||
|             'aucune offre n\'est disponible', | ||||
|             'ausverkauft', // sold out | ||||
|             'available for back order', | ||||
|             'awaiting stock', | ||||
|             'back in stock soon', | ||||
|             'back-order or out of stock', | ||||
|             'backordered', | ||||
|             'benachrichtigt mich', // notify me | ||||
|             'binnenkort leverbaar', // coming soon | ||||
|             'brak na stanie', | ||||
|             'brak w magazynie', | ||||
|             'coming soon', | ||||
|             'currently have any tickets for this', | ||||
|             'currently unavailable', | ||||
|             'dieser artikel ist bald wieder verfügbar', | ||||
|             'dostępne wkrótce', | ||||
|             'en rupture', | ||||
|             'esgotado', | ||||
|             'in kürze lieferbar', | ||||
|             'indisponible', | ||||
|             'indisponível', | ||||
|             'isn\'t in stock right now', | ||||
|             'isnt in stock right now', | ||||
|             'isn’t in stock right now', | ||||
|             'item is no longer available', | ||||
|             'let me know when it\'s available', | ||||
|             'mail me when available', | ||||
|             'message if back in stock', | ||||
|             'mevcut değil', | ||||
|             'nachricht bei', | ||||
|             'nicht auf lager', | ||||
|             'nicht lagernd', | ||||
|             'nicht lieferbar', | ||||
|             'nicht verfügbar', | ||||
|             'nicht vorrätig', | ||||
|             'nicht zur verfügung', | ||||
|             'nie znaleziono produktów', | ||||
|             'niet beschikbaar', | ||||
|             'niet leverbaar', | ||||
|             'niet op voorraad', | ||||
|             'no disponible', | ||||
|             'no featured offers available', | ||||
|             'no longer available', | ||||
|             'no longer in stock', | ||||
|             'no tickets available', | ||||
|             'non disponibile', | ||||
|             'non disponible', | ||||
|             'not available', | ||||
|             'not currently available', | ||||
|             'not in stock', | ||||
|             'notify me when available', | ||||
|             'notify me', | ||||
|             'notify when available', | ||||
|             'não disponível', | ||||
|             'não estamos a aceitar encomendas', | ||||
|             'out of stock', | ||||
|             'out-of-stock', | ||||
|             'plus disponible', | ||||
|             'prodotto esaurito', | ||||
|             'produkt niedostępny', | ||||
|             'rupture', | ||||
|             'sold out', | ||||
|             'sold-out', | ||||
|             'stok habis', | ||||
|             'stok kosong', | ||||
|             'stok varian ini habis', | ||||
|             'stokta yok', | ||||
|             'temporarily out of stock', | ||||
|             'temporarily unavailable', | ||||
|             'there were no search results for', | ||||
|             'this item is currently unavailable', | ||||
|             'tickets unavailable', | ||||
|             'tidak dijual', | ||||
|             'tidak tersedia', | ||||
|             'tijdelijk uitverkocht', | ||||
|             'tiket tidak tersedia', | ||||
|             'to subscribe to back in stock', | ||||
|             'tükendi', | ||||
|             'unavailable nearby', | ||||
|             'unavailable tickets', | ||||
|             'vergriffen', | ||||
|             'vorbestellen', | ||||
|             'vorbestellung ist bald möglich', | ||||
|             'we couldn\'t find any products that match', | ||||
|             'we do not currently have an estimate of when this product will be back in stock.', | ||||
|             'we don\'t currently have any', | ||||
|             'we don\'t know when or if this item will be back in stock.', | ||||
|             'we were not able to find a match', | ||||
|             'when this arrives in stock', | ||||
|             'when this item is available to order', | ||||
|             'zur zeit nicht an lager', | ||||
|             'épuisé', | ||||
|             '品切れ', | ||||
|             '已售', | ||||
|             '已售完', | ||||
|             '품절' | ||||
|         ]; | ||||
|  | ||||
|  | ||||
|     const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); | ||||
|         const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); | ||||
|  | ||||
|     function getElementBaseText(element) { | ||||
|         // .textContent can include text from children which may give the wrong results | ||||
|         // scan only immediate TEXT_NODEs, which will be a child of the element | ||||
|         var text = ""; | ||||
|         for (var i = 0; i < element.childNodes.length; ++i) | ||||
|             if (element.childNodes[i].nodeType === Node.TEXT_NODE) | ||||
|                 text += element.childNodes[i].textContent; | ||||
|         return text.toLowerCase().trim(); | ||||
|     } | ||||
|         function getElementBaseText(element) { | ||||
|             // .textContent can include text from children which may give the wrong results | ||||
|             // scan only immediate TEXT_NODEs, which will be a child of the element | ||||
|             var text = ""; | ||||
|             for (var i = 0; i < element.childNodes.length; ++i) | ||||
|                 if (element.childNodes[i].nodeType === Node.TEXT_NODE) | ||||
|                     text += element.childNodes[i].textContent; | ||||
|             return text.toLowerCase().trim(); | ||||
|         } | ||||
|  | ||||
|     const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig'); | ||||
|  | ||||
|     // The out-of-stock or in-stock-text is generally always above-the-fold | ||||
|     // and often below-the-fold is a list of related products that may or may not contain trigger text | ||||
|     // so it's good to filter to just the 'above the fold' elements | ||||
|     // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist | ||||
|         const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock|arrives approximately)', 'ig'); | ||||
|         // The out-of-stock or in-stock-text is generally always above-the-fold | ||||
|         // and often below-the-fold is a list of related products that may or may not contain trigger text | ||||
|         // so it's good to filter to just the 'above the fold' elements | ||||
|         // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist | ||||
|  | ||||
|         function elementIsInEyeBallRange(element) { | ||||
|             // outside the 'fold' or some weird text in the heading area | ||||
|             // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|             // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|             // Skip text that could be in the header area | ||||
|             if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) { | ||||
|                 return false; | ||||
|             } | ||||
|             // Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there | ||||
|             if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) { | ||||
|                 return false; | ||||
|             } | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
| // %ELEMENTS% replaced at injection time because different interfaces use it with different settings | ||||
|  | ||||
|     console.log("Scanning %ELEMENTS%"); | ||||
|         function collectVisibleElements(parent, visibleElements) { | ||||
|             if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|  | ||||
|     function collectVisibleElements(parent, visibleElements) { | ||||
|         if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|             // Add the parent itself to the visible elements array if it's of the specified types | ||||
|             visibleElements.push(parent); | ||||
|  | ||||
|         // Add the parent itself to the visible elements array if it's of the specified types | ||||
|         visibleElements.push(parent); | ||||
|  | ||||
|         // Iterate over the parent's children | ||||
|         const children = parent.children; | ||||
|         for (let i = 0; i < children.length; i++) { | ||||
|             const child = children[i]; | ||||
|             if ( | ||||
|                 child.nodeType === Node.ELEMENT_NODE && | ||||
|                 window.getComputedStyle(child).display !== 'none' && | ||||
|                 window.getComputedStyle(child).visibility !== 'hidden' && | ||||
|                 child.offsetWidth >= 0 && | ||||
|                 child.offsetHeight >= 0 && | ||||
|                 window.getComputedStyle(child).contentVisibility !== 'hidden' | ||||
|             ) { | ||||
|                 // If the child is an element and is visible, recursively collect visible elements | ||||
|                 collectVisibleElements(child, visibleElements); | ||||
|             // Iterate over the parent's children | ||||
|             const children = parent.children; | ||||
|             for (let i = 0; i < children.length; i++) { | ||||
|                 const child = children[i]; | ||||
|                 if ( | ||||
|                     child.nodeType === Node.ELEMENT_NODE && | ||||
|                     window.getComputedStyle(child).display !== 'none' && | ||||
|                     window.getComputedStyle(child).visibility !== 'hidden' && | ||||
|                     child.offsetWidth >= 0 && | ||||
|                     child.offsetHeight >= 0 && | ||||
|                     window.getComputedStyle(child).contentVisibility !== 'hidden' | ||||
|                 ) { | ||||
|                     // If the child is an element and is visible, recursively collect visible elements | ||||
|                     collectVisibleElements(child, visibleElements); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     const elementsToScan = []; | ||||
|     collectVisibleElements(document.body, elementsToScan); | ||||
|         const elementsToScan = []; | ||||
|         collectVisibleElements(document.body, elementsToScan); | ||||
|  | ||||
|     var elementText = ""; | ||||
|         var elementText = ""; | ||||
|  | ||||
|     // REGEXS THAT REALLY MEAN IT'S IN STOCK | ||||
|     for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|         const element = elementsToScan[i]; | ||||
|         // REGEXS THAT REALLY MEAN IT'S IN STOCK | ||||
|         for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|             const element = elementsToScan[i]; | ||||
|  | ||||
|         // outside the 'fold' or some weird text in the heading area | ||||
|         // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|         if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             continue | ||||
|             if (!elementIsInEyeBallRange(element)) { | ||||
|                 continue | ||||
|             } | ||||
|  | ||||
|             elementText = ""; | ||||
|             try { | ||||
|                 if (element.tagName.toLowerCase() === "input") { | ||||
|                     elementText = element.value.toLowerCase().trim(); | ||||
|                 } else { | ||||
|                     elementText = getElementBaseText(element); | ||||
|                 } | ||||
|             } catch (e) { | ||||
|                 console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e); | ||||
|             } | ||||
|             if (elementText.length) { | ||||
|                 // try which ones could mean its in stock | ||||
|                 if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { | ||||
|                     console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) | ||||
|                     element.style.border = "2px solid green"; // highlight the element that was detected as in stock | ||||
|                     return 'Possibly in stock'; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         elementText = ""; | ||||
|         try { | ||||
|         // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK | ||||
|         for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|             const element = elementsToScan[i]; | ||||
|  | ||||
|             if (!elementIsInEyeBallRange(element)) { | ||||
|                 continue | ||||
|             } | ||||
|             elementText = ""; | ||||
|             if (element.tagName.toLowerCase() === "input") { | ||||
|                 elementText = element.value.toLowerCase().trim(); | ||||
|             } else { | ||||
|                 elementText = getElementBaseText(element); | ||||
|             } | ||||
|         } catch (e) { | ||||
|             console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e); | ||||
|         } | ||||
|  | ||||
|         if (elementText.length) { | ||||
|             // try which ones could mean its in stock | ||||
|             if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { | ||||
|                 console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) | ||||
|                 return 'Possibly in stock'; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK | ||||
|     for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|         const element = elementsToScan[i]; | ||||
|         // outside the 'fold' or some weird text in the heading area | ||||
|         // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|         // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|         if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             continue | ||||
|         } | ||||
|         elementText = ""; | ||||
|         if (element.tagName.toLowerCase() === "input") { | ||||
|             elementText = element.value.toLowerCase().trim(); | ||||
|         } else { | ||||
|             elementText = getElementBaseText(element); | ||||
|         } | ||||
|  | ||||
|         if (elementText.length) { | ||||
|             // and these mean its out of stock | ||||
|             for (const outOfStockText of outOfStockTexts) { | ||||
|                 if (elementText.includes(outOfStockText)) { | ||||
|                     console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) | ||||
|                     return outOfStockText; // item is out of stock | ||||
|             if (elementText.length) { | ||||
|                 // and these mean its out of stock | ||||
|                 for (const outOfStockText of outOfStockTexts) { | ||||
|                     if (elementText.includes(outOfStockText)) { | ||||
|                         console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) | ||||
|                         element.style.border = "2px solid red"; // highlight the element that was detected as out of stock | ||||
|                         return outOfStockText; // item is out of stock | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`) | ||||
|         return 'Possibly in stock'; // possibly in stock, cant decide otherwise. | ||||
|     } | ||||
|  | ||||
|     console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`) | ||||
|     return 'Possibly in stock'; // possibly in stock, cant decide otherwise. | ||||
| } | ||||
|  | ||||
| // returns the element text that makes it think it's out of stock | ||||
| return isItemInStock().trim() | ||||
|  | ||||
|  | ||||
|     return isItemInStock().trim() | ||||
| } | ||||
|   | ||||
| @@ -1,282 +1,284 @@ | ||||
| // Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com) | ||||
| // All rights reserved. | ||||
| async (options) => { | ||||
|  | ||||
| // @file Scrape the page looking for elements of concern (%ELEMENTS%) | ||||
| // http://matatk.agrip.org.uk/tests/position-and-width/ | ||||
| // https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate | ||||
| // | ||||
| // Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis | ||||
| // will automatically force a scroll somewhere, so include the position offset | ||||
| // Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing | ||||
| var scroll_y = 0; | ||||
| try { | ||||
|     scroll_y = +document.documentElement.scrollTop || document.body.scrollTop | ||||
| } catch (e) { | ||||
|     console.log(e); | ||||
| } | ||||
|     let visualselector_xpath_selectors = options.visualselector_xpath_selectors | ||||
|     let max_height = options.max_height | ||||
|  | ||||
|  | ||||
| // Include the getXpath script directly, easier than fetching | ||||
| function getxpath(e) { | ||||
|     var n = e; | ||||
|     if (n && n.id) return '//*[@id="' + n.id + '"]'; | ||||
|     for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) { | ||||
|         for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling; | ||||
|         for (d = n.nextSibling; d;) { | ||||
|             if (d.nodeName === n.nodeName) { | ||||
|                 r = !0; | ||||
|                 break | ||||
|             } | ||||
|             d = d.nextSibling | ||||
|         } | ||||
|         o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode | ||||
|     } | ||||
|     return o.length ? "/" + o.reverse().join("/") : "" | ||||
| } | ||||
|  | ||||
| const findUpTag = (el) => { | ||||
|     let r = el | ||||
|     chained_css = []; | ||||
|     depth = 0; | ||||
|  | ||||
|     //  Strategy 1: If it's an input, with name, and there's only one, prefer that | ||||
|     if (el.name !== undefined && el.name.length) { | ||||
|         var proposed = el.tagName + "[name=" + el.name + "]"; | ||||
|         var proposed_element = window.document.querySelectorAll(proposed); | ||||
|         if (proposed_element.length) { | ||||
|             if (proposed_element.length === 1) { | ||||
|                 return proposed; | ||||
|             } else { | ||||
|                 // Some sites change ID but name= stays the same, we can hit it if we know the index | ||||
|                 // Find all the elements that match and work out the input[n] | ||||
|                 var n = Array.from(proposed_element).indexOf(el); | ||||
|                 // Return a Playwright selector for nthinput[name=zipcode] | ||||
|                 return proposed + " >> nth=" + n; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Strategy 2: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4 | ||||
|     while (r.parentNode) { | ||||
|         if (depth === 5) { | ||||
|             break; | ||||
|         } | ||||
|         if ('' !== r.id) { | ||||
|             chained_css.unshift("#" + CSS.escape(r.id)); | ||||
|             final_selector = chained_css.join(' > '); | ||||
|             // Be sure theres only one, some sites have multiples of the same ID tag :-( | ||||
|             if (window.document.querySelectorAll(final_selector).length === 1) { | ||||
|                 return final_selector; | ||||
|             } | ||||
|             return null; | ||||
|         } else { | ||||
|             chained_css.unshift(r.tagName.toLowerCase()); | ||||
|         } | ||||
|         r = r.parentNode; | ||||
|         depth += 1; | ||||
|     } | ||||
|     return null; | ||||
| } | ||||
|  | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
| // %ELEMENTS% replaced at injection time because different interfaces use it with different settings | ||||
|  | ||||
| var size_pos = []; | ||||
| // after page fetch, inject this JS | ||||
| // build a map of all elements and their positions (maybe that only include text?) | ||||
| var bbox; | ||||
| console.log("Scanning %ELEMENTS%"); | ||||
|  | ||||
| function collectVisibleElements(parent, visibleElements) { | ||||
|     if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|  | ||||
|  | ||||
|     // Add the parent itself to the visible elements array if it's of the specified types | ||||
|     const tagName = parent.tagName.toLowerCase(); | ||||
|     if ("%ELEMENTS%".split(',').includes(tagName)) { | ||||
|         visibleElements.push(parent); | ||||
|     } | ||||
|  | ||||
|     // Iterate over the parent's children | ||||
|     const children = parent.children; | ||||
|     for (let i = 0; i < children.length; i++) { | ||||
|         const child = children[i]; | ||||
|         if ( | ||||
|             child.nodeType === Node.ELEMENT_NODE && | ||||
|             window.getComputedStyle(child).display !== 'none' && | ||||
|             window.getComputedStyle(child).visibility !== 'hidden' && | ||||
|             child.offsetWidth >= 0 && | ||||
|             child.offsetHeight >= 0 && | ||||
|             window.getComputedStyle(child).contentVisibility !== 'hidden' | ||||
|         ) { | ||||
|             // If the child is an element and is visible, recursively collect visible elements | ||||
|             collectVisibleElements(child, visibleElements); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Create an array to hold the visible elements | ||||
| const visibleElementsArray = []; | ||||
|  | ||||
| // Call collectVisibleElements with the starting parent element | ||||
| collectVisibleElements(document.body, visibleElementsArray); | ||||
|  | ||||
|  | ||||
| visibleElementsArray.forEach(function (element) { | ||||
|  | ||||
|     bbox = element.getBoundingClientRect(); | ||||
|  | ||||
|     // Skip really small ones, and where width or height ==0 | ||||
|     if (bbox['width'] * bbox['height'] < 10) { | ||||
|         return | ||||
|     } | ||||
|  | ||||
|     // Don't include elements that are offset from canvas | ||||
|     if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) { | ||||
|         return | ||||
|     } | ||||
|  | ||||
|     // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes | ||||
|     // it should not traverse when we know we can anchor off just an ID one level up etc.. | ||||
|     // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match | ||||
|  | ||||
|     // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us. | ||||
|     xpath_result = false; | ||||
|     var scroll_y = 0; | ||||
|     try { | ||||
|         var d = findUpTag(element); | ||||
|         if (d) { | ||||
|             xpath_result = d; | ||||
|         } | ||||
|         scroll_y = +document.documentElement.scrollTop || document.body.scrollTop | ||||
|     } catch (e) { | ||||
|         console.log(e); | ||||
|     } | ||||
|     // You could swap it and default to getXpath and then try the smarter one | ||||
|     // default back to the less intelligent one | ||||
|     if (!xpath_result) { | ||||
|         try { | ||||
|             // I've seen on FB and eBay that this doesnt work | ||||
|             // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44) | ||||
|             xpath_result = getxpath(element); | ||||
|         } catch (e) { | ||||
|             console.log(e); | ||||
|             return | ||||
|  | ||||
| // Include the getXpath script directly, easier than fetching | ||||
|     function getxpath(e) { | ||||
|         var n = e; | ||||
|         if (n && n.id) return '//*[@id="' + n.id + '"]'; | ||||
|         for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) { | ||||
|             for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling; | ||||
|             for (d = n.nextSibling; d;) { | ||||
|                 if (d.nodeName === n.nodeName) { | ||||
|                     r = !0; | ||||
|                     break | ||||
|                 } | ||||
|                 d = d.nextSibling | ||||
|             } | ||||
|             o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode | ||||
|         } | ||||
|         return o.length ? "/" + o.reverse().join("/") : "" | ||||
|     } | ||||
|  | ||||
|     const findUpTag = (el) => { | ||||
|         let r = el | ||||
|         chained_css = []; | ||||
|         depth = 0; | ||||
|  | ||||
|         //  Strategy 1: If it's an input, with name, and there's only one, prefer that | ||||
|         if (el.name !== undefined && el.name.length) { | ||||
|             var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]"; | ||||
|             var proposed_element = window.document.querySelectorAll(proposed); | ||||
|             if (proposed_element.length) { | ||||
|                 if (proposed_element.length === 1) { | ||||
|                     return proposed; | ||||
|                 } else { | ||||
|                     // Some sites change ID but name= stays the same, we can hit it if we know the index | ||||
|                     // Find all the elements that match and work out the input[n] | ||||
|                     var n = Array.from(proposed_element).indexOf(el); | ||||
|                     // Return a Playwright selector for nthinput[name=zipcode] | ||||
|                     return proposed + " >> nth=" + n; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Strategy 2: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4 | ||||
|         while (r.parentNode) { | ||||
|             if (depth === 5) { | ||||
|                 break; | ||||
|             } | ||||
|             if ('' !== r.id) { | ||||
|                 chained_css.unshift("#" + CSS.escape(r.id)); | ||||
|                 final_selector = chained_css.join(' > '); | ||||
|                 // Be sure theres only one, some sites have multiples of the same ID tag :-( | ||||
|                 if (window.document.querySelectorAll(final_selector).length === 1) { | ||||
|                     return final_selector; | ||||
|                 } | ||||
|                 return null; | ||||
|             } else { | ||||
|                 chained_css.unshift(r.tagName.toLowerCase()); | ||||
|             } | ||||
|             r = r.parentNode; | ||||
|             depth += 1; | ||||
|         } | ||||
|         return null; | ||||
|     } | ||||
|  | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
|  | ||||
|     var size_pos = []; | ||||
| // after page fetch, inject this JS | ||||
| // build a map of all elements and their positions (maybe that only include text?) | ||||
|     var bbox; | ||||
|     console.log(`Scanning for "${visualselector_xpath_selectors}"`); | ||||
|  | ||||
|     function collectVisibleElements(parent, visibleElements) { | ||||
|         if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|  | ||||
|  | ||||
|         // Add the parent itself to the visible elements array if it's of the specified types | ||||
|         const tagName = parent.tagName.toLowerCase(); | ||||
|         if (visualselector_xpath_selectors.split(',').includes(tagName)) { | ||||
|             visibleElements.push(parent); | ||||
|         } | ||||
|  | ||||
|         // Iterate over the parent's children | ||||
|         const children = parent.children; | ||||
|         for (let i = 0; i < children.length; i++) { | ||||
|             const child = children[i]; | ||||
|             const computedStyle = window.getComputedStyle(child); | ||||
|  | ||||
|             if ( | ||||
|                 child.nodeType === Node.ELEMENT_NODE && | ||||
|                 computedStyle.display !== 'none' && | ||||
|                 computedStyle.visibility !== 'hidden' && | ||||
|                 child.offsetWidth >= 0 && | ||||
|                 child.offsetHeight >= 0 && | ||||
|                 computedStyle.contentVisibility !== 'hidden' | ||||
|             ) { | ||||
|                 // If the child is an element and is visible, recursively collect visible elements | ||||
|                 collectVisibleElements(child, visibleElements); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now | ||||
| // Create an array to hold the visible elements | ||||
|     const visibleElementsArray = []; | ||||
|  | ||||
|     let text = element.textContent.trim().slice(0, 30).trim(); | ||||
|     while (/\n{2,}|\t{2,}/.test(text)) { | ||||
|         text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t') | ||||
|     } | ||||
| // Call collectVisibleElements with the starting parent element | ||||
|     collectVisibleElements(document.body, visibleElementsArray); | ||||
|  | ||||
|     // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training. | ||||
|     const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ; | ||||
|  | ||||
|     size_pos.push({ | ||||
|         xpath: xpath_result, | ||||
|         width: Math.round(bbox['width']), | ||||
|         height: Math.round(bbox['height']), | ||||
|         left: Math.floor(bbox['left']), | ||||
|         top: Math.floor(bbox['top']) + scroll_y, | ||||
|         // tagName used by Browser Steps | ||||
|         tagName: (element.tagName) ? element.tagName.toLowerCase() : '', | ||||
|         // tagtype used by Browser Steps | ||||
|         tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', | ||||
|         isClickable: window.getComputedStyle(element).cursor === "pointer", | ||||
|         // Used by the keras trainer | ||||
|         fontSize: window.getComputedStyle(element).getPropertyValue('font-size'), | ||||
|         fontWeight: window.getComputedStyle(element).getPropertyValue('font-weight'), | ||||
|         hasDigitCurrency: hasDigitCurrency, | ||||
|         label: label, | ||||
|     visibleElementsArray.forEach(function (element) { | ||||
|  | ||||
|         bbox = element.getBoundingClientRect(); | ||||
|  | ||||
|         // Skip really small ones, and where width or height ==0 | ||||
|         if (bbox['width'] * bbox['height'] < 10) { | ||||
|             return | ||||
|         } | ||||
|  | ||||
|         // Don't include elements that are offset from canvas | ||||
|         if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) { | ||||
|             return | ||||
|         } | ||||
|  | ||||
|         // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes | ||||
|         // it should not traverse when we know we can anchor off just an ID one level up etc.. | ||||
|         // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match | ||||
|  | ||||
|         // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us. | ||||
|         xpath_result = false; | ||||
|         try { | ||||
|             var d = findUpTag(element); | ||||
|             if (d) { | ||||
|                 xpath_result = d; | ||||
|             } | ||||
|         } catch (e) { | ||||
|             console.log(e); | ||||
|         } | ||||
|         // You could swap it and default to getXpath and then try the smarter one | ||||
|         // default back to the less intelligent one | ||||
|         if (!xpath_result) { | ||||
|             try { | ||||
|                 // I've seen on FB and eBay that this doesnt work | ||||
|                 // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44) | ||||
|                 xpath_result = getxpath(element); | ||||
|             } catch (e) { | ||||
|                 console.log(e); | ||||
|                 return | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now | ||||
|  | ||||
|         let text = element.textContent.trim().slice(0, 30).trim(); | ||||
|         while (/\n{2,}|\t{2,}/.test(text)) { | ||||
|             text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t') | ||||
|         } | ||||
|  | ||||
|         // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training. | ||||
|         const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6))) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text); | ||||
|         const computedStyle = window.getComputedStyle(element); | ||||
|  | ||||
|         if (Math.floor(bbox['top']) + scroll_y > max_height) { | ||||
|             return | ||||
|         } | ||||
|  | ||||
|         size_pos.push({ | ||||
|             xpath: xpath_result, | ||||
|             width: Math.round(bbox['width']), | ||||
|             height: Math.round(bbox['height']), | ||||
|             left: Math.floor(bbox['left']), | ||||
|             top: Math.floor(bbox['top']) + scroll_y, | ||||
|             // tagName used by Browser Steps | ||||
|             tagName: (element.tagName) ? element.tagName.toLowerCase() : '', | ||||
|             // tagtype used by Browser Steps | ||||
|             tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', | ||||
|             isClickable: computedStyle.cursor === "pointer", | ||||
|             // Used by the keras trainer | ||||
|             fontSize: computedStyle.getPropertyValue('font-size'), | ||||
|             fontWeight: computedStyle.getPropertyValue('font-weight'), | ||||
|             hasDigitCurrency: hasDigitCurrency, | ||||
|             label: label, | ||||
|         }); | ||||
|  | ||||
|     }); | ||||
|  | ||||
| }); | ||||
|  | ||||
|  | ||||
| // Inject the current one set in the include_filters, which may be a CSS rule | ||||
| // used for displaying the current one in VisualSelector, where its not one we generated. | ||||
| if (include_filters.length) { | ||||
|     let results; | ||||
|     // Foreach filter, go and find it on the page and add it to the results so we can visualise it again | ||||
|     for (const f of include_filters) { | ||||
|         bbox = false; | ||||
|         q = false; | ||||
|     if (include_filters.length) { | ||||
|         let results; | ||||
|         // Foreach filter, go and find it on the page and add it to the results so we can visualise it again | ||||
|         for (const f of include_filters) { | ||||
|             bbox = false; | ||||
|  | ||||
|         if (!f.length) { | ||||
|             console.log("xpath_element_scraper: Empty filter, skipping"); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         try { | ||||
|             // is it xpath? | ||||
|             if (f.startsWith('/') || f.startsWith('xpath')) { | ||||
|                 var qry_f = f.replace(/xpath(:|\d:)/, '') | ||||
|                 console.log("[xpath] Scanning for included filter " + qry_f) | ||||
|                 let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | ||||
|                 results = []; | ||||
|                 for (let i = 0; i < xpathResult.snapshotLength; i++) { | ||||
|                     results.push(xpathResult.snapshotItem(i)); | ||||
|                 } | ||||
|             } else { | ||||
|                 console.log("[css] Scanning for included filter " + f) | ||||
|                 console.log("[css] Scanning for included filter " + f); | ||||
|                 results = document.querySelectorAll(f); | ||||
|             if (!f.length) { | ||||
|                 console.log("xpath_element_scraper: Empty filter, skipping"); | ||||
|                 continue; | ||||
|             } | ||||
|         } catch (e) { | ||||
|             // Maybe catch DOMException and alert? | ||||
|             console.log("xpath_element_scraper: Exception selecting element from filter " + f); | ||||
|             console.log(e); | ||||
|         } | ||||
|  | ||||
|         if (results != null && results.length) { | ||||
|  | ||||
|             // Iterate over the results | ||||
|             results.forEach(node => { | ||||
|                 // Try to resolve //something/text() back to its /something so we can atleast get the bounding box | ||||
|                 try { | ||||
|                     if (typeof node.nodeName == 'string' && node.nodeName === '#text') { | ||||
|                         node = node.parentElement | ||||
|             try { | ||||
|                 // is it xpath? | ||||
|                 if (f.startsWith('/') || f.startsWith('xpath')) { | ||||
|                     var qry_f = f.replace(/xpath(:|\d:)/, '') | ||||
|                     console.log("[xpath] Scanning for included filter " + qry_f) | ||||
|                     let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | ||||
|                     results = []; | ||||
|                     for (let i = 0; i < xpathResult.snapshotLength; i++) { | ||||
|                         results.push(xpathResult.snapshotItem(i)); | ||||
|                     } | ||||
|                 } catch (e) { | ||||
|                     console.log(e) | ||||
|                     console.log("xpath_element_scraper: #text resolver") | ||||
|                 } | ||||
|  | ||||
|                 // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. | ||||
|                 if (typeof node.getBoundingClientRect == 'function') { | ||||
|                     bbox = node.getBoundingClientRect(); | ||||
|                     console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) | ||||
|                 } else { | ||||
|                     console.log("[css] Scanning for included filter " + f) | ||||
|                     console.log("[css] Scanning for included filter " + f); | ||||
|                     results = document.querySelectorAll(f); | ||||
|                 } | ||||
|             } catch (e) { | ||||
|                 // Maybe catch DOMException and alert? | ||||
|                 console.log("xpath_element_scraper: Exception selecting element from filter " + f); | ||||
|                 console.log(e); | ||||
|             } | ||||
|  | ||||
|             if (results != null && results.length) { | ||||
|  | ||||
|                 // Iterate over the results | ||||
|                 results.forEach(node => { | ||||
|                     // Try to resolve //something/text() back to its /something so we can atleast get the bounding box | ||||
|                     try { | ||||
|                         // Try and see we can find its ownerElement | ||||
|                         bbox = node.ownerElement.getBoundingClientRect(); | ||||
|                         console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) | ||||
|                         if (typeof node.nodeName == 'string' && node.nodeName === '#text') { | ||||
|                             node = node.parentElement | ||||
|                         } | ||||
|                     } catch (e) { | ||||
|                         console.log(e) | ||||
|                         console.log("xpath_element_scraper: error looking up q.ownerElement") | ||||
|                         console.log("xpath_element_scraper: #text resolver") | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { | ||||
|                     size_pos.push({ | ||||
|                         xpath: f, | ||||
|                         width: parseInt(bbox['width']), | ||||
|                         height: parseInt(bbox['height']), | ||||
|                         left: parseInt(bbox['left']), | ||||
|                         top: parseInt(bbox['top']) + scroll_y, | ||||
|                         highlight_as_custom_filter: true | ||||
|                     }); | ||||
|                 } | ||||
|             }); | ||||
|                     // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. | ||||
|                     if (typeof node.getBoundingClientRect == 'function') { | ||||
|                         bbox = node.getBoundingClientRect(); | ||||
|                         console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) | ||||
|                     } else { | ||||
|                         try { | ||||
|                             // Try and see we can find its ownerElement | ||||
|                             bbox = node.ownerElement.getBoundingClientRect(); | ||||
|                             console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) | ||||
|                         } catch (e) { | ||||
|                             console.log(e) | ||||
|                             console.log("xpath_element_scraper: error looking up node.ownerElement") | ||||
|                         } | ||||
|                     } | ||||
|  | ||||
|                     if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { | ||||
|                         size_pos.push({ | ||||
|                             xpath: f, | ||||
|                             width: parseInt(bbox['width']), | ||||
|                             height: parseInt(bbox['height']), | ||||
|                             left: parseInt(bbox['left']), | ||||
|                             top: parseInt(bbox['top']) + scroll_y, | ||||
|                             highlight_as_custom_filter: true | ||||
|                         }); | ||||
|                     } | ||||
|                 }); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area | ||||
| // so that we dont select the wrapping element by mistake and be unable to select what we want | ||||
| size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1) | ||||
|     size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1) | ||||
|  | ||||
| // browser_width required for proper scaling in the frontend | ||||
|     // Return as a string to save playwright for juggling thousands of objects | ||||
|     return JSON.stringify({'size_pos': size_pos, 'browser_width': window.innerWidth}); | ||||
| } | ||||
|  | ||||
| // Window.width required for proper scaling in the frontend | ||||
| return {'size_pos': size_pos, 'browser_width': window.innerWidth}; | ||||
|   | ||||
							
								
								
									
										73
									
								
								changedetectionio/content_fetchers/screenshot_handler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,73 @@ | ||||
| # Pages with a vertical height longer than this will use the 'stitch together' method. | ||||
|  | ||||
| # - Many GPUs have a max texture size of 16384x16384px (or lower on older devices). | ||||
| # - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits. | ||||
| # - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer. | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY | ||||
|  | ||||
|  | ||||
| def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height): | ||||
|     import os | ||||
|     import io | ||||
|     from PIL import Image, ImageDraw, ImageFont | ||||
|  | ||||
|     try: | ||||
|  | ||||
|         # Load images from byte chunks | ||||
|         images = [Image.open(io.BytesIO(b)) for b in chunks_bytes] | ||||
|         total_height = sum(im.height for im in images) | ||||
|         max_width = max(im.width for im in images) | ||||
|  | ||||
|         # Create stitched image | ||||
|         stitched = Image.new('RGB', (max_width, total_height)) | ||||
|         y_offset = 0 | ||||
|         for im in images: | ||||
|             stitched.paste(im, (0, y_offset)) | ||||
|             y_offset += im.height | ||||
|  | ||||
|         # Draw caption on top (overlaid, not extending canvas) | ||||
|         draw = ImageDraw.Draw(stitched) | ||||
|  | ||||
|         if original_page_height > capture_height: | ||||
|             caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long" | ||||
|             padding = 10 | ||||
|             font_size = 35 | ||||
|             font_color = (255, 0, 0) | ||||
|             background_color = (255, 255, 255) | ||||
|  | ||||
|  | ||||
|             # Try to load a proper font | ||||
|             try: | ||||
|                 font = ImageFont.truetype("arial.ttf", font_size) | ||||
|             except IOError: | ||||
|                 font = ImageFont.load_default() | ||||
|  | ||||
|             bbox = draw.textbbox((0, 0), caption_text, font=font) | ||||
|             text_width = bbox[2] - bbox[0] | ||||
|             text_height = bbox[3] - bbox[1] | ||||
|  | ||||
|             # Draw white rectangle background behind text | ||||
|             rect_top = 0 | ||||
|             rect_bottom = text_height + 2 * padding | ||||
|             draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color) | ||||
|  | ||||
|             # Draw text centered horizontally, 10px padding from top of the rectangle | ||||
|             text_x = (max_width - text_width) // 2 | ||||
|             text_y = padding | ||||
|             draw.text((text_x, text_y), caption_text, font=font, fill=font_color) | ||||
|  | ||||
|         # Encode and send image | ||||
|         output = io.BytesIO() | ||||
|         stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY))) | ||||
|         pipe_conn.send_bytes(output.getvalue()) | ||||
|  | ||||
|         stitched.close() | ||||
|     except Exception as e: | ||||
|         pipe_conn.send(f"error:{e}") | ||||
|     finally: | ||||
|         pipe_conn.close() | ||||
|  | ||||
|  | ||||
| @@ -10,16 +10,13 @@ class fetcher(Fetcher): | ||||
|     else: | ||||
|         fetcher_description = "WebDriver Chrome/Javascript" | ||||
|  | ||||
|     # Configs for Proxy setup | ||||
|     # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy" | ||||
|     selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy', | ||||
|                                         'proxyAutoconfigUrl', 'sslProxy', 'autodetect', | ||||
|                                         'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword'] | ||||
|     proxy = None | ||||
|     proxy_url = None | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|         from selenium.webdriver.common.proxy import Proxy as SeleniumProxy | ||||
|         from urllib.parse import urlparse | ||||
|         from selenium.webdriver.common.proxy import Proxy | ||||
|  | ||||
|         # .strip('"') is going to save someone a lot of time when they accidently wrap the env value | ||||
|         if not custom_browser_connection_url: | ||||
| @@ -28,27 +25,29 @@ class fetcher(Fetcher): | ||||
|             self.browser_connection_is_custom = True | ||||
|             self.browser_connection_url = custom_browser_connection_url | ||||
|  | ||||
|         # If any proxy settings are enabled, then we should setup the proxy object | ||||
|         proxy_args = {} | ||||
|         for k in self.selenium_proxy_settings_mappings: | ||||
|             v = os.getenv('webdriver_' + k, False) | ||||
|             if v: | ||||
|                 proxy_args[k] = v.strip('"') | ||||
|  | ||||
|         # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy | ||||
|         if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy: | ||||
|             proxy_args['httpProxy'] = self.system_http_proxy | ||||
|         if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy: | ||||
|             proxy_args['httpsProxy'] = self.system_https_proxy | ||||
|         ##### PROXY SETUP ##### | ||||
|  | ||||
|         # Allows override the proxy on a per-request basis | ||||
|         if proxy_override is not None: | ||||
|             proxy_args['httpProxy'] = proxy_override | ||||
|         proxy_sources = [ | ||||
|             self.system_http_proxy, | ||||
|             self.system_https_proxy, | ||||
|             os.getenv('webdriver_proxySocks'), | ||||
|             os.getenv('webdriver_socksProxy'), | ||||
|             os.getenv('webdriver_proxyHttp'), | ||||
|             os.getenv('webdriver_httpProxy'), | ||||
|             os.getenv('webdriver_proxyHttps'), | ||||
|             os.getenv('webdriver_httpsProxy'), | ||||
|             os.getenv('webdriver_sslProxy'), | ||||
|             proxy_override, # last one should override | ||||
|         ] | ||||
|         # The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server= | ||||
|         for k in filter(None, proxy_sources): | ||||
|             if not k: | ||||
|                 continue | ||||
|             self.proxy_url = k.strip() | ||||
|  | ||||
|         if proxy_args: | ||||
|             self.proxy = SeleniumProxy(raw=proxy_args) | ||||
|  | ||||
|     def run(self, | ||||
|     async def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
| @@ -59,62 +58,86 @@ class fetcher(Fetcher): | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|         from selenium.common.exceptions import WebDriverException | ||||
|         # request_body, request_method unused for now, until some magic in the future happens. | ||||
|         import asyncio | ||||
|          | ||||
|         # Wrap the entire selenium operation in a thread executor | ||||
|         def _run_sync(): | ||||
|             from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|             # request_body, request_method unused for now, until some magic in the future happens. | ||||
|  | ||||
|         options = ChromeOptions() | ||||
|         if self.proxy: | ||||
|             options.proxy = self.proxy | ||||
|             options = ChromeOptions() | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.browser_connection_url, | ||||
|             options=options) | ||||
|             # Load Chrome options from env | ||||
|             CHROME_OPTIONS = [ | ||||
|                 line.strip() | ||||
|                 for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines() | ||||
|                 if line.strip() | ||||
|             ] | ||||
|  | ||||
|         try: | ||||
|             self.driver.get(url) | ||||
|         except WebDriverException as e: | ||||
|             # Be sure we close the session window | ||||
|             self.quit() | ||||
|             raise | ||||
|             for opt in CHROME_OPTIONS: | ||||
|                 options.add_argument(opt) | ||||
|  | ||||
|         self.driver.set_window_size(1280, 1024) | ||||
|         self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|             # 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable | ||||
|             # 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng | ||||
|             # 3. selenium only allows ONE runner at a time by default! | ||||
|             # 4. driver must use quit() or it will continue to block/hold the selenium process!! | ||||
|  | ||||
|         if self.webdriver_js_execute_code is not None: | ||||
|             self.driver.execute_script(self.webdriver_js_execute_code) | ||||
|             # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|             self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|             if self.proxy_url: | ||||
|                 options.add_argument(f'--proxy-server={self.proxy_url}') | ||||
|  | ||||
|         # @todo - how to check this? is it possible? | ||||
|         self.status_code = 200 | ||||
|         # @todo somehow we should try to get this working for WebDriver | ||||
|         # raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
|         # @todo - dom wait loaded? | ||||
|         time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) | ||||
|         self.content = self.driver.page_source | ||||
|         self.headers = {} | ||||
|  | ||||
|         self.screenshot = self.driver.get_screenshot_as_png() | ||||
|  | ||||
|     # Does the connection to the webdriver work? run a test connection. | ||||
|     def is_ready(self): | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.command_executor, | ||||
|             options=ChromeOptions()) | ||||
|  | ||||
|         # driver.quit() seems to cause better exceptions | ||||
|         self.quit() | ||||
|         return True | ||||
|  | ||||
|     def quit(self): | ||||
|         if self.driver: | ||||
|             from selenium.webdriver.remote.remote_connection import RemoteConnection | ||||
|             from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver | ||||
|             driver = None | ||||
|             try: | ||||
|                 self.driver.quit() | ||||
|                 # Create the RemoteConnection and set timeout (e.g., 30 seconds) | ||||
|                 remote_connection = RemoteConnection( | ||||
|                     self.browser_connection_url, | ||||
|                 ) | ||||
|                 remote_connection.set_timeout(30)  # seconds | ||||
|  | ||||
|                 # Now create the driver with the RemoteConnection | ||||
|                 driver = RemoteWebDriver( | ||||
|                     command_executor=remote_connection, | ||||
|                     options=options | ||||
|                 ) | ||||
|  | ||||
|                 driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45))) | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}") | ||||
|                 if driver: | ||||
|                     driver.quit() | ||||
|                 raise e | ||||
|  | ||||
|             try: | ||||
|                 driver.get(url) | ||||
|  | ||||
|                 if not "--window-size" in os.getenv("CHROME_OPTIONS", ""): | ||||
|                     driver.set_window_size(1280, 1024) | ||||
|  | ||||
|                 driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|                 if self.webdriver_js_execute_code is not None: | ||||
|                     driver.execute_script(self.webdriver_js_execute_code) | ||||
|                     # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|                     driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|                 # @todo - how to check this? is it possible? | ||||
|                 self.status_code = 200 | ||||
|                 # @todo somehow we should try to get this working for WebDriver | ||||
|                 # raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
|                 # @todo - dom wait loaded? | ||||
|                 import time | ||||
|                 time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) | ||||
|                 self.content = driver.page_source | ||||
|                 self.headers = {} | ||||
|                 self.screenshot = driver.get_screenshot_as_png() | ||||
|             except Exception as e: | ||||
|                 driver.quit() | ||||
|                 raise e | ||||
|  | ||||
|             driver.quit() | ||||
|  | ||||
|         # Run the selenium operations in a thread pool to avoid blocking the event loop | ||||
|         loop = asyncio.get_event_loop() | ||||
|         await loop.run_in_executor(None, _run_sync) | ||||
|  | ||||
|   | ||||
							
								
								
									
										535
									
								
								changedetectionio/custom_queue.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,535 @@ | ||||
| import queue | ||||
| import asyncio | ||||
| from blinker import signal | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| class NotificationQueue(queue.Queue): | ||||
|     """ | ||||
|     Extended Queue that sends a 'notification_event' signal when notifications are added. | ||||
|      | ||||
|     This class extends the standard Queue and adds a signal emission after a notification | ||||
|     is put into the queue. The signal includes the watch UUID if available. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize=0): | ||||
|         super().__init__(maxsize) | ||||
|         try: | ||||
|             self.notification_event_signal = signal('notification_event') | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception creating notification_event signal: {e}") | ||||
|  | ||||
|     def put(self, item, block=True, timeout=None): | ||||
|         # Call the parent's put method first | ||||
|         super().put(item, block, timeout) | ||||
|          | ||||
|         # After putting the notification in the queue, emit signal with watch UUID | ||||
|         try: | ||||
|             if self.notification_event_signal and isinstance(item, dict): | ||||
|                 watch_uuid = item.get('uuid') | ||||
|                 if watch_uuid: | ||||
|                     # Send the notification_event signal with the watch UUID | ||||
|                     self.notification_event_signal.send(watch_uuid=watch_uuid) | ||||
|                     logger.trace(f"NotificationQueue: Emitted notification_event signal for watch UUID {watch_uuid}") | ||||
|                 else: | ||||
|                     # Send signal without UUID for system notifications | ||||
|                     self.notification_event_signal.send() | ||||
|                     logger.trace("NotificationQueue: Emitted notification_event signal for system notification") | ||||
|         except Exception as e: | ||||
|             logger.error(f"Exception emitting notification_event signal: {e}") | ||||
|  | ||||
| class SignalPriorityQueue(queue.PriorityQueue): | ||||
|     """ | ||||
|     Extended PriorityQueue that sends a signal when items with a UUID are added. | ||||
|      | ||||
|     This class extends the standard PriorityQueue and adds a signal emission | ||||
|     after an item is put into the queue. If the item contains a UUID, the signal | ||||
|     is sent with that UUID as a parameter. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize=0): | ||||
|         super().__init__(maxsize) | ||||
|         try: | ||||
|             self.queue_length_signal = signal('queue_length') | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|  | ||||
|     def put(self, item, block=True, timeout=None): | ||||
|         # Call the parent's put method first | ||||
|         super().put(item, block, timeout) | ||||
|          | ||||
|         # After putting the item in the queue, check if it has a UUID and emit signal | ||||
|         if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item: | ||||
|             uuid = item.item['uuid'] | ||||
|             # Get the signal and send it if it exists | ||||
|             watch_check_update = signal('watch_check_update') | ||||
|             if watch_check_update: | ||||
|                 # Send the watch_uuid parameter | ||||
|                 watch_check_update.send(watch_uuid=uuid) | ||||
|          | ||||
|         # Send queue_length signal with current queue size | ||||
|         try: | ||||
|  | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|  | ||||
|     def get(self, block=True, timeout=None): | ||||
|         # Call the parent's get method first | ||||
|         item = super().get(block, timeout) | ||||
|          | ||||
|         # Send queue_length signal with current queue size | ||||
|         try: | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|         return item | ||||
|      | ||||
|     def get_uuid_position(self, target_uuid): | ||||
|         """ | ||||
|         Find the position of a watch UUID in the priority queue. | ||||
|         Optimized for large queues - O(n) complexity instead of O(n log n). | ||||
|          | ||||
|         Args: | ||||
|             target_uuid: The UUID to search for | ||||
|              | ||||
|         Returns: | ||||
|             dict: Contains position info or None if not found | ||||
|                 - position: 0-based position in queue (0 = next to be processed) | ||||
|                 - total_items: total number of items in queue | ||||
|                 - priority: the priority value of the found item | ||||
|         """ | ||||
|         with self.mutex: | ||||
|             queue_list = list(self.queue) | ||||
|             total_items = len(queue_list) | ||||
|              | ||||
|             if total_items == 0: | ||||
|                 return { | ||||
|                     'position': None, | ||||
|                     'total_items': 0, | ||||
|                     'priority': None, | ||||
|                     'found': False | ||||
|                 } | ||||
|              | ||||
|             # Find the target item and its priority first - O(n) | ||||
|             target_item = None | ||||
|             target_priority = None | ||||
|              | ||||
|             for item in queue_list: | ||||
|                 if (hasattr(item, 'item') and  | ||||
|                     isinstance(item.item, dict) and  | ||||
|                     item.item.get('uuid') == target_uuid): | ||||
|                     target_item = item | ||||
|                     target_priority = item.priority | ||||
|                     break | ||||
|              | ||||
|             if target_item is None: | ||||
|                 return { | ||||
|                     'position': None, | ||||
|                     'total_items': total_items, | ||||
|                     'priority': None, | ||||
|                     'found': False | ||||
|                 } | ||||
|              | ||||
|             # Count how many items have higher priority (lower numbers) - O(n) | ||||
|             position = 0 | ||||
|             for item in queue_list: | ||||
|                 # Items with lower priority numbers are processed first | ||||
|                 if item.priority < target_priority: | ||||
|                     position += 1 | ||||
|                 elif item.priority == target_priority and item != target_item: | ||||
|                     # For same priority, count items that come before this one | ||||
|                     # (Note: this is approximate since heap order isn't guaranteed for equal priorities) | ||||
|                     position += 1 | ||||
|              | ||||
|             return { | ||||
|                 'position': position, | ||||
|                 'total_items': total_items, | ||||
|                 'priority': target_priority, | ||||
|                 'found': True | ||||
|             } | ||||
|      | ||||
|     def get_all_queued_uuids(self, limit=None, offset=0): | ||||
|         """ | ||||
|         Get UUIDs currently in the queue with their positions. | ||||
|         For large queues, use limit/offset for pagination. | ||||
|          | ||||
|         Args: | ||||
|             limit: Maximum number of items to return (None = all) | ||||
|             offset: Number of items to skip (for pagination) | ||||
|          | ||||
|         Returns: | ||||
|             dict: Contains items and metadata | ||||
|                 - items: List of dicts with uuid, position, and priority | ||||
|                 - total_items: Total number of items in queue | ||||
|                 - returned_items: Number of items returned | ||||
|                 - has_more: Whether there are more items after this page | ||||
|         """ | ||||
|         with self.mutex: | ||||
|             queue_list = list(self.queue) | ||||
|             total_items = len(queue_list) | ||||
|              | ||||
|             if total_items == 0: | ||||
|                 return { | ||||
|                     'items': [], | ||||
|                     'total_items': 0, | ||||
|                     'returned_items': 0, | ||||
|                     'has_more': False | ||||
|                 } | ||||
|              | ||||
|             # For very large queues, warn about performance | ||||
|             if total_items > 1000 and limit is None: | ||||
|                 logger.warning(f"Getting all {total_items} queued items without limit - this may be slow") | ||||
|              | ||||
|             # Sort only if we need exact positions (expensive for large queues) | ||||
|             if limit is not None and limit <= 100: | ||||
|                 # For small requests, we can afford to sort | ||||
|                 queue_items = sorted(queue_list) | ||||
|                 end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items) | ||||
|                 items_to_process = queue_items[offset:end_idx] | ||||
|                  | ||||
|                 result = [] | ||||
|                 for position, item in enumerate(items_to_process, start=offset): | ||||
|                     if (hasattr(item, 'item') and  | ||||
|                         isinstance(item.item, dict) and  | ||||
|                         'uuid' in item.item): | ||||
|                          | ||||
|                         result.append({ | ||||
|                             'uuid': item.item['uuid'], | ||||
|                             'position': position, | ||||
|                             'priority': item.priority | ||||
|                         }) | ||||
|                  | ||||
|                 return { | ||||
|                     'items': result, | ||||
|                     'total_items': total_items, | ||||
|                     'returned_items': len(result), | ||||
|                     'has_more': (offset + len(result)) < total_items | ||||
|                 } | ||||
|             else: | ||||
|                 # For large requests, return items with approximate positions | ||||
|                 # This is much faster O(n) instead of O(n log n) | ||||
|                 result = [] | ||||
|                 processed = 0 | ||||
|                 skipped = 0 | ||||
|                  | ||||
|                 for item in queue_list: | ||||
|                     if (hasattr(item, 'item') and  | ||||
|                         isinstance(item.item, dict) and  | ||||
|                         'uuid' in item.item): | ||||
|                          | ||||
|                         if skipped < offset: | ||||
|                             skipped += 1 | ||||
|                             continue | ||||
|                          | ||||
|                         if limit and processed >= limit: | ||||
|                             break | ||||
|                          | ||||
|                         # Approximate position based on priority comparison | ||||
|                         approx_position = sum(1 for other in queue_list if other.priority < item.priority) | ||||
|                          | ||||
|                         result.append({ | ||||
|                             'uuid': item.item['uuid'], | ||||
|                             'position': approx_position,  # Approximate | ||||
|                             'priority': item.priority | ||||
|                         }) | ||||
|                         processed += 1 | ||||
|                  | ||||
|                 return { | ||||
|                     'items': result, | ||||
|                     'total_items': total_items, | ||||
|                     'returned_items': len(result), | ||||
|                     'has_more': (offset + len(result)) < total_items, | ||||
|                     'note': 'Positions are approximate for performance with large queues' | ||||
|                 } | ||||
|      | ||||
|     def get_queue_summary(self): | ||||
|         """ | ||||
|         Get a quick summary of queue state without expensive operations. | ||||
|         O(n) complexity - fast even for large queues. | ||||
|          | ||||
|         Returns: | ||||
|             dict: Queue summary statistics | ||||
|         """ | ||||
|         with self.mutex: | ||||
|             queue_list = list(self.queue) | ||||
|             total_items = len(queue_list) | ||||
|              | ||||
|             if total_items == 0: | ||||
|                 return { | ||||
|                     'total_items': 0, | ||||
|                     'priority_breakdown': {}, | ||||
|                     'immediate_items': 0, | ||||
|                     'clone_items': 0, | ||||
|                     'scheduled_items': 0 | ||||
|                 } | ||||
|              | ||||
|             # Count items by priority type - O(n) | ||||
|             immediate_items = 0  # priority 1 | ||||
|             clone_items = 0      # priority 5   | ||||
|             scheduled_items = 0  # priority > 100 (timestamps) | ||||
|             priority_counts = {} | ||||
|              | ||||
|             for item in queue_list: | ||||
|                 priority = item.priority | ||||
|                 priority_counts[priority] = priority_counts.get(priority, 0) + 1 | ||||
|                  | ||||
|                 if priority == 1: | ||||
|                     immediate_items += 1 | ||||
|                 elif priority == 5: | ||||
|                     clone_items += 1 | ||||
|                 elif priority > 100: | ||||
|                     scheduled_items += 1 | ||||
|              | ||||
|             return { | ||||
|                 'total_items': total_items, | ||||
|                 'priority_breakdown': priority_counts, | ||||
|                 'immediate_items': immediate_items, | ||||
|                 'clone_items': clone_items, | ||||
|                 'scheduled_items': scheduled_items, | ||||
|                 'min_priority': min(priority_counts.keys()) if priority_counts else None, | ||||
|                 'max_priority': max(priority_counts.keys()) if priority_counts else None | ||||
|             } | ||||
|  | ||||
|  | ||||
| class AsyncSignalPriorityQueue(asyncio.PriorityQueue): | ||||
|     """ | ||||
|     Async version of SignalPriorityQueue that sends signals when items are added/removed. | ||||
|      | ||||
|     This class extends asyncio.PriorityQueue and maintains the same signal behavior | ||||
|     as the synchronous version for real-time UI updates. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize=0): | ||||
|         super().__init__(maxsize) | ||||
|         try: | ||||
|             self.queue_length_signal = signal('queue_length') | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|  | ||||
|     async def put(self, item): | ||||
|         # Call the parent's put method first | ||||
|         await super().put(item) | ||||
|          | ||||
|         # After putting the item in the queue, check if it has a UUID and emit signal | ||||
|         if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item: | ||||
|             uuid = item.item['uuid'] | ||||
|             # Get the signal and send it if it exists | ||||
|             watch_check_update = signal('watch_check_update') | ||||
|             if watch_check_update: | ||||
|                 # Send the watch_uuid parameter | ||||
|                 watch_check_update.send(watch_uuid=uuid) | ||||
|          | ||||
|         # Send queue_length signal with current queue size | ||||
|         try: | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|  | ||||
|     async def get(self): | ||||
|         # Call the parent's get method first | ||||
|         item = await super().get() | ||||
|          | ||||
|         # Send queue_length signal with current queue size | ||||
|         try: | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|         return item | ||||
|      | ||||
|     @property | ||||
|     def queue(self): | ||||
|         """ | ||||
|         Provide compatibility with sync PriorityQueue.queue access | ||||
|         Returns the internal queue for template access | ||||
|         """ | ||||
|         return self._queue if hasattr(self, '_queue') else [] | ||||
|      | ||||
|     def get_uuid_position(self, target_uuid): | ||||
|         """ | ||||
|         Find the position of a watch UUID in the async priority queue. | ||||
|         Optimized for large queues - O(n) complexity instead of O(n log n). | ||||
|          | ||||
|         Args: | ||||
|             target_uuid: The UUID to search for | ||||
|              | ||||
|         Returns: | ||||
|             dict: Contains position info or None if not found | ||||
|                 - position: 0-based position in queue (0 = next to be processed) | ||||
|                 - total_items: total number of items in queue | ||||
|                 - priority: the priority value of the found item | ||||
|         """ | ||||
|         queue_list = list(self._queue) | ||||
|         total_items = len(queue_list) | ||||
|          | ||||
|         if total_items == 0: | ||||
|             return { | ||||
|                 'position': None, | ||||
|                 'total_items': 0, | ||||
|                 'priority': None, | ||||
|                 'found': False | ||||
|             } | ||||
|          | ||||
|         # Find the target item and its priority first - O(n) | ||||
|         target_item = None | ||||
|         target_priority = None | ||||
|          | ||||
|         for item in queue_list: | ||||
|             if (hasattr(item, 'item') and  | ||||
|                 isinstance(item.item, dict) and  | ||||
|                 item.item.get('uuid') == target_uuid): | ||||
|                 target_item = item | ||||
|                 target_priority = item.priority | ||||
|                 break | ||||
|          | ||||
|         if target_item is None: | ||||
|             return { | ||||
|                 'position': None, | ||||
|                 'total_items': total_items, | ||||
|                 'priority': None, | ||||
|                 'found': False | ||||
|             } | ||||
|          | ||||
|         # Count how many items have higher priority (lower numbers) - O(n) | ||||
|         position = 0 | ||||
|         for item in queue_list: | ||||
|             if item.priority < target_priority: | ||||
|                 position += 1 | ||||
|             elif item.priority == target_priority and item != target_item: | ||||
|                 position += 1 | ||||
|          | ||||
|         return { | ||||
|             'position': position, | ||||
|             'total_items': total_items, | ||||
|             'priority': target_priority, | ||||
|             'found': True | ||||
|         } | ||||
|      | ||||
|     def get_all_queued_uuids(self, limit=None, offset=0): | ||||
|         """ | ||||
|         Get UUIDs currently in the async queue with their positions. | ||||
|         For large queues, use limit/offset for pagination. | ||||
|          | ||||
|         Args: | ||||
|             limit: Maximum number of items to return (None = all) | ||||
|             offset: Number of items to skip (for pagination) | ||||
|          | ||||
|         Returns: | ||||
|             dict: Contains items and metadata (same structure as sync version) | ||||
|         """ | ||||
|         queue_list = list(self._queue) | ||||
|         total_items = len(queue_list) | ||||
|          | ||||
|         if total_items == 0: | ||||
|             return { | ||||
|                 'items': [], | ||||
|                 'total_items': 0, | ||||
|                 'returned_items': 0, | ||||
|                 'has_more': False | ||||
|             } | ||||
|          | ||||
|         # Same logic as sync version but without mutex | ||||
|         if limit is not None and limit <= 100: | ||||
|             queue_items = sorted(queue_list) | ||||
|             end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items) | ||||
|             items_to_process = queue_items[offset:end_idx] | ||||
|              | ||||
|             result = [] | ||||
|             for position, item in enumerate(items_to_process, start=offset): | ||||
|                 if (hasattr(item, 'item') and  | ||||
|                     isinstance(item.item, dict) and  | ||||
|                     'uuid' in item.item): | ||||
|                      | ||||
|                     result.append({ | ||||
|                         'uuid': item.item['uuid'], | ||||
|                         'position': position, | ||||
|                         'priority': item.priority | ||||
|                     }) | ||||
|              | ||||
|             return { | ||||
|                 'items': result, | ||||
|                 'total_items': total_items, | ||||
|                 'returned_items': len(result), | ||||
|                 'has_more': (offset + len(result)) < total_items | ||||
|             } | ||||
|         else: | ||||
|             # Fast approximate positions for large queues | ||||
|             result = [] | ||||
|             processed = 0 | ||||
|             skipped = 0 | ||||
|              | ||||
|             for item in queue_list: | ||||
|                 if (hasattr(item, 'item') and  | ||||
|                     isinstance(item.item, dict) and  | ||||
|                     'uuid' in item.item): | ||||
|                      | ||||
|                     if skipped < offset: | ||||
|                         skipped += 1 | ||||
|                         continue | ||||
|                      | ||||
|                     if limit and processed >= limit: | ||||
|                         break | ||||
|                      | ||||
|                     approx_position = sum(1 for other in queue_list if other.priority < item.priority) | ||||
|                      | ||||
|                     result.append({ | ||||
|                         'uuid': item.item['uuid'], | ||||
|                         'position': approx_position, | ||||
|                         'priority': item.priority | ||||
|                     }) | ||||
|                     processed += 1 | ||||
|              | ||||
|             return { | ||||
|                 'items': result, | ||||
|                 'total_items': total_items, | ||||
|                 'returned_items': len(result), | ||||
|                 'has_more': (offset + len(result)) < total_items, | ||||
|                 'note': 'Positions are approximate for performance with large queues' | ||||
|             } | ||||
|      | ||||
|     def get_queue_summary(self): | ||||
|         """ | ||||
|         Get a quick summary of async queue state. | ||||
|         O(n) complexity - fast even for large queues. | ||||
|         """ | ||||
|         queue_list = list(self._queue) | ||||
|         total_items = len(queue_list) | ||||
|          | ||||
|         if total_items == 0: | ||||
|             return { | ||||
|                 'total_items': 0, | ||||
|                 'priority_breakdown': {}, | ||||
|                 'immediate_items': 0, | ||||
|                 'clone_items': 0, | ||||
|                 'scheduled_items': 0 | ||||
|             } | ||||
|          | ||||
|         immediate_items = 0 | ||||
|         clone_items = 0 | ||||
|         scheduled_items = 0 | ||||
|         priority_counts = {} | ||||
|          | ||||
|         for item in queue_list: | ||||
|             priority = item.priority | ||||
|             priority_counts[priority] = priority_counts.get(priority, 0) + 1 | ||||
|              | ||||
|             if priority == 1: | ||||
|                 immediate_items += 1 | ||||
|             elif priority == 5: | ||||
|                 clone_items += 1 | ||||
|             elif priority > 100: | ||||
|                 scheduled_items += 1 | ||||
|          | ||||
|         return { | ||||
|             'total_items': total_items, | ||||
|             'priority_breakdown': priority_counts, | ||||
|             'immediate_items': immediate_items, | ||||
|             'clone_items': clone_items, | ||||
|             'scheduled_items': scheduled_items, | ||||
|             'min_priority': min(priority_counts.keys()) if priority_counts else None, | ||||
|             'max_priority': max(priority_counts.keys()) if priority_counts else None | ||||
|         } | ||||
| @@ -3,6 +3,8 @@ import re | ||||
| from loguru import logger | ||||
| from wtforms.widgets.core import TimeInput | ||||
|  | ||||
| from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES | ||||
| from changedetectionio.conditions.form import ConditionFormRow | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from wtforms import ( | ||||
| @@ -171,7 +173,7 @@ class validateTimeZoneName(object): | ||||
|  | ||||
| class ScheduleLimitDaySubForm(Form): | ||||
|     enabled = BooleanField("not set", default=True) | ||||
|     start_time = TimeStringField("Start At", default="00:00", render_kw={"placeholder": "HH:MM"}, validators=[validators.Optional()]) | ||||
|     start_time = TimeStringField("Start At", default="00:00", validators=[validators.Optional()]) | ||||
|     duration = FormField(TimeDurationForm, label="Run duration") | ||||
|  | ||||
| class ScheduleLimitForm(Form): | ||||
| @@ -222,27 +224,37 @@ class StringDictKeyValue(StringField): | ||||
|  | ||||
|     def _value(self): | ||||
|         if self.data: | ||||
|             output = u'' | ||||
|             for k in self.data.keys(): | ||||
|                 output += "{}: {}\r\n".format(k, self.data[k]) | ||||
|  | ||||
|             output = '' | ||||
|             for k, v in self.data.items(): | ||||
|                 output += f"{k}: {v}\r\n" | ||||
|             return output | ||||
|         else: | ||||
|             return u'' | ||||
|             return '' | ||||
|  | ||||
|     # incoming | ||||
|     # incoming data processing + validation | ||||
|     def process_formdata(self, valuelist): | ||||
|         self.data = {} | ||||
|         errors = [] | ||||
|         if valuelist: | ||||
|             self.data = {} | ||||
|             # Remove empty strings | ||||
|             cleaned = list(filter(None, valuelist[0].split("\n"))) | ||||
|             for s in cleaned: | ||||
|                 parts = s.strip().split(':', 1) | ||||
|                 if len(parts) == 2: | ||||
|                     self.data.update({parts[0].strip(): parts[1].strip()}) | ||||
|             # Remove empty strings (blank lines) | ||||
|             cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()] | ||||
|             for idx, s in enumerate(cleaned, start=1): | ||||
|                 if ':' not in s: | ||||
|                     errors.append(f"Line {idx} is missing a ':' separator.") | ||||
|                     continue | ||||
|                 parts = s.split(':', 1) | ||||
|                 key = parts[0].strip() | ||||
|                 value = parts[1].strip() | ||||
|  | ||||
|         else: | ||||
|             self.data = {} | ||||
|                 if not key: | ||||
|                     errors.append(f"Line {idx} has an empty key.") | ||||
|                 if not value: | ||||
|                     errors.append(f"Line {idx} has an empty value.") | ||||
|  | ||||
|                 self.data[key] = value | ||||
|  | ||||
|         if errors: | ||||
|             raise ValidationError("Invalid input:\n" + "\n".join(errors)) | ||||
|  | ||||
| class ValidateContentFetcherIsReady(object): | ||||
|     """ | ||||
| @@ -304,9 +316,11 @@ class ValidateAppRiseServers(object): | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         import apprise | ||||
|         apobj = apprise.Apprise() | ||||
|         # so that the custom endpoints are registered | ||||
|         from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper | ||||
|         from .notification.apprise_plugin.assets import apprise_asset | ||||
|         from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401 | ||||
|  | ||||
|         apobj = apprise.Apprise(asset=apprise_asset) | ||||
|  | ||||
|         for server_url in field.data: | ||||
|             url = server_url.strip() | ||||
|             if url.startswith("#"): | ||||
| @@ -509,6 +523,7 @@ class quickWatchForm(Form): | ||||
|     edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|  | ||||
|  | ||||
| # Common to a single watch and the global settings | ||||
| class commonSettingsForm(Form): | ||||
|     from . import processors | ||||
| @@ -581,7 +596,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     filter_text_replaced = BooleanField('Replaced/changed lines', default=True) | ||||
|     filter_text_removed = BooleanField('Removed lines', default=True) | ||||
|  | ||||
|     trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) | ||||
|     trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) | ||||
|     if os.getenv("PLAYWRIGHT_DRIVER_URL"): | ||||
|         browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10) | ||||
|     text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()]) | ||||
| @@ -596,6 +611,10 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     notification_muted = BooleanField('Notifications Muted / Off', default=False) | ||||
|     notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False) | ||||
|  | ||||
|     conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL') | ||||
|     conditions = FieldList(FormField(ConditionFormRow), min_entries=1)  # Add rule logic here | ||||
|  | ||||
|  | ||||
|     def extra_tab_content(self): | ||||
|         return None | ||||
|  | ||||
| @@ -700,6 +719,12 @@ class globalSettingsRequestForm(Form): | ||||
|     jitter_seconds = IntegerField('Random jitter seconds ± check', | ||||
|                                   render_kw={"style": "width: 5em;"}, | ||||
|                                   validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|      | ||||
|     workers = IntegerField('Number of fetch workers', | ||||
|                           render_kw={"style": "width: 5em;"}, | ||||
|                           validators=[validators.NumberRange(min=1, max=50, | ||||
|                                                              message="Should be between 1 and 50")]) | ||||
|      | ||||
|     extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) | ||||
|     extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5) | ||||
|  | ||||
| @@ -712,6 +737,9 @@ class globalSettingsRequestForm(Form): | ||||
|                     self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.') | ||||
|                     return False | ||||
|  | ||||
| class globalSettingsApplicationUIForm(Form): | ||||
|     open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()]) | ||||
|     socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()]) | ||||
|  | ||||
| # datastore.data['settings']['application'].. | ||||
| class globalSettingsApplicationForm(commonSettingsForm): | ||||
| @@ -731,6 +759,9 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|                               render_kw={"style": "width: 5em;"}, | ||||
|                               validators=[validators.NumberRange(min=0, | ||||
|                                                                  message="Should be atleast zero (disabled)")]) | ||||
|  | ||||
|     rss_content_format = SelectField('RSS Content format', choices=RSS_FORMAT_TYPES) | ||||
|  | ||||
|     removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) | ||||
|     shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()]) | ||||
| @@ -740,6 +771,7 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|                                                                   render_kw={"style": "width: 5em;"}, | ||||
|                                                                   validators=[validators.NumberRange(min=0, | ||||
|                                                                                                      message="Should contain zero or more attempts")]) | ||||
|     ui = FormField(globalSettingsApplicationUIForm) | ||||
|  | ||||
|  | ||||
| class globalSettingsForm(Form): | ||||
|   | ||||
							
								
								
									
										162
									
								
								changedetectionio/gc_cleanup.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,162 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import ctypes | ||||
| import gc | ||||
| import re | ||||
| import psutil | ||||
| import sys | ||||
| import threading | ||||
| import importlib | ||||
| from loguru import logger | ||||
|  | ||||
| def memory_cleanup(app=None): | ||||
|     """ | ||||
|     Perform comprehensive memory cleanup operations and log memory usage | ||||
|     at each step with nicely formatted numbers. | ||||
|      | ||||
|     Args: | ||||
|         app: Optional Flask app instance for clearing Flask-specific caches | ||||
|          | ||||
|     Returns: | ||||
|         str: Status message | ||||
|     """ | ||||
|     # Get current process | ||||
|     process = psutil.Process() | ||||
|      | ||||
|     # Log initial memory usage with nicely formatted numbers | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"Memory cleanup started - Current memory usage: {current_memory:,.2f} MB") | ||||
|  | ||||
|     # 1. Standard garbage collection - force full collection on all generations | ||||
|     gc.collect(0)  # Collect youngest generation | ||||
|     gc.collect(1)  # Collect middle generation | ||||
|     gc.collect(2)  # Collect oldest generation | ||||
|  | ||||
|     # Run full collection again to ensure maximum cleanup | ||||
|     gc.collect() | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"After full gc.collect() - Memory usage: {current_memory:,.2f} MB") | ||||
|      | ||||
|  | ||||
|     # 3. Call libc's malloc_trim to release memory back to the OS | ||||
|     libc = ctypes.CDLL("libc.so.6") | ||||
|     libc.malloc_trim(0) | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"After malloc_trim(0) - Memory usage: {current_memory:,.2f} MB") | ||||
|      | ||||
|     # 4. Clear Python's regex cache | ||||
|     re.purge() | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"After re.purge() - Memory usage: {current_memory:,.2f} MB") | ||||
|  | ||||
|     # 5. Reset thread-local storage | ||||
|     # Create a new thread local object to encourage cleanup of old ones | ||||
|     threading.local() | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"After threading.local() - Memory usage: {current_memory:,.2f} MB") | ||||
|  | ||||
|     # 6. Clear sys.intern cache if Python version supports it | ||||
|     try: | ||||
|         sys.intern.clear() | ||||
|         current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|         logger.debug(f"After sys.intern.clear() - Memory usage: {current_memory:,.2f} MB") | ||||
|     except (AttributeError, TypeError): | ||||
|         logger.debug("sys.intern.clear() not supported in this Python version") | ||||
|      | ||||
|     # 7. Clear XML/lxml caches if available | ||||
|     try: | ||||
|         # Check if lxml.etree is in use | ||||
|         lxml_etree = sys.modules.get('lxml.etree') | ||||
|         if lxml_etree: | ||||
|             # Clear module-level caches | ||||
|             if hasattr(lxml_etree, 'clear_error_log'): | ||||
|                 lxml_etree.clear_error_log() | ||||
|              | ||||
|             # Check for _ErrorLog and _RotatingErrorLog objects and clear them | ||||
|             for obj in gc.get_objects(): | ||||
|                 if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'): | ||||
|                     class_name = obj.__class__.__name__ | ||||
|                     if class_name in ('_ErrorLog', '_RotatingErrorLog', '_DomainErrorLog') and hasattr(obj, 'clear'): | ||||
|                         try: | ||||
|                             obj.clear() | ||||
|                         except (AttributeError, TypeError): | ||||
|                             pass | ||||
|                      | ||||
|                     # Clear Element objects which can hold references to documents | ||||
|                     elif class_name in ('_Element', 'ElementBase') and hasattr(obj, 'clear'): | ||||
|                         try: | ||||
|                             obj.clear() | ||||
|                         except (AttributeError, TypeError): | ||||
|                             pass | ||||
|              | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After lxml.etree cleanup - Memory usage: {current_memory:,.2f} MB") | ||||
|  | ||||
|         # Check if lxml.html is in use | ||||
|         lxml_html = sys.modules.get('lxml.html') | ||||
|         if lxml_html: | ||||
|             # Clear HTML-specific element types | ||||
|             for obj in gc.get_objects(): | ||||
|                 if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'): | ||||
|                     class_name = obj.__class__.__name__ | ||||
|                     if class_name in ('HtmlElement', 'FormElement', 'InputElement', | ||||
|                                     'SelectElement', 'TextareaElement', 'CheckboxGroup', | ||||
|                                     'RadioGroup', 'MultipleSelectOptions', 'FieldsDict') and hasattr(obj, 'clear'): | ||||
|                         try: | ||||
|                             obj.clear() | ||||
|                         except (AttributeError, TypeError): | ||||
|                             pass | ||||
|  | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After lxml.html cleanup - Memory usage: {current_memory:,.2f} MB") | ||||
|     except (ImportError, AttributeError): | ||||
|         logger.debug("lxml cleanup not applicable") | ||||
|      | ||||
|     # 8. Clear JSON parser caches if applicable | ||||
|     try: | ||||
|         # Check if json module is being used and try to clear its cache | ||||
|         json_module = sys.modules.get('json') | ||||
|         if json_module and hasattr(json_module, '_default_encoder'): | ||||
|             json_module._default_encoder.markers.clear() | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After JSON parser cleanup - Memory usage: {current_memory:,.2f} MB") | ||||
|     except (AttributeError, KeyError): | ||||
|         logger.debug("JSON cleanup not applicable") | ||||
|      | ||||
|     # 9. Force Python's memory allocator to release unused memory | ||||
|     try: | ||||
|         if hasattr(sys, 'pypy_version_info'): | ||||
|             # PyPy has different memory management | ||||
|             gc.collect() | ||||
|         else: | ||||
|             # CPython - try to release unused memory | ||||
|             ctypes.pythonapi.PyGC_Collect() | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After PyGC_Collect - Memory usage: {current_memory:,.2f} MB") | ||||
|     except (AttributeError, TypeError): | ||||
|         logger.debug("PyGC_Collect not supported") | ||||
|      | ||||
|     # 10. Clear Flask-specific caches if applicable | ||||
|     if app: | ||||
|         try: | ||||
|             # Clear Flask caches if they exist | ||||
|             for key in list(app.config.get('_cache', {}).keys()): | ||||
|                 app.config['_cache'].pop(key, None) | ||||
|              | ||||
|             # Clear Jinja2 template cache if available | ||||
|             if hasattr(app, 'jinja_env') and hasattr(app.jinja_env, 'cache'): | ||||
|                 app.jinja_env.cache.clear() | ||||
|              | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After Flask cache clear - Memory usage: {current_memory:,.2f} MB") | ||||
|         except (AttributeError, KeyError): | ||||
|             logger.debug("No Flask cache to clear") | ||||
|      | ||||
|     # Final garbage collection pass | ||||
|     gc.collect() | ||||
|     libc.malloc_trim(0) | ||||
|      | ||||
|     # Log final memory usage | ||||
|     final_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.info(f"Memory cleanup completed - Final memory usage: {final_memory:,.2f} MB") | ||||
|     return "cleaned" | ||||
| @@ -1,5 +1,6 @@ | ||||
| from typing import List | ||||
| from loguru import logger | ||||
| from lxml import etree | ||||
| from typing import List | ||||
| import json | ||||
| import re | ||||
|  | ||||
| @@ -298,18 +299,20 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w | ||||
|     # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags | ||||
|     try: | ||||
|         stripped_text_from_html = _parse_json(json.loads(content), json_filter) | ||||
|     except json.JSONDecodeError: | ||||
|         # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work | ||||
|         stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter) | ||||
|     except json.JSONDecodeError as e: | ||||
|         logger.warning(str(e)) | ||||
|  | ||||
|         # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|         # As a last resort, try to parse the whole <body> | ||||
|         soup = BeautifulSoup(content, 'html.parser') | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             bs_result = soup.findAll('script', {"type": "application/ld+json"}) | ||||
|             bs_result = soup.find_all('script', {"type": "application/ld+json"}) | ||||
|         else: | ||||
|             bs_result = soup.findAll('script') | ||||
|         bs_result += soup.findAll('body') | ||||
|             bs_result = soup.find_all('script') | ||||
|         bs_result += soup.find_all('body') | ||||
|  | ||||
|         bs_jsons = [] | ||||
|         for result in bs_result: | ||||
| @@ -363,22 +366,41 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None | ||||
| # wordlist - list of regex's (str) or words (str) | ||||
| # Preserves all linefeeds and other whitespacing, its not the job of this to remove that | ||||
| def strip_ignore_text(content, wordlist, mode="content"): | ||||
|     i = 0 | ||||
|     output = [] | ||||
|     ignore_text = [] | ||||
|     ignore_regex = [] | ||||
|     ignored_line_numbers = [] | ||||
|     ignore_regex_multiline = [] | ||||
|     ignored_lines = [] | ||||
|  | ||||
|     for k in wordlist: | ||||
|         # Is it a regex? | ||||
|         res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE) | ||||
|         if res: | ||||
|             ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k))) | ||||
|             res = re.compile(perl_style_slash_enclosed_regex_to_options(k)) | ||||
|             if res.flags & re.DOTALL or res.flags & re.MULTILINE: | ||||
|                 ignore_regex_multiline.append(res) | ||||
|             else: | ||||
|                 ignore_regex.append(res) | ||||
|         else: | ||||
|             ignore_text.append(k.strip()) | ||||
|  | ||||
|     for line in content.splitlines(keepends=True): | ||||
|         i += 1 | ||||
|     for r in ignore_regex_multiline: | ||||
|         for match in r.finditer(content): | ||||
|             content_lines = content[:match.end()].splitlines(keepends=True) | ||||
|             match_lines = content[match.start():match.end()].splitlines(keepends=True) | ||||
|  | ||||
|             end_line = len(content_lines) | ||||
|             start_line = end_line - len(match_lines) | ||||
|  | ||||
|             if end_line - start_line <= 1: | ||||
|                 # Match is empty or in the middle of the line | ||||
|                 ignored_lines.append(start_line) | ||||
|             else: | ||||
|                 for i in range(start_line, end_line): | ||||
|                     ignored_lines.append(i) | ||||
|  | ||||
|     line_index = 0 | ||||
|     lines = content.splitlines(keepends=True) | ||||
|     for line in lines: | ||||
|         # Always ignore blank lines in this mode. (when this function gets called) | ||||
|         got_match = False | ||||
|         for l in ignore_text: | ||||
| @@ -390,17 +412,19 @@ def strip_ignore_text(content, wordlist, mode="content"): | ||||
|                 if r.search(line): | ||||
|                     got_match = True | ||||
|  | ||||
|         if not got_match: | ||||
|             # Not ignored, and should preserve "keepends" | ||||
|             output.append(line) | ||||
|         else: | ||||
|             ignored_line_numbers.append(i) | ||||
|         if got_match: | ||||
|             ignored_lines.append(line_index) | ||||
|  | ||||
|         line_index += 1 | ||||
|  | ||||
|     ignored_lines = set([i for i in ignored_lines if i >= 0 and i < len(lines)]) | ||||
|  | ||||
|     # Used for finding out what to highlight | ||||
|     if mode == "line numbers": | ||||
|         return ignored_line_numbers | ||||
|         return [i + 1 for i in ignored_lines] | ||||
|  | ||||
|     return ''.join(output) | ||||
|     output_lines = set(range(len(lines))) - ignored_lines | ||||
|     return ''.join([lines[i] for i in output_lines]) | ||||
|  | ||||
| def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|     from xml.sax.saxutils import escape as xml_escape | ||||
| @@ -411,50 +435,36 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False | ||||
|  | ||||
|     return re.sub(pattern, repl, html_content) | ||||
|  | ||||
| def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str: | ||||
|  | ||||
| # NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON | ||||
|  | ||||
|  | ||||
| def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str: | ||||
|     from inscriptis import get_text | ||||
|     from inscriptis.model.config import ParserConfig | ||||
|  | ||||
|     """Converts html string to a string with just the text. If ignoring | ||||
|     rendering anchor tag content is enable, anchor tag content are also | ||||
|     included in the text | ||||
|  | ||||
|     :param html_content: string with html content | ||||
|     :param render_anchor_tag_content: boolean flag indicating whether to extract | ||||
|     hyperlinks (the anchor tag content) together with text. This refers to the | ||||
|     'href' inside 'a' tags. | ||||
|     Anchor tag content is rendered in the following manner: | ||||
|     '[ text ](anchor tag content)' | ||||
|     :return: extracted text from the HTML | ||||
|     """ | ||||
|     #  if anchor tag content flag is set to True define a config for | ||||
|     #  extracting this content | ||||
|     if render_anchor_tag_content: | ||||
|         parser_config = ParserConfig( | ||||
|             annotation_rules={"a": ["hyperlink"]}, | ||||
|             display_links=True | ||||
|         ) | ||||
|     # otherwise set config to None/default | ||||
|     else: | ||||
|         parser_config = None | ||||
|  | ||||
|     # RSS Mode - Inscriptis will treat `title` as something else. | ||||
|     # Make it as a regular block display element (//item/title) | ||||
|     # This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874 | ||||
|     if is_rss: | ||||
|         html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content) | ||||
|         html_content = re.sub(r'</title>', r'</h1>', html_content) | ||||
|  | ||||
|     text_content = get_text(html_content, config=parser_config) | ||||
|  | ||||
|     return text_content | ||||
|  | ||||
|  | ||||
| # Does LD+JSON exist with a @type=='product' and a .price set anywhere? | ||||
| def has_ldjson_product_info(content): | ||||
|     try: | ||||
|         lc = content.lower() | ||||
|         if 'application/ld+json' in lc and lc.count('"price"') == 1 and '"pricecurrency"' in lc: | ||||
|         # Better than .lower() which can use a lot of ram | ||||
|         if (re.search(r'application/ld\+json', content, re.IGNORECASE) and | ||||
|             re.search(r'"price"', content, re.IGNORECASE) and | ||||
|             re.search(r'"pricecurrency"', content, re.IGNORECASE)): | ||||
|             return True | ||||
|  | ||||
| #       On some pages this is really terribly expensive when they dont really need it | ||||
|   | ||||
| @@ -1,4 +1,7 @@ | ||||
| from os import getenv | ||||
|  | ||||
| from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES | ||||
|  | ||||
| from changedetectionio.notification import ( | ||||
|     default_notification_body, | ||||
|     default_notification_format, | ||||
| @@ -9,6 +12,8 @@ from changedetectionio.notification import ( | ||||
| _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6 | ||||
| DEFAULT_SETTINGS_HEADERS_USERAGENT='Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36' | ||||
|  | ||||
|  | ||||
|  | ||||
| class model(dict): | ||||
|     base_config = { | ||||
|             'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!", | ||||
| @@ -48,12 +53,17 @@ class model(dict): | ||||
|                     'password': False, | ||||
|                     'render_anchor_tag_content': False, | ||||
|                     'rss_access_token': None, | ||||
|                     'rss_content_format': RSS_FORMAT_TYPES[0][0], | ||||
|                     'rss_hide_muted_watches': True, | ||||
|                     'schema_version' : 0, | ||||
|                     'shared_diff_access': False, | ||||
|                     'webdriver_delay': None , # Extra delay in seconds before extracting text | ||||
|                     'tags': {}, #@todo use Tag.model initialisers | ||||
|                     'timezone': None, # Default IANA timezone name | ||||
|                     'ui': { | ||||
|                         'open_diff_in_new_tab': True, | ||||
|                         'socket_io_enabled': True | ||||
|                     }, | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
| @@ -69,7 +79,7 @@ def parse_headers_from_text_file(filepath): | ||||
|         for l in f.readlines(): | ||||
|             l = l.strip() | ||||
|             if not l.startswith('#') and ':' in l: | ||||
|                 (k, v) = l.split(':') | ||||
|                 (k, v) = l.split(':', 1)  # Split only on the first colon | ||||
|                 headers[k.strip()] = v.strip() | ||||
|  | ||||
|     return headers | ||||
| @@ -1,3 +1,5 @@ | ||||
| from blinker import signal | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from . import watch_base | ||||
| @@ -41,6 +43,7 @@ class model(watch_base): | ||||
|         self.__datastore_path = kw.get('datastore_path') | ||||
|         if kw.get('datastore_path'): | ||||
|             del kw['datastore_path'] | ||||
|              | ||||
|         super(model, self).__init__(*arg, **kw) | ||||
|         if kw.get('default'): | ||||
|             self.update(kw['default']) | ||||
| @@ -60,6 +63,10 @@ class model(watch_base): | ||||
|  | ||||
|         return False | ||||
|  | ||||
|     @property | ||||
|     def has_unviewed(self): | ||||
|         return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2 | ||||
|  | ||||
|     def ensure_data_dir_exists(self): | ||||
|         if not os.path.isdir(self.watch_data_dir): | ||||
|             logger.debug(f"> Creating data dir {self.watch_data_dir}") | ||||
| @@ -83,7 +90,7 @@ class model(watch_base): | ||||
|                     flash, Markup, url_for | ||||
|                 ) | ||||
|                 message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format( | ||||
|                     url_for('edit_page', uuid=self.get('uuid')), self.get('url', ''))) | ||||
|                     url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', ''))) | ||||
|                 flash(message, 'error') | ||||
|                 return '' | ||||
|  | ||||
| @@ -120,6 +127,10 @@ class model(watch_base): | ||||
|             'remote_server_reply': None, | ||||
|             'track_ldjson_price_data': None | ||||
|         }) | ||||
|         watch_check_update = signal('watch_check_update') | ||||
|         if watch_check_update: | ||||
|             watch_check_update.send(watch_uuid=self.get('uuid')) | ||||
|  | ||||
|         return | ||||
|  | ||||
|     @property | ||||
| @@ -247,37 +258,32 @@ class model(watch_base): | ||||
|         bump = self.history | ||||
|         return self.__newest_history_key | ||||
|  | ||||
|     # Given an arbitrary timestamp, find the closest next key | ||||
|     # For example, last_viewed = 1000 so it should return the next 1001 timestamp | ||||
|     # | ||||
|     # used for the [diff] button so it can preset a smarter from_version | ||||
|     # Given an arbitrary timestamp, find the best history key for the [diff] button so it can preset a smarter from_version | ||||
|     @property | ||||
|     def get_next_snapshot_key_to_last_viewed(self): | ||||
|     def get_from_version_based_on_last_viewed(self): | ||||
|  | ||||
|         """Unfortunately for now timestamp is stored as string key""" | ||||
|         keys = list(self.history.keys()) | ||||
|         if not keys: | ||||
|             return None | ||||
|         if len(keys) == 1: | ||||
|             return keys[0] | ||||
|  | ||||
|         last_viewed = int(self.get('last_viewed')) | ||||
|         prev_k = keys[0] | ||||
|         sorted_keys = sorted(keys, key=lambda x: int(x)) | ||||
|         sorted_keys.reverse() | ||||
|  | ||||
|         # When the 'last viewed' timestamp is greater than the newest snapshot, return second last | ||||
|         if last_viewed > int(sorted_keys[0]): | ||||
|         # When the 'last viewed' timestamp is greater than or equal the newest snapshot, return second newest | ||||
|         if last_viewed >= int(sorted_keys[0]): | ||||
|             return sorted_keys[1] | ||||
|          | ||||
|         # When the 'last viewed' timestamp is between snapshots, return the older snapshot | ||||
|         for newer, older in list(zip(sorted_keys[0:], sorted_keys[1:])): | ||||
|             if last_viewed < int(newer) and last_viewed >= int(older): | ||||
|                 return older | ||||
|  | ||||
|         for k in sorted_keys: | ||||
|             if int(k) < last_viewed: | ||||
|                 if prev_k == sorted_keys[0]: | ||||
|                     # Return the second last one so we dont recommend the same version compares itself | ||||
|                     return sorted_keys[1] | ||||
|  | ||||
|                 return prev_k | ||||
|             prev_k = k | ||||
|  | ||||
|         return keys[0] | ||||
|         # When the 'last viewed' timestamp is less than the oldest snapshot, return oldest | ||||
|         return sorted_keys[-1] | ||||
|  | ||||
|     def get_history_snapshot(self, timestamp): | ||||
|         import brotli | ||||
| @@ -301,11 +307,11 @@ class model(watch_base): | ||||
|         with open(filepath, 'r', encoding='utf-8', errors='ignore') as f: | ||||
|             return f.read() | ||||
|  | ||||
|     # Save some text file to the appropriate path and bump the history | ||||
|    # Save some text file to the appropriate path and bump the history | ||||
|     # result_obj from fetch_site_status.run() | ||||
|     def save_history_text(self, contents, timestamp, snapshot_id): | ||||
|         import brotli | ||||
|  | ||||
|         import tempfile | ||||
|         logger.trace(f"{self.get('uuid')} - Updating history.txt with timestamp {timestamp}") | ||||
|  | ||||
|         self.ensure_data_dir_exists() | ||||
| @@ -313,26 +319,37 @@ class model(watch_base): | ||||
|         threshold = int(os.getenv('SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD', 1024)) | ||||
|         skip_brotli = strtobool(os.getenv('DISABLE_BROTLI_TEXT_SNAPSHOT', 'False')) | ||||
|  | ||||
|         # Decide on snapshot filename and destination path | ||||
|         if not skip_brotli and len(contents) > threshold: | ||||
|             snapshot_fname = f"{snapshot_id}.txt.br" | ||||
|             dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|             if not os.path.exists(dest): | ||||
|                 with open(dest, 'wb') as f: | ||||
|                     f.write(brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT)) | ||||
|             encoded_data = brotli.compress(contents.encode('utf-8'), mode=brotli.MODE_TEXT) | ||||
|         else: | ||||
|             snapshot_fname = f"{snapshot_id}.txt" | ||||
|             dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|             if not os.path.exists(dest): | ||||
|                 with open(dest, 'wb') as f: | ||||
|                     f.write(contents.encode('utf-8')) | ||||
|             encoded_data = contents.encode('utf-8') | ||||
|  | ||||
|         # Append to index | ||||
|         # @todo check last char was \n | ||||
|         dest = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|  | ||||
|         # Write snapshot file atomically if it doesn't exist | ||||
|         if not os.path.exists(dest): | ||||
|             with tempfile.NamedTemporaryFile('wb', delete=False, dir=self.watch_data_dir) as tmp: | ||||
|                 tmp.write(encoded_data) | ||||
|                 tmp.flush() | ||||
|                 os.fsync(tmp.fileno()) | ||||
|                 tmp_path = tmp.name | ||||
|             os.rename(tmp_path, dest) | ||||
|  | ||||
|         # Append to history.txt atomically | ||||
|         index_fname = os.path.join(self.watch_data_dir, "history.txt") | ||||
|         with open(index_fname, 'a') as f: | ||||
|             f.write("{},{}\n".format(timestamp, snapshot_fname)) | ||||
|             f.close() | ||||
|         index_line = f"{timestamp},{snapshot_fname}\n" | ||||
|  | ||||
|         # Lets try force flush here since it's usually a very small file | ||||
|         # If this still fails in the future then try reading all to memory first, re-writing etc | ||||
|         with open(index_fname, 'a', encoding='utf-8') as f: | ||||
|             f.write(index_line) | ||||
|             f.flush() | ||||
|             os.fsync(f.fileno()) | ||||
|  | ||||
|         # Update internal state | ||||
|         self.__newest_history_key = timestamp | ||||
|         self.__history_n += 1 | ||||
|  | ||||
| @@ -357,7 +374,7 @@ class model(watch_base): | ||||
|     # Iterate over all history texts and see if something new exists | ||||
|     # Always applying .strip() to start/end but optionally replace any other whitespace | ||||
|     def lines_contain_something_unique_compared_to_history(self, lines: list, ignore_whitespace=False): | ||||
|         local_lines = [] | ||||
|         local_lines = set([]) | ||||
|         if lines: | ||||
|             if ignore_whitespace: | ||||
|                 if isinstance(lines[0], str): # Can be either str or bytes depending on what was on the disk | ||||
| @@ -532,7 +549,7 @@ class model(watch_base): | ||||
|     def save_error_text(self, contents): | ||||
|         self.ensure_data_dir_exists() | ||||
|         target_path = os.path.join(self.watch_data_dir, "last-error.txt") | ||||
|         with open(target_path, 'w') as f: | ||||
|         with open(target_path, 'w', encoding='utf-8') as f: | ||||
|             f.write(contents) | ||||
|  | ||||
|     def save_xpath_data(self, data, as_error=False): | ||||
| @@ -547,7 +564,10 @@ class model(watch_base): | ||||
|         self.ensure_data_dir_exists() | ||||
|  | ||||
|         with open(target_path, 'wb') as f: | ||||
|             f.write(zlib.compress(json.dumps(data).encode())) | ||||
|             if not isinstance(data, str): | ||||
|                 f.write(zlib.compress(json.dumps(data).encode())) | ||||
|             else: | ||||
|                 f.write(zlib.compress(data.encode())) | ||||
|             f.close() | ||||
|  | ||||
|     # Save as PNG, PNG is larger but better for doing visual diff in the future | ||||
| @@ -569,7 +589,7 @@ class model(watch_base): | ||||
|         import brotli | ||||
|         filepath = os.path.join(self.watch_data_dir, 'last-fetched.br') | ||||
|  | ||||
|         if not os.path.isfile(filepath): | ||||
|         if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0: | ||||
|             # If a previous attempt doesnt yet exist, just snarf the previous snapshot instead | ||||
|             dates = list(self.history.keys()) | ||||
|             if len(dates): | ||||
| @@ -639,3 +659,44 @@ class model(watch_base): | ||||
|             if step_n: | ||||
|                 available.append(step_n.group(1)) | ||||
|         return available | ||||
|  | ||||
|     def compile_error_texts(self, has_proxies=None): | ||||
|         """Compile error texts for this watch. | ||||
|         Accepts has_proxies parameter to ensure it works even outside app context""" | ||||
|         from flask import url_for | ||||
|         from markupsafe import Markup | ||||
|  | ||||
|         output = []  # Initialize as list since we're using append | ||||
|         last_error = self.get('last_error','') | ||||
|  | ||||
|         try: | ||||
|             url_for('settings.settings_page') | ||||
|         except Exception as e: | ||||
|             has_app_context = False | ||||
|         else: | ||||
|             has_app_context = True | ||||
|  | ||||
|         # has app+request context, we can use url_for() | ||||
|         if has_app_context: | ||||
|             if last_error: | ||||
|                 if '403' in last_error: | ||||
|                     if has_proxies: | ||||
|                         output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a> '"))) | ||||
|                     else: | ||||
|                         output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try adding external proxies/locations</a> '"))) | ||||
|                 else: | ||||
|                     output.append(str(Markup(last_error))) | ||||
|  | ||||
|             if self.get('last_notification_error'): | ||||
|                 output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>"))) | ||||
|  | ||||
|         else: | ||||
|             # Lo_Fi version | ||||
|             if last_error: | ||||
|                 output.append(str(Markup(last_error))) | ||||
|             if self.get('last_notification_error'): | ||||
|                 output.append(str(Markup(self.get('last_notification_error')))) | ||||
|  | ||||
|         res = "\n".join(output) | ||||
|         return res | ||||
|  | ||||
|   | ||||
| @@ -2,7 +2,7 @@ import os | ||||
| import uuid | ||||
|  | ||||
| from changedetectionio import strtobool | ||||
| from changedetectionio.notification import default_notification_format_for_watch | ||||
| default_notification_format_for_watch = 'System default' | ||||
|  | ||||
| class watch_base(dict): | ||||
|  | ||||
| @@ -36,6 +36,7 @@ class watch_base(dict): | ||||
|             'include_filters': [], | ||||
|             'last_checked': 0, | ||||
|             'last_error': False, | ||||
|             'last_notification_error': None, | ||||
|             'last_viewed': 0,  # history key value of the last viewed via the [diff] link | ||||
|             'method': 'GET', | ||||
|             'notification_alert_count': 0, | ||||
|   | ||||
							
								
								
									
										35
									
								
								changedetectionio/notification/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,35 @@ | ||||
| from changedetectionio.model import default_notification_format_for_watch | ||||
|  | ||||
| ult_notification_format_for_watch = 'System default' | ||||
| default_notification_format = 'HTML Color' | ||||
| default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n' | ||||
| default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}' | ||||
|  | ||||
| # The values (markdown etc) are from apprise NotifyFormat, | ||||
| # But to avoid importing the whole heavy module just use the same strings here. | ||||
| valid_notification_formats = { | ||||
|     'Text': 'text', | ||||
|     'Markdown': 'markdown', | ||||
|     'HTML': 'html', | ||||
|     'HTML Color': 'htmlcolor', | ||||
|     # Used only for editing a watch (not for global) | ||||
|     default_notification_format_for_watch: default_notification_format_for_watch | ||||
| } | ||||
|  | ||||
|  | ||||
| valid_tokens = { | ||||
|     'base_url': '', | ||||
|     'current_snapshot': '', | ||||
|     'diff': '', | ||||
|     'diff_added': '', | ||||
|     'diff_full': '', | ||||
|     'diff_patch': '', | ||||
|     'diff_removed': '', | ||||
|     'diff_url': '', | ||||
|     'preview_url': '', | ||||
|     'triggered_text': '', | ||||
|     'watch_tag': '', | ||||
|     'watch_title': '', | ||||
|     'watch_url': '', | ||||
|     'watch_uuid': '', | ||||
| } | ||||
							
								
								
									
										16
									
								
								changedetectionio/notification/apprise_plugin/assets.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,16 @@ | ||||
| from apprise import AppriseAsset | ||||
|  | ||||
| # Refer to: | ||||
| # https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object | ||||
|  | ||||
| APPRISE_APP_ID = "changedetection.io" | ||||
| APPRISE_APP_DESC = "ChangeDetection.io best and simplest website monitoring and change detection" | ||||
| APPRISE_APP_URL = "https://changedetection.io" | ||||
| APPRISE_AVATAR_URL = "https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png" | ||||
|  | ||||
| apprise_asset = AppriseAsset( | ||||
|     app_id=APPRISE_APP_ID, | ||||
|     app_desc=APPRISE_APP_DESC, | ||||
|     app_url=APPRISE_APP_URL, | ||||
|     image_url_logo=APPRISE_AVATAR_URL, | ||||
| ) | ||||
							
								
								
									
										112
									
								
								changedetectionio/notification/apprise_plugin/custom_handlers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,112 @@ | ||||
| import json | ||||
| import re | ||||
| from urllib.parse import unquote_plus | ||||
|  | ||||
| import requests | ||||
| from apprise.decorators import notify | ||||
| from apprise.utils.parse import parse_url as apprise_parse_url | ||||
| from loguru import logger | ||||
| from requests.structures import CaseInsensitiveDict | ||||
|  | ||||
| SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"} | ||||
|  | ||||
|  | ||||
| def notify_supported_methods(func): | ||||
|     for method in SUPPORTED_HTTP_METHODS: | ||||
|         func = notify(on=method)(func) | ||||
|         # Add support for https, for each supported http method | ||||
|         func = notify(on=f"{method}s")(func) | ||||
|     return func | ||||
|  | ||||
|  | ||||
| def _get_auth(parsed_url: dict) -> str | tuple[str, str]: | ||||
|     user: str | None = parsed_url.get("user") | ||||
|     password: str | None = parsed_url.get("password") | ||||
|  | ||||
|     if user is not None and password is not None: | ||||
|         return (unquote_plus(user), unquote_plus(password)) | ||||
|  | ||||
|     if user is not None: | ||||
|         return unquote_plus(user) | ||||
|  | ||||
|     return "" | ||||
|  | ||||
|  | ||||
| def _get_headers(parsed_url: dict, body: str) -> CaseInsensitiveDict: | ||||
|     headers = CaseInsensitiveDict( | ||||
|         {unquote_plus(k).title(): unquote_plus(v) for k, v in parsed_url["qsd+"].items()} | ||||
|     ) | ||||
|  | ||||
|     # If Content-Type is not specified, guess if the body is a valid JSON | ||||
|     if headers.get("Content-Type") is None: | ||||
|         try: | ||||
|             json.loads(body) | ||||
|             headers["Content-Type"] = "application/json; charset=utf-8" | ||||
|         except Exception: | ||||
|             pass | ||||
|  | ||||
|     return headers | ||||
|  | ||||
|  | ||||
| def _get_params(parsed_url: dict) -> CaseInsensitiveDict: | ||||
|     # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation | ||||
|     # In Apprise, it relies on prefixing each request arg with "-", because it uses say &method=update as a flag for apprise | ||||
|     # but here we are making straight requests, so we need todo convert this against apprise's logic | ||||
|     params = CaseInsensitiveDict( | ||||
|         { | ||||
|             unquote_plus(k): unquote_plus(v) | ||||
|             for k, v in parsed_url["qsd"].items() | ||||
|             if k.strip("-") not in parsed_url["qsd-"] | ||||
|             and k.strip("+") not in parsed_url["qsd+"] | ||||
|         } | ||||
|     ) | ||||
|  | ||||
|     return params | ||||
|  | ||||
|  | ||||
| @notify_supported_methods | ||||
| def apprise_http_custom_handler( | ||||
|     body: str, | ||||
|     title: str, | ||||
|     notify_type: str, | ||||
|     meta: dict, | ||||
|     *args, | ||||
|     **kwargs, | ||||
| ) -> bool: | ||||
|     url: str = meta.get("url") | ||||
|     schema: str = meta.get("schema") | ||||
|     method: str = re.sub(r"s$", "", schema).upper() | ||||
|  | ||||
|     # Convert /foobar?+some-header=hello to proper header dictionary | ||||
|     parsed_url: dict[str, str | dict | None] | None = apprise_parse_url(url) | ||||
|     if parsed_url is None: | ||||
|         return False | ||||
|  | ||||
|     auth = _get_auth(parsed_url=parsed_url) | ||||
|     headers = _get_headers(parsed_url=parsed_url, body=body) | ||||
|     params = _get_params(parsed_url=parsed_url) | ||||
|  | ||||
|     url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url")) | ||||
|  | ||||
|     try: | ||||
|         response = requests.request( | ||||
|             method=method, | ||||
|             url=url, | ||||
|             auth=auth, | ||||
|             headers=headers, | ||||
|             params=params, | ||||
|             data=body.encode("utf-8") if isinstance(body, str) else body, | ||||
|         ) | ||||
|  | ||||
|         response.raise_for_status() | ||||
|  | ||||
|         logger.info(f"Successfully sent custom notification to {url}") | ||||
|         return True | ||||
|  | ||||
|     except requests.RequestException as e: | ||||
|         logger.error(f"Remote host error while sending custom notification to {url}: {e}") | ||||
|         return False | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}") | ||||
|         return False | ||||
| @@ -1,48 +1,15 @@ | ||||
| 
 | ||||
| import time | ||||
| from apprise import NotifyFormat | ||||
| import apprise | ||||
| from loguru import logger | ||||
| 
 | ||||
| 
 | ||||
| valid_tokens = { | ||||
|     'base_url': '', | ||||
|     'current_snapshot': '', | ||||
|     'diff': '', | ||||
|     'diff_added': '', | ||||
|     'diff_full': '', | ||||
|     'diff_patch': '', | ||||
|     'diff_removed': '', | ||||
|     'diff_url': '', | ||||
|     'preview_url': '', | ||||
|     'triggered_text': '', | ||||
|     'watch_tag': '', | ||||
|     'watch_title': '', | ||||
|     'watch_url': '', | ||||
|     'watch_uuid': '', | ||||
| } | ||||
| 
 | ||||
| default_notification_format_for_watch = 'System default' | ||||
| default_notification_format = 'HTML Color' | ||||
| default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n' | ||||
| default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}' | ||||
| 
 | ||||
| valid_notification_formats = { | ||||
|     'Text': NotifyFormat.TEXT, | ||||
|     'Markdown': NotifyFormat.MARKDOWN, | ||||
|     'HTML': NotifyFormat.HTML, | ||||
|     'HTML Color': 'htmlcolor', | ||||
|     # Used only for editing a watch (not for global) | ||||
|     default_notification_format_for_watch: default_notification_format_for_watch | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL | ||||
| 
 | ||||
| def process_notification(n_object, datastore): | ||||
|     # so that the custom endpoints are registered | ||||
|     from changedetectionio.apprise_plugin import apprise_custom_api_call_wrapper | ||||
|     from changedetectionio.safe_jinja import render as jinja_render | ||||
|     from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats | ||||
|     # be sure its registered | ||||
|     from .apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
| 
 | ||||
|     from .safe_jinja import render as jinja_render | ||||
|     now = time.time() | ||||
|     if n_object.get('notification_timestamp'): | ||||
|         logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s") | ||||
| @@ -59,15 +26,18 @@ def process_notification(n_object, datastore): | ||||
|         # Initially text or whatever | ||||
|         n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format]) | ||||
| 
 | ||||
|     logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.3f}s") | ||||
|     logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.2f}s") | ||||
| 
 | ||||
|     # https://github.com/caronc/apprise/wiki/Development_LogCapture | ||||
|     # Anything higher than or equal to WARNING (which covers things like Connection errors) | ||||
|     # raise it as an exception | ||||
| 
 | ||||
|     sent_objs = [] | ||||
|     from .apprise_asset import asset | ||||
|     apobj = apprise.Apprise(debug=True, asset=asset) | ||||
| 
 | ||||
|     if 'as_async' in n_object: | ||||
|         apprise_asset.async_mode = n_object.get('as_async') | ||||
| 
 | ||||
|     apobj = apprise.Apprise(debug=True, asset=apprise_asset) | ||||
| 
 | ||||
|     if not n_object.get('notification_urls'): | ||||
|         return None | ||||
| @@ -108,7 +78,7 @@ def process_notification(n_object, datastore): | ||||
|                     and not url.startswith('get') \ | ||||
|                     and not url.startswith('delete') \ | ||||
|                     and not url.startswith('put'): | ||||
|                 url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png' | ||||
|                 url += k + f"avatar_url={APPRISE_AVATAR_URL}" | ||||
| 
 | ||||
|             if url.startswith('tgram://'): | ||||
|                 # Telegram only supports a limit subset of HTML, remove the '<br>' we place in. | ||||
| @@ -157,8 +127,6 @@ def process_notification(n_object, datastore): | ||||
|             attach=n_object.get('screenshot', None) | ||||
|         ) | ||||
| 
 | ||||
|         # Give apprise time to register an error | ||||
|         time.sleep(3) | ||||
| 
 | ||||
|         # Returns empty string if nothing found, multi-line string otherwise | ||||
|         log_value = logs.getvalue() | ||||
| @@ -175,6 +143,7 @@ def process_notification(n_object, datastore): | ||||
| # ( Where we prepare the tokens in the notification to be replaced with actual values ) | ||||
| def create_notification_parameters(n_object, datastore): | ||||
|     from copy import deepcopy | ||||
|     from . import valid_tokens | ||||
| 
 | ||||
|     # in the case we send a test notification from the main settings, there is no UUID. | ||||
|     uuid = n_object['uuid'] if 'uuid' in n_object else '' | ||||
							
								
								
									
										246
									
								
								changedetectionio/notification_service.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,246 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| """ | ||||
| Notification Service Module | ||||
| Extracted from update_worker.py to provide standalone notification functionality | ||||
| for both sync and async workers | ||||
| """ | ||||
|  | ||||
| import time | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| class NotificationService: | ||||
|     """ | ||||
|     Standalone notification service that handles all notification functionality | ||||
|     previously embedded in the update_worker class | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, datastore, notification_q): | ||||
|         self.datastore = datastore | ||||
|         self.notification_q = notification_q | ||||
|      | ||||
|     def queue_notification_for_watch(self, n_object, watch): | ||||
|         """ | ||||
|         Queue a notification for a watch with full diff rendering and template variables | ||||
|         """ | ||||
|         from changedetectionio import diff | ||||
|         from changedetectionio.notification import default_notification_format_for_watch | ||||
|  | ||||
|         dates = [] | ||||
|         trigger_text = '' | ||||
|  | ||||
|         now = time.time() | ||||
|  | ||||
|         if watch: | ||||
|             watch_history = watch.history | ||||
|             dates = list(watch_history.keys()) | ||||
|             trigger_text = watch.get('trigger_text', []) | ||||
|  | ||||
|         # Add text that was triggered | ||||
|         if len(dates): | ||||
|             snapshot_contents = watch.get_history_snapshot(dates[-1]) | ||||
|         else: | ||||
|             snapshot_contents = "No snapshot/history available, the watch should fetch atleast once." | ||||
|  | ||||
|         # If we ended up here with "System default" | ||||
|         if n_object.get('notification_format') == default_notification_format_for_watch: | ||||
|             n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|         html_colour_enable = False | ||||
|         # HTML needs linebreak, but MarkDown and Text can use a linefeed | ||||
|         if n_object.get('notification_format') == 'HTML': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|         elif n_object.get('notification_format') == 'HTML Color': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|             html_colour_enable = True | ||||
|         else: | ||||
|             line_feed_sep = "\n" | ||||
|  | ||||
|         triggered_text = '' | ||||
|         if len(trigger_text): | ||||
|             from . import html_tools | ||||
|             triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text) | ||||
|             if triggered_text: | ||||
|                 triggered_text = line_feed_sep.join(triggered_text) | ||||
|  | ||||
|         # Could be called as a 'test notification' with only 1 snapshot available | ||||
|         prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n" | ||||
|         current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples" | ||||
|  | ||||
|         if len(dates) > 1: | ||||
|             prev_snapshot = watch.get_history_snapshot(dates[-2]) | ||||
|             current_snapshot = watch.get_history_snapshot(dates[-1]) | ||||
|  | ||||
|         n_object.update({ | ||||
|             'current_snapshot': snapshot_contents, | ||||
|             'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep), | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep), | ||||
|             'notification_timestamp': now, | ||||
|             'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None, | ||||
|             'triggered_text': triggered_text, | ||||
|             'uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_url': watch.get('url') if watch else None, | ||||
|         }) | ||||
|  | ||||
|         if watch: | ||||
|             n_object.update(watch.extra_notification_token_values()) | ||||
|  | ||||
|         logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s") | ||||
|         logger.debug("Queued notification for sending") | ||||
|         self.notification_q.put(n_object) | ||||
|  | ||||
|     def _check_cascading_vars(self, var_name, watch): | ||||
|         """ | ||||
|         Check notification variables in cascading priority: | ||||
|         Individual watch settings > Tag settings > Global settings | ||||
|         """ | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch, | ||||
|             default_notification_body, | ||||
|             default_notification_title | ||||
|         ) | ||||
|  | ||||
|         # Would be better if this was some kind of Object where Watch can reference the parent datastore etc | ||||
|         v = watch.get(var_name) | ||||
|         if v and not watch.get('notification_muted'): | ||||
|             if var_name == 'notification_format' and v == default_notification_format_for_watch: | ||||
|                 return self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|             return v | ||||
|  | ||||
|         tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid')) | ||||
|         if tags: | ||||
|             for tag_uuid, tag in tags.items(): | ||||
|                 v = tag.get(var_name) | ||||
|                 if v and not tag.get('notification_muted'): | ||||
|                     return v | ||||
|  | ||||
|         if self.datastore.data['settings']['application'].get(var_name): | ||||
|             return self.datastore.data['settings']['application'].get(var_name) | ||||
|  | ||||
|         # Otherwise could be defaults | ||||
|         if var_name == 'notification_format': | ||||
|             return default_notification_format_for_watch | ||||
|         if var_name == 'notification_body': | ||||
|             return default_notification_body | ||||
|         if var_name == 'notification_title': | ||||
|             return default_notification_title | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def send_content_changed_notification(self, watch_uuid): | ||||
|         """ | ||||
|         Send notification when content changes are detected | ||||
|         """ | ||||
|         n_object = {} | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         watch_history = watch.history | ||||
|         dates = list(watch_history.keys()) | ||||
|         # Theoretically it's possible that this could be just 1 long, | ||||
|         # - In the case that the timestamp key was not unique | ||||
|         if len(dates) == 1: | ||||
|             raise ValueError( | ||||
|                 "History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?" | ||||
|             ) | ||||
|  | ||||
|         # Should be a better parent getter in the model object | ||||
|  | ||||
|         # Prefer - Individual watch settings > Tag settings >  Global settings (in that order) | ||||
|         n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch) | ||||
|         n_object['notification_title'] = self._check_cascading_vars('notification_title', watch) | ||||
|         n_object['notification_body'] = self._check_cascading_vars('notification_body', watch) | ||||
|         n_object['notification_format'] = self._check_cascading_vars('notification_format', watch) | ||||
|  | ||||
|         # (Individual watch) Only prepare to notify if the rules above matched | ||||
|         queued = False | ||||
|         if n_object and n_object.get('notification_urls'): | ||||
|             queued = True | ||||
|  | ||||
|             count = watch.get('notification_alert_count', 0) + 1 | ||||
|             self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count}) | ||||
|  | ||||
|             self.queue_notification_for_watch(n_object=n_object, watch=watch) | ||||
|  | ||||
|         return queued | ||||
|  | ||||
|     def send_filter_failure_notification(self, watch_uuid): | ||||
|         """ | ||||
|         Send notification when CSS/XPath filters fail consecutively | ||||
|         """ | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         ", ".join(watch['include_filters']), | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|  | ||||
|         elif len(self.datastore.data['settings']['application']['notification_urls']): | ||||
|             n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] | ||||
|  | ||||
|         # Only prepare to notify if the rules above matched | ||||
|         if 'notification_urls' in n_object: | ||||
|             n_object.update({ | ||||
|                 'watch_url': watch['url'], | ||||
|                 'uuid': watch_uuid, | ||||
|                 'screenshot': None | ||||
|             }) | ||||
|             self.notification_q.put(n_object) | ||||
|             logger.debug(f"Sent filter not found notification for {watch_uuid}") | ||||
|         else: | ||||
|             logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs") | ||||
|  | ||||
|     def send_step_failure_notification(self, watch_uuid, step_n): | ||||
|         """ | ||||
|         Send notification when browser steps fail consecutively | ||||
|         """ | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid, False) | ||||
|         if not watch: | ||||
|             return | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1), | ||||
|                     'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} " | ||||
|                                          "did not appear on the page after {} attempts, did the page change layout? " | ||||
|                                          "Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n" | ||||
|                                          "Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|  | ||||
|         elif len(self.datastore.data['settings']['application']['notification_urls']): | ||||
|             n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] | ||||
|  | ||||
|         # Only prepare to notify if the rules above matched | ||||
|         if 'notification_urls' in n_object: | ||||
|             n_object.update({ | ||||
|                 'watch_url': watch['url'], | ||||
|                 'uuid': watch_uuid | ||||
|             }) | ||||
|             self.notification_q.put(n_object) | ||||
|             logger.error(f"Sent step not found notification for {watch_uuid}") | ||||
|  | ||||
|  | ||||
| # Convenience functions for creating notification service instances | ||||
| def create_notification_service(datastore, notification_q): | ||||
|     """ | ||||
|     Factory function to create a NotificationService instance | ||||
|     """ | ||||
|     return NotificationService(datastore, notification_q) | ||||
							
								
								
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,82 @@ | ||||
| import pluggy | ||||
| import os | ||||
| import importlib | ||||
| import sys | ||||
|  | ||||
| # Global plugin namespace for changedetection.io | ||||
| PLUGIN_NAMESPACE = "changedetectionio" | ||||
|  | ||||
| hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE) | ||||
| hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE) | ||||
|  | ||||
|  | ||||
| class ChangeDetectionSpec: | ||||
|     """Hook specifications for extending changedetection.io functionality.""" | ||||
|  | ||||
|     @hookspec | ||||
|     def ui_edit_stats_extras(watch): | ||||
|         """Return HTML content to add to the stats tab in the edit view. | ||||
|          | ||||
|         Args: | ||||
|             watch: The watch object being edited | ||||
|              | ||||
|         Returns: | ||||
|             str: HTML content to be inserted in the stats tab | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|  | ||||
| # Set up Plugin Manager | ||||
| plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # Register hookspecs | ||||
| plugin_manager.add_hookspecs(ChangeDetectionSpec) | ||||
|  | ||||
| # Load plugins from subdirectories | ||||
| def load_plugins_from_directories(): | ||||
|     # Dictionary of directories to scan for plugins | ||||
|     plugin_dirs = { | ||||
|         'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'), | ||||
|         # Add more plugin directories here as needed | ||||
|     } | ||||
|      | ||||
|     # Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory | ||||
|      | ||||
|     for dir_name, dir_path in plugin_dirs.items(): | ||||
|         if not os.path.exists(dir_path): | ||||
|             continue | ||||
|              | ||||
|         # Get all Python files (excluding __init__.py) | ||||
|         for filename in os.listdir(dir_path): | ||||
|             if filename.endswith(".py") and filename != "__init__.py": | ||||
|                 module_name = filename[:-3]  # Remove .py extension | ||||
|                 module_path = f"changedetectionio.{dir_name}.plugins.{module_name}" | ||||
|                  | ||||
|                 try: | ||||
|                     module = importlib.import_module(module_path) | ||||
|                     # Register the plugin with pluggy | ||||
|                     plugin_manager.register(module, module_name) | ||||
|                 except (ImportError, AttributeError) as e: | ||||
|                     print(f"Error loading plugin {module_name}: {e}") | ||||
|  | ||||
| # Load plugins | ||||
| load_plugins_from_directories() | ||||
|  | ||||
| # Discover installed plugins from external packages (if any) | ||||
| plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # Helper function to collect UI stats extras from all plugins | ||||
| def collect_ui_edit_stats_extras(watch): | ||||
|     """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" | ||||
|     extras_content = [] | ||||
|      | ||||
|     # Get all plugins that implement the ui_edit_stats_extras hook | ||||
|     results = plugin_manager.hook.ui_edit_stats_extras(watch=watch) | ||||
|      | ||||
|     # If we have results, add them to our content | ||||
|     if results: | ||||
|         for result in results: | ||||
|             if result:  # Skip empty results | ||||
|                 extras_content.append(result) | ||||
|              | ||||
|     return "\n".join(extras_content) if extras_content else "" | ||||
| @@ -27,14 +27,14 @@ class difference_detection_processor(): | ||||
|         # Generic fetcher that should be extended (requests, playwright etc) | ||||
|         self.fetcher = Fetcher() | ||||
|  | ||||
|     def call_browser(self, preferred_proxy_id=None): | ||||
|     async def call_browser(self, preferred_proxy_id=None): | ||||
|  | ||||
|         from requests.structures import CaseInsensitiveDict | ||||
|  | ||||
|         url = self.watch.link | ||||
|  | ||||
|         # Protect against file://, file:/ access, check the real "link" without any meta "source:" etc prepended. | ||||
|         if re.search(r'^file:/', url.strip(), re.IGNORECASE): | ||||
|         # Protect against file:, file:/, file:// access, check the real "link" without any meta "source:" etc prepended. | ||||
|         if re.search(r'^file:', url.strip(), re.IGNORECASE): | ||||
|             if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')): | ||||
|                 raise Exception( | ||||
|                     "file:// type access is denied for security reasons." | ||||
| @@ -89,7 +89,7 @@ class difference_detection_processor(): | ||||
|                 proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url') | ||||
|                 logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}") | ||||
|             else: | ||||
|                 logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified. ") | ||||
|                 logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ") | ||||
|  | ||||
|         # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. | ||||
|         # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) | ||||
| @@ -147,19 +147,20 @@ class difference_detection_processor(): | ||||
|         # And here we go! call the right browser with browser-specific settings | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|  | ||||
|         self.fetcher.run(url=url, | ||||
|                          timeout=timeout, | ||||
|                          request_headers=request_headers, | ||||
|                          request_body=request_body, | ||||
|                          request_method=request_method, | ||||
|                          ignore_status_codes=ignore_status_codes, | ||||
|                          current_include_filters=self.watch.get('include_filters'), | ||||
|                          is_binary=is_binary, | ||||
|                          empty_pages_are_a_change=empty_pages_are_a_change | ||||
|                          ) | ||||
|         # All fetchers are now async | ||||
|         await self.fetcher.run(url=url, | ||||
|                                timeout=timeout, | ||||
|                                request_headers=request_headers, | ||||
|                                request_body=request_body, | ||||
|                                request_method=request_method, | ||||
|                                ignore_status_codes=ignore_status_codes, | ||||
|                                current_include_filters=self.watch.get('include_filters'), | ||||
|                                is_binary=is_binary, | ||||
|                                empty_pages_are_a_change=empty_pages_are_a_change | ||||
|                                ) | ||||
|  | ||||
|         #@todo .quit here could go on close object, so we can run JS if change-detected | ||||
|         self.fetcher.quit() | ||||
|         self.fetcher.quit(watch=self.watch) | ||||
|  | ||||
|         # After init, call run_changedetection() which will do the actual change-detection | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ import urllib3 | ||||
| import time | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
| name = 'Re-stock & Price detection for single product pages' | ||||
| name = 'Re-stock & Price detection for pages with a SINGLE product' | ||||
| description = 'Detects if the product goes back to in-stock' | ||||
|  | ||||
| class UnableToExtractRestockData(Exception): | ||||
| @@ -79,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock: | ||||
|     # First phase, dead simple scanning of anything that looks useful | ||||
|     value = Restock() | ||||
|     if data: | ||||
|         logger.debug(f"Using jsonpath to find price/availability/etc") | ||||
|         logger.debug("Using jsonpath to find price/availability/etc") | ||||
|         price_parse = parse('$..(price|Price)') | ||||
|         pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )') | ||||
|         availability_parse = parse('$..(availability|Availability)') | ||||
| @@ -110,7 +110,7 @@ def get_itemprop_availability(html_content) -> Restock: | ||||
|  | ||||
|         # Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:) | ||||
|         if not value.get('price') or value.get('availability'): | ||||
|             logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..") | ||||
|             logger.debug("Alternatively digging through OpenGraph properties for restock/price info..") | ||||
|             jsonpath_expr = parse('$..properties') | ||||
|  | ||||
|             for match in jsonpath_expr.find(data): | ||||
|   | ||||
| @@ -15,7 +15,7 @@ def _task(watch, update_handler): | ||||
|     except FilterNotFoundInResponse as e: | ||||
|         text_after_filter = f"Filter not found in HTML: {str(e)}" | ||||
|     except ReplyWithContentButNoText as e: | ||||
|         text_after_filter = f"Filter found but no text (empty result)" | ||||
|         text_after_filter = "Filter found but no text (empty result)" | ||||
|     except Exception as e: | ||||
|         text_after_filter = f"Error: {str(e)}" | ||||
|  | ||||
| @@ -28,13 +28,13 @@ def _task(watch, update_handler): | ||||
|     return text_after_filter | ||||
|  | ||||
|  | ||||
| def prepare_filter_prevew(datastore, watch_uuid): | ||||
| def prepare_filter_prevew(datastore, watch_uuid, form_data): | ||||
|     '''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])''' | ||||
|     from changedetectionio import forms, html_tools | ||||
|     from changedetectionio.model.Watch import model as watch_model | ||||
|     from concurrent.futures import ProcessPoolExecutor | ||||
|     from copy import deepcopy | ||||
|     from flask import request, jsonify | ||||
|     from flask import request | ||||
|     import brotli | ||||
|     import importlib | ||||
|     import os | ||||
| @@ -50,12 +50,12 @@ def prepare_filter_prevew(datastore, watch_uuid): | ||||
|  | ||||
|     if tmp_watch and tmp_watch.history and os.path.isdir(tmp_watch.watch_data_dir): | ||||
|         # Splice in the temporary stuff from the form | ||||
|         form = forms.processor_text_json_diff_form(formdata=request.form if request.method == 'POST' else None, | ||||
|                                                    data=request.form | ||||
|         form = forms.processor_text_json_diff_form(formdata=form_data if request.method == 'POST' else None, | ||||
|                                                    data=form_data | ||||
|                                                    ) | ||||
|  | ||||
|         # Only update vars that came in via the AJAX post | ||||
|         p = {k: v for k, v in form.data.items() if k in request.form.keys()} | ||||
|         p = {k: v for k, v in form.data.items() if k in form_data.keys()} | ||||
|         tmp_watch.update(p) | ||||
|         blank_watch_no_filters = watch_model() | ||||
|         blank_watch_no_filters['url'] = tmp_watch.get('url') | ||||
| @@ -103,13 +103,12 @@ def prepare_filter_prevew(datastore, watch_uuid): | ||||
|  | ||||
|     logger.trace(f"Parsed in {time.time() - now:.3f}s") | ||||
|  | ||||
|     return jsonify( | ||||
|         { | ||||
|     return ({ | ||||
|             'after_filter': text_after_filter, | ||||
|             'before_filter': text_before_filter.decode('utf-8') if isinstance(text_before_filter, bytes) else text_before_filter, | ||||
|             'duration': time.time() - now, | ||||
|             'trigger_line_numbers': trigger_line_numbers, | ||||
|             'ignore_line_numbers': ignore_line_numbers, | ||||
|         } | ||||
|     ) | ||||
|         }) | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -6,6 +6,7 @@ import os | ||||
| import re | ||||
| import urllib3 | ||||
|  | ||||
| from changedetectionio.conditions import execute_ruleset_against_all_plugins | ||||
| from changedetectionio.processors import difference_detection_processor | ||||
| from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE | ||||
| from changedetectionio import html_tools, content_fetchers | ||||
| @@ -251,6 +252,7 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|         # 615 Extract text by regex | ||||
|         extract_text = watch.get('extract_text', []) | ||||
|         extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text') | ||||
|         if len(extract_text) > 0: | ||||
|             regex_matched_output = [] | ||||
|             for s_re in extract_text: | ||||
| @@ -295,6 +297,8 @@ class perform_site_check(difference_detection_processor): | ||||
| ### CALCULATE MD5 | ||||
|         # If there's text to ignore | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text') | ||||
|  | ||||
|         text_for_checksuming = stripped_text_from_html | ||||
|         if text_to_ignore: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
| @@ -307,8 +311,8 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         blocked = False | ||||
|  | ||||
|         trigger_text = watch.get('trigger_text', []) | ||||
|         trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text') | ||||
|         if len(trigger_text): | ||||
|             # Assume blocked | ||||
|             blocked = True | ||||
| @@ -323,6 +327,7 @@ class perform_site_check(difference_detection_processor): | ||||
|                 blocked = False | ||||
|  | ||||
|         text_should_not_be_present = watch.get('text_should_not_be_present', []) | ||||
|         text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present') | ||||
|         if len(text_should_not_be_present): | ||||
|             # If anything matched, then we should block a change from happening | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
| @@ -331,6 +336,18 @@ class perform_site_check(difference_detection_processor): | ||||
|             if result: | ||||
|                 blocked = True | ||||
|  | ||||
|         # And check if 'conditions' will let this pass through | ||||
|         if watch.get('conditions') and watch.get('conditions_match_logic'): | ||||
|             conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'), | ||||
|                                                                     application_datastruct=self.datastore.data, | ||||
|                                                                     ephemeral_data={ | ||||
|                                                                         'text': stripped_text_from_html | ||||
|                                                                     } | ||||
|                                                                     ) | ||||
|  | ||||
|             if not conditions_result.get('result'): | ||||
|                 # Conditions say "Condition not met" so we block it. | ||||
|                 blocked = True | ||||
|  | ||||
|         # Looks like something changed, but did it match all the rules? | ||||
|         if blocked: | ||||
|   | ||||
							
								
								
									
										124
									
								
								changedetectionio/realtime/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,124 @@ | ||||
| # Real-time Socket.IO Implementation | ||||
|  | ||||
| This directory contains the Socket.IO implementation for changedetection.io's real-time updates. | ||||
|  | ||||
| ## Architecture Overview | ||||
|  | ||||
| The real-time system provides live updates to the web interface for: | ||||
| - Watch status changes (checking, completed, errors) | ||||
| - Queue length updates   | ||||
| - General statistics updates | ||||
|  | ||||
| ## Current Implementation | ||||
|  | ||||
| ### Socket.IO Configuration | ||||
| - **Async Mode**: `threading` (default) or `gevent` (optional via SOCKETIO_MODE env var) | ||||
| - **Server**: Flask-SocketIO with threading support | ||||
| - **Background Tasks**: Python threading with daemon threads | ||||
|  | ||||
| ### Async Worker Integration | ||||
| - **Workers**: Async workers using asyncio for watch processing | ||||
| - **Queue**: AsyncSignalPriorityQueue for job distribution | ||||
| - **Signals**: Blinker signals for real-time updates between workers and Socket.IO | ||||
|  | ||||
| ### Environment Variables | ||||
| - `SOCKETIO_MODE=threading` (default, recommended) | ||||
| - `SOCKETIO_MODE=gevent` (optional, has cross-platform limitations) | ||||
|  | ||||
| ## Architecture Decision: Why Threading Mode? | ||||
|  | ||||
| ### Previous Issues with Eventlet | ||||
| **Eventlet was completely removed** due to fundamental compatibility issues: | ||||
|  | ||||
| 1. **Monkey Patching Conflicts**: `eventlet.monkey_patch()` globally replaced Python's threading/socket modules, causing conflicts with: | ||||
|    - Playwright's synchronous browser automation | ||||
|    - Async worker event loops | ||||
|    - Various Python libraries expecting real threading | ||||
|  | ||||
| 2. **Python 3.12+ Compatibility**: Eventlet had issues with newer Python versions and asyncio integration | ||||
|  | ||||
| 3. **CVE-2023-29483**: Security vulnerability in eventlet's dnspython dependency | ||||
|  | ||||
| ### Current Solution Benefits | ||||
| ✅ **Threading Mode Advantages**: | ||||
| - Full compatibility with async workers and Playwright | ||||
| - No monkey patching - uses standard Python threading | ||||
| - Better Python 3.12+ support | ||||
| - Cross-platform compatibility (Windows, macOS, Linux) | ||||
| - No external async library dependencies | ||||
| - Fast shutdown capabilities | ||||
|  | ||||
| ✅ **Optional Gevent Support**: | ||||
| - Available via `SOCKETIO_MODE=gevent` for high-concurrency scenarios | ||||
| - Cross-platform limitations documented in requirements.txt | ||||
| - Not recommended as default due to Windows socket limits and macOS ARM build issues | ||||
|  | ||||
| ## Socket.IO Mode Configuration | ||||
|  | ||||
| ### Threading Mode (Default) | ||||
| ```python | ||||
| # Enabled automatically | ||||
| async_mode = 'threading' | ||||
| socketio = SocketIO(app, async_mode='threading') | ||||
| ``` | ||||
|  | ||||
| ### Gevent Mode (Optional) | ||||
| ```bash | ||||
| # Set environment variable | ||||
| export SOCKETIO_MODE=gevent | ||||
| ``` | ||||
|  | ||||
| ## Background Tasks | ||||
|  | ||||
| ### Queue Polling | ||||
| - **Threading Mode**: `threading.Thread` with `threading.Event` for shutdown | ||||
| - **Signal Handling**: Blinker signals for watch state changes | ||||
| - **Real-time Updates**: Direct Socket.IO `emit()` calls to connected clients | ||||
|  | ||||
| ### Worker Integration | ||||
| - **Async Workers**: Run in separate asyncio event loop thread | ||||
| - **Communication**: AsyncSignalPriorityQueue bridges async workers and Socket.IO | ||||
| - **Updates**: Real-time updates sent when workers complete tasks | ||||
|  | ||||
| ## Files in This Directory | ||||
|  | ||||
| - `socket_server.py`: Main Socket.IO initialization and event handling | ||||
| - `events.py`: Watch operation event handlers   | ||||
| - `__init__.py`: Module initialization | ||||
|  | ||||
| ## Production Deployment | ||||
|  | ||||
| ### Recommended WSGI Servers | ||||
| For production with Socket.IO threading mode: | ||||
| - **Gunicorn**: `gunicorn --worker-class eventlet changedetection:app` (if using gevent mode) | ||||
| - **uWSGI**: With threading support | ||||
| - **Docker**: Built-in Flask server works well for containerized deployments | ||||
|  | ||||
| ### Performance Considerations | ||||
| - Threading mode: Better memory usage, standard Python threading | ||||
| - Gevent mode: Higher concurrency but platform limitations | ||||
| - Async workers: Separate from Socket.IO, provides scalability | ||||
|  | ||||
| ## Environment Variables | ||||
|  | ||||
| | Variable | Default | Description | | ||||
| |----------|---------|-------------| | ||||
| | `SOCKETIO_MODE` | `threading` | Socket.IO async mode (`threading` or `gevent`) | | ||||
| | `FETCH_WORKERS` | `10` | Number of async workers for watch processing | | ||||
| | `CHANGEDETECTION_HOST` | `0.0.0.0` | Server bind address | | ||||
| | `CHANGEDETECTION_PORT` | `5000` | Server port | | ||||
|  | ||||
| ## Debugging Tips | ||||
|  | ||||
| 1. **Socket.IO Issues**: Check browser dev tools for WebSocket connection errors | ||||
| 2. **Threading Issues**: Monitor with `ps -T` to check thread count   | ||||
| 3. **Worker Issues**: Use `/worker-health` endpoint to check async worker status | ||||
| 4. **Queue Issues**: Use `/queue-status` endpoint to monitor job queue | ||||
| 5. **Performance**: Use `/gc-cleanup` endpoint to trigger memory cleanup | ||||
|  | ||||
| ## Migration Notes | ||||
|  | ||||
| If upgrading from eventlet-based versions: | ||||
| - Remove any `EVENTLET_*` environment variables | ||||
| - No code changes needed - Socket.IO mode is automatically configured | ||||
| - Optional: Set `SOCKETIO_MODE=gevent` if high concurrency is required and platform supports it | ||||
							
								
								
									
										3
									
								
								changedetectionio/realtime/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,3 @@ | ||||
| """ | ||||
| Socket.IO realtime updates module for changedetection.io | ||||
| """ | ||||
							
								
								
									
										58
									
								
								changedetectionio/realtime/events.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,58 @@ | ||||
| from flask_socketio import emit | ||||
| from loguru import logger | ||||
| from blinker import signal | ||||
|  | ||||
|  | ||||
| def register_watch_operation_handlers(socketio, datastore): | ||||
|     """Register Socket.IO event handlers for watch operations""" | ||||
|      | ||||
|     @socketio.on('watch_operation') | ||||
|     def handle_watch_operation(data): | ||||
|         """Handle watch operations like pause, mute, recheck via Socket.IO""" | ||||
|         try: | ||||
|             op = data.get('op') | ||||
|             uuid = data.get('uuid') | ||||
|              | ||||
|             logger.debug(f"Socket.IO: Received watch operation '{op}' for UUID {uuid}") | ||||
|              | ||||
|             if not op or not uuid: | ||||
|                 emit('operation_result', {'success': False, 'error': 'Missing operation or UUID'}) | ||||
|                 return | ||||
|              | ||||
|             # Check if watch exists | ||||
|             if not datastore.data['watching'].get(uuid): | ||||
|                 emit('operation_result', {'success': False, 'error': 'Watch not found'}) | ||||
|                 return | ||||
|              | ||||
|             watch = datastore.data['watching'][uuid] | ||||
|              | ||||
|             # Perform the operation | ||||
|             if op == 'pause': | ||||
|                 watch.toggle_pause() | ||||
|                 logger.info(f"Socket.IO: Toggled pause for watch {uuid}") | ||||
|             elif op == 'mute': | ||||
|                 watch.toggle_mute() | ||||
|                 logger.info(f"Socket.IO: Toggled mute for watch {uuid}") | ||||
|             elif op == 'recheck': | ||||
|                 # Import here to avoid circular imports | ||||
|                 from changedetectionio.flask_app import update_q | ||||
|                 from changedetectionio import queuedWatchMetaData | ||||
|                 from changedetectionio import worker_handler | ||||
|                  | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                 logger.info(f"Socket.IO: Queued recheck for watch {uuid}") | ||||
|             else: | ||||
|                 emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'}) | ||||
|                 return | ||||
|              | ||||
|             # Send signal to update UI | ||||
|             watch_check_update = signal('watch_check_update') | ||||
|             if watch_check_update: | ||||
|                 watch_check_update.send(watch_uuid=uuid) | ||||
|              | ||||
|             # Send success response to client | ||||
|             emit('operation_result', {'success': True, 'operation': op, 'uuid': uuid}) | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error in handle_watch_operation: {str(e)}") | ||||
|             emit('operation_result', {'success': False, 'error': str(e)}) | ||||
							
								
								
									
										396
									
								
								changedetectionio/realtime/socket_server.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						| @@ -0,0 +1,396 @@ | ||||
| import timeago | ||||
| from flask_socketio import SocketIO | ||||
|  | ||||
| import time | ||||
| import os | ||||
| from loguru import logger | ||||
| from blinker import signal | ||||
|  | ||||
| from changedetectionio import strtobool | ||||
|  | ||||
|  | ||||
| class SignalHandler: | ||||
|     """A standalone class to receive signals""" | ||||
|  | ||||
|     def __init__(self, socketio_instance, datastore): | ||||
|         self.socketio_instance = socketio_instance | ||||
|         self.datastore = datastore | ||||
|  | ||||
|         # Connect to the watch_check_update signal | ||||
|         from changedetectionio.flask_app import watch_check_update as wcc | ||||
|         wcc.connect(self.handle_signal, weak=False) | ||||
|         #        logger.info("SignalHandler: Connected to signal from direct import") | ||||
|  | ||||
|         # Connect to the queue_length signal | ||||
|         queue_length_signal = signal('queue_length') | ||||
|         queue_length_signal.connect(self.handle_queue_length, weak=False) | ||||
|         #       logger.info("SignalHandler: Connected to queue_length signal") | ||||
|  | ||||
|         watch_delete_signal = signal('watch_deleted') | ||||
|         watch_delete_signal.connect(self.handle_deleted_signal, weak=False) | ||||
|  | ||||
|         # Connect to the notification_event signal | ||||
|         notification_event_signal = signal('notification_event') | ||||
|         notification_event_signal.connect(self.handle_notification_event, weak=False) | ||||
|         logger.info("SignalHandler: Connected to notification_event signal") | ||||
|  | ||||
|         # Create and start the queue update thread using standard threading | ||||
|         import threading | ||||
|         self.polling_emitter_thread = threading.Thread( | ||||
|             target=self.polling_emit_running_or_queued_watches_threaded,  | ||||
|             daemon=True | ||||
|         ) | ||||
|         self.polling_emitter_thread.start() | ||||
|         logger.info("Started polling thread using threading (eventlet-free)") | ||||
|  | ||||
|         # Store the thread reference in socketio for clean shutdown | ||||
|         self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread | ||||
|  | ||||
|     def handle_signal(self, *args, **kwargs): | ||||
|         logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs") | ||||
|         # Safely extract the watch UUID from kwargs | ||||
|         watch_uuid = kwargs.get('watch_uuid') | ||||
|         app_context = kwargs.get('app_context') | ||||
|  | ||||
|         if watch_uuid: | ||||
|             # Get the watch object from the datastore | ||||
|             watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|             if watch: | ||||
|                 if app_context: | ||||
|                     # note | ||||
|                     with app_context.app_context(): | ||||
|                         with app_context.test_request_context(): | ||||
|                             # Forward to handle_watch_update with the watch parameter | ||||
|                             handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore) | ||||
|                 else: | ||||
|                     handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore) | ||||
|  | ||||
|                 logger.trace(f"Signal handler processed watch UUID {watch_uuid}") | ||||
|             else: | ||||
|                 logger.warning(f"Watch UUID {watch_uuid} not found in datastore") | ||||
|  | ||||
|     def handle_deleted_signal(self, *args, **kwargs): | ||||
|         watch_uuid = kwargs.get('watch_uuid') | ||||
|         if watch_uuid: | ||||
|             # Emit the queue size to all connected clients | ||||
|             self.socketio_instance.emit("watch_deleted", { | ||||
|                 "uuid": watch_uuid, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|         logger.debug(f"Watch UUID {watch_uuid} was deleted") | ||||
|  | ||||
|     def handle_queue_length(self, *args, **kwargs): | ||||
|         """Handle queue_length signal and emit to all clients""" | ||||
|         try: | ||||
|             queue_length = kwargs.get('length', 0) | ||||
|             logger.debug(f"SignalHandler: Queue length update received: {queue_length}") | ||||
|  | ||||
|             # Emit the queue size to all connected clients | ||||
|             self.socketio_instance.emit("queue_size", { | ||||
|                 "q_length": queue_length, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error in handle_queue_length: {str(e)}") | ||||
|  | ||||
|     def handle_notification_event(self, *args, **kwargs): | ||||
|         """Handle notification_event signal and emit to all clients""" | ||||
|         try: | ||||
|             watch_uuid = kwargs.get('watch_uuid') | ||||
|             logger.debug(f"SignalHandler: Notification event received for watch UUID: {watch_uuid}") | ||||
|  | ||||
|             # Emit the notification event to all connected clients | ||||
|             self.socketio_instance.emit("notification_event", { | ||||
|                 "watch_uuid": watch_uuid, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|              | ||||
|             logger.trace(f"Socket.IO: Emitted notification_event for watch UUID {watch_uuid}") | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error in handle_notification_event: {str(e)}") | ||||
|  | ||||
|  | ||||
|     def polling_emit_running_or_queued_watches_threaded(self): | ||||
|         """Threading version of polling for Windows compatibility""" | ||||
|         import time | ||||
|         import threading | ||||
|         logger.info("Queue update thread started (threading mode)") | ||||
|          | ||||
|         # Import here to avoid circular imports | ||||
|         from changedetectionio.flask_app import app | ||||
|         from changedetectionio import worker_handler | ||||
|         watch_check_update = signal('watch_check_update') | ||||
|          | ||||
|         # Track previous state to avoid unnecessary emissions | ||||
|         previous_running_uuids = set() | ||||
|          | ||||
|         # Run until app shutdown - check exit flag more frequently for fast shutdown | ||||
|         exit_event = getattr(app.config, 'exit', threading.Event()) | ||||
|          | ||||
|         while not exit_event.is_set(): | ||||
|             try: | ||||
|                 # Get current running UUIDs from async workers | ||||
|                 running_uuids = set(worker_handler.get_running_uuids()) | ||||
|                  | ||||
|                 # Only send updates for UUIDs that changed state | ||||
|                 newly_running = running_uuids - previous_running_uuids | ||||
|                 no_longer_running = previous_running_uuids - running_uuids | ||||
|                  | ||||
|                 # Send updates for newly running UUIDs (but exit fast if shutdown requested) | ||||
|                 for uuid in newly_running: | ||||
|                     if exit_event.is_set(): | ||||
|                         break | ||||
|                     logger.trace(f"Threading polling: UUID {uuid} started processing") | ||||
|                     with app.app_context(): | ||||
|                         watch_check_update.send(app_context=app, watch_uuid=uuid) | ||||
|                     time.sleep(0.01)  # Small yield | ||||
|                  | ||||
|                 # Send updates for UUIDs that finished processing (but exit fast if shutdown requested) | ||||
|                 if not exit_event.is_set(): | ||||
|                     for uuid in no_longer_running: | ||||
|                         if exit_event.is_set(): | ||||
|                             break | ||||
|                         logger.trace(f"Threading polling: UUID {uuid} finished processing") | ||||
|                         with app.app_context(): | ||||
|                             watch_check_update.send(app_context=app, watch_uuid=uuid) | ||||
|                         time.sleep(0.01)  # Small yield | ||||
|                  | ||||
|                 # Update tracking for next iteration | ||||
|                 previous_running_uuids = running_uuids | ||||
|                  | ||||
|                 # Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown | ||||
|                 for _ in range(20):  # 20 * 0.5 = 10 seconds total | ||||
|                     if exit_event.is_set(): | ||||
|                         break | ||||
|                     time.sleep(0.5) | ||||
|                  | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Error in threading polling: {str(e)}") | ||||
|                 # Even during error recovery, check for exit quickly | ||||
|                 for _ in range(1):  # 1 * 0.5 = 0.5 seconds | ||||
|                     if exit_event.is_set(): | ||||
|                         break | ||||
|                     time.sleep(0.5) | ||||
|          | ||||
|         # Check if we're in pytest environment - if so, be more gentle with logging | ||||
|         import sys | ||||
|         in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ | ||||
|          | ||||
|         if not in_pytest: | ||||
|             logger.info("Queue update thread stopped (threading mode)") | ||||
|  | ||||
|  | ||||
| def handle_watch_update(socketio, **kwargs): | ||||
|     """Handle watch update signal from blinker""" | ||||
|     try: | ||||
|         watch = kwargs.get('watch') | ||||
|         datastore = kwargs.get('datastore') | ||||
|  | ||||
|         # Emit the watch update to all connected clients | ||||
|         from changedetectionio.flask_app import update_q | ||||
|         from changedetectionio.flask_app import _jinja2_filter_datetime | ||||
|         from changedetectionio import worker_handler | ||||
|  | ||||
|         # Get list of watches that are currently running | ||||
|         running_uuids = worker_handler.get_running_uuids() | ||||
|  | ||||
|         # Get list of watches in the queue | ||||
|         queue_list = [] | ||||
|         for q_item in update_q.queue: | ||||
|             if hasattr(q_item, 'item') and 'uuid' in q_item.item: | ||||
|                 queue_list.append(q_item.item['uuid']) | ||||
|  | ||||
|         # Get the error texts from the watch | ||||
|         error_texts = watch.compile_error_texts() | ||||
|         # Create a simplified watch data object to send to clients | ||||
|  | ||||
|         watch_data = { | ||||
|             'checking_now': True if watch.get('uuid') in running_uuids else False, | ||||
|             'fetch_time': watch.get('fetch_time'), | ||||
|             'has_error': True if error_texts else False, | ||||
|             'last_changed': watch.get('last_changed'), | ||||
|             'last_checked': watch.get('last_checked'), | ||||
|             'error_text': error_texts, | ||||
|             'history_n': watch.history_n, | ||||
|             'last_checked_text': _jinja2_filter_datetime(watch), | ||||
|             'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet', | ||||
|             'queued': True if watch.get('uuid') in queue_list else False, | ||||
|             'paused': True if watch.get('paused') else False, | ||||
|             'notification_muted': True if watch.get('notification_muted') else False, | ||||
|             'unviewed': watch.has_unviewed, | ||||
|             'uuid': watch.get('uuid'), | ||||
|             'event_timestamp': time.time() | ||||
|         } | ||||
|  | ||||
|         errored_count = 0 | ||||
|         for watch_uuid_iter, watch_iter in datastore.data['watching'].items(): | ||||
|             if watch_iter.get('last_error'): | ||||
|                 errored_count += 1 | ||||
|  | ||||
|         general_stats = { | ||||
|             'count_errors': errored_count, | ||||
|             'has_unviewed': datastore.has_unviewed | ||||
|         } | ||||
|  | ||||
|         # Debug what's being emitted | ||||
|         # logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}") | ||||
|  | ||||
|         # Emit to all clients (no 'broadcast' parameter needed - it's the default behavior) | ||||
|         socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats}) | ||||
|  | ||||
|         # Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues | ||||
|         logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}") | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"Socket.IO error in handle_watch_update: {str(e)}") | ||||
|  | ||||
|  | ||||
| def init_socketio(app, datastore): | ||||
|     """Initialize SocketIO with the main Flask app""" | ||||
|     import platform | ||||
|     import sys | ||||
|      | ||||
|     # Platform-specific async_mode selection for better stability | ||||
|     system = platform.system().lower() | ||||
|     python_version = sys.version_info | ||||
|      | ||||
|     # Check for SocketIO mode configuration via environment variable | ||||
|     # Default is 'threading' for best cross-platform compatibility | ||||
|     socketio_mode = os.getenv('SOCKETIO_MODE', 'threading').lower() | ||||
|      | ||||
|     if socketio_mode == 'gevent': | ||||
|         # Use gevent mode (higher concurrency but platform limitations) | ||||
|         try: | ||||
|             import gevent | ||||
|             async_mode = 'gevent' | ||||
|             logger.info(f"SOCKETIO_MODE=gevent: Using {async_mode} mode for Socket.IO") | ||||
|         except ImportError: | ||||
|             async_mode = 'threading' | ||||
|             logger.warning(f"SOCKETIO_MODE=gevent but gevent not available, falling back to {async_mode} mode") | ||||
|     elif socketio_mode == 'threading': | ||||
|         # Use threading mode (default - best compatibility) | ||||
|         async_mode = 'threading' | ||||
|         logger.info(f"SOCKETIO_MODE=threading: Using {async_mode} mode for Socket.IO") | ||||
|     else: | ||||
|         # Invalid mode specified, use default | ||||
|         async_mode = 'threading' | ||||
|         logger.warning(f"Invalid SOCKETIO_MODE='{socketio_mode}', using default {async_mode} mode for Socket.IO") | ||||
|      | ||||
|     # Log platform info for debugging | ||||
|     logger.info(f"Platform: {system}, Python: {python_version.major}.{python_version.minor}, Socket.IO mode: {async_mode}") | ||||
|  | ||||
|     # Restrict SocketIO CORS to same origin by default, can be overridden with env var | ||||
|     cors_origins = os.environ.get('SOCKETIO_CORS_ORIGINS', None) | ||||
|  | ||||
|     socketio = SocketIO(app, | ||||
|                         async_mode=async_mode, | ||||
|                         cors_allowed_origins=cors_origins,  # None means same-origin only | ||||
|                         logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')), | ||||
|                         engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False'))) | ||||
|  | ||||
|     # Set up event handlers | ||||
|     logger.info("Socket.IO: Registering connect event handler") | ||||
|  | ||||
|     @socketio.on('checkbox-operation') | ||||
|     def event_checkbox_operations(data): | ||||
|         from changedetectionio.blueprint.ui import _handle_operations | ||||
|         from changedetectionio import queuedWatchMetaData | ||||
|         from changedetectionio import worker_handler | ||||
|         from changedetectionio.flask_app import update_q, watch_check_update | ||||
|         logger.trace(f"Got checkbox operations event: {data}") | ||||
|  | ||||
|         datastore = socketio.datastore | ||||
|  | ||||
|         _handle_operations( | ||||
|             op=data.get('op'), | ||||
|             uuids=data.get('uuids'), | ||||
|             datastore=datastore, | ||||
|             extra_data=data.get('extra_data'), | ||||
|             worker_handler=worker_handler, | ||||
|             update_q=update_q, | ||||
|             queuedWatchMetaData=queuedWatchMetaData, | ||||
|             watch_check_update=watch_check_update, | ||||
|             emit_flash=False | ||||
|         ) | ||||
|  | ||||
|  | ||||
|     @socketio.on('connect') | ||||
|     def handle_connect(): | ||||
|         """Handle client connection""" | ||||
|         #        logger.info("Socket.IO: CONNECT HANDLER CALLED - Starting connection process") | ||||
|         from flask import request | ||||
|         from flask_login import current_user | ||||
|         from changedetectionio.flask_app import update_q | ||||
|  | ||||
|         # Access datastore from socketio | ||||
|         datastore = socketio.datastore | ||||
|         #        logger.info(f"Socket.IO: Current user authenticated: {current_user.is_authenticated if hasattr(current_user, 'is_authenticated') else 'No current_user'}") | ||||
|  | ||||
|         # Check if authentication is required and user is not authenticated | ||||
|         has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False) | ||||
|         #        logger.info(f"Socket.IO: Password enabled: {has_password_enabled}") | ||||
|         if has_password_enabled and not current_user.is_authenticated: | ||||
|             logger.warning("Socket.IO: Rejecting unauthenticated connection") | ||||
|             return False  # Reject the connection | ||||
|  | ||||
|         # Send the current queue size to the newly connected client | ||||
|         try: | ||||
|             queue_size = update_q.qsize() | ||||
|             socketio.emit("queue_size", { | ||||
|                 "q_length": queue_size, | ||||
|                 "event_timestamp": time.time() | ||||
|             }, room=request.sid)  # Send only to this client | ||||
|             logger.debug(f"Socket.IO: Sent initial queue size {queue_size} to new client") | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error sending initial queue size: {str(e)}") | ||||
|  | ||||
|         logger.info("Socket.IO: Client connected") | ||||
|  | ||||
|     #    logger.info("Socket.IO: Registering disconnect event handler") | ||||
|     @socketio.on('disconnect') | ||||
|     def handle_disconnect(): | ||||
|         """Handle client disconnection""" | ||||
|         logger.info("Socket.IO: Client disconnected") | ||||
|  | ||||
|     # Create a dedicated signal handler that will receive signals and emit them to clients | ||||
|     signal_handler = SignalHandler(socketio, datastore) | ||||
|  | ||||
|     # Register watch operation event handlers | ||||
|     from .events import register_watch_operation_handlers | ||||
|     register_watch_operation_handlers(socketio, datastore) | ||||
|  | ||||
|     # Store the datastore reference on the socketio object for later use | ||||
|     socketio.datastore = datastore | ||||
|  | ||||
|     # No stop event needed for threading mode - threads check app.config.exit directly | ||||
|  | ||||
|     # Add a shutdown method to the socketio object | ||||
|     def shutdown(): | ||||
|         """Shutdown the SocketIO server fast and aggressively""" | ||||
|         try: | ||||
|             logger.info("Socket.IO: Fast shutdown initiated...") | ||||
|  | ||||
|             # For threading mode, give the thread a very short time to exit gracefully | ||||
|             if hasattr(socketio, 'polling_emitter_thread'): | ||||
|                 if socketio.polling_emitter_thread.is_alive(): | ||||
|                     logger.info("Socket.IO: Waiting 1 second for polling thread to stop...") | ||||
|                     socketio.polling_emitter_thread.join(timeout=1.0)  # Only 1 second timeout | ||||
|                     if socketio.polling_emitter_thread.is_alive(): | ||||
|                         logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown") | ||||
|                     else: | ||||
|                         logger.info("Socket.IO: Polling thread stopped quickly") | ||||
|                 else: | ||||
|                     logger.info("Socket.IO: Polling thread already stopped") | ||||
|  | ||||
|             logger.info("Socket.IO: Fast shutdown complete") | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error during shutdown: {str(e)}") | ||||
|  | ||||
|     # Attach the shutdown method to the socketio object | ||||
|     socketio.shutdown = shutdown | ||||
|  | ||||
|     logger.info("Socket.IO initialized and attached to main Flask app") | ||||
|     logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}") | ||||
|     return socketio | ||||
| @@ -14,7 +14,8 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||||
| find tests/test_*py -type f|while read test_name | ||||
| do | ||||
|   echo "TEST RUNNING $test_name" | ||||
|   pytest $test_name | ||||
|   # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser | ||||
|   REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name | ||||
| done | ||||
|  | ||||
| echo "RUNNING WITH BASE_URL SET" | ||||
| @@ -22,7 +23,7 @@ echo "RUNNING WITH BASE_URL SET" | ||||
| # Now re-run some tests with BASE_URL enabled | ||||
| # Re #65 - Ability to include a link back to the installation, in the notification. | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| pytest tests/test_notification.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
|  | ||||
|  | ||||
| # Re-run with HIDE_REFERER set - could affect login | ||||
| @@ -32,11 +33,14 @@ pytest tests/test_access_control.py | ||||
| # Re-run a few tests that will trigger brotli based storage | ||||
| export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5 | ||||
| pytest tests/test_access_control.py | ||||
| pytest tests/test_notification.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
| pytest tests/test_backend.py | ||||
| pytest tests/test_rss.py | ||||
| pytest tests/test_unique_lines.py | ||||
|  | ||||
| # Try high concurrency | ||||
| FETCH_WORKERS=130 pytest  tests/test_history_consistency.py -v -l | ||||
|  | ||||
| # Check file:// will pickup a file when enabled | ||||
| echo "Hello world" > /tmp/test-file.txt | ||||
| ALLOW_FILE_URI=yes pytest tests/test_security.py | ||||
|   | ||||
| @@ -82,3 +82,25 @@ done | ||||
|  | ||||
|  | ||||
| docker kill squid-one squid-two squid-custom | ||||
|  | ||||
| # Test that the UI is returning the correct error message when a proxy is not available | ||||
|  | ||||
| # Requests | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Playwright | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Puppeteer fast | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Selenium | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| <?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||||
| <svg | ||||
|    version="1.1" | ||||
|    id="Layer_1" | ||||
|    id="copy" | ||||
|    x="0px" | ||||
|    y="0px" | ||||
|    viewBox="0 0 115.77 122.88" | ||||
|   | ||||
| Before Width: | Height: | Size: 2.5 KiB After Width: | Height: | Size: 2.5 KiB | 
| @@ -6,7 +6,7 @@ | ||||
|    height="7.5005589" | ||||
|    width="11.248507" | ||||
|    version="1.1" | ||||
|    id="Layer_1" | ||||
|    id="email" | ||||
|    viewBox="0 0 7.1975545 4.7993639" | ||||
|    xml:space="preserve" | ||||
|    xmlns="http://www.w3.org/2000/svg" | ||||
|   | ||||
| Before Width: | Height: | Size: 1.9 KiB After Width: | Height: | Size: 1.9 KiB | 
| Before Width: | Height: | Size: 569 B After Width: | Height: | Size: 569 B | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |