mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-30 14:17:40 +00:00 
			
		
		
		
	Compare commits
	
		
			177 Commits
		
	
	
		
			bugfix-las
			...
			0.50.13
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | a27265450c | ||
|   | cc5455c3dc | ||
|   | 9db7fb83eb | ||
|   | f0061110c9 | ||
|   | a13fedc0d6 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 7576bec66a | ||
|   | 7672190923 | ||
|   | 0ade4307b0 | ||
|   | 8c03b65dc6 | ||
|   | 8a07459e43 | ||
|   | cd8e115118 | ||
|   | 4ff7b20fcf | ||
|   | 8120f00148 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 127abf49f1 | ||
|   | db81c3c5e2 | ||
|   | 9952af7a52 | ||
|   | 790577c1b6 | ||
|   | bab362fb7d | ||
|   | a177d02406 | ||
|   | 8b8f280565 | ||
|   | e752875504 | ||
|   | 0a4562fc09 | ||
|   | c84ac2eab1 | ||
|   | 3ae07ac633 | ||
|   | 8379fdb1f8 | ||
|   | 3f77e075b9 | ||
|   | 685bd01156 | ||
|   | 20bcca578a | ||
|   | f05f143b46 | ||
|   | d7f00679a0 | ||
|   | b7da6f0ca7 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | e4a81ebe08 | ||
|   | a4edc46af0 | ||
|   | 767db3b79b | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 4f6e9dcc56 | ||
|   | aa4e182549 | ||
|   | fe1f7c30e1 | ||
|   | e5ed1ae349 | ||
|   | d1b1dd70f4 | ||
|   | 93b14c9fc8 | ||
|   | c9c5de20d8 | ||
|   | 011fa3540e | ||
|   | c3c3671f8b | ||
|   | 5980bd9bcd | ||
|   | 438871429c | ||
|   | 173ce5bfa2 | ||
|   | 106b1f85fa | ||
|   | a5c7f343d0 | ||
|   | 401886bcda | ||
|   | c66fca9de9 | ||
|   | daee4c5c17 | ||
|   | af5d0b6963 | ||
|   | f92dd81c8f | ||
|   | 55cdcfe3ea | ||
|   | 2f7520a6c5 | ||
|   | 4fdc5d7da2 | ||
|   | 308f30b2e8 | ||
|   | 4fa2042d12 | ||
|   | 2a4e1bad4e | ||
|   | 8a317eead5 | ||
|   | b58094877f | ||
|   | afe252126c | ||
|   | 342e6119f1 | ||
|   | e4ff87e970 | ||
|   | e45a544f15 | ||
|   | 9a5abaa17a | ||
|   | b8ecfff861 | ||
|   | 58e2a41c95 | ||
|   | a7214db9c3 | ||
|   | b9da4af64f | ||
|   | b77105be7b | ||
|   | 3d5a544ea6 | ||
|   | 4f362385e1 | ||
|   | a01d6169d2 | ||
|   | 9beda3911d | ||
|   | 5ed596bfa9 | ||
|   | 99ca8787ab | ||
|   | 8f1a6feb90 | ||
|   | c0e229201b | ||
|   | 66bc7fbc04 | ||
|   | 530bd40ca5 | ||
|   | 36004cf74b | ||
|   | c7374245e1 | ||
|   | 59df59e9cd | ||
|   | c0c2898b91 | ||
|   | abac660bac | ||
|   | 26de64d873 | ||
|   | 79d9a8ca28 | ||
|   | 5c391fbcad | ||
|   | d7e24f64a5 | ||
|   | d6427d823f | ||
|   | 47eb874f47 | ||
|   | 37019355fd | ||
|   | a8e7f8236e | ||
|   | 2414b61fcb | ||
|   | a63ffa89b1 | ||
|   | 59e93c29d0 | ||
|   | d7173bb96e | ||
|   | d544e11a20 | ||
|   | 7f0c19c61c | ||
|   | 30e84f1030 | ||
|   | d5af91d8f7 | ||
|   | 4b18c633ba | ||
|   | 08728d7d03 | ||
|   | 73f3beda00 | ||
|   | 7b8d335c43 | ||
|   | ba0b6071e6 | ||
|   | a6603d5ad6 | ||
|   | 26833781a7 | ||
|   | f3ed9bdbb5 | ||
|   | 0f65178190 | ||
|   | a58fc82575 | ||
|   | 2575c03ae0 | ||
|   | 9b7372fff0 | ||
|   | fcd6ebe0ee | ||
|   | c162ec9d52 | ||
|   | bb7f7f473b | ||
|   | a9ca511004 | ||
|   | 8df61f5eaa | ||
|   | 162f573967 | ||
|   | eada0ef08d | ||
|   | f57bc10973 | ||
|   | d2e8f822d6 | ||
|   | 5fd8200fd9 | ||
|   | d0da8c9825 | ||
|   | fd7574d21b | ||
|   | c70706a27b | ||
|   | 968c364999 | ||
|   | 031cb76b7d | ||
|   | af568d064c | ||
|   | a75f57de43 | ||
|   | 72a1c3dda1 | ||
|   | ffde79ecac | ||
|   | 66ad43b2df | ||
|   | 6b0e56ca80 | ||
|   | 5a2d84d8b4 | ||
|   | a941156f26 | ||
|   | a1fdeeaa29 | ||
|   | 40ea2604a7 | ||
|   | ceda526093 | ||
|   | 4197254c53 | ||
|   | a0b7efb436 | ||
|   | 5f5e8ede6c | ||
|   | 52ca855a29 | ||
|   | 079efd0a85 | ||
|   | 3a583a4e5d | ||
|   | cfb4decf67 | ||
|   | 8067d5170b | ||
|   | 5551acf67d | ||
|   | 45a030bac6 | ||
|   | 96dc49e229 | ||
|   | 5f43d988a3 | ||
|   | 4269079c54 | ||
|   | cdfb3f206c | ||
|   | 9f326783e5 | ||
|   | 4e6e680d79 | ||
|   | 1378b5b2ff | ||
|   | 456c6e3f58 | ||
|   | 61be7f68db | ||
|   | 0e38a3c881 | ||
|   | 2c630e9853 | ||
|   | 786e0d1fab | ||
|   | 78b7aee512 | ||
|   | 9d9d01863a | ||
|   | 108cdf84a5 | ||
|   | 8c6f6f1578 | ||
|   | df4ffaaff8 | ||
|   | d522c65e50 | ||
|   | c3b2a8b019 | ||
|   | 28d3151090 | ||
|   | 2a1c832f8d | ||
|   | 0170adb171 | ||
|   | cb62404b8c | ||
|   | 8f9c46bd3f | ||
|   | 97291ce6d0 | ||
|   | f689e5418e | ||
|   | f751f0b0ef | 
| @@ -29,3 +29,34 @@ venv/ | ||||
|  | ||||
| # Visual Studio | ||||
| .vscode/ | ||||
|  | ||||
| # Test and development files | ||||
| test-datastore/ | ||||
| tests/ | ||||
| *.md | ||||
| !README.md | ||||
|  | ||||
| # Temporary and log files | ||||
| *.log | ||||
| *.tmp | ||||
| tmp/ | ||||
| temp/ | ||||
|  | ||||
| # Training data and large files | ||||
| train-data/ | ||||
| works-data/ | ||||
|  | ||||
| # Container files | ||||
| Dockerfile* | ||||
| docker-compose*.yml | ||||
| .dockerignore | ||||
|  | ||||
| # Development certificates and keys | ||||
| *.pem | ||||
| *.key | ||||
| *.crt | ||||
| profile_output.prof | ||||
|  | ||||
| # Large binary files that shouldn't be in container | ||||
| *.pdf | ||||
| chrome.json | ||||
							
								
								
									
										6
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							| @@ -2,7 +2,7 @@ | ||||
| # Test that we can still build on Alpine (musl modified libc https://musl.libc.org/) | ||||
| # Some packages wont install via pypi because they dont have a wheel available under this architecture. | ||||
|  | ||||
| FROM ghcr.io/linuxserver/baseimage-alpine:3.21 | ||||
| FROM ghcr.io/linuxserver/baseimage-alpine:3.22 | ||||
| ENV PYTHONUNBUFFERED=1 | ||||
|  | ||||
| COPY requirements.txt /requirements.txt | ||||
| @@ -18,17 +18,19 @@ RUN \ | ||||
|     libxslt-dev \ | ||||
|     openssl-dev \ | ||||
|     python3-dev \ | ||||
|     file \ | ||||
|     zip \ | ||||
|     zlib-dev && \ | ||||
|   apk add --update --no-cache \ | ||||
|     libjpeg \ | ||||
|     libxslt \ | ||||
|     file \ | ||||
|     nodejs \ | ||||
|     poppler-utils \ | ||||
|     python3 && \ | ||||
|   echo "**** pip3 install test of changedetection.io ****" && \ | ||||
|   python3 -m venv /lsiopy  && \ | ||||
|   pip install -U pip wheel setuptools && \ | ||||
|   pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \ | ||||
|   pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \ | ||||
|   apk del --purge \ | ||||
|     build-dependencies | ||||
|   | ||||
							
								
								
									
										2
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							| @@ -30,7 +30,7 @@ jobs: | ||||
|  | ||||
|     steps: | ||||
|     - name: Checkout repository | ||||
|       uses: actions/checkout@v4 | ||||
|       uses: actions/checkout@v5 | ||||
|  | ||||
|     # Initializes the CodeQL tools for scanning. | ||||
|     - name: Initialize CodeQL | ||||
|   | ||||
							
								
								
									
										18
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										18
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							| @@ -39,9 +39,9 @@ jobs: | ||||
|     # Or if we are in a tagged release scenario. | ||||
|     if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != '' | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - uses: actions/checkout@v5 | ||||
|       - name: Set up Python 3.11 | ||||
|         uses: actions/setup-python@v5 | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           python-version: 3.11 | ||||
|  | ||||
| @@ -103,6 +103,13 @@ jobs: | ||||
| #          provenance: false | ||||
|  | ||||
|       # A new tagged release is required, which builds :tag and :latest | ||||
|       - name: Debug release info | ||||
|         if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') | ||||
|         run: | | ||||
|           echo "Release tag: ${{ github.event.release.tag_name }}" | ||||
|           echo "Github ref: ${{ github.ref }}" | ||||
|           echo "Github ref name: ${{ github.ref_name }}" | ||||
|            | ||||
|       - name: Docker meta :tag | ||||
|         if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') | ||||
|         uses: docker/metadata-action@v5 | ||||
| @@ -112,9 +119,10 @@ jobs: | ||||
|                 ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io | ||||
|                 ghcr.io/dgtlmoon/changedetection.io | ||||
|             tags: | | ||||
|                 type=semver,pattern={{version}} | ||||
|                 type=semver,pattern={{major}}.{{minor}} | ||||
|                 type=semver,pattern={{major}} | ||||
|                 type=semver,pattern={{version}},value=${{ github.event.release.tag_name }} | ||||
|                 type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }} | ||||
|                 type=semver,pattern={{major}},value=${{ github.event.release.tag_name }} | ||||
|                 type=raw,value=latest | ||||
|  | ||||
|       - name: Build and push :tag | ||||
|         id: docker_build_tag_release | ||||
|   | ||||
							
								
								
									
										10
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										10
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							| @@ -7,9 +7,9 @@ jobs: | ||||
|     runs-on: ubuntu-latest | ||||
|  | ||||
|     steps: | ||||
|     - uses: actions/checkout@v4 | ||||
|     - uses: actions/checkout@v5 | ||||
|     - name: Set up Python | ||||
|       uses: actions/setup-python@v5 | ||||
|       uses: actions/setup-python@v6 | ||||
|       with: | ||||
|         python-version: "3.11" | ||||
|     - name: Install pypa/build | ||||
| @@ -34,12 +34,12 @@ jobs: | ||||
|     - build | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v4 | ||||
|       uses: actions/download-artifact@v5 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|     - name: Set up Python 3.11 | ||||
|       uses: actions/setup-python@v5 | ||||
|       uses: actions/setup-python@v6 | ||||
|       with: | ||||
|         python-version: '3.11' | ||||
|     - name: Test that the basic pip built package runs without error | ||||
| @@ -72,7 +72,7 @@ jobs: | ||||
|  | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v4 | ||||
|       uses: actions/download-artifact@v5 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|   | ||||
							
								
								
									
										46
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										46
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							| @@ -23,12 +23,32 @@ on: | ||||
|   # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing | ||||
|   # @todo: some kind of path filter for requirements.txt and Dockerfile | ||||
| jobs: | ||||
|   test-container-build: | ||||
|   builder: | ||||
|     name: Build ${{ matrix.platform }} (${{ matrix.dockerfile == './Dockerfile' && 'main' || 'alpine' }}) | ||||
|     runs-on: ubuntu-latest | ||||
|     strategy: | ||||
|       matrix: | ||||
|         include: | ||||
|           # Main Dockerfile platforms | ||||
|           - platform: linux/amd64 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm64 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm/v7 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm64/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           # Alpine Dockerfile platforms (musl via alpine check) | ||||
|           - platform: linux/amd64 | ||||
|             dockerfile: ./.github/test/Dockerfile-alpine | ||||
|           - platform: linux/arm64 | ||||
|             dockerfile: ./.github/test/Dockerfile-alpine | ||||
|     steps: | ||||
|         - uses: actions/checkout@v4 | ||||
|         - uses: actions/checkout@v5 | ||||
|         - name: Set up Python 3.11 | ||||
|           uses: actions/setup-python@v5 | ||||
|           uses: actions/setup-python@v6 | ||||
|           with: | ||||
|             python-version: 3.11 | ||||
|  | ||||
| @@ -47,24 +67,14 @@ jobs: | ||||
|             version: latest | ||||
|             driver-opts: image=moby/buildkit:master | ||||
|  | ||||
|         # https://github.com/dgtlmoon/changedetection.io/pull/1067 | ||||
|         # Check we can still build under alpine/musl | ||||
|         - name: Test that the docker containers can build (musl via alpine check) | ||||
|           id: docker_build_musl | ||||
|           uses: docker/build-push-action@v6 | ||||
|           with: | ||||
|             context: ./ | ||||
|             file: ./.github/test/Dockerfile-alpine | ||||
|             platforms: linux/amd64,linux/arm64 | ||||
|  | ||||
|         - name: Test that the docker containers can build | ||||
|         - name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }}) | ||||
|           id: docker_build | ||||
|           uses: docker/build-push-action@v6 | ||||
|           # https://github.com/docker/build-push-action#customizing | ||||
|           with: | ||||
|             context: ./ | ||||
|             file: ./Dockerfile | ||||
|             platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|             cache-from: type=local,src=/tmp/.buildx-cache | ||||
|             cache-to: type=local,dest=/tmp/.buildx-cache | ||||
|             file: ${{ matrix.dockerfile }} | ||||
|             platforms: ${{ matrix.platform }} | ||||
|             cache-from: type=gha | ||||
|             cache-to: type=gha,mode=max | ||||
|  | ||||
|   | ||||
							
								
								
									
										22
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										22
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -7,14 +7,18 @@ jobs: | ||||
|   lint-code: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - name: Lint with flake8 | ||||
|       - uses: actions/checkout@v5 | ||||
|       - name: Lint with Ruff | ||||
|         run: | | ||||
|           pip3 install flake8 | ||||
|           # stop the build if there are Python syntax errors or undefined names | ||||
|           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||||
|           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||||
|           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||||
|           pip install ruff | ||||
|           # Check for syntax errors and undefined names | ||||
|           ruff check . --select E9,F63,F7,F82 | ||||
|           # Complete check with errors treated as warnings | ||||
|           ruff check . --exit-zero | ||||
|       - name: Validate OpenAPI spec | ||||
|         run: | | ||||
|           pip install openapi-spec-validator | ||||
|           python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))" | ||||
|  | ||||
|   test-application-3-10: | ||||
|     needs: lint-code | ||||
| @@ -28,7 +32,6 @@ jobs: | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
|       python-version: '3.11' | ||||
|       skip-pypuppeteer: true | ||||
|  | ||||
|   test-application-3-12: | ||||
|     needs: lint-code | ||||
| @@ -42,5 +45,4 @@ jobs: | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
|       python-version: '3.13' | ||||
|       skip-pypuppeteer: true | ||||
|        | ||||
|       skip-pypuppeteer: true | ||||
| @@ -7,7 +7,7 @@ on: | ||||
|         description: 'Python version to use' | ||||
|         required: true | ||||
|         type: string | ||||
|         default: '3.10' | ||||
|         default: '3.11' | ||||
|       skip-pypuppeteer: | ||||
|         description: 'Skip PyPuppeteer (not supported in 3.11/3.12)' | ||||
|         required: false | ||||
| @@ -20,11 +20,11 @@ jobs: | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       # Mainly just for link/flake8 | ||||
|       - name: Set up Python ${{ env.PYTHON_VERSION }} | ||||
|         uses: actions/setup-python@v5 | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           python-version: ${{ env.PYTHON_VERSION }} | ||||
|  | ||||
| @@ -86,10 +86,10 @@ jobs: | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch | ||||
|           # tests/visualselector/test_fetch_data.py will do browser steps   | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|  | ||||
|  | ||||
|       - name: Playwright and SocketPuppetBrowser - Headers and requests | ||||
| @@ -172,13 +172,33 @@ jobs: | ||||
|           curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|  | ||||
|           # Check whether TRACE log is enabled. | ||||
|           # Also, check whether TRACE is came from STDERR | ||||
|           docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Also, check whether TRACE came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Check whether DEBUG is came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 | ||||
|  | ||||
|           docker kill test-changedetectionio | ||||
|  | ||||
|       - name: Test HTTPS SSL mode | ||||
|         run: | | ||||
|           openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost" | ||||
|           docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it | ||||
|           # -k because its self-signed | ||||
|           curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid | ||||
|        | ||||
|           docker kill test-changedetectionio-ssl | ||||
|  | ||||
|       - name: Test IPv6 Mode | ||||
|         run: | | ||||
|           # IPv6 - :: bind to all interfaces inside container (like 0.0.0.0), ::1 would be localhost only | ||||
|           docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it on localhost | ||||
|           curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid | ||||
|           docker kill test-changedetectionio-ipv6 | ||||
|  | ||||
|       - name: Test changedetection.io SIGTERM and SIGINT signal shutdown | ||||
|         run: | | ||||
|            | ||||
|   | ||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -16,6 +16,7 @@ dist/ | ||||
| .env | ||||
| .venv/ | ||||
| venv/ | ||||
| .python-version | ||||
|  | ||||
| # IDEs | ||||
| .idea | ||||
|   | ||||
							
								
								
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| repos: | ||||
|   - repo: https://github.com/astral-sh/ruff-pre-commit | ||||
|     rev: v0.11.2 | ||||
|     hooks: | ||||
|       # Lint (and apply safe fixes) | ||||
|       - id: ruff | ||||
|         args: [--fix] | ||||
|       # Fomrat | ||||
|       - id: ruff-format | ||||
							
								
								
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| # Minimum supported version | ||||
| target-version = "py310" | ||||
|  | ||||
| # Formatting options | ||||
| line-length = 100 | ||||
| indent-width = 4 | ||||
|  | ||||
| exclude = [ | ||||
|     "__pycache__", | ||||
|     ".eggs", | ||||
|     ".git", | ||||
|     ".tox", | ||||
|     ".venv", | ||||
|     "*.egg-info", | ||||
|     "*.pyc", | ||||
| ] | ||||
|  | ||||
| [lint] | ||||
| # https://docs.astral.sh/ruff/rules/ | ||||
| select = [ | ||||
|     "B", # flake8-bugbear | ||||
|     "B9", | ||||
|     "C",  | ||||
|     "E", # pycodestyle | ||||
|     "F", # Pyflakes | ||||
|     "I", # isort | ||||
|     "N", # pep8-naming | ||||
|     "UP", # pyupgrade | ||||
|     "W", # pycodestyle | ||||
| ] | ||||
| ignore = [ | ||||
|     "B007", # unused-loop-control-variable | ||||
|     "B909", # loop-iterator-mutation | ||||
|     "E203", # whitespace-before-punctuation | ||||
|     "E266", # multiple-leading-hashes-for-block-comment | ||||
|     "E501", # redundant-backslash | ||||
|     "F403", # undefined-local-with-import-star | ||||
|     "N802", # invalid-function-name | ||||
|     "N806", # non-lowercase-variable-in-function | ||||
|     "N815", # mixed-case-variable-in-class-scope | ||||
| ] | ||||
|  | ||||
| [lint.mccabe] | ||||
| max-complexity = 12 | ||||
|  | ||||
| [format] | ||||
| indent-style = "space" | ||||
| quote-style = "preserve" | ||||
							
								
								
									
										43
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										43
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,14 +1,10 @@ | ||||
| # pip dependencies install stage | ||||
|  | ||||
| # @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py | ||||
| #        If you know how to fix it, please do! and test it for both 3.10 and 3.11 | ||||
|  | ||||
| ARG PYTHON_VERSION=3.11 | ||||
|  | ||||
| FROM python:${PYTHON_VERSION}-slim-bookworm AS builder | ||||
|  | ||||
| # See `cryptography` pin comment in requirements.txt | ||||
| ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     g++ \ | ||||
| @@ -19,6 +15,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     libssl-dev \ | ||||
|     libxslt-dev \ | ||||
|     make \ | ||||
|     patch \ | ||||
|     pkg-config \ | ||||
|     zlib1g-dev | ||||
|  | ||||
| RUN mkdir /install | ||||
| @@ -26,13 +24,32 @@ WORKDIR /install | ||||
|  | ||||
| COPY requirements.txt /requirements.txt | ||||
|  | ||||
| # --extra-index-url https://www.piwheels.org/simple  is for cryptography module to be prebuilt (or rustc etc needs to be installed) | ||||
| RUN pip install --extra-index-url https://www.piwheels.org/simple  --target=/dependencies -r /requirements.txt | ||||
| # Use cache mounts and multiple wheel sources for faster ARM builds | ||||
| ENV PIP_CACHE_DIR=/tmp/pip-cache | ||||
| # Help Rust find OpenSSL for cryptography package compilation on ARM | ||||
| ENV PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/lib/arm-linux-gnueabihf/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig" | ||||
| ENV PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1 | ||||
| ENV OPENSSL_DIR="/usr" | ||||
| ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf" | ||||
| ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl" | ||||
| # Additional environment variables for cryptography Rust build | ||||
| ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
| RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
|     pip install \ | ||||
|     --extra-index-url https://www.piwheels.org/simple \ | ||||
|     --extra-index-url https://pypi.anaconda.org/ARM-software/simple \ | ||||
|     --cache-dir=/tmp/pip-cache \ | ||||
|     --target=/dependencies \ | ||||
|     -r /requirements.txt | ||||
|  | ||||
| # Playwright is an alternative to Selenium | ||||
| # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) | ||||
| RUN pip install --target=/dependencies playwright~=1.48.0 \ | ||||
| RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
|     pip install \ | ||||
|     --cache-dir=/tmp/pip-cache \ | ||||
|     --target=/dependencies \ | ||||
|     playwright~=1.48.0 \ | ||||
|     || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
|  | ||||
| # Final image stage | ||||
| @@ -45,6 +62,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     locales \ | ||||
|     # For pdftohtml | ||||
|     poppler-utils \ | ||||
|     # favicon type detection and other uses | ||||
|     file \ | ||||
|     zlib1g \ | ||||
|     && apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| @@ -65,13 +84,21 @@ EXPOSE 5000 | ||||
|  | ||||
| # The actual flask app module | ||||
| COPY changedetectionio /app/changedetectionio | ||||
|  | ||||
| # Also for OpenAPI validation wrapper - needs the YML | ||||
| RUN [ ! -d "/app/docs" ] && mkdir /app/docs | ||||
| COPY docs/api-spec.yaml /app/docs/api-spec.yaml | ||||
|  | ||||
| # Starting wrapper | ||||
| COPY changedetection.py /app/changedetection.py | ||||
|  | ||||
| # Github Action test purpose(test-only.yml). | ||||
| # On production, it is effectively LOGGER_LEVEL=''. | ||||
| ARG LOGGER_LEVEL='' | ||||
| ENV LOGGER_LEVEL "$LOGGER_LEVEL" | ||||
| ENV LOGGER_LEVEL="$LOGGER_LEVEL" | ||||
|  | ||||
| # Default | ||||
| ENV LC_ALL=en_US.UTF-8 | ||||
|  | ||||
| WORKDIR /app | ||||
| CMD ["python", "./changedetection.py", "-d", "/datastore"] | ||||
|   | ||||
							
								
								
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							| @@ -186,7 +186,7 @@ | ||||
|       same "printed page" as the copyright notice for easier | ||||
|       identification within third-party archives. | ||||
|  | ||||
|    Copyright [yyyy] [name of copyright owner] | ||||
|    Copyright 2025 Web Technologies s.r.o. | ||||
|  | ||||
|    Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|    you may not use this file except in compliance with the License. | ||||
|   | ||||
| @@ -1,13 +1,15 @@ | ||||
| recursive-include changedetectionio/api * | ||||
| recursive-include changedetectionio/apprise_plugin * | ||||
| recursive-include changedetectionio/blueprint * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/conditions * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/model * | ||||
| recursive-include changedetectionio/notification * | ||||
| recursive-include changedetectionio/processors * | ||||
| recursive-include changedetectionio/realtime * | ||||
| recursive-include changedetectionio/static * | ||||
| recursive-include changedetectionio/templates * | ||||
| recursive-include changedetectionio/tests * | ||||
| recursive-include changedetectionio/widgets * | ||||
| prune changedetectionio/static/package-lock.json | ||||
| prune changedetectionio/static/styles/node_modules | ||||
| prune changedetectionio/static/styles/package-lock.json | ||||
|   | ||||
| @@ -1,11 +1,21 @@ | ||||
| ## Web Site Change Detection, Monitoring and Notification. | ||||
| # Monitor website changes | ||||
|  | ||||
| Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more | ||||
| Detect WebPage Changes Automatically — Monitor Web Page Changes in Real Time | ||||
|  | ||||
| Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more. | ||||
|  | ||||
| Detect web page content changes and get instant alerts. | ||||
|  | ||||
|  | ||||
| [Changedetection.io is the best tool to monitor web-pages for changes](https://changedetection.io) Track website content changes and receive notifications via Discord, Email, Slack, Telegram and 90+ more | ||||
|  | ||||
| Ideal for monitoring price changes, content edits, conditional changes and more. | ||||
|  | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes"  title="Self-hosted web page change monitoring, list of websites with changes"  />](https://changedetection.io) | ||||
|  | ||||
|  | ||||
| [**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)  | ||||
| [**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)  | ||||
|  | ||||
|  | ||||
|  | ||||
| ### Target specific parts of the webpage using the Visual Selector tool. | ||||
|   | ||||
							
								
								
									
										28
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										28
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,11 +1,13 @@ | ||||
| ## Web Site Change Detection, Restock monitoring and notifications. | ||||
| # Detect Website Changes Automatically — Monitor Web Page Changes in Real Time | ||||
|  | ||||
| **_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._** | ||||
| Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more. | ||||
|  | ||||
| _Live your data-life pro-actively._  | ||||
| **Detect web page content changes and get instant alerts.**   | ||||
|  | ||||
| Ideal for monitoring price changes, content edits, conditional changes and more. | ||||
|  | ||||
|  | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web site page change monitoring"  title="Self-hosted web site page change monitoring"  />](https://changedetection.io?src=github) | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Web site page change monitoring"  title="Web site page change monitoring"  />](https://changedetection.io?src=github) | ||||
|  | ||||
| [![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md) | ||||
|  | ||||
| @@ -13,6 +15,7 @@ _Live your data-life pro-actively._ | ||||
|  | ||||
| [**Get started with website page change monitoring straight away. Don't have time? Try our $8.99/month subscription, use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_ | ||||
|  | ||||
|  | ||||
| - Chrome browser included. | ||||
| - Nothing to install, access via browser login after signup. | ||||
| - Super fast, no registration needed setup. | ||||
| @@ -89,7 +92,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W | ||||
| #### Key Features | ||||
|  | ||||
| - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! | ||||
| - Target elements with xPath(1.0) and CSS Selectors, Easily monitor complex JSON with JSONPath or jq | ||||
| - Target elements with xPath 1 and xPath 2, CSS Selectors, Easily monitor complex JSON with JSONPath or jq | ||||
| - Switch between fast non-JS and Chrome JS based "fetchers" | ||||
| - Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums) | ||||
| - Easily specify how often a site should be checked | ||||
| @@ -99,12 +102,16 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W | ||||
| - Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration) | ||||
| - Send a screenshot with the notification when a change is detected in the web page | ||||
|  | ||||
| We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link. | ||||
|  | ||||
| [Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.  | ||||
| We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $150 using our signup link. | ||||
|  | ||||
| Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ | ||||
|  | ||||
| ### Conditional web page changes | ||||
|  | ||||
| Easily [configure conditional actions](https://changedetection.io/tutorial/conditional-actions-web-page-changes), for example, only trigger when a price is above or below a preset amount, or [when a web page includes (or does not include) a keyword](https://changedetection.io/tutorial/how-monitor-keywords-any-website) | ||||
|  | ||||
| <img src="./docs/web-page-change-conditions.png" style="max-width:80%;" alt="Conditional web page changes"  title="Conditional web page changes"  /> | ||||
|  | ||||
| ### Schedule web page watches in any timezone, limit by day of week and time. | ||||
|  | ||||
| Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours. | ||||
| @@ -273,7 +280,10 @@ Excel import is recommended - that way you can better organise tags/groups of we | ||||
|  | ||||
| ## API Support | ||||
|  | ||||
| Supports managing the website watch list [via our API](https://changedetection.io/docs/api_v1/index.html) | ||||
| Full REST API for programmatic management of watches, tags, notifications and more.  | ||||
|  | ||||
| - **[Interactive API Documentation](https://changedetection.io/docs/api_v1/index.html)** - Complete API reference with live testing | ||||
| - **[OpenAPI Specification](docs/api-spec.yaml)** - Generate SDKs for any programming language | ||||
|  | ||||
| ## Support us | ||||
|  | ||||
|   | ||||
| @@ -3,4 +3,6 @@ | ||||
| # Only exists for direct CLI usage | ||||
|  | ||||
| import changedetectionio | ||||
| changedetectionio.main() | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     changedetectionio.main() | ||||
|   | ||||
							
								
								
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| # Creating Plugins for changedetection.io | ||||
|  | ||||
| This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways. | ||||
|  | ||||
| ## Plugin Types | ||||
|  | ||||
| ### UI Stats Tab Plugins | ||||
|  | ||||
| These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch. | ||||
|  | ||||
| #### Creating a UI Stats Tab Plugin | ||||
|  | ||||
| 1. Create a Python file in a directory that will be loaded by the plugin system. | ||||
|  | ||||
| 2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook: | ||||
|  | ||||
| ```python | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add custom content to the stats tab""" | ||||
|     # Calculate or retrieve your stats | ||||
|     my_stat = calculate_something(watch) | ||||
|      | ||||
|     # Return HTML content as a string | ||||
|     html = f""" | ||||
|     <div class="my-plugin-stats"> | ||||
|         <h4>My Plugin Statistics</h4> | ||||
|         <p>My statistic: {my_stat}</p> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
| ``` | ||||
|  | ||||
| 3. The HTML you return will be included in the Stats tab. | ||||
|  | ||||
| ## Plugin Loading | ||||
|  | ||||
| Plugins can be loaded from: | ||||
|  | ||||
| 1. Built-in plugin directories in the codebase | ||||
| 2. External packages using setuptools entry points | ||||
|  | ||||
| To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`. | ||||
|  | ||||
| ## Example Plugin | ||||
|  | ||||
| Here's a simple example of a plugin that adds a word count statistic to the Stats tab: | ||||
|  | ||||
| ```python | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def count_words_in_history(watch): | ||||
|     """Count words in the latest snapshot""" | ||||
|     try: | ||||
|         if not watch.history.keys(): | ||||
|             return 0 | ||||
|              | ||||
|         latest_key = list(watch.history.keys())[-1] | ||||
|         latest_content = watch.get_history_snapshot(latest_key) | ||||
|         return len(latest_content.split()) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error counting words: {str(e)}") | ||||
|         return 0 | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count to the Stats tab""" | ||||
|     word_count = count_words_in_history(watch) | ||||
|      | ||||
|     html = f""" | ||||
|     <div class="word-count-stats"> | ||||
|         <h4>Content Analysis</h4> | ||||
|         <table class="pure-table"> | ||||
|             <tbody> | ||||
|                 <tr> | ||||
|                     <td>Word count (latest snapshot)</td> | ||||
|                     <td>{word_count}</td> | ||||
|                 </tr> | ||||
|             </tbody> | ||||
|         </table> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
| ``` | ||||
|  | ||||
| ## Testing Your Plugin | ||||
|  | ||||
| 1. Place your plugin in one of the directories scanned by the plugin system | ||||
| 2. Restart changedetection.io | ||||
| 3. Go to the Edit page of a watch and check the Stats tab to see your content | ||||
| @@ -2,24 +2,23 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.49.9' | ||||
| __version__ = '0.50.13' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
| import os | ||||
| os.environ['EVENTLET_NO_GREENDNS'] = 'yes' | ||||
| import eventlet | ||||
| import eventlet.wsgi | ||||
| import getopt | ||||
| import platform | ||||
| import signal | ||||
| import socket | ||||
|  | ||||
| import sys | ||||
|  | ||||
| # Eventlet completely removed - using threading mode for SocketIO | ||||
| # This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts | ||||
| from changedetectionio import store | ||||
| from changedetectionio.flask_app import changedetection_app | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| # Only global so we can access it in the signal handler | ||||
| app = None | ||||
| datastore = None | ||||
| @@ -29,16 +28,44 @@ def get_version(): | ||||
|  | ||||
| # Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown | ||||
| def sigshutdown_handler(_signo, _stack_frame): | ||||
|     global app | ||||
|     global datastore | ||||
|     name = signal.Signals(_signo).name | ||||
|     logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown') | ||||
|     datastore.sync_to_json() | ||||
|     logger.success('Sync JSON to disk complete.') | ||||
|     # This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it. | ||||
|     # Solution: move to gevent or other server in the future (#2014) | ||||
|     datastore.stop_thread = True | ||||
|     logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated') | ||||
|      | ||||
|     # Set exit flag immediately to stop all loops | ||||
|     app.config.exit.set() | ||||
|     datastore.stop_thread = True | ||||
|      | ||||
|     # Shutdown workers and queues immediately | ||||
|     try: | ||||
|         from changedetectionio import worker_handler | ||||
|         worker_handler.shutdown_workers() | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error shutting down workers: {str(e)}") | ||||
|      | ||||
|     # Close janus queues properly | ||||
|     try: | ||||
|         from changedetectionio.flask_app import update_q, notification_q | ||||
|         update_q.close() | ||||
|         notification_q.close() | ||||
|         logger.debug("Janus queues closed successfully") | ||||
|     except Exception as e: | ||||
|         logger.critical(f"CRITICAL: Failed to close janus queues: {e}") | ||||
|      | ||||
|     # Shutdown socketio server fast | ||||
|     from changedetectionio.flask_app import socketio_server | ||||
|     if socketio_server and hasattr(socketio_server, 'shutdown'): | ||||
|         try: | ||||
|             socketio_server.shutdown() | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error shutting down Socket.IO server: {str(e)}") | ||||
|      | ||||
|     # Save data quickly | ||||
|     try: | ||||
|         datastore.sync_to_json() | ||||
|         logger.success('Fast sync to disk complete.') | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error syncing to disk: {str(e)}") | ||||
|      | ||||
|     sys.exit() | ||||
|  | ||||
| def main(): | ||||
| @@ -47,9 +74,8 @@ def main(): | ||||
|  | ||||
|     datastore_path = None | ||||
|     do_cleanup = False | ||||
|     host = '' | ||||
|     ipv6_enabled = False | ||||
|     port = os.environ.get('PORT') or 5000 | ||||
|     host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip() | ||||
|     port = int(os.environ.get('PORT', 5000)) | ||||
|     ssl_mode = False | ||||
|  | ||||
|     # On Windows, create and use a default path. | ||||
| @@ -90,10 +116,6 @@ def main(): | ||||
|         if opt == '-d': | ||||
|             datastore_path = arg | ||||
|  | ||||
|         if opt == '-6': | ||||
|             logger.success("Enabling IPv6 listen support") | ||||
|             ipv6_enabled = True | ||||
|  | ||||
|         # Cleanup (remove text files that arent in the index) | ||||
|         if opt == '-c': | ||||
|             do_cleanup = True | ||||
| @@ -105,10 +127,24 @@ def main(): | ||||
|         if opt == '-l': | ||||
|             logger_level = int(arg) if arg.isdigit() else arg.upper() | ||||
|  | ||||
|  | ||||
|     logger.success(f"changedetection.io version {get_version()} starting.") | ||||
|     # Launch using SocketIO run method for proper integration (if enabled) | ||||
|     ssl_cert_file = os.getenv("SSL_CERT_FILE", 'cert.pem') | ||||
|     ssl_privkey_file = os.getenv("SSL_PRIVKEY_FILE", 'privkey.pem') | ||||
|     if os.getenv("SSL_CERT_FILE") and os.getenv("SSL_PRIVKEY_FILE"): | ||||
|         ssl_mode = True | ||||
|  | ||||
|     # SSL mode could have been set by -s too, therefor fallback to default values | ||||
|     if ssl_mode: | ||||
|         if not os.path.isfile(ssl_cert_file) or not os.path.isfile(ssl_privkey_file): | ||||
|             logger.critical(f"Cannot start SSL/HTTPS mode, Please be sure that {ssl_cert_file}' and '{ssl_privkey_file}' exist in in {os.getcwd()}") | ||||
|             os._exit(2) | ||||
|  | ||||
|     # Without this, a logger will be duplicated | ||||
|     logger.remove() | ||||
|     try: | ||||
|         log_level_for_stdout = { 'DEBUG', 'SUCCESS' } | ||||
|         log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' } | ||||
|         logger.configure(handlers=[ | ||||
|             {"sink": sys.stdout, "level": logger_level, | ||||
|              "filter" : lambda record: record['level'].name in log_level_for_stdout}, | ||||
| @@ -145,8 +181,26 @@ def main(): | ||||
|  | ||||
|     app = changedetection_app(app_config, datastore) | ||||
|  | ||||
|     # Get the SocketIO instance from the Flask app (created in flask_app.py) | ||||
|     from changedetectionio.flask_app import socketio_server | ||||
|     global socketio | ||||
|     socketio = socketio_server | ||||
|  | ||||
|     signal.signal(signal.SIGTERM, sigshutdown_handler) | ||||
|     signal.signal(signal.SIGINT, sigshutdown_handler) | ||||
|      | ||||
|     # Custom signal handler for memory cleanup | ||||
|     def sigusr_clean_handler(_signo, _stack_frame): | ||||
|         from changedetectionio.gc_cleanup import memory_cleanup | ||||
|         logger.info('SIGUSR1 received: Running memory cleanup') | ||||
|         return memory_cleanup(app) | ||||
|  | ||||
|     # Register the SIGUSR1 signal handler | ||||
|     # Only register the signal handler if running on Linux | ||||
|     if platform.system() == "Linux": | ||||
|         signal.signal(signal.SIGUSR1, sigusr_clean_handler) | ||||
|     else: | ||||
|         logger.info("SIGUSR1 handler only registered on Linux, skipped.") | ||||
|  | ||||
|     # Go into cleanup mode | ||||
|     if do_cleanup: | ||||
| @@ -156,10 +210,11 @@ def main(): | ||||
|  | ||||
|  | ||||
|     @app.context_processor | ||||
|     def inject_version(): | ||||
|     def inject_template_globals(): | ||||
|         return dict(right_sticky="v{}".format(datastore.data['version_tag']), | ||||
|                     new_version_available=app.config['NEW_VERSION_AVAILABLE'], | ||||
|                     has_password=datastore.data['settings']['application']['password'] != False | ||||
|                     has_password=datastore.data['settings']['application']['password'] != False, | ||||
|                     socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True) | ||||
|                     ) | ||||
|  | ||||
|     # Monitored websites will not receive a Referer header when a user clicks on an outgoing link. | ||||
| @@ -183,15 +238,21 @@ def main(): | ||||
|         from werkzeug.middleware.proxy_fix import ProxyFix | ||||
|         app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1) | ||||
|  | ||||
|     s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET | ||||
|  | ||||
|     if ssl_mode: | ||||
|         # @todo finalise SSL config, but this should get you in the right direction if you need it. | ||||
|         eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type), | ||||
|                                                certfile='cert.pem', | ||||
|                                                keyfile='privkey.pem', | ||||
|                                                server_side=True), app) | ||||
|  | ||||
|     # SocketIO instance is already initialized in flask_app.py | ||||
|     if socketio_server: | ||||
|         if ssl_mode: | ||||
|             logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}") | ||||
|             socketio.run(app, host=host, port=int(port), debug=False, | ||||
|                          ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True) | ||||
|         else: | ||||
|             socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True) | ||||
|     else: | ||||
|         eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app) | ||||
|  | ||||
|         # Run Flask app without Socket.IO if disabled | ||||
|         logger.info("Starting Flask app without Socket.IO server") | ||||
|         if ssl_mode: | ||||
|             logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}") | ||||
|             app.run(host=host, port=int(port), debug=False, | ||||
|                     ssl_context=(ssl_cert_file, ssl_privkey_file)) | ||||
|         else: | ||||
|             app.run(host=host, port=int(port), debug=False) | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from changedetectionio.strtobool import strtobool | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request | ||||
| import validators | ||||
| from . import auth | ||||
| from . import auth, validate_openapi_request | ||||
|  | ||||
|  | ||||
| class Import(Resource): | ||||
| @@ -12,17 +12,9 @@ class Import(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('importWatches') | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/import Import a list of watched URLs | ||||
|         @apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag  id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line. | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a" | ||||
|         @apiName Import | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {List} OK List of watch UUIDs added | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Import a list of watched URLs.""" | ||||
|  | ||||
|         extras = {} | ||||
|  | ||||
|   | ||||
							
								
								
									
										108
									
								
								changedetectionio/api/Notifications.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										108
									
								
								changedetectionio/api/Notifications.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,108 @@ | ||||
| from flask_expects_json import expects_json | ||||
| from flask_restful import Resource, abort | ||||
| from flask import request | ||||
| from . import auth, validate_openapi_request | ||||
| from . import schema_create_notification_urls, schema_delete_notification_urls | ||||
|  | ||||
| class Notifications(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getNotifications') | ||||
|     def get(self): | ||||
|         """Return Notification URL List.""" | ||||
|  | ||||
|         notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])         | ||||
|  | ||||
|         return { | ||||
|                 'notification_urls': notification_urls, | ||||
|                }, 200 | ||||
|      | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('addNotifications') | ||||
|     @expects_json(schema_create_notification_urls) | ||||
|     def post(self): | ||||
|         """Create Notification URLs.""" | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         notification_urls = json_data.get("notification_urls", []) | ||||
|  | ||||
|         from wtforms import ValidationError | ||||
|         try: | ||||
|             validate_notification_urls(notification_urls) | ||||
|         except ValidationError as e: | ||||
|             return str(e), 400 | ||||
|  | ||||
|         added_urls = [] | ||||
|  | ||||
|         for url in notification_urls: | ||||
|             clean_url = url.strip() | ||||
|             added_url = self.datastore.add_notification_url(clean_url) | ||||
|             if added_url: | ||||
|                 added_urls.append(added_url) | ||||
|  | ||||
|         if not added_urls: | ||||
|             return "No valid notification URLs were added", 400 | ||||
|  | ||||
|         return {'notification_urls': added_urls}, 201 | ||||
|      | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('replaceNotifications') | ||||
|     @expects_json(schema_create_notification_urls) | ||||
|     def put(self): | ||||
|         """Replace Notification URLs.""" | ||||
|         json_data = request.get_json() | ||||
|         notification_urls = json_data.get("notification_urls", []) | ||||
|  | ||||
|         from wtforms import ValidationError | ||||
|         try: | ||||
|             validate_notification_urls(notification_urls) | ||||
|         except ValidationError as e: | ||||
|             return str(e), 400 | ||||
|          | ||||
|         if not isinstance(notification_urls, list): | ||||
|             return "Invalid input format", 400 | ||||
|  | ||||
|         clean_urls = [url.strip() for url in notification_urls if isinstance(url, str)] | ||||
|         self.datastore.data['settings']['application']['notification_urls'] = clean_urls | ||||
|         self.datastore.needs_write = True | ||||
|  | ||||
|         return {'notification_urls': clean_urls}, 200 | ||||
|          | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('deleteNotifications') | ||||
|     @expects_json(schema_delete_notification_urls) | ||||
|     def delete(self): | ||||
|         """Delete Notification URLs.""" | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         urls_to_delete = json_data.get("notification_urls", []) | ||||
|         if not isinstance(urls_to_delete, list): | ||||
|             abort(400, message="Expected a list of notification URLs.") | ||||
|  | ||||
|         notification_urls = self.datastore.data['settings']['application'].get('notification_urls', []) | ||||
|         deleted = [] | ||||
|  | ||||
|         for url in urls_to_delete: | ||||
|             clean_url = url.strip() | ||||
|             if clean_url in notification_urls: | ||||
|                 notification_urls.remove(clean_url) | ||||
|                 deleted.append(clean_url) | ||||
|  | ||||
|         if not deleted: | ||||
|             abort(400, message="No matching notification URLs found.") | ||||
|  | ||||
|         self.datastore.data['settings']['application']['notification_urls'] = notification_urls | ||||
|         self.datastore.needs_write = True | ||||
|  | ||||
|         return 'OK', 204 | ||||
|      | ||||
| def validate_notification_urls(notification_urls): | ||||
|     from changedetectionio.forms import ValidateAppRiseServers | ||||
|     validator = ValidateAppRiseServers() | ||||
|     class DummyForm: pass | ||||
|     dummy_form = DummyForm() | ||||
|     field = type("Field", (object,), {"data": notification_urls, "gettext": lambda self, x: x})() | ||||
|     validator(dummy_form, field) | ||||
| @@ -1,6 +1,6 @@ | ||||
| from flask_restful import Resource, abort | ||||
| from flask import request | ||||
| from . import auth | ||||
| from . import auth, validate_openapi_request | ||||
|  | ||||
| class Search(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
| @@ -8,21 +8,9 @@ class Search(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('searchWatches') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/search Search for watches | ||||
|         @apiDescription Search watches by URL or title text | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com/page1" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com/page1?tag=Favourites" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com?partial=true" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiName Search | ||||
|         @apiGroup Watch Management | ||||
|         @apiQuery {String} q Search query to match against watch URLs and titles | ||||
|         @apiQuery {String} [tag] Optional name of tag to limit results (name not UUID) | ||||
|         @apiQuery {String} [partial] Allow partial matching of URL query | ||||
|         @apiSuccess (200) {Object} JSON Object containing matched watches | ||||
|         """ | ||||
|         """Search for watches by URL or title text.""" | ||||
|         query = request.args.get('q', '').strip() | ||||
|         tag_limit = request.args.get('tag', '').strip() | ||||
|         from changedetectionio.strtobool import strtobool | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| from flask_restful import Resource | ||||
| from . import auth | ||||
| from . import auth, validate_openapi_request | ||||
|  | ||||
|  | ||||
| class SystemInfo(Resource): | ||||
| @@ -9,23 +9,9 @@ class SystemInfo(Resource): | ||||
|         self.update_q = kwargs['update_q'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getSystemInfo') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/systeminfo Return system info | ||||
|         @apiDescription Return some info about the current system state | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             HTTP/1.0 200 | ||||
|             { | ||||
|                 'queue_size': 10 , | ||||
|                 'overdue_watches': ["watch-uuid-list"], | ||||
|                 'uptime': 38344.55, | ||||
|                 'watch_count': 800, | ||||
|                 'version': "0.40.1" | ||||
|             } | ||||
|         @apiName Get Info | ||||
|         @apiGroup System Information | ||||
|         """ | ||||
|         """Return system info.""" | ||||
|         import time | ||||
|         overdue_watches = [] | ||||
|  | ||||
|   | ||||
| @@ -1,39 +1,46 @@ | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from flask_expects_json import expects_json | ||||
| from flask_restful import abort, Resource | ||||
|  | ||||
| from flask import request | ||||
| from . import auth | ||||
|  | ||||
| # Import schemas from __init__.py | ||||
| from . import schema_tag, schema_create_tag, schema_update_tag | ||||
| from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request | ||||
|  | ||||
|  | ||||
| class Tag(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|         self.update_q = kwargs['update_q'] | ||||
|  | ||||
|     # Get information about a single tag | ||||
|     # curl http://localhost:5000/api/v1/tag/<string:uuid> | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getTag') | ||||
|     def get(self, uuid): | ||||
|         """ | ||||
|         @api {get} /api/v1/tag/:uuid Single tag - get data or toggle notification muting. | ||||
|         @apiDescription Retrieve tag information and set notification_muted status | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=muted" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiName Tag | ||||
|         @apiGroup Tag | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state | ||||
|         @apiSuccess (200) {String} OK When muted operation OR full JSON object of the tag | ||||
|         @apiSuccess (200) {JSON} TagJSON JSON Full JSON object of the tag | ||||
|         """ | ||||
|         """Get data for a single tag/group, toggle notification muting, or recheck all.""" | ||||
|         from copy import deepcopy | ||||
|         tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid)) | ||||
|         if not tag: | ||||
|             abort(404, message=f'No tag exists with the UUID of {uuid}') | ||||
|  | ||||
|         if request.args.get('recheck'): | ||||
|             # Recheck all, including muted | ||||
|             # Get most overdue first | ||||
|             i=0 | ||||
|             for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)): | ||||
|                 watch_uuid = k[0] | ||||
|                 watch = k[1] | ||||
|                 if not watch['paused'] and tag['uuid'] not in watch['tags']: | ||||
|                     continue | ||||
|                 worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) | ||||
|                 i+=1 | ||||
|  | ||||
|             return f"OK, {i} watches queued", 200 | ||||
|  | ||||
|         if request.args.get('muted', '') == 'muted': | ||||
|             self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True | ||||
|             return "OK", 200 | ||||
| @@ -44,16 +51,9 @@ class Tag(Resource): | ||||
|         return tag | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('deleteTag') | ||||
|     def delete(self, uuid): | ||||
|         """ | ||||
|         @api {delete} /api/v1/tag/:uuid Delete a tag and remove it from all watches | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiName DeleteTag | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was deleted | ||||
|         """ | ||||
|         """Delete a tag/group and remove it from all watches.""" | ||||
|         if not self.datastore.data['settings']['application']['tags'].get(uuid): | ||||
|             abort(400, message='No tag exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
| @@ -68,21 +68,10 @@ class Tag(Resource): | ||||
|         return 'OK', 204 | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('updateTag') | ||||
|     @expects_json(schema_update_tag) | ||||
|     def put(self, uuid): | ||||
|         """ | ||||
|         @api {put} /api/v1/tag/:uuid Update tag information | ||||
|         @apiExample {curl} Example usage: | ||||
|             Update (PUT) | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"title": "New Tag Title"}' | ||||
|  | ||||
|         @apiDescription Updates an existing tag using JSON | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiName UpdateTag | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was updated | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Update tag information.""" | ||||
|         tag = self.datastore.data['settings']['application']['tags'].get(uuid) | ||||
|         if not tag: | ||||
|             abort(404, message='No tag exists with the UUID of {}'.format(uuid)) | ||||
| @@ -94,17 +83,10 @@ class Tag(Resource): | ||||
|  | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('createTag') | ||||
|     # Only cares for {'title': 'xxxx'} | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/watch Create a single tag | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"name": "Work related"}' | ||||
|         @apiName Create | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was created | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Create a single tag/group.""" | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         title = json_data.get("title",'').strip() | ||||
| @@ -122,28 +104,9 @@ class Tags(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('listTags') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/tags List tags | ||||
|         @apiDescription Return list of available tags | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tags -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             { | ||||
|                 "cc0cfffa-f449-477b-83ea-0caafd1dc091": { | ||||
|                     "title": "Tech News", | ||||
|                     "notification_muted": false, | ||||
|                     "date_created": 1677103794 | ||||
|                 }, | ||||
|                 "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": { | ||||
|                     "title": "Shopping", | ||||
|                     "notification_muted": true, | ||||
|                     "date_created": 1676662819 | ||||
|                 } | ||||
|             } | ||||
|         @apiName ListTags | ||||
|         @apiGroup Tag Management | ||||
|         @apiSuccess (200) {String} OK JSON dict | ||||
|         """ | ||||
|         """List tags/groups.""" | ||||
|         result = {} | ||||
|         for uuid, tag in self.datastore.data['settings']['application']['tags'].items(): | ||||
|             result[uuid] = { | ||||
|   | ||||
| @@ -3,14 +3,15 @@ from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from flask_expects_json import expects_json | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request, make_response | ||||
| from flask import request, make_response, send_from_directory | ||||
| import validators | ||||
| from . import auth | ||||
| import copy | ||||
|  | ||||
| # Import schemas from __init__.py | ||||
| from . import schema, schema_create_watch, schema_update_watch | ||||
| from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request | ||||
|  | ||||
|  | ||||
| class Watch(Resource): | ||||
| @@ -24,30 +25,16 @@ class Watch(Resource): | ||||
|     # @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK" | ||||
|     # ?recheck=true | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getWatch') | ||||
|     def get(self, uuid): | ||||
|         """ | ||||
|         @api {get} /api/v1/watch/:uuid Single watch - get data, recheck, pause, mute. | ||||
|         @apiDescription Retrieve watch information and set muted/paused status | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091  -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=unmuted"  -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?paused=unpaused"  -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiName Watch | ||||
|         @apiGroup Watch | ||||
|         @apiParam {uuid} uuid Watch unique ID. | ||||
|         @apiQuery {Boolean} [recheck] Recheck this watch `recheck=1` | ||||
|         @apiQuery {String} [paused] =`paused` or =`unpaused` , Sets the PAUSED state | ||||
|         @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state | ||||
|         @apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch | ||||
|         @apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch | ||||
|         """ | ||||
|         """Get information about a single watch, recheck, pause, or mute.""" | ||||
|         from copy import deepcopy | ||||
|         watch = deepcopy(self.datastore.data['watching'].get(uuid)) | ||||
|         if not watch: | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
|         if request.args.get('recheck'): | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             return "OK", 200 | ||||
|         if request.args.get('paused', '') == 'paused': | ||||
|             self.datastore.data['watching'].get(uuid).pause() | ||||
| @@ -71,16 +58,9 @@ class Watch(Resource): | ||||
|         return watch | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('deleteWatch') | ||||
|     def delete(self, uuid): | ||||
|         """ | ||||
|         @api {delete} /api/v1/watch/:uuid Delete a watch and related history | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiParam {uuid} uuid Watch unique ID. | ||||
|         @apiName Delete | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {String} OK Was deleted | ||||
|         """ | ||||
|         """Delete a watch and related history.""" | ||||
|         if not self.datastore.data['watching'].get(uuid): | ||||
|             abort(400, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
| @@ -88,21 +68,10 @@ class Watch(Resource): | ||||
|         return 'OK', 204 | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('updateWatch') | ||||
|     @expects_json(schema_update_watch) | ||||
|     def put(self, uuid): | ||||
|         """ | ||||
|         @api {put} /api/v1/watch/:uuid Update watch information | ||||
|         @apiExample {curl} Example usage: | ||||
|             Update (PUT) | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "new list"}' | ||||
|  | ||||
|         @apiDescription Updates an existing watch using JSON, accepts the same structure as returned in <a href="#api-Watch-Watch">get single watch information</a> | ||||
|         @apiParam {uuid} uuid Watch unique ID. | ||||
|         @apiName Update a watch | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {String} OK Was updated | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Update watch information.""" | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
| @@ -125,22 +94,9 @@ class WatchHistory(Resource): | ||||
|     # Get a list of available history for a watch by UUID | ||||
|     # curl http://localhost:5000/api/v1/watch/<string:uuid>/history | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getWatchHistory') | ||||
|     def get(self, uuid): | ||||
|         """ | ||||
|         @api {get} /api/v1/watch/<string:uuid>/history Get a list of all historical snapshots available for a watch | ||||
|         @apiDescription Requires `uuid`, returns list | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" | ||||
|             { | ||||
|                 "1676649279": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/cb7e9be8258368262246910e6a2a4c30.txt", | ||||
|                 "1677092785": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/e20db368d6fc633e34f559ff67bb4044.txt", | ||||
|                 "1677103794": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/02efdd37dacdae96554a8cc85dc9c945.txt" | ||||
|             } | ||||
|         @apiName Get list of available stored snapshots for watch | ||||
|         @apiGroup Watch History | ||||
|         @apiSuccess (200) {String} OK | ||||
|         @apiSuccess (404) {String} ERR Not found | ||||
|         """ | ||||
|         """Get a list of all historical snapshots available for a watch.""" | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
| @@ -153,18 +109,9 @@ class WatchSingleHistory(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getWatchSnapshot') | ||||
|     def get(self, uuid, timestamp): | ||||
|         """ | ||||
|         @api {get} /api/v1/watch/<string:uuid>/history/<int:timestamp> Get single snapshot from watch | ||||
|         @apiDescription Requires watch `uuid` and `timestamp`. `timestamp` of "`latest`" for latest available snapshot, or <a href="#api-Watch_History-Get_list_of_available_stored_snapshots_for_watch">use the list returned here</a> | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" | ||||
|         @apiName Get single snapshot content | ||||
|         @apiGroup Watch History | ||||
|         @apiParam {String} [html]       Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp) | ||||
|         @apiSuccess (200) {String} OK | ||||
|         @apiSuccess (404) {String} ERR Not found | ||||
|         """ | ||||
|         """Get single snapshot from watch.""" | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             abort(404, message=f"No watch exists with the UUID of {uuid}") | ||||
| @@ -190,6 +137,39 @@ class WatchSingleHistory(Resource): | ||||
|  | ||||
|         return response | ||||
|  | ||||
| class WatchFavicon(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getWatchFavicon') | ||||
|     def get(self, uuid): | ||||
|         """Get favicon for a watch.""" | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             abort(404, message=f"No watch exists with the UUID of {uuid}") | ||||
|  | ||||
|         favicon_filename = watch.get_favicon_filename() | ||||
|         if favicon_filename: | ||||
|             try: | ||||
|                 import magic | ||||
|                 mime = magic.from_file( | ||||
|                     os.path.join(watch.watch_data_dir, favicon_filename), | ||||
|                     mime=True | ||||
|                 ) | ||||
|             except ImportError: | ||||
|                 # Fallback, no python-magic | ||||
|                 import mimetypes | ||||
|                 mime, encoding = mimetypes.guess_type(favicon_filename) | ||||
|  | ||||
|             response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename)) | ||||
|             response.headers['Content-type'] = mime | ||||
|             response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate | ||||
|             return response | ||||
|  | ||||
|         abort(404, message=f'No Favicon available for {uuid}') | ||||
|  | ||||
|  | ||||
| class CreateWatch(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
| @@ -198,18 +178,10 @@ class CreateWatch(Resource): | ||||
|         self.update_q = kwargs['update_q'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('createWatch') | ||||
|     @expects_json(schema_create_watch) | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/watch Create a single watch | ||||
|         @apiDescription Requires atleast `url` set, can accept the same structure as <a href="#api-Watch-Watch">get single watch information</a> to create. | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "nice list"}' | ||||
|         @apiName Create | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {String} OK Was created | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Create a single watch.""" | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         url = json_data['url'].strip() | ||||
| @@ -236,41 +208,15 @@ class CreateWatch(Resource): | ||||
|  | ||||
|         new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags) | ||||
|         if new_uuid: | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|             worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|             return {'uuid': new_uuid}, 201 | ||||
|         else: | ||||
|             return "Invalid or unsupported URL", 400 | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('listWatches') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/watch List watches | ||||
|         @apiDescription Return concise list of available watches and some very basic info | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             { | ||||
|                 "6a4b7d5c-fee4-4616-9f43-4ac97046b595": { | ||||
|                     "last_changed": 1677103794, | ||||
|                     "last_checked": 1677103794, | ||||
|                     "last_error": false, | ||||
|                     "title": "", | ||||
|                     "url": "http://www.quotationspage.com/random.php" | ||||
|                 }, | ||||
|                 "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": { | ||||
|                     "last_changed": 0, | ||||
|                     "last_checked": 1676662819, | ||||
|                     "last_error": false, | ||||
|                     "title": "QuickLook", | ||||
|                     "url": "https://github.com/QL-Win/QuickLook/tags" | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|         @apiParam {String} [recheck_all]       Optional Set to =1 to force recheck of all watches | ||||
|         @apiParam {String} [tag]               Optional name of tag to limit results | ||||
|         @apiName ListWatches | ||||
|         @apiGroup Watch Management | ||||
|         @apiSuccess (200) {String} OK JSON dict | ||||
|         """ | ||||
|         """List watches.""" | ||||
|         list = {} | ||||
|  | ||||
|         tag_limit = request.args.get('tag', '').lower() | ||||
| @@ -291,7 +237,7 @@ class CreateWatch(Resource): | ||||
|  | ||||
|         if request.args.get('recheck_all'): | ||||
|             for uuid in self.datastore.data['watching'].keys(): | ||||
|                 self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                 worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             return {'status': "OK"}, 200 | ||||
|  | ||||
|         return list, 200 | ||||
| @@ -1,4 +1,10 @@ | ||||
| import copy | ||||
| import yaml | ||||
| import functools | ||||
| from flask import request, abort | ||||
| from loguru import logger | ||||
| from openapi_core import OpenAPI | ||||
| from openapi_core.contrib.flask import FlaskOpenAPIRequest | ||||
| from . import api_schema | ||||
| from ..model import watch_base | ||||
|  | ||||
| @@ -8,6 +14,7 @@ schema = api_schema.build_watch_json_schema(watch_base_config) | ||||
|  | ||||
| schema_create_watch = copy.deepcopy(schema) | ||||
| schema_create_watch['required'] = ['url'] | ||||
| del schema_create_watch['properties']['last_viewed'] | ||||
|  | ||||
| schema_update_watch = copy.deepcopy(schema) | ||||
| schema_update_watch['additionalProperties'] = False | ||||
| @@ -19,8 +26,53 @@ schema_create_tag['required'] = ['title'] | ||||
| schema_update_tag = copy.deepcopy(schema_tag) | ||||
| schema_update_tag['additionalProperties'] = False | ||||
|  | ||||
| schema_notification_urls = copy.deepcopy(schema) | ||||
| schema_create_notification_urls = copy.deepcopy(schema_notification_urls) | ||||
| schema_create_notification_urls['required'] = ['notification_urls'] | ||||
| schema_delete_notification_urls = copy.deepcopy(schema_notification_urls) | ||||
| schema_delete_notification_urls['required'] = ['notification_urls'] | ||||
|  | ||||
| @functools.cache | ||||
| def get_openapi_spec(): | ||||
|     import os | ||||
|     spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml') | ||||
|     with open(spec_path, 'r') as f: | ||||
|         spec_dict = yaml.safe_load(f) | ||||
|     _openapi_spec = OpenAPI.from_dict(spec_dict) | ||||
|     return _openapi_spec | ||||
|  | ||||
| def validate_openapi_request(operation_id): | ||||
|     """Decorator to validate incoming requests against OpenAPI spec.""" | ||||
|     def decorator(f): | ||||
|         @functools.wraps(f) | ||||
|         def wrapper(*args, **kwargs): | ||||
|             try: | ||||
|                 # Skip OpenAPI validation for GET requests since they don't have request bodies | ||||
|                 if request.method.upper() != 'GET': | ||||
|                     spec = get_openapi_spec() | ||||
|                     openapi_request = FlaskOpenAPIRequest(request) | ||||
|                     result = spec.unmarshal_request(openapi_request) | ||||
|                     if result.errors: | ||||
|                         from werkzeug.exceptions import BadRequest | ||||
|                         error_details = [] | ||||
|                         for error in result.errors: | ||||
|                             error_details.append(str(error)) | ||||
|                         raise BadRequest(f"OpenAPI validation failed: {error_details}") | ||||
|             except BadRequest: | ||||
|                 # Re-raise BadRequest exceptions (validation failures) | ||||
|                 raise | ||||
|             except Exception as e: | ||||
|                 # If OpenAPI spec loading fails, log but don't break existing functionality | ||||
|                 logger.critical(f"OpenAPI validation warning for {operation_id}: {e}") | ||||
|                 abort(500) | ||||
|             return f(*args, **kwargs) | ||||
|         return wrapper | ||||
|     return decorator | ||||
|  | ||||
| # Import all API resources | ||||
| from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch | ||||
| from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon | ||||
| from .Tags import Tags, Tag | ||||
| from .Import import Import | ||||
| from .SystemInfo import SystemInfo | ||||
| from .Notifications import Notifications | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,7 @@ | ||||
| # Responsible for building the storage dict into a set of rules ("JSON Schema") acceptable via the API | ||||
| # Probably other ways to solve this when the backend switches to some ORM | ||||
| from changedetectionio.notification import valid_notification_formats | ||||
|  | ||||
|  | ||||
| def build_time_between_check_json_schema(): | ||||
|     # Setup time between check schema | ||||
| @@ -76,6 +78,13 @@ def build_watch_json_schema(d): | ||||
|               ]: | ||||
|         schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000}) | ||||
|  | ||||
|     for v in ['last_viewed']: | ||||
|         schema['properties'][v] = { | ||||
|             "type": "integer", | ||||
|             "description": "Unix timestamp in seconds of the last time the watch was viewed.", | ||||
|             "minimum": 0 | ||||
|         } | ||||
|  | ||||
|     # None or Boolean | ||||
|     schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'}) | ||||
|  | ||||
| @@ -98,8 +107,6 @@ def build_watch_json_schema(d): | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     from changedetectionio.notification import valid_notification_formats | ||||
|  | ||||
|     schema['properties']['notification_format'] = {'type': 'string', | ||||
|                                                    'enum': list(valid_notification_formats.keys()) | ||||
|                                                    } | ||||
|   | ||||
							
								
								
									
										465
									
								
								changedetectionio/async_update_worker.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										465
									
								
								changedetectionio/async_update_worker.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,465 @@ | ||||
| from .processors.exceptions import ProcessorException | ||||
| import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions | ||||
| from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio.flask_app import watch_check_update | ||||
|  | ||||
| import asyncio | ||||
| import importlib | ||||
| import os | ||||
| import queue | ||||
| import time | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| # Async version of update_worker | ||||
| # Processes jobs from AsyncSignalPriorityQueue instead of threaded queue | ||||
|  | ||||
| async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|     """ | ||||
|     Async worker function that processes watch check jobs from the queue. | ||||
|      | ||||
|     Args: | ||||
|         worker_id: Unique identifier for this worker | ||||
|         q: AsyncSignalPriorityQueue containing jobs to process | ||||
|         notification_q: Standard queue for notifications | ||||
|         app: Flask application instance | ||||
|         datastore: Application datastore | ||||
|     """ | ||||
|     # Set a descriptive name for this task | ||||
|     task = asyncio.current_task() | ||||
|     if task: | ||||
|         task.set_name(f"async-worker-{worker_id}") | ||||
|      | ||||
|     logger.info(f"Starting async worker {worker_id}") | ||||
|      | ||||
|     while not app.config.exit.is_set(): | ||||
|         update_handler = None | ||||
|         watch = None | ||||
|  | ||||
|         try: | ||||
|             # Use native janus async interface - no threads needed! | ||||
|             queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0) | ||||
|              | ||||
|         except asyncio.TimeoutError: | ||||
|             # No jobs available, continue loop | ||||
|             continue | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}") | ||||
|              | ||||
|             # Log queue health for debugging | ||||
|             try: | ||||
|                 queue_size = q.qsize() | ||||
|                 is_empty = q.empty() | ||||
|                 logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}") | ||||
|             except Exception as health_e: | ||||
|                 logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}") | ||||
|              | ||||
|             await asyncio.sleep(0.1) | ||||
|             continue | ||||
|          | ||||
|         uuid = queued_item_data.item.get('uuid') | ||||
|         fetch_start_time = round(time.time()) | ||||
|          | ||||
|         # Mark this UUID as being processed | ||||
|         from changedetectionio import worker_handler | ||||
|         worker_handler.set_uuid_processing(uuid, processing=True) | ||||
|          | ||||
|         try: | ||||
|             if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'): | ||||
|                 changed_detected = False | ||||
|                 contents = b'' | ||||
|                 process_changedetection_results = True | ||||
|                 update_obj = {} | ||||
|  | ||||
|                 # Clear last errors | ||||
|                 datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None | ||||
|                 datastore.data['watching'][uuid]['last_checked'] = fetch_start_time | ||||
|  | ||||
|                 watch = datastore.data['watching'].get(uuid) | ||||
|  | ||||
|                 logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}") | ||||
|  | ||||
|                 try: | ||||
|                     watch_check_update.send(watch_uuid=uuid) | ||||
|  | ||||
|                     # Processor is what we are using for detecting the "Change" | ||||
|                     processor = watch.get('processor', 'text_json_diff') | ||||
|  | ||||
|                     # Init a new 'difference_detection_processor' | ||||
|                     processor_module_name = f"changedetectionio.processors.{processor}.processor" | ||||
|                     try: | ||||
|                         processor_module = importlib.import_module(processor_module_name) | ||||
|                     except ModuleNotFoundError as e: | ||||
|                         print(f"Processor module '{processor}' not found.") | ||||
|                         raise e | ||||
|  | ||||
|                     update_handler = processor_module.perform_site_check(datastore=datastore, | ||||
|                                                                          watch_uuid=uuid) | ||||
|  | ||||
|                     # All fetchers are now async, so call directly | ||||
|                     await update_handler.call_browser() | ||||
|  | ||||
|                     # Run change detection (this is synchronous) | ||||
|                     changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch) | ||||
|  | ||||
|                 except PermissionError as e: | ||||
|                     logger.critical(f"File permission error updating file, watch: {uuid}") | ||||
|                     logger.critical(str(e)) | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except ProcessorException as e: | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot) | ||||
|                     if e.xpath_data: | ||||
|                         watch.save_xpath_data(data=e.xpath_data) | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message}) | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except content_fetchers_exceptions.ReplyWithContentButNoText as e: | ||||
|                     extra_help = "" | ||||
|                     if e.has_filters: | ||||
|                         has_img = html_tools.include_filters(include_filters='img', | ||||
|                                                              html_content=e.html_content) | ||||
|                         if has_img: | ||||
|                             extra_help = ", it's possible that the filters you have give an empty result or contain only an image." | ||||
|                         else: | ||||
|                             extra_help = ", it's possible that the filters were found, but contained no usable text." | ||||
|  | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={ | ||||
|                         'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}" | ||||
|                     }) | ||||
|  | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|  | ||||
|                     if e.xpath_data: | ||||
|                         watch.save_xpath_data(data=e.xpath_data) | ||||
|                          | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except content_fetchers_exceptions.Non200ErrorCodeReceived as e: | ||||
|                     if e.status_code == 403: | ||||
|                         err_text = "Error - 403 (Access denied) received" | ||||
|                     elif e.status_code == 404: | ||||
|                         err_text = "Error - 404 (Page not found) received" | ||||
|                     elif e.status_code == 407: | ||||
|                         err_text = "Error - 407 (Proxy authentication required) received, did you need a username and password for the proxy?" | ||||
|                     elif e.status_code == 500: | ||||
|                         err_text = "Error - 500 (Internal server error) received from the web site" | ||||
|                     else: | ||||
|                         extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else '' | ||||
|                         err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}" | ||||
|  | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|                     if e.xpath_data: | ||||
|                         watch.save_xpath_data(data=e.xpath_data, as_error=True) | ||||
|                     if e.page_text: | ||||
|                         watch.save_error_text(contents=e.page_text) | ||||
|  | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except FilterNotFoundInResponse as e: | ||||
|                     if not datastore.data['watching'].get(uuid): | ||||
|                         continue | ||||
|  | ||||
|                     err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary." | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|  | ||||
|                     # Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot) | ||||
|  | ||||
|                     if e.xpath_data: | ||||
|                         watch.save_xpath_data(data=e.xpath_data) | ||||
|  | ||||
|                     # Only when enabled, send the notification | ||||
|                     if watch.get('filter_failure_notification_send', False): | ||||
|                         c = watch.get('consecutive_filter_failures', 0) | ||||
|                         c += 1 | ||||
|                         # Send notification if we reached the threshold? | ||||
|                         threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) | ||||
|                         logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}") | ||||
|                         if c >= threshold: | ||||
|                             if not watch.get('notification_muted'): | ||||
|                                 logger.debug(f"Sending filter failed notification for {uuid}") | ||||
|                                 await send_filter_failure_notification(uuid, notification_q, datastore) | ||||
|                             c = 0 | ||||
|                             logger.debug(f"Reset filter failure count back to zero") | ||||
|  | ||||
|                         datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) | ||||
|                     else: | ||||
|                         logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping") | ||||
|  | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e: | ||||
|                     # Yes fine, so nothing todo, don't continue to process. | ||||
|                     process_changedetection_results = False | ||||
|                     changed_detected = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.BrowserConnectError as e: | ||||
|                     datastore.update_watch(uuid=uuid, | ||||
|                                          update_obj={'last_error': e.msg}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.BrowserFetchTimedOut as e: | ||||
|                     datastore.update_watch(uuid=uuid, | ||||
|                                          update_obj={'last_error': e.msg}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.BrowserStepsStepException as e: | ||||
|                     if not datastore.data['watching'].get(uuid): | ||||
|                         continue | ||||
|  | ||||
|                     error_step = e.step_n + 1 | ||||
|                     from playwright._impl._errors import TimeoutError, Error | ||||
|  | ||||
|                     # Generally enough info for TimeoutError (couldnt locate the element after default seconds) | ||||
|                     err_text = f"Browser step at position {error_step} could not run, check the watch, add a delay if necessary, view Browser Steps to see screenshot at that step." | ||||
|  | ||||
|                     if e.original_e.name == "TimeoutError": | ||||
|                         # Just the first line is enough, the rest is the stack trace | ||||
|                         err_text += " Could not find the target." | ||||
|                     else: | ||||
|                         # Other Error, more info is good. | ||||
|                         err_text += " " + str(e.original_e).splitlines()[0] | ||||
|  | ||||
|                     logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}") | ||||
|  | ||||
|                     datastore.update_watch(uuid=uuid, | ||||
|                                          update_obj={'last_error': err_text, | ||||
|                                                    'browser_steps_last_error_step': error_step}) | ||||
|  | ||||
|                     if watch.get('filter_failure_notification_send', False): | ||||
|                         c = watch.get('consecutive_filter_failures', 0) | ||||
|                         c += 1 | ||||
|                         # Send notification if we reached the threshold? | ||||
|                         threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0) | ||||
|                         logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}") | ||||
|                         if threshold > 0 and c >= threshold: | ||||
|                             if not watch.get('notification_muted'): | ||||
|                                 await send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n, notification_q=notification_q, datastore=datastore) | ||||
|                             c = 0 | ||||
|  | ||||
|                         datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) | ||||
|  | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 except content_fetchers_exceptions.EmptyReply as e: | ||||
|                     # Some kind of custom to-str handler in the exception handler that does this? | ||||
|                     err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code) | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                 'last_check_status': e.status_code}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.ScreenshotUnavailable as e: | ||||
|                     err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'" | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                 'last_check_status': e.status_code}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.JSActionExceptions as e: | ||||
|                     err_text = "Error running JS Actions - Page request - "+e.message | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                 'last_check_status': e.status_code}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.PageUnloadable as e: | ||||
|                     err_text = "Page request from server didnt respond correctly" | ||||
|                     if e.message: | ||||
|                         err_text = "{} - {}".format(err_text, e.message) | ||||
|  | ||||
|                     if e.screenshot: | ||||
|                         watch.save_screenshot(screenshot=e.screenshot, as_error=True) | ||||
|  | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                 'last_check_status': e.status_code, | ||||
|                                                                 'has_ldjson_price_data': None}) | ||||
|                     process_changedetection_results = False | ||||
|                      | ||||
|                 except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e: | ||||
|                     err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher." | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|                     process_changedetection_results = False | ||||
|                     logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}") | ||||
|  | ||||
|                 except Exception as e: | ||||
|                     logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}") | ||||
|                     logger.error(str(e)) | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)}) | ||||
|                     process_changedetection_results = False | ||||
|  | ||||
|                 else: | ||||
|                     if not datastore.data['watching'].get(uuid): | ||||
|                         continue | ||||
|  | ||||
|                     update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|  | ||||
|                     if not watch.get('ignore_status_codes'): | ||||
|                         update_obj['consecutive_filter_failures'] = 0 | ||||
|  | ||||
|                     update_obj['last_error'] = False | ||||
|                     cleanup_error_artifacts(uuid, datastore) | ||||
|  | ||||
|                 if not datastore.data['watching'].get(uuid): | ||||
|                     continue | ||||
|  | ||||
|                 if process_changedetection_results: | ||||
|                     try: | ||||
|                         datastore.update_watch(uuid=uuid, update_obj=update_obj) | ||||
|  | ||||
|                         if changed_detected or not watch.history_n: | ||||
|                             if update_handler.screenshot: | ||||
|                                 watch.save_screenshot(screenshot=update_handler.screenshot) | ||||
|  | ||||
|                             if update_handler.xpath_data: | ||||
|                                 watch.save_xpath_data(data=update_handler.xpath_data) | ||||
|  | ||||
|                             # Ensure unique timestamp for history | ||||
|                             if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key): | ||||
|                                 logger.warning(f"Timestamp {fetch_start_time} already exists, waiting 1 seconds") | ||||
|                                 fetch_start_time += 1 | ||||
|                                 await asyncio.sleep(1) | ||||
|  | ||||
|                             watch.save_history_text(contents=contents, | ||||
|                                                     timestamp=int(fetch_start_time), | ||||
|                                                     snapshot_id=update_obj.get('previous_md5', 'none')) | ||||
|  | ||||
|                             empty_pages_are_a_change = datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|                             if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change): | ||||
|                                 watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time)) | ||||
|  | ||||
|                             # Send notifications on second+ check | ||||
|                             if watch.history_n >= 2: | ||||
|                                 logger.info(f"Change detected in UUID {uuid} - {watch['url']}") | ||||
|                                 if not watch.get('notification_muted'): | ||||
|                                     await send_content_changed_notification(uuid, notification_q, datastore) | ||||
|  | ||||
|                     except Exception as e: | ||||
|                         logger.critical(f"Worker {worker_id} exception in process_changedetection_results") | ||||
|                         logger.critical(str(e)) | ||||
|                         datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) | ||||
|  | ||||
|                 # Always record attempt count | ||||
|                 count = watch.get('check_count', 0) + 1 | ||||
|  | ||||
|                 # Always record page title (used in notifications, and can change even when the content is the same) | ||||
|                 try: | ||||
|                     page_title = html_tools.extract_title(data=update_handler.fetcher.content) | ||||
|                     logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'") | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title}) | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}") | ||||
|  | ||||
|                 # Record server header | ||||
|                 try: | ||||
|                     server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255] | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header}) | ||||
|                 except Exception as e: | ||||
|                     pass | ||||
|  | ||||
|                 # Store favicon if necessary | ||||
|                 if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'): | ||||
|                     watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'), | ||||
|                                        favicon_base_64=update_handler.fetcher.favicon_blob.get('base64') | ||||
|                                        ) | ||||
|  | ||||
|                 datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3), | ||||
|                                                                'check_count': count}) | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}") | ||||
|             logger.error(f"Worker {worker_id} traceback:", exc_info=True) | ||||
|              | ||||
|             # Also update the watch with error information | ||||
|             if datastore and uuid in datastore.data['watching']: | ||||
|                 datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"}) | ||||
|          | ||||
|         finally: | ||||
|             # Always cleanup - this runs whether there was an exception or not | ||||
|             if uuid: | ||||
|                 try: | ||||
|                     # Mark UUID as no longer being processed | ||||
|                     worker_handler.set_uuid_processing(uuid, processing=False) | ||||
|                      | ||||
|                     # Send completion signal | ||||
|                     if watch: | ||||
|                         #logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}") | ||||
|                         watch_check_update.send(watch_uuid=watch['uuid']) | ||||
|  | ||||
|                     update_handler = None | ||||
|                     logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s") | ||||
|                 except Exception as cleanup_error: | ||||
|                     logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}") | ||||
|              | ||||
|             # Brief pause before continuing to avoid tight error loops (only on error) | ||||
|             if 'e' in locals(): | ||||
|                 await asyncio.sleep(1.0) | ||||
|             else: | ||||
|                 # Small yield for normal completion | ||||
|                 await asyncio.sleep(0.01) | ||||
|  | ||||
|         # Check if we should exit | ||||
|         if app.config.exit.is_set(): | ||||
|             break | ||||
|  | ||||
|     # Check if we're in pytest environment - if so, be more gentle with logging | ||||
|     import sys | ||||
|     in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ | ||||
|      | ||||
|     if not in_pytest: | ||||
|         logger.info(f"Worker {worker_id} shutting down") | ||||
|  | ||||
|  | ||||
| def cleanup_error_artifacts(uuid, datastore): | ||||
|     """Helper function to clean up error artifacts""" | ||||
|     cleanup_files = ["last-error-screenshot.png", "last-error.txt"] | ||||
|     for f in cleanup_files: | ||||
|         full_path = os.path.join(datastore.datastore_path, uuid, f) | ||||
|         if os.path.isfile(full_path): | ||||
|             os.unlink(full_path) | ||||
|  | ||||
|  | ||||
|  | ||||
| async def send_content_changed_notification(watch_uuid, notification_q, datastore): | ||||
|     """Helper function to queue notifications using the new notification service""" | ||||
|     try: | ||||
|         from changedetectionio.notification_service import create_notification_service | ||||
|          | ||||
|         # Create notification service instance | ||||
|         notification_service = create_notification_service(datastore, notification_q) | ||||
|          | ||||
|         notification_service.send_content_changed_notification(watch_uuid) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error sending notification for {watch_uuid}: {e}") | ||||
|  | ||||
|  | ||||
| async def send_filter_failure_notification(watch_uuid, notification_q, datastore): | ||||
|     """Helper function to send filter failure notifications using the new notification service""" | ||||
|     try: | ||||
|         from changedetectionio.notification_service import create_notification_service | ||||
|          | ||||
|         # Create notification service instance | ||||
|         notification_service = create_notification_service(datastore, notification_q) | ||||
|          | ||||
|         notification_service.send_filter_failure_notification(watch_uuid) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error sending filter failure notification for {watch_uuid}: {e}") | ||||
|  | ||||
|  | ||||
| async def send_step_failure_notification(watch_uuid, step_n, notification_q, datastore): | ||||
|     """Helper function to send step failure notifications using the new notification service""" | ||||
|     try: | ||||
|         from changedetectionio.notification_service import create_notification_service | ||||
|          | ||||
|         # Create notification service instance | ||||
|         notification_service = create_notification_service(datastore, notification_q) | ||||
|          | ||||
|         notification_service.send_step_failure_notification(watch_uuid, step_n) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error sending step failure notification for {watch_uuid}: {e}") | ||||
| @@ -20,10 +20,7 @@ def login_optionally_required(func): | ||||
|         has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False) | ||||
|  | ||||
|         # Permitted | ||||
|         if request.endpoint and 'static_content' in request.endpoint and request.view_args and request.view_args.get('group') == 'styles': | ||||
|             return func(*args, **kwargs) | ||||
|         # Permitted | ||||
|         elif request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'): | ||||
|         if request.endpoint and 'diff_history_page' in request.endpoint and datastore.data['settings']['application'].get('shared_diff_access'): | ||||
|             return func(*args, **kwargs) | ||||
|         elif request.method in flask_login.config.EXEMPT_METHODS: | ||||
|             return func(*args, **kwargs) | ||||
|   | ||||
| @@ -23,47 +23,55 @@ from loguru import logger | ||||
| browsersteps_sessions = {} | ||||
| io_interface_context = None | ||||
| import json | ||||
| import base64 | ||||
| import hashlib | ||||
| from flask import Response | ||||
| import asyncio | ||||
| import threading | ||||
|  | ||||
| def run_async_in_browser_loop(coro): | ||||
|     """Run async coroutine using the existing async worker event loop""" | ||||
|     from changedetectionio import worker_handler | ||||
|      | ||||
|     # Use the existing async worker event loop instead of creating a new one | ||||
|     if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed(): | ||||
|         logger.debug("Browser steps using existing async worker event loop") | ||||
|         future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop) | ||||
|         return future.result() | ||||
|     else: | ||||
|         # Fallback: create a new event loop (for sync workers or if async loop not available) | ||||
|         logger.debug("Browser steps creating temporary event loop") | ||||
|         loop = asyncio.new_event_loop() | ||||
|         asyncio.set_event_loop(loop) | ||||
|         try: | ||||
|             return loop.run_until_complete(coro) | ||||
|         finally: | ||||
|             loop.close() | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates") | ||||
|  | ||||
|     def start_browsersteps_session(watch_uuid): | ||||
|         from . import nonContext | ||||
|     async def start_browsersteps_session(watch_uuid): | ||||
|         from . import browser_steps | ||||
|         import time | ||||
|         global browsersteps_sessions | ||||
|         global io_interface_context | ||||
|  | ||||
|         from playwright.async_api import async_playwright | ||||
|  | ||||
|         # We keep the playwright session open for many minutes | ||||
|         keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60 | ||||
|  | ||||
|         browsersteps_start_session = {'start_time': time.time()} | ||||
|  | ||||
|         # You can only have one of these running | ||||
|         # This should be very fine to leave running for the life of the application | ||||
|         # @idea - Make it global so the pool of watch fetchers can use it also | ||||
|         if not io_interface_context: | ||||
|             io_interface_context = nonContext.c_sync_playwright() | ||||
|             # Start the Playwright context, which is actually a nodejs sub-process and communicates over STDIN/STDOUT pipes | ||||
|             io_interface_context = io_interface_context.start() | ||||
|         # Create a new async playwright instance for browser steps | ||||
|         playwright_instance = async_playwright() | ||||
|         playwright_context = await playwright_instance.start() | ||||
|  | ||||
|         keepalive_ms = ((keepalive_seconds + 3) * 1000) | ||||
|         base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"') | ||||
|         a = "?" if not '?' in base_url else '&' | ||||
|         base_url += a + f"timeout={keepalive_ms}" | ||||
|  | ||||
|         try: | ||||
|             browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url) | ||||
|         except Exception as e: | ||||
|             if 'ECONNREFUSED' in str(e): | ||||
|                 return make_response('Unable to start the Playwright Browser session, is it running?', 401) | ||||
|             else: | ||||
|                 # Other errors, bad URL syntax, bad reply etc | ||||
|                 return make_response(str(e), 401) | ||||
|         browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms) | ||||
|         browsersteps_start_session['browser'] = browser | ||||
|         browsersteps_start_session['playwright_context'] = playwright_context | ||||
|  | ||||
|         proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid) | ||||
|         proxy = None | ||||
| @@ -85,15 +93,20 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                 logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}") | ||||
|  | ||||
|         # Tell Playwright to connect to Chrome and setup a new session via our stepper interface | ||||
|         browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui( | ||||
|             playwright_browser=browsersteps_start_session['browser'], | ||||
|         browserstepper = browser_steps.browsersteps_live_ui( | ||||
|             playwright_browser=browser, | ||||
|             proxy=proxy, | ||||
|             start_url=datastore.data['watching'][watch_uuid].link, | ||||
|             headers=datastore.data['watching'][watch_uuid].get('headers') | ||||
|         ) | ||||
|          | ||||
|         # Initialize the async connection | ||||
|         await browserstepper.connect(proxy=proxy) | ||||
|          | ||||
|         browsersteps_start_session['browserstepper'] = browserstepper | ||||
|  | ||||
|         # For test | ||||
|         #browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time())) | ||||
|         #await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time())) | ||||
|  | ||||
|         return browsersteps_start_session | ||||
|  | ||||
| @@ -102,10 +115,8 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     @browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET']) | ||||
|     def browsersteps_start_session(): | ||||
|         # A new session was requested, return sessionID | ||||
|  | ||||
|         import asyncio | ||||
|         import uuid | ||||
|         global browsersteps_sessions | ||||
|  | ||||
|         browsersteps_session_id = str(uuid.uuid4()) | ||||
|         watch_uuid = request.args.get('uuid') | ||||
|  | ||||
| @@ -114,7 +125,19 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|         logger.debug("Starting connection with playwright") | ||||
|         logger.debug("browser_steps.py connecting") | ||||
|         browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid) | ||||
|  | ||||
|         try: | ||||
|             # Run the async function in the dedicated browser steps event loop | ||||
|             browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop( | ||||
|                 start_browsersteps_session(watch_uuid) | ||||
|             ) | ||||
|         except Exception as e: | ||||
|             if 'ECONNREFUSED' in str(e): | ||||
|                 return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401) | ||||
|             else: | ||||
|                 # Other errors, bad URL syntax, bad reply etc | ||||
|                 return make_response(str(e), 401) | ||||
|  | ||||
|         logger.debug("Starting connection with playwright - done") | ||||
|         return {'browsersteps_session_id': browsersteps_session_id} | ||||
|  | ||||
| @@ -149,7 +172,6 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     def browsersteps_ui_update(): | ||||
|         import base64 | ||||
|         import playwright._impl._errors | ||||
|         global browsersteps_sessions | ||||
|         from changedetectionio.blueprint.browser_steps import browser_steps | ||||
|  | ||||
|         remaining =0 | ||||
| @@ -172,12 +194,15 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             step_optional_value = request.form.get('optional_value') | ||||
|             is_last_step = strtobool(request.form.get('is_last_step')) | ||||
|  | ||||
|             # @todo try.. accept.. nice errors not popups.. | ||||
|             try: | ||||
|  | ||||
|                 browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation, | ||||
|                                          selector=step_selector, | ||||
|                                          optional_value=step_optional_value) | ||||
|                 # Run the async call_action method in the dedicated browser steps event loop | ||||
|                 run_async_in_browser_loop( | ||||
|                     browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action( | ||||
|                         action_name=step_operation, | ||||
|                         selector=step_selector, | ||||
|                         optional_value=step_optional_value | ||||
|                     ) | ||||
|                 ) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Exception when calling step operation {step_operation} {str(e)}") | ||||
| @@ -191,7 +216,11 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|         # Screenshots and other info only needed on requesting a step (POST) | ||||
|         try: | ||||
|             (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state() | ||||
|             # Run the async get_current_state method in the dedicated browser steps event loop | ||||
|             (screenshot, xpath_data) = run_async_in_browser_loop( | ||||
|                 browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state() | ||||
|             ) | ||||
|                  | ||||
|             if is_last_step: | ||||
|                 watch = datastore.data['watching'].get(uuid) | ||||
|                 u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url | ||||
| @@ -199,13 +228,10 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                     watch.save_screenshot(screenshot=screenshot) | ||||
|                     watch.save_xpath_data(data=xpath_data) | ||||
|  | ||||
|         except playwright._impl._api_types.Error as e: | ||||
|             return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401) | ||||
|         except Exception as e: | ||||
|             return make_response("Error fetching screenshot and element data - " + str(e), 401) | ||||
|             return make_response(f"Error fetching screenshot and element data - {str(e)}", 401) | ||||
|  | ||||
|         # SEND THIS BACK TO THE BROWSER | ||||
|  | ||||
|         output = { | ||||
|             "screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}", | ||||
|             "xpath_data": xpath_data, | ||||
|   | ||||
| @@ -4,7 +4,7 @@ import re | ||||
| from random import randint | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT | ||||
| from changedetectionio.content_fetchers.base import manage_user_agent | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
|  | ||||
| @@ -35,6 +35,7 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
|                           'Make all child elements visible': '1 0', | ||||
|                           'Press Enter': '0 0', | ||||
|                           'Select by label': '1 1', | ||||
|                           '<select> by option text': '1 1', | ||||
|                           'Scroll down': '0 0', | ||||
|                           'Uncheck checkbox': '1 0', | ||||
|                           'Wait for seconds': '0 1', | ||||
| @@ -54,14 +55,17 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
| class steppable_browser_interface(): | ||||
|     page = None | ||||
|     start_url = None | ||||
|  | ||||
|     action_timeout = 10 * 1000 | ||||
|  | ||||
|     def __init__(self, start_url): | ||||
|         self.start_url = start_url | ||||
|  | ||||
|     # Convert and perform "Click Button" for example | ||||
|     def call_action(self, action_name, selector=None, optional_value=None): | ||||
|     async def call_action(self, action_name, selector=None, optional_value=None): | ||||
|         if self.page is None: | ||||
|             logger.warning("Cannot call action on None page object") | ||||
|             return | ||||
|              | ||||
|         now = time.time() | ||||
|         call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower()) | ||||
|         if call_action_name == 'choose_one': | ||||
| @@ -72,136 +76,216 @@ class steppable_browser_interface(): | ||||
|         if selector and selector.startswith('/') and not selector.startswith('//'): | ||||
|             selector = "xpath=" + selector | ||||
|  | ||||
|         # Check if action handler exists | ||||
|         if not hasattr(self, "action_" + call_action_name): | ||||
|             logger.warning(f"Action handler for '{call_action_name}' not found") | ||||
|             return | ||||
|              | ||||
|         action_handler = getattr(self, "action_" + call_action_name) | ||||
|  | ||||
|         # Support for Jinja2 variables in the value and selector | ||||
|  | ||||
|         if selector and ('{%' in selector or '{{' in selector): | ||||
|             selector = jinja_render(template_str=selector) | ||||
|  | ||||
|         if optional_value and ('{%' in optional_value or '{{' in optional_value): | ||||
|             optional_value = jinja_render(template_str=optional_value) | ||||
|  | ||||
|         action_handler(selector, optional_value) | ||||
|         self.page.wait_for_timeout(1.5 * 1000) | ||||
|         # Trigger click and cautiously handle potential navigation | ||||
|         # This means the page redirects/reloads/changes JS etc etc | ||||
|         if call_action_name.startswith('click_'): | ||||
|             try: | ||||
|                 # Set up navigation expectation before the click (like sync version) | ||||
|                 async with self.page.expect_event("framenavigated", timeout=3000) as navigation_info: | ||||
|                     await action_handler(selector, optional_value) | ||||
|                  | ||||
|                 # Check if navigation actually occurred | ||||
|                 try: | ||||
|                     await navigation_info.value  # This waits for the navigation promise | ||||
|                     logger.debug(f"Navigation occurred on {call_action_name}.") | ||||
|                 except Exception: | ||||
|                     logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.") | ||||
|                      | ||||
|             except Exception as e: | ||||
|                 # If expect_event itself times out, that means no navigation occurred - that's OK | ||||
|                 if "framenavigated" in str(e) and "exceeded" in str(e): | ||||
|                     logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.") | ||||
|                 else: | ||||
|                     raise e | ||||
|         else: | ||||
|             # Some other action that probably a navigation is not expected | ||||
|             await action_handler(selector, optional_value) | ||||
|  | ||||
|  | ||||
|         # Safely wait for timeout | ||||
|         await self.page.wait_for_timeout(1.5 * 1000) | ||||
|         logger.debug(f"Call action done in {time.time()-now:.2f}s") | ||||
|  | ||||
|     def action_goto_url(self, selector=None, value=None): | ||||
|         # self.page.set_viewport_size({"width": 1280, "height": 5000}) | ||||
|     async def action_goto_url(self, selector=None, value=None): | ||||
|         if not value: | ||||
|             logger.warning("No URL provided for goto_url action") | ||||
|             return None | ||||
|              | ||||
|         now = time.time() | ||||
|         response = self.page.goto(value, timeout=0, wait_until='load') | ||||
|         # Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout) | ||||
|         #and also wait for seconds ? | ||||
|         #await page.waitForTimeout(1000); | ||||
|         #await page.waitForTimeout(extra_wait_ms); | ||||
|         response = await self.page.goto(value, timeout=0, wait_until='load') | ||||
|         logger.debug(f"Time to goto URL {time.time()-now:.2f}s") | ||||
|         return response | ||||
|  | ||||
|     # Incase they request to go back to the start | ||||
|     def action_goto_site(self, selector=None, value=None): | ||||
|         return self.action_goto_url(value=self.start_url) | ||||
|     async def action_goto_site(self, selector=None, value=None): | ||||
|         return await self.action_goto_url(value=re.sub(r'^source:', '', self.start_url, flags=re.IGNORECASE)) | ||||
|  | ||||
|     def action_click_element_containing_text(self, selector=None, value=''): | ||||
|     async def action_click_element_containing_text(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text") | ||||
|         if not len(value.strip()): | ||||
|         if not value or not len(value.strip()): | ||||
|             return | ||||
|              | ||||
|         elem = self.page.get_by_text(value) | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|         if await elem.count(): | ||||
|             await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|  | ||||
|     def action_click_element_containing_text_if_exists(self, selector=None, value=''): | ||||
|  | ||||
|     async def action_click_element_containing_text_if_exists(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text if exists") | ||||
|         if not len(value.strip()): | ||||
|         if not value or not len(value.strip()): | ||||
|             return | ||||
|              | ||||
|         elem = self.page.get_by_text(value) | ||||
|         logger.debug(f"Clicking element containing text - {elem.count()} elements found") | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|         else: | ||||
|         count = await elem.count() | ||||
|         logger.debug(f"Clicking element containing text - {count} elements found") | ||||
|         if count: | ||||
|             await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|                  | ||||
|  | ||||
|     async def action_enter_text_in_field(self, selector, value): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|     def action_enter_text_in_field(self, selector, value): | ||||
|         if not len(selector.strip()): | ||||
|             return | ||||
|         await self.page.fill(selector, value, timeout=self.action_timeout) | ||||
|  | ||||
|         self.page.fill(selector, value, timeout=self.action_timeout) | ||||
|     async def action_execute_js(self, selector, value): | ||||
|         if not value: | ||||
|             return None | ||||
|              | ||||
|         return await self.page.evaluate(value) | ||||
|  | ||||
|     def action_execute_js(self, selector, value): | ||||
|         response = self.page.evaluate(value) | ||||
|         return response | ||||
|  | ||||
|     def action_click_element(self, selector, value): | ||||
|     async def action_click_element(self, selector, value): | ||||
|         logger.debug("Clicking element") | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500)) | ||||
|         await self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500)) | ||||
|  | ||||
|     def action_click_element_if_exists(self, selector, value): | ||||
|     async def action_click_element_if_exists(self, selector, value): | ||||
|         import playwright._impl._errors as _api_types | ||||
|         logger.debug("Clicking element if exists") | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|              | ||||
|         try: | ||||
|             self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500)) | ||||
|         except _api_types.TimeoutError as e: | ||||
|             await self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500)) | ||||
|         except _api_types.TimeoutError: | ||||
|             return | ||||
|         except _api_types.Error as e: | ||||
|         except _api_types.Error: | ||||
|             # Element was there, but page redrew and now its long long gone | ||||
|             return | ||||
|                  | ||||
|  | ||||
|     def action_click_x_y(self, selector, value): | ||||
|         if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): | ||||
|             raise Exception("'Click X,Y' step should be in the format of '100 , 90'") | ||||
|     async def action_click_x_y(self, selector, value): | ||||
|         if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): | ||||
|             logger.warning("'Click X,Y' step should be in the format of '100 , 90'") | ||||
|             return | ||||
|  | ||||
|         x, y = value.strip().split(',') | ||||
|         x = int(float(x.strip())) | ||||
|         y = int(float(y.strip())) | ||||
|         self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) | ||||
|         try: | ||||
|             x, y = value.strip().split(',') | ||||
|             x = int(float(x.strip())) | ||||
|             y = int(float(y.strip())) | ||||
|              | ||||
|             await self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error parsing x,y coordinates: {str(e)}") | ||||
|  | ||||
|     def action_scroll_down(self, selector, value): | ||||
|     async def action__select_by_option_text(self, selector, value): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         await self.page.select_option(selector, label=value, timeout=self.action_timeout) | ||||
|  | ||||
|     async def action_scroll_down(self, selector, value): | ||||
|         # Some sites this doesnt work on for some reason | ||||
|         self.page.mouse.wheel(0, 600) | ||||
|         self.page.wait_for_timeout(1000) | ||||
|         await self.page.mouse.wheel(0, 600) | ||||
|         await self.page.wait_for_timeout(1000) | ||||
|  | ||||
|     def action_wait_for_seconds(self, selector, value): | ||||
|         self.page.wait_for_timeout(float(value.strip()) * 1000) | ||||
|     async def action_wait_for_seconds(self, selector, value): | ||||
|         try: | ||||
|             seconds = float(value.strip()) if value else 1.0 | ||||
|             await self.page.wait_for_timeout(seconds * 1000) | ||||
|         except (ValueError, TypeError) as e: | ||||
|             logger.error(f"Invalid value for wait_for_seconds: {str(e)}") | ||||
|  | ||||
|     def action_wait_for_text(self, selector, value): | ||||
|     async def action_wait_for_text(self, selector, value): | ||||
|         if not value: | ||||
|             return | ||||
|              | ||||
|         import json | ||||
|         v = json.dumps(value) | ||||
|         self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000) | ||||
|         await self.page.wait_for_function( | ||||
|             f'document.querySelector("body").innerText.includes({v});', | ||||
|             timeout=30000 | ||||
|         ) | ||||
|              | ||||
|  | ||||
|     def action_wait_for_text_in_element(self, selector, value): | ||||
|     async def action_wait_for_text_in_element(self, selector, value): | ||||
|         if not selector or not value: | ||||
|             return | ||||
|              | ||||
|         import json | ||||
|         s = json.dumps(selector) | ||||
|         v = json.dumps(value) | ||||
|         self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000) | ||||
|          | ||||
|         await self.page.wait_for_function( | ||||
|             f'document.querySelector({s}).innerText.includes({v});', | ||||
|             timeout=30000 | ||||
|         ) | ||||
|  | ||||
|     # @todo - in the future make some popout interface to capture what needs to be set | ||||
|     # https://playwright.dev/python/docs/api/class-keyboard | ||||
|     def action_press_enter(self, selector, value): | ||||
|         self.page.keyboard.press("Enter", delay=randint(200, 500)) | ||||
|     async def action_press_enter(self, selector, value): | ||||
|         await self.page.keyboard.press("Enter", delay=randint(200, 500)) | ||||
|              | ||||
|  | ||||
|     def action_press_page_up(self, selector, value): | ||||
|         self.page.keyboard.press("PageUp", delay=randint(200, 500)) | ||||
|     async def action_press_page_up(self, selector, value): | ||||
|         await self.page.keyboard.press("PageUp", delay=randint(200, 500)) | ||||
|  | ||||
|     def action_press_page_down(self, selector, value): | ||||
|         self.page.keyboard.press("PageDown", delay=randint(200, 500)) | ||||
|     async def action_press_page_down(self, selector, value): | ||||
|         await self.page.keyboard.press("PageDown", delay=randint(200, 500)) | ||||
|  | ||||
|     def action_check_checkbox(self, selector, value): | ||||
|         self.page.locator(selector).check(timeout=self.action_timeout) | ||||
|     async def action_check_checkbox(self, selector, value): | ||||
|         if not selector: | ||||
|             return | ||||
|  | ||||
|     def action_uncheck_checkbox(self, selector, value): | ||||
|         self.page.locator(selector).uncheck(timeout=self.action_timeout) | ||||
|         await self.page.locator(selector).check(timeout=self.action_timeout) | ||||
|  | ||||
|     def action_remove_elements(self, selector, value): | ||||
|     async def action_uncheck_checkbox(self, selector, value): | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         await self.page.locator(selector).uncheck(timeout=self.action_timeout) | ||||
|              | ||||
|  | ||||
|     async def action_remove_elements(self, selector, value): | ||||
|         """Removes all elements matching the given selector from the DOM.""" | ||||
|         self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())") | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         await self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())") | ||||
|  | ||||
|     def action_make_all_child_elements_visible(self, selector, value): | ||||
|     async def action_make_all_child_elements_visible(self, selector, value): | ||||
|         """Recursively makes all child elements inside the given selector fully visible.""" | ||||
|         self.page.locator(selector).locator("*").evaluate_all(""" | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         await self.page.locator(selector).locator("*").evaluate_all(""" | ||||
|             els => els.forEach(el => { | ||||
|                 el.style.display = 'block';   // Forces it to be displayed | ||||
|                 el.style.visibility = 'visible';   // Ensures it's not hidden | ||||
| @@ -224,7 +308,9 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|     # bump and kill this if idle after X sec | ||||
|     age_start = 0 | ||||
|     headers = {} | ||||
|  | ||||
|     # Track if resources are properly cleaned up | ||||
|     _is_cleaned_up = False | ||||
|      | ||||
|     # use a special driver, maybe locally etc | ||||
|     command_executor = os.getenv( | ||||
|         "PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL" | ||||
| @@ -243,17 +329,23 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|         self.age_start = time.time() | ||||
|         self.playwright_browser = playwright_browser | ||||
|         self.start_url = start_url | ||||
|         if self.context is None: | ||||
|             self.connect(proxy=proxy) | ||||
|         self._is_cleaned_up = False | ||||
|         self.proxy = proxy | ||||
|         # Note: connect() is now async and must be called separately | ||||
|  | ||||
|     def __del__(self): | ||||
|         # Ensure cleanup happens if object is garbage collected | ||||
|         # Note: cleanup is now async, so we can only mark as cleaned up here | ||||
|         self._is_cleaned_up = True | ||||
|  | ||||
|     # Connect and setup a new context | ||||
|     def connect(self, proxy=None): | ||||
|     async def connect(self, proxy=None): | ||||
|         # Should only get called once - test that | ||||
|         keep_open = 1000 * 60 * 5 | ||||
|         now = time.time() | ||||
|  | ||||
|         # @todo handle multiple contexts, bind a unique id from the browser on each req? | ||||
|         self.context = self.playwright_browser.new_context( | ||||
|         self.context = await self.playwright_browser.new_context( | ||||
|             accept_downloads=False,  # Should never be needed | ||||
|             bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others | ||||
|             extra_http_headers=self.headers, | ||||
| @@ -264,64 +356,142 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|             user_agent=manage_user_agent(headers=self.headers), | ||||
|         ) | ||||
|  | ||||
|  | ||||
|         self.page = self.context.new_page() | ||||
|         self.page = await self.context.new_page() | ||||
|  | ||||
|         # self.page.set_default_navigation_timeout(keep_open) | ||||
|         self.page.set_default_timeout(keep_open) | ||||
|         # @todo probably this doesnt work | ||||
|         self.page.on( | ||||
|             "close", | ||||
|             self.mark_as_closed, | ||||
|         ) | ||||
|         # Set event handlers | ||||
|         self.page.on("close", self.mark_as_closed) | ||||
|         # Listen for all console events and handle errors | ||||
|         self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|         logger.debug(f"Time to browser setup {time.time()-now:.2f}s") | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|         await self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|     def mark_as_closed(self): | ||||
|         logger.debug("Page closed, cleaning up..") | ||||
|         # Note: This is called from a sync context (event handler) | ||||
|         # so we'll just mark as cleaned up and let __del__ handle the rest | ||||
|         self._is_cleaned_up = True | ||||
|  | ||||
|     async def cleanup(self): | ||||
|         """Properly clean up all resources to prevent memory leaks""" | ||||
|         if self._is_cleaned_up: | ||||
|             return | ||||
|              | ||||
|         logger.debug("Cleaning up browser steps resources") | ||||
|          | ||||
|         # Clean up page | ||||
|         if hasattr(self, 'page') and self.page is not None: | ||||
|             try: | ||||
|                 # Force garbage collection before closing | ||||
|                 await self.page.request_gc() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error during page garbage collection: {str(e)}") | ||||
|                  | ||||
|             try: | ||||
|                 # Remove event listeners before closing | ||||
|                 self.page.remove_listener("close", self.mark_as_closed) | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error removing event listeners: {str(e)}") | ||||
|                  | ||||
|             try: | ||||
|                 await self.page.close() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error closing page: {str(e)}") | ||||
|              | ||||
|             self.page = None | ||||
|  | ||||
|         # Clean up context | ||||
|         if hasattr(self, 'context') and self.context is not None: | ||||
|             try: | ||||
|                 await self.context.close() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error closing context: {str(e)}") | ||||
|              | ||||
|             self.context = None | ||||
|              | ||||
|         self._is_cleaned_up = True | ||||
|         logger.debug("Browser steps resources cleanup complete") | ||||
|  | ||||
|     @property | ||||
|     def has_expired(self): | ||||
|         if not self.page: | ||||
|         if not self.page or self._is_cleaned_up: | ||||
|             return True | ||||
|          | ||||
|         # Check if session has expired based on age | ||||
|         max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10))  # Default 10 minutes | ||||
|         if (time.time() - self.age_start) > max_age_seconds: | ||||
|             logger.debug(f"Browser steps session expired after {max_age_seconds} seconds") | ||||
|             return True | ||||
|              | ||||
|         return False | ||||
|  | ||||
|  | ||||
|     def get_current_state(self): | ||||
|     async def get_current_state(self): | ||||
|         """Return the screenshot and interactive elements mapping, generally always called after action_()""" | ||||
|         import importlib.resources | ||||
|         import json | ||||
|         # because we for now only run browser steps in playwright mode (not puppeteer mode) | ||||
|         from changedetectionio.content_fetchers.playwright import capture_full_page_async | ||||
|  | ||||
|         # Safety check - don't proceed if resources are cleaned up | ||||
|         if self._is_cleaned_up or self.page is None: | ||||
|             logger.warning("Attempted to get current state after cleanup") | ||||
|             return (None, None) | ||||
|  | ||||
|         xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() | ||||
|  | ||||
|         now = time.time() | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|         await self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|         screenshot = None | ||||
|         xpath_data = None | ||||
|          | ||||
|         try: | ||||
|             # Get screenshot first | ||||
|             screenshot = await capture_full_page_async(page=self.page) | ||||
|             if not screenshot: | ||||
|                 logger.error("No screenshot was retrieved :((") | ||||
|  | ||||
|         full_height = self.page.evaluate("document.documentElement.scrollHeight") | ||||
|             logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") | ||||
|  | ||||
|         if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD: | ||||
|             logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.") | ||||
|             screenshot = capture_stitched_together_full_page(self.page) | ||||
|         else: | ||||
|             screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40) | ||||
|             # Then get interactive elements | ||||
|             now = time.time() | ||||
|             await self.page.evaluate("var include_filters=''") | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|         logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") | ||||
|             scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' | ||||
|  | ||||
|         now = time.time() | ||||
|         self.page.evaluate("var include_filters=''") | ||||
|         # Go find the interactive elements | ||||
|         # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers? | ||||
|         elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' | ||||
|         xpath_element_js = xpath_element_js.replace('%ELEMENTS%', elements) | ||||
|             MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|             xpath_data = json.loads(await self.page.evaluate(xpath_element_js, { | ||||
|                 "visualselector_xpath_selectors": scan_elements, | ||||
|                 "max_height": MAX_TOTAL_HEIGHT | ||||
|             })) | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|         xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") | ||||
|         # So the JS will find the smallest one first | ||||
|         xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) | ||||
|         logger.debug(f"Time to scrape xpath element data in browser {time.time()-now:.2f}s") | ||||
|             # Sort elements by size | ||||
|             xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) | ||||
|             logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s") | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error getting current state: {str(e)}") | ||||
|             # If the page has navigated (common with logins) then the context is destroyed on navigation, continue | ||||
|             # I'm not sure that this is required anymore because we have the "expect navigation wrapper" at the top | ||||
|             if "Execution context was destroyed" in str(e): | ||||
|                 logger.debug("Execution context was destroyed, most likely because of navigation, continuing...") | ||||
|             pass | ||||
|  | ||||
|         # playwright._impl._api_types.Error: Browser closed. | ||||
|         # @todo show some countdown timer? | ||||
|             # Attempt recovery - force garbage collection | ||||
|             try: | ||||
|                 await self.page.request_gc() | ||||
|             except: | ||||
|                 pass | ||||
|          | ||||
|         # Request garbage collection one final time | ||||
|         try: | ||||
|             await self.page.request_gc() | ||||
|         except: | ||||
|             pass | ||||
|              | ||||
|         return (screenshot, xpath_data) | ||||
|  | ||||
|   | ||||
| @@ -1,17 +0,0 @@ | ||||
| from playwright.sync_api import PlaywrightContextManager | ||||
|  | ||||
| # So playwright wants to run as a context manager, but we do something horrible and hacky | ||||
| # we are holding the session open for as long as possible, then shutting it down, and opening a new one | ||||
| # So it means we don't get to use PlaywrightContextManager' __enter__ __exit__ | ||||
| # To work around this, make goodbye() act the same as the __exit__() | ||||
| # | ||||
| # But actually I think this is because the context is opened correctly with __enter__() but we timeout the connection | ||||
| # then theres some lock condition where we cant destroy it without it hanging | ||||
|  | ||||
| class c_PlaywrightContextManager(PlaywrightContextManager): | ||||
|  | ||||
|     def goodbye(self) -> None: | ||||
|         self.__exit__() | ||||
|  | ||||
| def c_sync_playwright() -> PlaywrightContextManager: | ||||
|     return c_PlaywrightContextManager() | ||||
| @@ -1,6 +1,7 @@ | ||||
| from flask import Blueprint, request, redirect, url_for, flash, render_template | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
| from changedetectionio import worker_handler | ||||
| from changedetectionio.blueprint.imports.importer import ( | ||||
|     import_url_list,  | ||||
|     import_distill_io_json,  | ||||
| @@ -24,7 +25,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 importer_handler = import_url_list() | ||||
|                 importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff')) | ||||
|                 for uuid in importer_handler.new_uuids: | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                     worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|                 if len(importer_handler.remaining_data) == 0: | ||||
|                     return redirect(url_for('watchlist.index')) | ||||
| @@ -37,7 +38,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 d_importer = import_distill_io_json() | ||||
|                 d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) | ||||
|                 for uuid in d_importer.new_uuids: | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                     worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|             # XLSX importer | ||||
|             if request.files and request.files.get('xlsx_file'): | ||||
| @@ -60,7 +61,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                     w_importer.run(data=file, flash=flash, datastore=datastore) | ||||
|  | ||||
|                 for uuid in w_importer.new_uuids: | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                     worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|         # Could be some remaining, or we could be on GET | ||||
|         form = forms.importForm(formdata=request.form if request.method == 'POST' else None) | ||||
|   | ||||
| @@ -4,6 +4,7 @@ from flask import Blueprint, flash, redirect, url_for | ||||
| from flask_login import login_required | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from queue import PriorityQueue | ||||
|  | ||||
| PRICE_DATA_TRACK_ACCEPT = 'accepted' | ||||
| @@ -19,7 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue | ||||
|         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT | ||||
|         datastore.data['watching'][uuid]['processor'] = 'restock_diff' | ||||
|         datastore.data['watching'][uuid].clear_watch() | ||||
|         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|         worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|         return redirect(url_for("watchlist.index")) | ||||
|  | ||||
|     @login_required | ||||
|   | ||||
| @@ -108,10 +108,13 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|                 fe.link(link=diff_link) | ||||
|  | ||||
|                 # @todo watch should be a getter - watch.get('title') (internally if URL else..) | ||||
|                 # Same logic as watch-overview.html | ||||
|                 if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'): | ||||
|                     watch_label = watch.label | ||||
|                 else: | ||||
|                     watch_label = watch.get('url') | ||||
|  | ||||
|                 watch_title = watch.get('title') if watch.get('title') else watch.get('url') | ||||
|                 fe.title(title=watch_title) | ||||
|                 fe.title(title=watch_label) | ||||
|                 try: | ||||
|  | ||||
|                     html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]), | ||||
| @@ -127,7 +130,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                 # @todo User could decide if <link> goes to the diff page, or to the watch link | ||||
|                 rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n" | ||||
|  | ||||
|                 content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link) | ||||
|                 content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link) | ||||
|  | ||||
|                 # Out of range chars could also break feedgen | ||||
|                 if scan_invalid_chars_in_rss(content): | ||||
|   | ||||
| @@ -67,7 +67,32 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                     del (app_update['password']) | ||||
|  | ||||
|                 datastore.data['settings']['application'].update(app_update) | ||||
|                  | ||||
|                 # Handle dynamic worker count adjustment | ||||
|                 old_worker_count = datastore.data['settings']['requests'].get('workers', 1) | ||||
|                 new_worker_count = form.data['requests'].get('workers', 1) | ||||
|                  | ||||
|                 datastore.data['settings']['requests'].update(form.data['requests']) | ||||
|                  | ||||
|                 # Adjust worker count if it changed | ||||
|                 if new_worker_count != old_worker_count: | ||||
|                     from changedetectionio import worker_handler | ||||
|                     from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds | ||||
|                      | ||||
|                     result = worker_handler.adjust_async_worker_count( | ||||
|                         new_count=new_worker_count, | ||||
|                         update_q=update_q, | ||||
|                         notification_q=notification_q, | ||||
|                         app=app, | ||||
|                         datastore=ds | ||||
|                     ) | ||||
|                      | ||||
|                     if result['status'] == 'success': | ||||
|                         flash(f"Worker count adjusted: {result['message']}", 'notice') | ||||
|                     elif result['status'] == 'not_supported': | ||||
|                         flash("Dynamic worker adjustment not supported for sync workers", 'warning') | ||||
|                     elif result['status'] == 'error': | ||||
|                         flash(f"Error adjusting workers: {result['message']}", 'error') | ||||
|  | ||||
|                 if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password): | ||||
|                     datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| {% extends 'base.html' %} | ||||
|  | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}"; | ||||
| @@ -22,6 +22,7 @@ | ||||
|             <li class="tab"><a href="#notifications">Notifications</a></li> | ||||
|             <li class="tab"><a href="#fetching">Fetching</a></li> | ||||
|             <li class="tab"><a href="#filters">Global Filters</a></li> | ||||
|             <li class="tab"><a href="#ui-options">UI Options</a></li> | ||||
|             <li class="tab"><a href="#api">API</a></li> | ||||
|             <li class="tab"><a href="#timedate">Time & Date</a></li> | ||||
|             <li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li> | ||||
| @@ -74,18 +75,10 @@ | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.pager_size) }} | ||||
|                         <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.rss_content_format) }} | ||||
|                         <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.extract_title_as_title) }} | ||||
|                         <span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} | ||||
|                         <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span> | ||||
| @@ -134,6 +127,12 @@ | ||||
|                         {{ render_field(form.application.form.webdriver_delay) }} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.requests.form.workers) }} | ||||
|                     {% set worker_info = get_worker_status_info() %} | ||||
|                     <span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br> | ||||
|                     Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group inline-radio"> | ||||
|                     {{ render_field(form.requests.form.default_ua) }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
| @@ -196,7 +195,7 @@ nav | ||||
|  | ||||
|             <div class="tab-pane-inner" id="api"> | ||||
|                 <h4>API Access</h4> | ||||
|                 <p>Drive your changedetection.io via API, More about <a href="https://github.com/dgtlmoon/changedetection.io/wiki/API-Reference">API access here</a></p> | ||||
|                 <p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p> | ||||
|  | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.api_access_token_enabled) }} | ||||
| @@ -217,7 +216,7 @@ nav | ||||
|                         <a id="chrome-extension-link" | ||||
|                            title="Try our new Chrome Extension!" | ||||
|                            href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop"> | ||||
|                             <img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}" alt="Chrome"> | ||||
|                             <img alt="Chrome store icon" src="{{ url_for('static_content', group='images', filename='google-chrome-icon.png') }}" alt="Chrome"> | ||||
|                             Chrome Webstore | ||||
|                         </a> | ||||
|                     </p> | ||||
| @@ -240,6 +239,28 @@ nav | ||||
|                     </p> | ||||
|                 </div> | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="ui-options"> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }} | ||||
|                     <span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }} | ||||
|                     <span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }} | ||||
|                     <span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }} | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.application.form.pager_size) }} | ||||
|                     <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span> | ||||
|                 </div> | ||||
|  | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="proxies"> | ||||
|                 <div id="recommended-proxy"> | ||||
|                     <div> | ||||
| @@ -302,8 +323,8 @@ nav | ||||
|             <div id="actions"> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_button(form.save_button) }} | ||||
|                     <a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a> | ||||
|                     <a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a> | ||||
|                     <a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a> | ||||
|                     <a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </form> | ||||
|   | ||||
| @@ -104,6 +104,9 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             uuid = list(datastore.data['settings']['application']['tags'].keys()).pop() | ||||
|  | ||||
|         default = datastore.data['settings']['application']['tags'].get(uuid) | ||||
|         if not default: | ||||
|             flash("Tag not found", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         form = group_restock_settings_form( | ||||
|                                        formdata=request.form if request.method == 'POST' else None, | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| {% extends 'base.html' %} | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_ternary_field %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}"; | ||||
| @@ -13,6 +13,7 @@ | ||||
|     /*const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');*/ | ||||
| /*{% endif %}*/ | ||||
|  | ||||
| {% set has_tag_filters_extra='' %} | ||||
|  | ||||
| </script> | ||||
|  | ||||
| @@ -46,59 +47,12 @@ | ||||
|             </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="filters-and-triggers"> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% set field = render_field(form.include_filters, | ||||
|                             rows=5, | ||||
|                             placeholder="#example | ||||
| xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                             class="m-d") | ||||
|                         %} | ||||
|                         {{ field }} | ||||
|                         {% if '/text()' in  field %} | ||||
|                           <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br> | ||||
|                         {% endif %} | ||||
|                         <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br> | ||||
|                     <div data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</div> | ||||
|                     <ul id="advanced-help-selectors"> | ||||
|                         <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). | ||||
|                             <ul> | ||||
|                                 <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required,  <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> | ||||
|                                 {% if jq_support %} | ||||
|                                 <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li> | ||||
|                                 {% else %} | ||||
|                                 <li>jq support not installed</li> | ||||
|                                 {% endif %} | ||||
|                             </ul> | ||||
|                         </li> | ||||
|                         <li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code> | ||||
|                             <ul> | ||||
|                                 <li>Example:  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a | ||||
|                                 href="http://xpather.com/" target="new">test your XPath here</a></li> | ||||
|                                 <li>Example: Get all titles from an RSS feed <code>//title/text()</code></li> | ||||
|                                 <li>To use XPath1.0: Prefix with <code>xpath1:</code></li> | ||||
|                             </ul> | ||||
|                             </li> | ||||
|                     </ul> | ||||
|                     Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a | ||||
|                                 href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br> | ||||
|                 </span> | ||||
|                     </div> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_field(form.subtractive_selectors, rows=5, placeholder="header | ||||
| footer | ||||
| nav | ||||
| .stockticker | ||||
| //*[contains(text(), 'Advertisement')]") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                           <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li> | ||||
|                           <li> Don't paste HTML here, use only CSS and XPath selectors </li> | ||||
|                           <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                         </ul> | ||||
|                       </span> | ||||
|                 </fieldset> | ||||
|  | ||||
|                 <p>These settings are <strong><i>added</i></strong> to any existing watch configurations.</p> | ||||
|                 {% include "edit/include_subtract.html" %} | ||||
|                 <div class="text-filtering border-fieldset"> | ||||
|                     <h3>Text filtering</h3> | ||||
|                     {% include "edit/text-options.html" %} | ||||
|                 </div> | ||||
|             </div> | ||||
|  | ||||
|         {# rendered sub Template #} | ||||
| @@ -110,9 +64,9 @@ nav | ||||
|             <div class="tab-pane-inner" id="notifications"> | ||||
|                 <fieldset> | ||||
|                     <div  class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_muted) }} | ||||
|                       {{ render_ternary_field(form.notification_muted, BooleanField=True) }} | ||||
|                     </div> | ||||
|                     {% if is_html_webdriver %} | ||||
|                     {% if 1 %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_screenshot) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|   | ||||
| @@ -10,7 +10,7 @@ | ||||
|             <legend>Add a new organisational tag</legend> | ||||
|             <div id="watch-add-wrapper-zone"> | ||||
|                 <div> | ||||
|                     {{ render_simple_field(form.name, placeholder="watch label / tag") }} | ||||
|                     {{ render_simple_field(form.name, placeholder="Watch group / tag") }} | ||||
|                 </div> | ||||
|                 <div> | ||||
|                     {{ render_simple_field(form.save_button, title="Save" ) }} | ||||
|   | ||||
| @@ -1,14 +1,112 @@ | ||||
| import time | ||||
| from flask import Blueprint, request, redirect, url_for, flash, render_template, session | ||||
| from loguru import logger | ||||
| from functools import wraps | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint | ||||
| from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint | ||||
| from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_update_threads, queuedWatchMetaData): | ||||
| def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True): | ||||
|     from flask import request, flash | ||||
|  | ||||
|     if op == 'delete': | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.delete(uuid) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches deleted") | ||||
|  | ||||
|     elif op == 'pause': | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['paused'] = True | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches paused") | ||||
|  | ||||
|     elif op == 'unpause': | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid.strip()]['paused'] = False | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches unpaused") | ||||
|  | ||||
|     elif (op == 'mark-viewed'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.set_last_viewed(uuid, int(time.time())) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches updated") | ||||
|  | ||||
|     elif (op == 'mute'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_muted'] = True | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches muted") | ||||
|  | ||||
|     elif (op == 'unmute'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_muted'] = False | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches un-muted") | ||||
|  | ||||
|     elif (op == 'recheck'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 # Recheck and require a full reprocessing | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches queued for rechecking") | ||||
|  | ||||
|     elif (op == 'clear-errors'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]["last_error"] = False | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches errors cleared") | ||||
|  | ||||
|     elif (op == 'clear-history'): | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.clear_watch_history(uuid) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches cleared/reset.") | ||||
|  | ||||
|     elif (op == 'notification-default'): | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch | ||||
|         ) | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_title'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_body'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_urls'] = [] | ||||
|                 datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches set to use default notification settings") | ||||
|  | ||||
|     elif (op == 'assign-tag'): | ||||
|         op_extradata = extra_data | ||||
|         if op_extradata: | ||||
|             tag_uuid = datastore.add_tag(title=op_extradata) | ||||
|             if op_extradata and tag_uuid: | ||||
|                 for uuid in uuids: | ||||
|                     if datastore.data['watching'].get(uuid): | ||||
|                         # Bug in old versions caused by bad edit page/tag handler | ||||
|                         if isinstance(datastore.data['watching'][uuid]['tags'], str): | ||||
|                             datastore.data['watching'][uuid]['tags'] = [] | ||||
|  | ||||
|                         datastore.data['watching'][uuid]['tags'].append(tag_uuid) | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches were tagged") | ||||
|  | ||||
|     if uuids: | ||||
|         for uuid in uuids: | ||||
|             watch_check_update.send(watch_uuid=uuid) | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update): | ||||
|     ui_blueprint = Blueprint('ui', __name__, template_folder="templates") | ||||
|      | ||||
|     # Register the edit blueprint | ||||
| @@ -20,9 +118,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat | ||||
|     ui_blueprint.register_blueprint(notification_blueprint) | ||||
|      | ||||
|     # Register the views blueprint | ||||
|     views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData) | ||||
|     views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update) | ||||
|     ui_blueprint.register_blueprint(views_blueprint) | ||||
|      | ||||
|  | ||||
|     # Import the login decorator | ||||
|     from changedetectionio.auth_decorator import login_optionally_required | ||||
|  | ||||
| @@ -35,7 +133,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat | ||||
|             flash('Watch not found', 'error') | ||||
|         else: | ||||
|             flash("Cleared snapshot history for watch {}".format(uuid)) | ||||
|  | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|     @ui_blueprint.route("/clear_history", methods=['GET', 'POST']) | ||||
| @@ -47,7 +144,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat | ||||
|             if confirmtext == 'clear': | ||||
|                 for uuid in datastore.data['watching'].keys(): | ||||
|                     datastore.clear_watch_history(uuid) | ||||
|  | ||||
|                 flash("Cleared snapshot history for all watches") | ||||
|             else: | ||||
|                 flash('Incorrect confirmation text.', 'error') | ||||
| @@ -63,12 +159,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat | ||||
|     def mark_all_viewed(): | ||||
|         # Save the current newest history as the most recently viewed | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|         tag_limit = request.args.get('tag') | ||||
|         logger.debug(f"Limiting to tag {tag_limit}") | ||||
|         now = int(time.time()) | ||||
|         for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|             if with_errors and not watch.get('last_error'): | ||||
|                 continue | ||||
|             datastore.set_last_viewed(watch_uuid, int(time.time())) | ||||
|  | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|             if tag_limit and ( not watch.get('tags') or tag_limit not in watch['tags'] ): | ||||
|                 logger.debug(f"Skipping watch {watch_uuid}") | ||||
|                 continue | ||||
|  | ||||
|             datastore.set_last_viewed(watch_uuid, now) | ||||
|  | ||||
|         return redirect(url_for('watchlist.index', tag=tag_limit)) | ||||
|  | ||||
|     @ui_blueprint.route("/delete", methods=['GET']) | ||||
|     @login_optionally_required | ||||
| @@ -98,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat | ||||
|         new_uuid = datastore.clone(uuid) | ||||
|  | ||||
|         if not datastore.data['watching'].get(uuid).get('paused'): | ||||
|             update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid})) | ||||
|             worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid})) | ||||
|  | ||||
|         flash('Cloned, you are editing the new watch.') | ||||
|  | ||||
| @@ -114,18 +218,19 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat | ||||
|  | ||||
|         i = 0 | ||||
|  | ||||
|         running_uuids = [] | ||||
|         for t in running_update_threads: | ||||
|             running_uuids.append(t.current_uuid) | ||||
|         running_uuids = worker_handler.get_running_uuids() | ||||
|  | ||||
|         if uuid: | ||||
|             if uuid not in running_uuids: | ||||
|                 update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                 i += 1 | ||||
|  | ||||
|         else: | ||||
|             # Recheck all, including muted | ||||
|             for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|             # Get most overdue first | ||||
|             for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)): | ||||
|                 watch_uuid = k[0] | ||||
|                 watch = k[1] | ||||
|                 if not watch['paused']: | ||||
|                     if watch_uuid not in running_uuids: | ||||
|                         if with_errors and not watch.get('last_error'): | ||||
| @@ -134,13 +239,13 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat | ||||
|                         if tag != None and tag not in watch['tags']: | ||||
|                             continue | ||||
|  | ||||
|                         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) | ||||
|                         worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) | ||||
|                         i += 1 | ||||
|  | ||||
|         if i == 1: | ||||
|             flash("Queued 1 watch for rechecking.") | ||||
|         if i > 1: | ||||
|             flash("Queued {} watches for rechecking.".format(i)) | ||||
|             flash(f"Queued {i} watches for rechecking.") | ||||
|         if i == 0: | ||||
|             flash("No watches available to recheck.") | ||||
|  | ||||
| @@ -150,100 +255,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat | ||||
|     @login_optionally_required | ||||
|     def form_watch_list_checkbox_operations(): | ||||
|         op = request.form['op'] | ||||
|         uuids = request.form.getlist('uuids') | ||||
|  | ||||
|         if (op == 'delete'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.delete(uuid.strip()) | ||||
|             flash("{} watches deleted".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'pause'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.data['watching'][uuid.strip()]['paused'] = True | ||||
|             flash("{} watches paused".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'unpause'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.data['watching'][uuid.strip()]['paused'] = False | ||||
|             flash("{} watches unpaused".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'mark-viewed'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.set_last_viewed(uuid, int(time.time())) | ||||
|             flash("{} watches updated".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'mute'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.data['watching'][uuid.strip()]['notification_muted'] = True | ||||
|             flash("{} watches muted".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'unmute'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.data['watching'][uuid.strip()]['notification_muted'] = False | ||||
|             flash("{} watches un-muted".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'recheck'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     # Recheck and require a full reprocessing | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|             flash("{} watches queued for rechecking".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'clear-errors'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.data['watching'][uuid]["last_error"] = False | ||||
|             flash(f"{len(uuids)} watches errors cleared") | ||||
|  | ||||
|         elif (op == 'clear-history'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.clear_watch_history(uuid) | ||||
|             flash("{} watches cleared/reset.".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'notification-default'): | ||||
|             from changedetectionio.notification import ( | ||||
|                 default_notification_format_for_watch | ||||
|             ) | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.data['watching'][uuid.strip()]['notification_title'] = None | ||||
|                     datastore.data['watching'][uuid.strip()]['notification_body'] = None | ||||
|                     datastore.data['watching'][uuid.strip()]['notification_urls'] = [] | ||||
|                     datastore.data['watching'][uuid.strip()]['notification_format'] = default_notification_format_for_watch | ||||
|             flash("{} watches set to use default notification settings".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'assign-tag'): | ||||
|             op_extradata = request.form.get('op_extradata', '').strip() | ||||
|             if op_extradata: | ||||
|                 tag_uuid = datastore.add_tag(title=op_extradata) | ||||
|                 if op_extradata and tag_uuid: | ||||
|                     for uuid in uuids: | ||||
|                         uuid = uuid.strip() | ||||
|                         if datastore.data['watching'].get(uuid): | ||||
|                             # Bug in old versions caused by bad edit page/tag handler | ||||
|                             if isinstance(datastore.data['watching'][uuid]['tags'], str): | ||||
|                                 datastore.data['watching'][uuid]['tags'] = [] | ||||
|  | ||||
|                             datastore.data['watching'][uuid]['tags'].append(tag_uuid) | ||||
|  | ||||
|             flash(f"{len(uuids)} watches were tagged") | ||||
|         uuids = [u.strip() for u in request.form.getlist('uuids') if u] | ||||
|         extra_data = request.form.get('op_extradata', '').strip() | ||||
|         _handle_operations( | ||||
|             datastore=datastore, | ||||
|             extra_data=extra_data, | ||||
|             queuedWatchMetaData=queuedWatchMetaData, | ||||
|             uuids=uuids, | ||||
|             worker_handler=worker_handler, | ||||
|             update_q=update_q, | ||||
|             watch_check_update=watch_check_update, | ||||
|             op=op, | ||||
|         ) | ||||
|  | ||||
|         return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|   | ||||
| @@ -9,6 +9,7 @@ from jinja2 import Environment, FileSystemLoader | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
| from changedetectionio.time_handler import is_within_schedule | ||||
| from changedetectionio import worker_handler | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData): | ||||
|     edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates") | ||||
| @@ -201,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             ############################# | ||||
|             if not datastore.data['watching'][uuid].get('paused') and is_in_schedule: | ||||
|                 # Queue the watch for immediate recheck, with a higher priority | ||||
|                 update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|  | ||||
|             # Diff page [edit] link should go back to diff page | ||||
|             if request.args.get("next") and request.args.get("next") == 'diff': | ||||
| @@ -213,9 +214,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             if request.method == 'POST' and not form.validate(): | ||||
|                 flash("An error occurred, please see below.", "error") | ||||
|  | ||||
|             visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid) | ||||
|  | ||||
|  | ||||
|             # JQ is difficult to install on windows and must be manually added (outside requirements.txt) | ||||
|             jq_support = True | ||||
|             try: | ||||
| @@ -225,21 +223,26 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|  | ||||
|             watch = datastore.data['watching'].get(uuid) | ||||
|  | ||||
|             # if system or watch is configured to need a chrome type browser | ||||
|             system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' | ||||
|  | ||||
|             watch_uses_webdriver = False | ||||
|             watch_needs_selenium_or_playwright = False | ||||
|             if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): | ||||
|                 watch_uses_webdriver = True | ||||
|                 watch_needs_selenium_or_playwright = True | ||||
|  | ||||
|  | ||||
|             from zoneinfo import available_timezones | ||||
|  | ||||
|             # Only works reliably with Playwright | ||||
|  | ||||
|             # Import the global plugin system | ||||
|             from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras | ||||
|              | ||||
|             template_args = { | ||||
|                 'available_processors': processors.available_processors(), | ||||
|                 'available_timezones': sorted(available_timezones()), | ||||
|                 'browser_steps_config': browser_step_ui_config, | ||||
|                 'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), | ||||
|                 'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '', | ||||
|                 'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(), | ||||
|                 'extra_processor_config': form.extra_tab_content(), | ||||
|                 'extra_title': f" - Edit - {watch.label}", | ||||
| @@ -247,14 +250,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False, | ||||
|                 'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, | ||||
|                 'has_special_tag_options': _watch_has_tag_options_set(watch=watch), | ||||
|                 'watch_uses_webdriver': watch_uses_webdriver, | ||||
|                 'jq_support': jq_support, | ||||
|                 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), | ||||
|                 'settings_application': datastore.data['settings']['application'], | ||||
|                 'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'), | ||||
|                 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), | ||||
|                 'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch), | ||||
|                 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), | ||||
|                 'using_global_webdriver_wait': not default['webdriver_delay'], | ||||
|                 'uuid': uuid, | ||||
|                 'watch': watch | ||||
|                 'watch': watch, | ||||
|                 'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright, | ||||
|             } | ||||
|  | ||||
|             included_content = None | ||||
|   | ||||
| @@ -4,7 +4,6 @@ from loguru import logger | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
| from changedetectionio.notification import process_notification | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     notification_blueprint = Blueprint('ui_notification', __name__, template_folder="../ui/templates") | ||||
| @@ -18,8 +17,11 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|         # Watch_uuid could be unset in the case it`s used in tag editor, global settings | ||||
|         import apprise | ||||
|         from ...apprise_plugin.assets import apprise_asset | ||||
|         from ...apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401 | ||||
|         from changedetectionio.notification.handler import process_notification | ||||
|         from changedetectionio.notification.apprise_plugin.assets import apprise_asset | ||||
|  | ||||
|         from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
|  | ||||
|         apobj = apprise.Apprise(asset=apprise_asset) | ||||
|  | ||||
|         is_global_settings_form = request.args.get('mode', '') == 'global-settings' | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| {% extends 'base.html' %} | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_fieldlist_of_formfields_as_table %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script> | ||||
| @@ -72,15 +72,16 @@ | ||||
|                         <div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div> | ||||
|                         <div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.tags) }} | ||||
|                         <span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                         {{ render_field(form.processor) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.title, class="m-d") }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.tags) }} | ||||
|                         <span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span> | ||||
|                         {{ render_field(form.title, class="m-d", placeholder=watch.label) }} | ||||
|                         <span class="pure-form-message-inline">Automatically uses the page title if found, you can also use your own title/description here</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group time-between-check border-fieldset"> | ||||
| 
 | ||||
| @@ -101,15 +102,16 @@ | ||||
|                         </div> | ||||
| <br> | ||||
|               </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.extract_title_as_title) }} | ||||
|                     </div> | ||||
| 
 | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.filter_failure_notification_send) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                          Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore. | ||||
|                         </span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_ternary_field(form.use_page_title_in_list) }} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
| 
 | ||||
| @@ -204,7 +206,9 @@ Math: {{ 1 + 1 }}") }} | ||||
|             </div> | ||||
| 
 | ||||
|             <div class="tab-pane-inner" id="browser-steps"> | ||||
|             {% if playwright_enabled and watch_uses_webdriver %} | ||||
|             {% if watch_needs_selenium_or_playwright %} | ||||
|                 {# Only works with playwright #} | ||||
|                 {% if system_has_playwright_configured %} | ||||
|                 <img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality"> | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
| @@ -223,7 +227,6 @@ Math: {{ 1 + 1 }}") }} | ||||
|                         <div class="flex-wrapper" > | ||||
| 
 | ||||
|                             <div id="browser-steps-ui" class="noselect"> | ||||
| 
 | ||||
|                                 <div class="noselect"  id="browsersteps-selector-wrapper" style="width: 100%"> | ||||
|                                     <span class="loader" > | ||||
|                                         <span id="browsersteps-click-start"> | ||||
| @@ -238,31 +241,32 @@ Math: {{ 1 + 1 }}") }} | ||||
|                                 </div> | ||||
|                             </div> | ||||
|                             <div id="browser-steps-fieldlist" > | ||||
|                                 <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> | ||||
|                                 <span id="browser-seconds-remaining">Press "Play" to start.</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> | ||||
|                                 {{ render_field(form.browser_steps) }} | ||||
|                             </div> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 {% else %} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         {% if not watch_uses_webdriver %} | ||||
|                             {{ only_webdriver_type_watches_warning() }} | ||||
|                         {% endif %} | ||||
|                         {%  if not playwright_enabled %} | ||||
|                             {{ playwright_warning() }} | ||||
|                         {% endif %} | ||||
|                     </span> | ||||
|                     {# it's configured to use selenium or chrome but system says its not configured #} | ||||
|                     {{ playwright_warning() }} | ||||
|                     {% if system_has_webdriver_configured %} | ||||
|                         <strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong> | ||||
|                     {% endif %} | ||||
|                 {% endif %} | ||||
|             {% else %} | ||||
|                 {# "This functionality needs chrome.." #} | ||||
|                 {{ only_playwright_type_watches_warning() }} | ||||
|             {% endif %} | ||||
|             </div> | ||||
| 
 | ||||
| 
 | ||||
|             <div class="tab-pane-inner" id="notifications"> | ||||
|                 <fieldset> | ||||
|                     <div  class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_muted) }} | ||||
|                       {{ render_ternary_field(form.notification_muted, BooleanField=true) }} | ||||
|                     </div> | ||||
|                     {% if watch_uses_webdriver %} | ||||
|                     {% if watch_needs_selenium_or_playwright %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_screenshot) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
| @@ -289,25 +293,13 @@ Math: {{ 1 + 1 }}") }} | ||||
|                     <script> | ||||
|                         const verify_condition_rule_url="{{url_for('conditions.verify_condition_single_rule', watch_uuid=uuid)}}"; | ||||
|                     </script> | ||||
|                 <style> | ||||
|                     .verifyRuleRow { | ||||
|                         background-color: #4caf50; | ||||
|                         color: white; | ||||
|                         border: none; | ||||
|                         cursor: pointer; | ||||
|                         font-weight: bold; | ||||
|                     } | ||||
|                     .verifyRuleRow:hover { | ||||
|                         background-color: #45a049; | ||||
|                     } | ||||
|                 </style> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.conditions_match_logic) }} | ||||
|                     {{ render_fieldlist_of_formfields_as_table(form.conditions) }} | ||||
|                     {{ render_conditions_fieldlist_of_formfields_as_table(form.conditions) }} | ||||
|                     <div class="pure-form-message-inline"> | ||||
| 
 | ||||
|                         <p id="verify-state-text">Use the verify (✓) button to test if a condition passes against the current snapshot.</p> | ||||
|                         Did you know that <strong>conditions</strong> can be extended with your own custom plugin? tutorials coming soon!<br> | ||||
|                        Read a quick tutorial about <a href="https://changedetection.io/tutorial/conditional-actions-web-page-changes">using conditional web page changes here</a>.<br> | ||||
|                     </div> | ||||
|                 </div> | ||||
|             </div> | ||||
| @@ -326,61 +318,8 @@ Math: {{ 1 + 1 }}") }} | ||||
|                                 </li> | ||||
|                             </ul> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% set field = render_field(form.include_filters, | ||||
|                             rows=5, | ||||
|                             placeholder=has_tag_filters_extra+"#example | ||||
| xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                             class="m-d") | ||||
|                         %} | ||||
|                         {{ field }} | ||||
|                         {% if '/text()' in  field %} | ||||
|                           <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br> | ||||
|                         {% endif %} | ||||
|                         <span class="pure-form-message-inline">One CSS, xPath, JSON Path/JQ selector per line, <i>any</i> rules that matches will be used.<br> | ||||
|                         <span data-target="#advanced-help-selectors" class="toggle-show pure-button button-tag button-xsmall">Show advanced help and tips</span><br> | ||||
|                     <ul id="advanced-help-selectors" style="display: none;"> | ||||
|                         <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). | ||||
|                             <ul> | ||||
|                                 <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required,  <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> | ||||
|                                 {% if jq_support %} | ||||
|                                 <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>. Prefix <code>jqraw:</code> outputs the results as text instead of a JSON list.</li> | ||||
|                                 {% else %} | ||||
|                                 <li>jq support not installed</li> | ||||
|                                 {% endif %} | ||||
|                             </ul> | ||||
|                         </li> | ||||
|                         <li>XPath - Limit text to this XPath rule, simply start with a forward-slash. To specify XPath to be used explicitly or the XPath rule starts with an XPath function: Prefix with <code>xpath:</code> | ||||
|                             <ul> | ||||
|                                 <li>Example:  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath:count(//*[contains(@class, 'sametext')])</code>, <a | ||||
|                                 href="http://xpather.com/" target="new">test your XPath here</a></li> | ||||
|                                 <li>Example: Get all titles from an RSS feed <code>//title/text()</code></li> | ||||
|                                 <li>To use XPath1.0: Prefix with <code>xpath1:</code></li> | ||||
|                             </ul> | ||||
|                             </li> | ||||
|                     <li> | ||||
|                         Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a | ||||
|                                 href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br> | ||||
|                     </li> | ||||
|                     </ul> | ||||
| 
 | ||||
|                 </span> | ||||
|                     </div> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header | ||||
| footer | ||||
| nav | ||||
| .stockticker | ||||
| //*[contains(text(), 'Advertisement')]") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                           <li> Remove HTML element(s) by CSS and XPath selectors before text conversion. </li> | ||||
|                           <li> Don't paste HTML here, use only CSS and XPath selectors </li> | ||||
|                           <li> Add multiple elements, CSS or XPath selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                         </ul> | ||||
|                       </span> | ||||
|                 </fieldset> | ||||
| {% include "edit/include_subtract.html" %} | ||||
|                 <div class="text-filtering border-fieldset"> | ||||
|                 <fieldset class="pure-group" id="text-filtering-type-options"> | ||||
|                     <h3>Text filtering</h3> | ||||
| @@ -408,76 +347,9 @@ nav | ||||
|                     {{ render_checkbox_field(form.trim_text_whitespace) }} | ||||
|                     <span class="pure-form-message-inline">Remove any whitespace before and after each line of text</span> | ||||
|                 </fieldset> | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.trigger_text, rows=5, placeholder="Some text to wait for in a line | ||||
| /some.regex\d{2}/ for case-INsensitive regex | ||||
| ") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                     <ul> | ||||
|                         <li>Text to wait for before triggering a change/notification, all text and regex are tested <i>case-insensitive</i>.</li> | ||||
|                         <li>Trigger text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li> | ||||
|                         <li>Each line is processed separately (think of each line as "OR")</li> | ||||
|                         <li>Note: Wrap in forward slash / to use regex  example: <code>/foo\d/</code></li> | ||||
|                     </ul> | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 <fieldset class="pure-group"> | ||||
|                     {{ render_field(form.ignore_text, rows=5, placeholder="Some text to ignore in a line | ||||
| /some.regex\d{2}/ for case-INsensitive regex | ||||
| ") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                             <li>Matching text will be <strong>ignored</strong> in the text snapshot (you can still see it but it wont trigger a change)</li> | ||||
|                             <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> | ||||
|                             <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> | ||||
|                             <li>Changing this will affect the comparison checksum which may trigger an alert</li> | ||||
|                         </ul> | ||||
|                 </span> | ||||
| 
 | ||||
|                 </fieldset> | ||||
| 
 | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock | ||||
| Sold out | ||||
| Not in stock | ||||
| Unavailable") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                             <ul> | ||||
|                                 <li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li> | ||||
|                                 <li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li> | ||||
|                                 <li>All lines here must not exist (think of each line as "OR")</li> | ||||
|                                 <li>Note: Wrap in forward slash / to use regex  example: <code>/foo\d/</code></li> | ||||
|                             </ul> | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.extract_text, rows=5, placeholder="/.+?\d+ comments.+?/ | ||||
|  or | ||||
| keyword") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                     <ul> | ||||
|                         <li>Extracts text in the final output (line by line) after other filters using regular expressions or string match; | ||||
|                             <ul> | ||||
|                                 <li>Regular expression ‐ example <code>/reports.+?2022/i</code></li> | ||||
|                                 <li>Don't forget to consider the white-space at the start of a line <code>/.+?reports.+?2022/i</code></li> | ||||
|                                 <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li> | ||||
|                                 <li>Keyword example ‐ example <code>Out of stock</code></li> | ||||
|                                 <li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li> | ||||
|                                 <li>Example - match lines containing a keyword <code>/.*icecream.*/</code></li> | ||||
|                             </ul> | ||||
|                         </li> | ||||
|                         <li>One line per regular-expression/string match</li> | ||||
|                     </ul> | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 {% include "edit/text-options.html" %} | ||||
|                 </div> | ||||
|             </div> | ||||
|               </div> | ||||
|               <div id="text-preview" style="display: none;" > | ||||
|                     <script> | ||||
|                         const preview_text_edit_filters_url="{{url_for('ui.ui_edit.watch_get_preview_rendered', uuid=uuid)}}"; | ||||
| @@ -511,13 +383,15 @@ keyword") }} | ||||
| 
 | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% if playwright_enabled and watch_uses_webdriver %} | ||||
|                         {% if watch_needs_selenium_or_playwright %} | ||||
|                             {% if system_has_playwright_configured %} | ||||
|                             <span class="pure-form-message-inline" id="visual-selector-heading"> | ||||
|                                 The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items. | ||||
|                             </span> | ||||
| 
 | ||||
|                             <div id="selector-header"> | ||||
|                                 <a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a> | ||||
|                                 <!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text --> | ||||
|                                 <i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i> | ||||
|                             </div> | ||||
|                             <div id="selector-wrapper" style="display: none"> | ||||
| @@ -529,13 +403,16 @@ keyword") }} | ||||
|                             </div> | ||||
|                             <div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div> | ||||
|                         {% else %} | ||||
|                             {% if not watch_uses_webdriver %} | ||||
|                                 {{ only_webdriver_type_watches_warning() }} | ||||
|                             {% endif %} | ||||
|                             {% if not playwright_enabled %} | ||||
|                                 {{ playwright_warning() }} | ||||
|                             {% endif %} | ||||
|                             {# The watch needed chrome but system says that playwright is not ready #} | ||||
|                             {{ playwright_warning() }} | ||||
|                         {% endif %} | ||||
|                             {% if system_has_webdriver_configured %} | ||||
|                                 <strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong> | ||||
|                             {% endif %} | ||||
|                     {% else %} | ||||
|                         {# "This functionality needs chrome.." #} | ||||
|                         {{ only_playwright_type_watches_warning() }} | ||||
|                     {% endif %} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
| @@ -575,6 +452,13 @@ keyword") }} | ||||
|                         </tr> | ||||
|                         </tbody> | ||||
|                     </table> | ||||
| 
 | ||||
|                     {% if ui_edit_stats_extras %} | ||||
|                     <div class="plugin-stats-extras"> <!-- from pluggy plugin --> | ||||
|                         {{ ui_edit_stats_extras|safe }} | ||||
|                     </div> | ||||
|                     {% endif %} | ||||
| 
 | ||||
|                     {% if watch.history_n %} | ||||
|                         <p> | ||||
|                              <a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a> | ||||
| @@ -587,11 +471,11 @@ keyword") }} | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_button(form.save_button) }} | ||||
|                     <a href="{{url_for('ui.form_delete', uuid=uuid)}}" | ||||
|                        class="pure-button button-small button-error ">Delete</a> | ||||
|                        class="pure-button button-error ">Delete</a> | ||||
|                     {% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}" | ||||
|                        class="pure-button button-small button-error ">Clear History</a>{% endif %} | ||||
|                        class="pure-button button-error">Clear History</a>{% endif %} | ||||
|                     <a href="{{url_for('ui.form_clone', uuid=uuid)}}" | ||||
|                        class="pure-button button-small ">Clone & Edit</a> | ||||
|                        class="pure-button">Clone & Edit</a> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </form> | ||||
| @@ -1,14 +1,14 @@ | ||||
| from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort | ||||
| from flask_login import current_user | ||||
| import os | ||||
| import time | ||||
| from copy import deepcopy | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio import worker_handler | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData): | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update): | ||||
|     views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates") | ||||
|      | ||||
|     @views_blueprint.route("/preview/<string:uuid>", methods=['GET']) | ||||
| @@ -77,9 +77,46 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     @views_blueprint.route("/diff/<string:uuid>", methods=['GET', 'POST']) | ||||
|     @views_blueprint.route("/diff/<string:uuid>", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def diff_history_page(uuid): | ||||
|     def diff_history_page_build_report(uuid): | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         # More for testing, possible to return the first/only | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         try: | ||||
|             watch = datastore.data['watching'][uuid] | ||||
|         except KeyError: | ||||
|             flash("No history found for the specified link, bad link?", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         # For submission of requesting an extract | ||||
|         extract_form = forms.extractDataForm(formdata=request.form, | ||||
|                                              data={'extract_regex': request.form.get('extract_regex', '')} | ||||
|                                              ) | ||||
|         if not extract_form.validate(): | ||||
|             flash("An error occurred, please see below.", "error") | ||||
|             return _render_diff_template(uuid, extract_form) | ||||
|  | ||||
|         else: | ||||
|             extract_regex = request.form.get('extract_regex', '').strip() | ||||
|             output = watch.extract_regex_from_all_history(extract_regex) | ||||
|             if output: | ||||
|                 watch_dir = os.path.join(datastore.datastore_path, uuid) | ||||
|                 response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True)) | ||||
|                 response.headers['Content-type'] = 'text/csv' | ||||
|                 response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' | ||||
|                 response.headers['Pragma'] = 'no-cache' | ||||
|                 response.headers['Expires'] = "0" | ||||
|                 return response | ||||
|  | ||||
|             flash('No matches found while scanning all of the watch history for that RegEx.', 'error') | ||||
|         return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract') | ||||
|  | ||||
|     def _render_diff_template(uuid, extract_form=None): | ||||
|         """Helper function to render the diff template with all required data""" | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         # More for testing, possible to return the first/only | ||||
| @@ -93,62 +130,36 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             flash("No history found for the specified link, bad link?", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         # For submission of requesting an extract | ||||
|         extract_form = forms.extractDataForm(request.form) | ||||
|         if request.method == 'POST': | ||||
|             if not extract_form.validate(): | ||||
|                 flash("An error occurred, please see below.", "error") | ||||
|  | ||||
|             else: | ||||
|                 extract_regex = request.form.get('extract_regex').strip() | ||||
|                 output = watch.extract_regex_from_all_history(extract_regex) | ||||
|                 if output: | ||||
|                     watch_dir = os.path.join(datastore.datastore_path, uuid) | ||||
|                     response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True)) | ||||
|                     response.headers['Content-type'] = 'text/csv' | ||||
|                     response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' | ||||
|                     response.headers['Pragma'] = 'no-cache' | ||||
|                     response.headers['Expires'] = 0 | ||||
|                     return response | ||||
|  | ||||
|                 flash('Nothing matches that RegEx', 'error') | ||||
|                 redirect(url_for('ui_views.diff_history_page', uuid=uuid)+'#extract') | ||||
|         # Use provided form or create a new one | ||||
|         if extract_form is None: | ||||
|             extract_form = forms.extractDataForm(formdata=request.form, | ||||
|                                                  data={'extract_regex': request.form.get('extract_regex', '')} | ||||
|                                                  ) | ||||
|  | ||||
|         history = watch.history | ||||
|         dates = list(history.keys()) | ||||
|  | ||||
|         if len(dates) < 2: | ||||
|             flash("Not enough saved change detection snapshots to produce a report.", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|         # If a "from_version" was requested, then find it (or the closest one) | ||||
|         # Also set "from version" to be the closest version to the one that was last viewed. | ||||
|  | ||||
|         # Save the current newest history as the most recently viewed | ||||
|         datastore.set_last_viewed(uuid, time.time()) | ||||
|         best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed | ||||
|         from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2] | ||||
|         from_version = request.args.get('from_version', from_version_timestamp ) | ||||
|  | ||||
|         # Read as binary and force decode as UTF-8 | ||||
|         # Windows may fail decode in python if we just use 'r' mode (chardet decode exception) | ||||
|         from_version = request.args.get('from_version') | ||||
|         from_version_index = -2  # second newest | ||||
|         if from_version and from_version in dates: | ||||
|             from_version_index = dates.index(from_version) | ||||
|         else: | ||||
|             from_version = dates[from_version_index] | ||||
|         # Use the current one if nothing was specified | ||||
|         to_version = request.args.get('to_version', str(dates[-1])) | ||||
|  | ||||
|         try: | ||||
|             from_version_file_contents = watch.get_history_snapshot(dates[from_version_index]) | ||||
|             to_version_file_contents = watch.get_history_snapshot(timestamp=to_version) | ||||
|         except Exception as e: | ||||
|             from_version_file_contents = f"Unable to read to-version at index {dates[from_version_index]}.\n" | ||||
|  | ||||
|         to_version = request.args.get('to_version') | ||||
|         to_version_index = -1 | ||||
|         if to_version and to_version in dates: | ||||
|             to_version_index = dates.index(to_version) | ||||
|         else: | ||||
|             to_version = dates[to_version_index] | ||||
|             logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}") | ||||
|             to_version_file_contents = f"Unable to read to-version at {to_version}.\n" | ||||
|  | ||||
|         try: | ||||
|             to_version_file_contents = watch.get_history_snapshot(dates[to_version_index]) | ||||
|             from_version_file_contents = watch.get_history_snapshot(timestamp=from_version) | ||||
|         except Exception as e: | ||||
|             to_version_file_contents = "Unable to read to-version at index{}.\n".format(dates[to_version_index]) | ||||
|             logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}") | ||||
|             from_version_file_contents = f"Unable to read to-version {from_version}.\n" | ||||
|  | ||||
|         screenshot_url = watch.get_screenshot() | ||||
|  | ||||
| @@ -162,7 +173,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|         if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False): | ||||
|             password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access') | ||||
|  | ||||
|         output = render_template("diff.html", | ||||
|         datastore.set_last_viewed(uuid, time.time()) | ||||
|  | ||||
|         return render_template("diff.html", | ||||
|                                  current_diff_url=watch['url'], | ||||
|                                  from_version=str(from_version), | ||||
|                                  to_version=str(to_version), | ||||
| @@ -185,7 +198,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                                  watch_a=watch | ||||
|                                  ) | ||||
|  | ||||
|         return output | ||||
|     @views_blueprint.route("/diff/<string:uuid>", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def diff_history_page(uuid): | ||||
|         return _render_diff_template(uuid) | ||||
|  | ||||
|     @views_blueprint.route("/form/add/quickwatch", methods=['POST']) | ||||
|     @login_optionally_required | ||||
| @@ -212,7 +228,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag'))) | ||||
|             else: | ||||
|                 # Straight into the queue. | ||||
|                 update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|                 flash("Watch added.") | ||||
|  | ||||
|         return redirect(url_for('watchlist.index', tag=request.args.get('tag',''))) | ||||
|   | ||||
| @@ -44,12 +44,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|         # Sort by last_changed and add the uuid which is usually the key.. | ||||
|         sorted_watches = [] | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|         unread_only = request.args.get('unread') == "1" | ||||
|         errored_count = 0 | ||||
|         search_q = request.args.get('q').strip().lower() if request.args.get('q') else False | ||||
|         for uuid, watch in datastore.data['watching'].items(): | ||||
|             if with_errors and not watch.get('last_error'): | ||||
|                 continue | ||||
|  | ||||
|             if unread_only and (watch.viewed or watch.last_changed == 0) : | ||||
|                 continue | ||||
|  | ||||
|             if active_tag_uuid and not active_tag_uuid in watch['tags']: | ||||
|                     continue | ||||
|             if watch.get('last_error'): | ||||
| @@ -72,31 +76,32 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                                 per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic") | ||||
|  | ||||
|         sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title']) | ||||
|  | ||||
|         output = render_template( | ||||
|             "watch-overview.html", | ||||
|                                  active_tag=active_tag, | ||||
|                                  active_tag_uuid=active_tag_uuid, | ||||
|                                  app_rss_token=datastore.data['settings']['application'].get('rss_access_token'), | ||||
|                                  datastore=datastore, | ||||
|                                  errored_count=errored_count, | ||||
|                                  form=form, | ||||
|                                  guid=datastore.data['app_guid'], | ||||
|                                  has_proxies=datastore.proxy_list, | ||||
|                                  has_unviewed=datastore.has_unviewed, | ||||
|                                  hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|                                  now_time_server=time.time(), | ||||
|                                  pagination=pagination, | ||||
|                                  queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue], | ||||
|                                  search_q=request.args.get('q', '').strip(), | ||||
|                                  sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'), | ||||
|                                  sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), | ||||
|                                  system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'), | ||||
|                                  tags=sorted_tags, | ||||
|                                  watches=sorted_watches | ||||
|                                  ) | ||||
|             active_tag=active_tag, | ||||
|             active_tag_uuid=active_tag_uuid, | ||||
|             app_rss_token=datastore.data['settings']['application'].get('rss_access_token'), | ||||
|             datastore=datastore, | ||||
|             errored_count=errored_count, | ||||
|             form=form, | ||||
|             guid=datastore.data['app_guid'], | ||||
|             has_proxies=datastore.proxy_list, | ||||
|             has_unviewed=datastore.has_unviewed, | ||||
|             hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|             now_time_server=round(time.time()), | ||||
|             pagination=pagination, | ||||
|             queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue], | ||||
|             search_q=request.args.get('q', '').strip(), | ||||
|             sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'), | ||||
|             sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), | ||||
|             system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'), | ||||
|             tags=sorted_tags, | ||||
|             watches=sorted_watches | ||||
|         ) | ||||
|  | ||||
|         if session.get('share-link'): | ||||
|             del(session['share-link']) | ||||
|             del (session['share-link']) | ||||
|  | ||||
|         resp = make_response(output) | ||||
|  | ||||
|   | ||||
| @@ -1,10 +1,16 @@ | ||||
| {% extends 'base.html' %} | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title %} | ||||
| {%- extends 'base.html' -%} | ||||
| {%- block content -%} | ||||
| {%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%} | ||||
| <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script> | ||||
| <script>let nowtimeserver={{ now_time_server }};</script> | ||||
|  | ||||
| <script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script> | ||||
| <script> | ||||
| // Initialize Feather icons after the page loads | ||||
| document.addEventListener('DOMContentLoaded', function() { | ||||
|     feather.replace(); | ||||
| }); | ||||
| </script> | ||||
| <style> | ||||
| .checking-now .last-checked { | ||||
|     background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%); | ||||
| @@ -13,19 +19,20 @@ | ||||
|     transition: background-size 0.9s ease | ||||
| } | ||||
| </style> | ||||
| <div class="box"> | ||||
| <div class="box" id="form-quick-watch-add"> | ||||
|  | ||||
|     <form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form"> | ||||
|         <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" > | ||||
|         <fieldset> | ||||
|             <legend>Add a new change detection watch</legend> | ||||
|             <legend>Add a new web page change detection watch</legend> | ||||
|             <div id="watch-add-wrapper-zone"> | ||||
|  | ||||
|                     {{ render_nolabel_field(form.url, placeholder="https://...", required=true) }} | ||||
|                     {{ render_nolabel_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="watch label / tag") }} | ||||
|                     {{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }} | ||||
|                     {{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }} | ||||
|             </div> | ||||
|             <div id="watch-group-tag"> | ||||
|                {{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }} | ||||
|             </div> | ||||
|             <div id="quick-watch-processor-type"> | ||||
|                 {{ render_simple_field(form.processor) }} | ||||
|             </div> | ||||
| @@ -33,219 +40,231 @@ | ||||
|         </fieldset> | ||||
|         <span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span> | ||||
|     </form> | ||||
|  | ||||
| </div> | ||||
| <div class="box"> | ||||
|     <form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form"> | ||||
|     <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" > | ||||
|     <input type="hidden" id="op_extradata" name="op_extradata" value="" > | ||||
|     <div id="checkbox-operations"> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="pause">Pause</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="unpause">UnPause</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="mute">Mute</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="unmute">UnMute</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag">Tag</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors">Clear errors</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history">Clear/reset history</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete">Delete</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Pause</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnPause</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mute</button> | ||||
|         <button class="pure-button button-secondary button-xsmall"  name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnMute</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Recheck</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Tag</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mark viewed</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Use default notification</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear errors</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear/reset history</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Delete</button> | ||||
|     </div> | ||||
|     {% if watches|length >= pagination.per_page %} | ||||
|     {%- if watches|length >= pagination.per_page -%} | ||||
|         {{ pagination.info }} | ||||
|     {% endif %} | ||||
|     {% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %} | ||||
|     {%- endif -%} | ||||
|     {%- if search_q -%}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%} | ||||
|     <div> | ||||
|         <a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a> | ||||
|  | ||||
|     <!-- tag list --> | ||||
|     {% for uuid, tag in tags %} | ||||
|         {% if tag != "" %} | ||||
|     {%- for uuid, tag in tags -%} | ||||
|         {%- if tag != "" -%} | ||||
|             <a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a> | ||||
|         {% endif %} | ||||
|     {% endfor %} | ||||
|         {%- endif -%} | ||||
|     {%- endfor -%} | ||||
|     </div> | ||||
|  | ||||
|     {% set sort_order = sort_order or 'asc' %} | ||||
|     {% set sort_attribute = sort_attribute or 'last_changed'  %} | ||||
|     {% set pagination_page = request.args.get('page', 0) %} | ||||
|     {% set cols_required = 6 %} | ||||
|     {% set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") %} | ||||
|     {% if any_has_restock_price_processor %} | ||||
|         {% set cols_required = cols_required + 1 %} | ||||
|     {% endif %} | ||||
|     {%- set sort_order = sort_order or 'asc' -%} | ||||
|     {%- set sort_attribute = sort_attribute or 'last_changed'  -%} | ||||
|     {%- set pagination_page = request.args.get('page', 0) -%} | ||||
|     {%- set cols_required = 6 -%} | ||||
|     {%- set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") -%} | ||||
|     {%- if any_has_restock_price_processor -%} | ||||
|         {%- set cols_required = cols_required + 1 -%} | ||||
|     {%- endif -%} | ||||
|     {%- set ui_settings = datastore.data['settings']['application']['ui'] -%} | ||||
|  | ||||
|     <div id="watch-table-wrapper"> | ||||
|  | ||||
|         <table class="pure-table pure-table-striped watch-table"> | ||||
|         {%- set table_classes = [ | ||||
|             'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled', | ||||
|         ] -%} | ||||
|         <table class="pure-table pure-table-striped watch-table {{ table_classes | reject('equalto', '') | join(' ') }}"> | ||||
|             <thead> | ||||
|             <tr> | ||||
|                 {% set link_order = "desc" if sort_order  == 'asc' else "asc" %} | ||||
|                 {% set arrow_span = "" %} | ||||
|                 {%- set link_order = "desc" if sort_order  == 'asc' else "asc" -%} | ||||
|                 {%- set arrow_span = "" -%} | ||||
|                 <th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}"  href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th> | ||||
|                 <th class="empty-cell"></th> | ||||
|                 <th> | ||||
|                     <a class="{{ 'active '+link_order if sort_attribute == 'paused' else 'inactive' }}" href="{{url_for('watchlist.index', sort='paused', order=link_order, tag=active_tag_uuid)}}"><i data-feather="pause" style="vertical-align: bottom; width: 14px; height: 14px;  margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a> | ||||
|                       | ||||
|                     <a class="{{ 'active '+link_order if sort_attribute == 'notification_muted' else 'inactive' }}" href="{{url_for('watchlist.index', sort='notification_muted', order=link_order, tag=active_tag_uuid)}}"><i data-feather="volume-2" style="vertical-align: bottom; width: 14px; height: 14px;  margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a> | ||||
|                 </th> | ||||
|                 <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th> | ||||
|              {% if any_has_restock_price_processor %} | ||||
|              {%- if any_has_restock_price_processor -%} | ||||
|                 <th>Restock & Price</th> | ||||
|              {% endif %} | ||||
|              {%- endif -%} | ||||
|                 <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th> | ||||
|                 <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th> | ||||
|                 <th class="empty-cell"></th> | ||||
|             </tr> | ||||
|             </thead> | ||||
|             <tbody> | ||||
|             {% if not watches|length %} | ||||
|             {%- if not watches|length -%} | ||||
|             <tr> | ||||
|                 <td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td> | ||||
|             </tr> | ||||
|             {% endif %} | ||||
|             {% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %} | ||||
|             {%- endif -%} | ||||
|  | ||||
|                 {% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %} | ||||
|                 {% set checking_now = is_checking_now(watch) %} | ||||
|             <tr id="{{ watch.uuid }}" | ||||
|                 class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }} | ||||
|                 {% if watch.last_error is defined and watch.last_error != False %}error{% endif %} | ||||
|                 {% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %} | ||||
|                 {% if watch.paused is defined and watch.paused != False %}paused{% endif %} | ||||
|                 {% if is_unviewed %}unviewed{% endif %} | ||||
|                 {% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %} | ||||
|                 {% if watch.uuid in queued_uuids %}queued{% endif %} | ||||
|                 {% if checking_now %}checking-now{% endif %} | ||||
|                 "> | ||||
|                 <td class="inline checkbox-uuid" ><input name="uuids"  type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td> | ||||
|             {%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%} | ||||
|                 {%- set checking_now = is_checking_now(watch) -%} | ||||
|                 {%- set history_n = watch.history_n -%} | ||||
|                 {%- set favicon = watch.get_favicon_filename() -%} | ||||
|                 {%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list')  -%} | ||||
|                 {#  Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #} | ||||
|                 {%- set row_classes = [ | ||||
|                     loop.cycle('pure-table-odd', 'pure-table-even'), | ||||
|                     'processor-' ~ watch['processor'], | ||||
|                     'has-error' if watch.compile_error_texts()|length > 2 else '', | ||||
|                     'paused' if watch.paused is defined and watch.paused != False else '', | ||||
|                     'unviewed' if watch.has_unviewed else '', | ||||
|                     'has-restock-info' if watch.has_restock_info else 'no-restock-info', | ||||
|                     'has-favicon' if favicon else '', | ||||
|                     'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '', | ||||
|                     'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '', | ||||
|                     'queued' if watch.uuid in queued_uuids else '', | ||||
|                     'checking-now' if checking_now else '', | ||||
|                     'notification_muted' if watch.notification_muted else '', | ||||
|                     'single-history' if history_n == 1 else '', | ||||
|                     'multiple-history' if history_n >= 2 else '', | ||||
|                     'use-html-title' if system_use_url_watchlist else 'no-html-title', | ||||
|                 ] -%} | ||||
|             <tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}"> | ||||
|                 <td class="inline checkbox-uuid" ><div><input name="uuids"  type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td> | ||||
|                 <td class="inline watch-controls"> | ||||
|                     {% if not watch.paused %} | ||||
|                     <a class="state-off" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a> | ||||
|                     {% else %} | ||||
|                     <a class="state-on" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a> | ||||
|                     {% endif %} | ||||
|                     {% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %} | ||||
|                     <a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a> | ||||
|                 </td> | ||||
|                 <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} | ||||
|                     <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a> | ||||
|                     <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a> | ||||
|  | ||||
|                     {% if watch.get_fetch_backend == "html_webdriver" | ||||
|                          or (  watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver'  ) | ||||
|                          or "extra_browser_" in watch.get_fetch_backend | ||||
|                     %} | ||||
|                     <img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" > | ||||
|                     {% endif %} | ||||
|  | ||||
|                     {%if watch.is_pdf  %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %} | ||||
|                     {% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %} | ||||
|                     {% if watch.last_error is defined and watch.last_error != False %} | ||||
|                     <div class="fetch-error">{{ watch.last_error }} | ||||
|  | ||||
|                         {% if '403' in watch.last_error %} | ||||
|                             {% if has_proxies %} | ||||
|                                 <a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>  | ||||
|                             {% endif %} | ||||
|                             <a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a> | ||||
|                          | ||||
|                         {% endif %} | ||||
|                         {% if 'empty result or contain only an image' in watch.last_error %} | ||||
|                             <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>. | ||||
|                         {% endif %} | ||||
|                     <div> | ||||
|                     <a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a> | ||||
|                     <a class="ajax-op state-on pause-toggle"  data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a> | ||||
|                     <a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a> | ||||
|                     <a class="ajax-op state-on mute-toggle" data-op="mute"  style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a> | ||||
|                     </div> | ||||
|                     {% endif %} | ||||
|                     {% if watch.last_notification_error is defined and watch.last_notification_error != False %} | ||||
|                     <div class="fetch-error notification-error"><a href="{{url_for('settings.notification_logs')}}">{{ watch.last_notification_error }}</a></div> | ||||
|                     {% endif %} | ||||
|  | ||||
|                     {% if watch['processor'] == 'text_json_diff'  %} | ||||
|                         {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  %} | ||||
|                         <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> | ||||
|                         {% endif %} | ||||
|                     {% endif %} | ||||
|                     {% if watch['processor'] == 'restock_diff' %} | ||||
|                         <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon" > Price</span> | ||||
|                     {% endif %} | ||||
|                     {% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %} | ||||
|                       <span class="watch-tag-list">{{ watch_tag.title }}</span> | ||||
|                     {% endfor %} | ||||
|                 </td> | ||||
|             <!-- @todo make it so any watch handler obj can expose this ---> | ||||
| {% if any_has_restock_price_processor %} | ||||
|  | ||||
|                 <td class="title-col inline"> | ||||
|                     <div class="flex-wrapper"> | ||||
|                     {% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %} | ||||
|                         <div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder'  #} | ||||
|                             <img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {%  endif %} /> | ||||
|                         </div> | ||||
|                     {%  endif %} | ||||
|                         <div> | ||||
|                         <span class="watch-title"> | ||||
|                             {% if system_use_url_watchlist or watch.get('use_page_title_in_list') %} | ||||
|                                 {{watch.label}} | ||||
|                             {% else %} | ||||
|                                 {{watch.url}} | ||||
|                             {% endif %} | ||||
|                            <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a> | ||||
|                         </span> | ||||
|                             <div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div> | ||||
|                             {%- if watch['processor'] == 'text_json_diff'  -%} | ||||
|                                 {%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  -%} | ||||
|                                 <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> | ||||
|                                 {%- endif -%} | ||||
|                             {%- endif -%} | ||||
|                             {%- if watch['processor'] == 'restock_diff' -%} | ||||
|                                 <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon" > Price</span> | ||||
|                             {%- endif -%} | ||||
|                             {%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%} | ||||
|                               <span class="watch-tag-list">{{ watch_tag.title }}</span> | ||||
|                             {%- endfor -%} | ||||
|                         </div> | ||||
|                     <div class="status-icons"> | ||||
|                             <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a> | ||||
|                             {%- if watch.get_fetch_backend == "html_webdriver" | ||||
|                                  or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver'  ) | ||||
|                                  or "extra_browser_" in watch.get_fetch_backend | ||||
|                             -%} | ||||
|                             <img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" > | ||||
|                             {%- endif -%} | ||||
|                             {%- if watch.is_pdf  -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%} | ||||
|                             {%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%} | ||||
|  | ||||
|                     </div> | ||||
|                     </div> | ||||
|                 </td> | ||||
| {%- if any_has_restock_price_processor -%} | ||||
|                 <td class="restock-and-price"> | ||||
|                     {% if watch['processor'] == 'restock_diff'  %} | ||||
|                         {% if watch.has_restock_info %} | ||||
|                     {%- if watch['processor'] == 'restock_diff'  -%} | ||||
|                         {%- if watch.has_restock_info -%} | ||||
|                             <span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price"> | ||||
|                                 <!-- maybe some object watch['processor'][restock_diff] or.. --> | ||||
|                                  {% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %} | ||||
|                                  {%- if watch['restock']['in_stock']-%}  In stock {%- else-%}  Not in stock {%- endif -%} | ||||
|                             </span> | ||||
|                         {% endif %} | ||||
|                         {%- endif -%} | ||||
|  | ||||
|                         {% if watch.get('restock') and watch['restock']['price'] != None %} | ||||
|                             {% if watch['restock']['price'] != None %} | ||||
|                         {%- if watch.get('restock') and watch['restock']['price'] != None -%} | ||||
|                             {%- if watch['restock']['price'] != None -%} | ||||
|                                 <span class="restock-label price" title="Price"> | ||||
|                                 {{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }} | ||||
|                                 </span> | ||||
|                             {% endif %} | ||||
|                         {% elif not watch.has_restock_info %} | ||||
|                             {%- endif -%} | ||||
|                         {%- elif not watch.has_restock_info -%} | ||||
|                             <span class="restock-label error">No information</span> | ||||
|                         {% endif %} | ||||
|                     {% endif %} | ||||
|                         {%- endif -%} | ||||
|                     {%- endif -%} | ||||
|                 </td> | ||||
| {% endif %} | ||||
| {%- endif -%} | ||||
|             {#last_checked becomes fetch-start-time#} | ||||
|                 <td class="last-checked" data-timestamp="{{ watch.last_checked }}" {% if checking_now %} data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" {% endif %} > | ||||
|                     {% if checking_now %} | ||||
|                         <span class="spinner"></span><span> Checking now</span> | ||||
|                     {% else %} | ||||
|                         {{watch|format_last_checked_time|safe}}</td> | ||||
|                     {% endif %} | ||||
|  | ||||
|                 <td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %} | ||||
|                     {{watch.last_changed|format_timestamp_timeago}} | ||||
|                     {% else %} | ||||
|                     Not yet | ||||
|                     {% endif %} | ||||
|                 <td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" > | ||||
|                     <div class="spinner-wrapper" style="display:none;" > | ||||
|                         <span class="spinner"></span><span> Checking now</span> | ||||
|                     </div> | ||||
|                     <span class="innertext">{{watch|format_last_checked_time|safe}}</span> | ||||
|                 </td> | ||||
|                 <td> | ||||
|                     <a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" | ||||
|                        class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a> | ||||
|                 <td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%} | ||||
|                     {{watch.last_changed|format_timestamp_timeago}} | ||||
|                     {%- else -%} | ||||
|                     Not yet | ||||
|                     {%- endif -%} | ||||
|                 </td> | ||||
|                 <td class="buttons"> | ||||
|                     <div> | ||||
|                     {%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%} | ||||
|                     <a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a> | ||||
|                     <a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a> | ||||
|                     <a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a> | ||||
|                     {% if watch.history_n >= 2 %} | ||||
|  | ||||
|                         {%  if is_unviewed %} | ||||
|                            <a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a> | ||||
|                         {% else %} | ||||
|                            <a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a> | ||||
|                         {% endif %} | ||||
|  | ||||
|                     {% else %} | ||||
|                         {% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%} | ||||
|                             <a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary">Preview</a> | ||||
|                         {% endif %} | ||||
|                     {% endif %} | ||||
|                     <a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a> | ||||
|                     <a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a> | ||||
|                     </div> | ||||
|                 </td> | ||||
|             </tr> | ||||
|             {% endfor %} | ||||
|             {%- endfor -%} | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <ul id="post-list-buttons"> | ||||
|             {% if errored_count %} | ||||
|             <li> | ||||
|                 <a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error ">With errors ({{ errored_count }})</a> | ||||
|             <li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a> | ||||
|             </li> | ||||
|             {% endif %} | ||||
|             {% if has_unviewed %} | ||||
|             <li> | ||||
|                 <a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Mark all viewed</a> | ||||
|             <li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" > | ||||
|                 <a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a> | ||||
|             </li> | ||||
|             {% endif %} | ||||
|             <li> | ||||
|                <a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck | ||||
|                 all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a> | ||||
|         {%-  if active_tag_uuid -%} | ||||
|             <li id="post-list-mark-views-tag"> | ||||
|                 <a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a> | ||||
|             </li> | ||||
|         {%-  endif -%} | ||||
|             <li id="post-list-unread" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread</a> | ||||
|             </li> | ||||
|             <li> | ||||
|                 <a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='Generic_Feed-icon.svg')}}" height="15"></a> | ||||
|                <a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck | ||||
|                 all {% if active_tag_uuid %}  in '{{active_tag.title}}'{%endif%}</a> | ||||
|             </li> | ||||
|             <li> | ||||
|                 <a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a> | ||||
|             </li> | ||||
|         </ul> | ||||
|         {{ pagination.links }} | ||||
|     </div> | ||||
|     </form> | ||||
| </div> | ||||
| {% endblock %} | ||||
| {%- endblock -%} | ||||
| @@ -1,14 +1,12 @@ | ||||
| from flask import Blueprint | ||||
|  | ||||
| from json_logic.builtins import BUILTINS | ||||
|  | ||||
| from .exceptions import EmptyConditionRuleRowNotUsable | ||||
| from .pluggy_interface import plugin_manager  # Import the pluggy plugin manager | ||||
| from . import default_plugin | ||||
|  | ||||
| from loguru import logger | ||||
| # List of all supported JSON Logic operators | ||||
| operator_choices = [ | ||||
|     (None, "Choose one"), | ||||
|     (None, "Choose one - Operator"), | ||||
|     (">", "Greater Than"), | ||||
|     ("<", "Less Than"), | ||||
|     (">=", "Greater Than or Equal To"), | ||||
| @@ -16,12 +14,11 @@ operator_choices = [ | ||||
|     ("==", "Equals"), | ||||
|     ("!=", "Not Equals"), | ||||
|     ("in", "Contains"), | ||||
|     ("!in", "Does Not Contain"), | ||||
| ] | ||||
|  | ||||
| # Fields available in the rules | ||||
| field_choices = [ | ||||
|     (None, "Choose one"), | ||||
|     (None, "Choose one - Field"), | ||||
| ] | ||||
|  | ||||
| # The data we will feed the JSON Rules to see if it passes the test/conditions or not | ||||
| @@ -94,20 +91,41 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat | ||||
|     EXECUTE_DATA = {} | ||||
|     result = True | ||||
|      | ||||
|     ruleset_settings = application_datastruct['watching'].get(current_watch_uuid) | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|  | ||||
|     if ruleset_settings.get("conditions"): | ||||
|         logic_operator = "and" if ruleset_settings.get("conditions_match_logic", "ALL") == "ALL" else "or" | ||||
|         complete_rules = filter_complete_rules(ruleset_settings['conditions']) | ||||
|     if watch and watch.get("conditions"): | ||||
|         logic_operator = "and" if watch.get("conditions_match_logic", "ALL") == "ALL" else "or" | ||||
|         complete_rules = filter_complete_rules(watch['conditions']) | ||||
|         if complete_rules: | ||||
|             # Give all plugins a chance to update the data dict again (that we will test the conditions against) | ||||
|             for plugin in plugin_manager.get_plugins(): | ||||
|                 new_execute_data = plugin.add_data(current_watch_uuid=current_watch_uuid, | ||||
|                                                    application_datastruct=application_datastruct, | ||||
|                                                    ephemeral_data=ephemeral_data) | ||||
|                 try: | ||||
|                     import concurrent.futures | ||||
|                     import time | ||||
|                      | ||||
|                     with concurrent.futures.ThreadPoolExecutor() as executor: | ||||
|                         future = executor.submit( | ||||
|                             plugin.add_data, | ||||
|                             current_watch_uuid=current_watch_uuid, | ||||
|                             application_datastruct=application_datastruct, | ||||
|                             ephemeral_data=ephemeral_data | ||||
|                         ) | ||||
|                         logger.debug(f"Trying plugin {plugin}....") | ||||
|  | ||||
|                 if new_execute_data and isinstance(new_execute_data, dict): | ||||
|                     EXECUTE_DATA.update(new_execute_data) | ||||
|                         # Set a timeout of 10 seconds | ||||
|                         try: | ||||
|                             new_execute_data = future.result(timeout=10) | ||||
|                             if new_execute_data and isinstance(new_execute_data, dict): | ||||
|                                 EXECUTE_DATA.update(new_execute_data) | ||||
|  | ||||
|                         except concurrent.futures.TimeoutError: | ||||
|                             # The plugin took too long, abort processing for this watch | ||||
|                             raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.") | ||||
|                 except Exception as e: | ||||
|                     # Log the error but continue with the next plugin | ||||
|                     import logging | ||||
|                     logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}") | ||||
|                     continue | ||||
|  | ||||
|             # Create the ruleset | ||||
|             ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules) | ||||
| @@ -132,3 +150,18 @@ for plugin in plugin_manager.get_plugins(): | ||||
|     if isinstance(new_field_choices, list): | ||||
|         field_choices.extend(new_field_choices) | ||||
|  | ||||
| def collect_ui_edit_stats_extras(watch): | ||||
|     """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" | ||||
|     extras_content = [] | ||||
|      | ||||
|     for plugin in plugin_manager.get_plugins(): | ||||
|         try: | ||||
|             content = plugin.ui_edit_stats_extras(watch=watch) | ||||
|             if content: | ||||
|                 extras_content.append(content) | ||||
|         except Exception as e: | ||||
|             # Skip plugins that don't implement the hook or have errors | ||||
|             pass | ||||
|              | ||||
|     return "\n".join(extras_content) if extras_content else "" | ||||
|  | ||||
|   | ||||
| @@ -21,17 +21,21 @@ def register_operators(): | ||||
|     def length_max(_, text, strlen): | ||||
|         return len(text) <= int(strlen) | ||||
|  | ||||
|     # ✅ Custom function for case-insensitive regex matching | ||||
|     # Custom function for case-insensitive regex matching | ||||
|     def contains_regex(_, text, pattern): | ||||
|         """Returns True if `text` contains `pattern` (case-insensitive regex match).""" | ||||
|         return bool(re.search(pattern, str(text), re.IGNORECASE)) | ||||
|  | ||||
|     # ✅ Custom function for NOT matching case-insensitive regex | ||||
|     # Custom function for NOT matching case-insensitive regex | ||||
|     def not_contains_regex(_, text, pattern): | ||||
|         """Returns True if `text` does NOT contain `pattern` (case-insensitive regex match).""" | ||||
|         return not bool(re.search(pattern, str(text), re.IGNORECASE)) | ||||
|  | ||||
|     def not_contains(_, text, pattern): | ||||
|         return not pattern in text | ||||
|  | ||||
|     return { | ||||
|         "!in": not_contains, | ||||
|         "!contains_regex": not_contains_regex, | ||||
|         "contains_regex": contains_regex, | ||||
|         "ends_with": ends_with, | ||||
| @@ -43,6 +47,7 @@ def register_operators(): | ||||
| @hookimpl | ||||
| def register_operator_choices(): | ||||
|     return [ | ||||
|         ("!in", "Does NOT Contain"), | ||||
|         ("starts_with", "Text Starts With"), | ||||
|         ("ends_with", "Text Ends With"), | ||||
|         ("length_min", "Length minimum"), | ||||
|   | ||||
| @@ -19,7 +19,7 @@ class ConditionFormRow(Form): | ||||
|         validators=[validators.Optional()] | ||||
|     ) | ||||
|  | ||||
|     value = StringField("Value", validators=[validators.Optional()]) | ||||
|     value = StringField("Value", validators=[validators.Optional()], render_kw={"placeholder": "A value"}) | ||||
|  | ||||
|     def validate(self, extra_validators=None): | ||||
|         # First, run the default validators | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| import pluggy | ||||
| from . import default_plugin  # Import the default plugin | ||||
| import os | ||||
| import importlib | ||||
| import sys | ||||
| from . import default_plugin | ||||
|  | ||||
| # ✅ Ensure that the namespace in HookspecMarker matches PluginManager | ||||
| PLUGIN_NAMESPACE = "changedetectionio_conditions" | ||||
| @@ -30,6 +33,11 @@ class ConditionsSpec: | ||||
|     def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|         """Add to the datadict""" | ||||
|         pass | ||||
|          | ||||
|     @hookspec | ||||
|     def ui_edit_stats_extras(watch): | ||||
|         """Return HTML content to add to the stats tab in the edit view""" | ||||
|         pass | ||||
|  | ||||
| # ✅ Set up Pluggy Plugin Manager | ||||
| plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) | ||||
| @@ -40,5 +48,27 @@ plugin_manager.add_hookspecs(ConditionsSpec) | ||||
| # ✅ Register built-in plugins manually | ||||
| plugin_manager.register(default_plugin, "default_plugin") | ||||
|  | ||||
| # ✅ Load plugins from the plugins directory | ||||
| def load_plugins_from_directory(): | ||||
|     plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins') | ||||
|     if not os.path.exists(plugins_dir): | ||||
|         return | ||||
|          | ||||
|     # Get all Python files (excluding __init__.py) | ||||
|     for filename in os.listdir(plugins_dir): | ||||
|         if filename.endswith(".py") and filename != "__init__.py": | ||||
|             module_name = filename[:-3]  # Remove .py extension | ||||
|             module_path = f"changedetectionio.conditions.plugins.{module_name}" | ||||
|              | ||||
|             try: | ||||
|                 module = importlib.import_module(module_path) | ||||
|                 # Register the plugin with pluggy | ||||
|                 plugin_manager.register(module, module_name) | ||||
|             except (ImportError, AttributeError) as e: | ||||
|                 print(f"Error loading plugin {module_name}: {e}") | ||||
|  | ||||
| # Load plugins from the plugins directory | ||||
| load_plugins_from_directory() | ||||
|  | ||||
| # ✅ Discover installed plugins from external packages (if any) | ||||
| plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) | ||||
|   | ||||
							
								
								
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| # Import plugins package to make them discoverable | ||||
							
								
								
									
										119
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,119 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000 | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def levenshtein_ratio_recent_history(watch, incoming_text=None): | ||||
|     try: | ||||
|         from Levenshtein import ratio, distance | ||||
|         k = list(watch.history.keys()) | ||||
|         a = None | ||||
|         b = None | ||||
|  | ||||
|         # When called from ui_edit_stats_extras, we don't have incoming_text | ||||
|         if incoming_text is None: | ||||
|             a = watch.get_history_snapshot(timestamp=k[-1])  # Latest snapshot | ||||
|             b = watch.get_history_snapshot(timestamp=k[-2])  # Previous snapshot | ||||
|  | ||||
|         # Needs atleast one snapshot | ||||
|         elif len(k) >= 1: # Should be atleast one snapshot to compare against | ||||
|             a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot | ||||
|             b = incoming_text if incoming_text else k[-2] | ||||
|  | ||||
|         if a and b: | ||||
|             distance_value = distance(a, b) | ||||
|             ratio_value = ratio(a, b) | ||||
|             return { | ||||
|                 'distance': distance_value, | ||||
|                 'ratio': ratio_value, | ||||
|                 'percent_similar': round(ratio_value * 100, 2) | ||||
|             } | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Unable to calc similarity: {str(e)}") | ||||
|  | ||||
|     return '' | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operators(): | ||||
|     pass | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operator_choices(): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_field_choices(): | ||||
|     return [ | ||||
|         ("levenshtein_ratio", "Levenshtein - Text similarity ratio"), | ||||
|         ("levenshtein_distance", "Levenshtein - Text change distance"), | ||||
|     ] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|     res = {} | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|     # ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc | ||||
|  | ||||
|     if watch and 'text' in ephemeral_data: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text','')) | ||||
|         if isinstance(lev_data, dict): | ||||
|             res['levenshtein_ratio'] = lev_data.get('ratio', 0) | ||||
|             res['levenshtein_similarity'] = lev_data.get('percent_similar', 0) | ||||
|             res['levenshtein_distance'] = lev_data.get('distance', 0) | ||||
|  | ||||
|     return res | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add Levenshtein stats to the UI using the global plugin system""" | ||||
|     """Generate the HTML for Levenshtein stats - shared by both plugin systems""" | ||||
|     if len(watch.history.keys()) < 2: | ||||
|         return "<p>Not enough history to calculate Levenshtein metrics</p>" | ||||
|  | ||||
|  | ||||
|     # Protection against the algorithm getting stuck on huge documents | ||||
|     k = list(watch.history.keys()) | ||||
|     if any( | ||||
|             len(watch.get_history_snapshot(timestamp=k[idx])) > LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS | ||||
|             for idx in (-1, -2) | ||||
|             if len(k) >= abs(idx) | ||||
|     ): | ||||
|         return "<p>Snapshot too large for edit statistics, skipping.</p>" | ||||
|  | ||||
|     try: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch) | ||||
|         if not lev_data or not isinstance(lev_data, dict): | ||||
|             return "<p>Unable to calculate Levenshtein metrics</p>" | ||||
|              | ||||
|         html = f""" | ||||
|         <div class="levenshtein-stats"> | ||||
|             <h4>Levenshtein Text Similarity Details</h4> | ||||
|             <table class="pure-table"> | ||||
|                 <tbody> | ||||
|                     <tr> | ||||
|                         <td>Raw distance (edits needed)</td> | ||||
|                         <td>{lev_data['distance']}</td> | ||||
|                     </tr> | ||||
|                     <tr> | ||||
|                         <td>Similarity ratio</td> | ||||
|                         <td>{lev_data['ratio']:.4f}</td> | ||||
|                     </tr> | ||||
|                     <tr> | ||||
|                         <td>Percent similar</td> | ||||
|                         <td>{lev_data['percent_similar']}%</td> | ||||
|                     </tr> | ||||
|                 </tbody> | ||||
|             </table> | ||||
|             <p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p> | ||||
|         </div> | ||||
|         """ | ||||
|         return html | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error generating Levenshtein UI extras: {str(e)}") | ||||
|         return "<p>Error calculating Levenshtein metrics</p>" | ||||
|          | ||||
							
								
								
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def count_words_in_history(watch, incoming_text=None): | ||||
|     """Count words in snapshot text""" | ||||
|     try: | ||||
|         if incoming_text is not None: | ||||
|             # When called from add_data with incoming text | ||||
|             return len(incoming_text.split()) | ||||
|         elif watch.history.keys(): | ||||
|             # When called from UI extras to count latest snapshot | ||||
|             latest_key = list(watch.history.keys())[-1] | ||||
|             latest_content = watch.get_history_snapshot(latest_key) | ||||
|             return len(latest_content.split()) | ||||
|         return 0 | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error counting words: {str(e)}") | ||||
|         return 0 | ||||
|  | ||||
| # Implement condition plugin hooks | ||||
| @conditions_hookimpl | ||||
| def register_operators(): | ||||
|     # No custom operators needed | ||||
|     return {} | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operator_choices(): | ||||
|     # No custom operator choices needed | ||||
|     return [] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_field_choices(): | ||||
|     # Add a field that will be available in conditions | ||||
|     return [ | ||||
|         ("word_count", "Word count of content"), | ||||
|     ] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|     """Add word count data for conditions""" | ||||
|     result = {} | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|      | ||||
|     if watch and 'text' in ephemeral_data: | ||||
|         word_count = count_words_in_history(watch, ephemeral_data['text']) | ||||
|         result['word_count'] = word_count | ||||
|      | ||||
|     return result | ||||
|  | ||||
| def _generate_stats_html(watch): | ||||
|     """Generate the HTML content for the stats tab""" | ||||
|     word_count = count_words_in_history(watch) | ||||
|      | ||||
|     html = f""" | ||||
|     <div class="word-count-stats"> | ||||
|         <h4>Content Analysis</h4> | ||||
|         <table class="pure-table"> | ||||
|             <tbody> | ||||
|                 <tr> | ||||
|                     <td>Word count (latest snapshot)</td> | ||||
|                     <td>{word_count}</td> | ||||
|                 </tr> | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count stats to the UI through conditions plugin system""" | ||||
|     return _generate_stats_html(watch) | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count stats to the UI using the global plugin system""" | ||||
|     return _generate_stats_html(watch) | ||||
| @@ -7,11 +7,30 @@ import os | ||||
| # Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>. | ||||
| visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button' | ||||
|  | ||||
| SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000 | ||||
| SCREENSHOT_DEFAULT_QUALITY = 40 | ||||
|  | ||||
| # Maximum total height for the final image (When in stitch mode). | ||||
| # We limit this to 16000px due to the huge amount of RAM that was being used | ||||
| # Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc) | ||||
| SCREENSHOT_MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|  | ||||
| # The size at which we will switch to stitching method, when below this (and | ||||
| # MAX_TOTAL_HEIGHT which can be set by a user) we will use the default | ||||
| # screenshot method. | ||||
| SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000 | ||||
|  | ||||
| # available_fetchers() will scan this implementation looking for anything starting with html_ | ||||
| # this information is used in the form selections | ||||
| from changedetectionio.content_fetchers.requests import fetcher as html_requests | ||||
|  | ||||
|  | ||||
| import importlib.resources | ||||
| XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8') | ||||
| INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8') | ||||
| FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8') | ||||
|  | ||||
|  | ||||
| def available_fetchers(): | ||||
|     # See the if statement at the bottom of this file for how we switch between playwright and webdriver | ||||
|     import inspect | ||||
|   | ||||
| @@ -48,6 +48,7 @@ class Fetcher(): | ||||
|     error = None | ||||
|     fetcher_description = "No description" | ||||
|     headers = {} | ||||
|     favicon_blob = None | ||||
|     instock_data = None | ||||
|     instock_data_js = "" | ||||
|     status_code = None | ||||
| @@ -63,31 +64,28 @@ class Fetcher(): | ||||
|     # Time ONTOP of the system defined env minimum time | ||||
|     render_extract_delay = 0 | ||||
|  | ||||
|     def __init__(self): | ||||
|         import importlib.resources | ||||
|         self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8') | ||||
|         self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8') | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_error(self): | ||||
|         return self.error | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|     async def run(self, | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|         # Should set self.error, self.status_code and self.content | ||||
|         pass | ||||
|  | ||||
|     @abstractmethod | ||||
|     def quit(self): | ||||
|     def quit(self, watch=None): | ||||
|         return | ||||
|  | ||||
|     @abstractmethod | ||||
| @@ -127,7 +125,7 @@ class Fetcher(): | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def iterate_browser_steps(self, start_url=None): | ||||
|     async def iterate_browser_steps(self, start_url=None): | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|         from playwright._impl._errors import TimeoutError, Error | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
| @@ -141,8 +139,9 @@ class Fetcher(): | ||||
|             for step in valid_steps: | ||||
|                 step_n += 1 | ||||
|                 logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...") | ||||
|                 self.screenshot_step("before-" + str(step_n)) | ||||
|                 self.save_step_html("before-" + str(step_n)) | ||||
|                 await self.screenshot_step("before-" + str(step_n)) | ||||
|                 await self.save_step_html("before-" + str(step_n)) | ||||
|  | ||||
|                 try: | ||||
|                     optional_value = step['optional_value'] | ||||
|                     selector = step['selector'] | ||||
| @@ -152,11 +151,11 @@ class Fetcher(): | ||||
|                     if '{%' in step['selector'] or '{{' in step['selector']: | ||||
|                         selector = jinja_render(template_str=step['selector']) | ||||
|  | ||||
|                     getattr(interface, "call_action")(action_name=step['operation'], | ||||
|                     await getattr(interface, "call_action")(action_name=step['operation'], | ||||
|                                                       selector=selector, | ||||
|                                                       optional_value=optional_value) | ||||
|                     self.screenshot_step(step_n) | ||||
|                     self.save_step_html(step_n) | ||||
|                     await self.screenshot_step(step_n) | ||||
|                     await self.save_step_html(step_n) | ||||
|                 except (Error, TimeoutError) as e: | ||||
|                     logger.debug(str(e)) | ||||
|                     # Stop processing here | ||||
|   | ||||
| @@ -1,104 +0,0 @@ | ||||
|  | ||||
| # Pages with a vertical height longer than this will use the 'stitch together' method. | ||||
|  | ||||
| # - Many GPUs have a max texture size of 16384x16384px (or lower on older devices). | ||||
| # - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits. | ||||
| # - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer. | ||||
|  | ||||
|  | ||||
| # The size at which we will switch to stitching method | ||||
| SCREENSHOT_SIZE_STITCH_THRESHOLD=8000 | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| def capture_stitched_together_full_page(page): | ||||
|     import io | ||||
|     import os | ||||
|     import time | ||||
|     from PIL import Image, ImageDraw, ImageFont | ||||
|  | ||||
|     MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4  # Maximum total height for the final image (When in stitch mode) | ||||
|     MAX_CHUNK_HEIGHT = 4000  # Height per screenshot chunk | ||||
|     WARNING_TEXT_HEIGHT = 20  # Height of the warning text overlay | ||||
|  | ||||
|     # Save the original viewport size | ||||
|     original_viewport = page.viewport_size | ||||
|     now = time.time() | ||||
|  | ||||
|     try: | ||||
|         viewport = page.viewport_size | ||||
|         page_height = page.evaluate("document.documentElement.scrollHeight") | ||||
|  | ||||
|         # Limit the total capture height | ||||
|         capture_height = min(page_height, MAX_TOTAL_HEIGHT) | ||||
|  | ||||
|         images = [] | ||||
|         total_captured_height = 0 | ||||
|  | ||||
|         for offset in range(0, capture_height, MAX_CHUNK_HEIGHT): | ||||
|             # Ensure we do not exceed the total height limit | ||||
|             chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height) | ||||
|  | ||||
|             # Adjust viewport size for this chunk | ||||
|             page.set_viewport_size({"width": viewport["width"], "height": chunk_height}) | ||||
|  | ||||
|             # Scroll to the correct position | ||||
|             page.evaluate(f"window.scrollTo(0, {offset})") | ||||
|  | ||||
|             # Capture screenshot chunk | ||||
|             screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30))) | ||||
|             images.append(Image.open(io.BytesIO(screenshot_bytes))) | ||||
|  | ||||
|             total_captured_height += chunk_height | ||||
|  | ||||
|             # Stop if we reached the maximum total height | ||||
|             if total_captured_height >= MAX_TOTAL_HEIGHT: | ||||
|                 break | ||||
|  | ||||
|         # Create the final stitched image | ||||
|         stitched_image = Image.new('RGB', (viewport["width"], total_captured_height)) | ||||
|         y_offset = 0 | ||||
|  | ||||
|         # Stitch the screenshot chunks together | ||||
|         for img in images: | ||||
|             stitched_image.paste(img, (0, y_offset)) | ||||
|             y_offset += img.height | ||||
|  | ||||
|         logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s") | ||||
|  | ||||
|         # Overlay warning text if the screenshot was trimmed | ||||
|         if page_height > MAX_TOTAL_HEIGHT: | ||||
|             draw = ImageDraw.Draw(stitched_image) | ||||
|             warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long" | ||||
|  | ||||
|             # Load font (default system font if Arial is unavailable) | ||||
|             try: | ||||
|                 font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT)  # Arial (Windows/Mac) | ||||
|             except IOError: | ||||
|                 font = ImageFont.load_default()  # Default font if Arial not found | ||||
|  | ||||
|             # Get text bounding box (correct method for newer Pillow versions) | ||||
|             text_bbox = draw.textbbox((0, 0), warning_text, font=font) | ||||
|             text_width = text_bbox[2] - text_bbox[0]  # Calculate text width | ||||
|             text_height = text_bbox[3] - text_bbox[1]  # Calculate text height | ||||
|  | ||||
|             # Define background rectangle (top of the image) | ||||
|             draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white") | ||||
|  | ||||
|             # Center text horizontally within the warning area | ||||
|             text_x = (viewport["width"] - text_width) // 2 | ||||
|             text_y = (WARNING_TEXT_HEIGHT - text_height) // 2 | ||||
|  | ||||
|             # Draw the warning text in red | ||||
|             draw.text((text_x, text_y), warning_text, fill="red", font=font) | ||||
|  | ||||
|         # Save or return the final image | ||||
|         output = io.BytesIO() | ||||
|         stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))) | ||||
|         screenshot = output.getvalue() | ||||
|  | ||||
|     finally: | ||||
|         # Restore the original viewport size | ||||
|         page.set_viewport_size(original_viewport) | ||||
|  | ||||
|     return screenshot | ||||
| @@ -4,10 +4,75 @@ from urllib.parse import urlparse | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \ | ||||
|     SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS | ||||
| from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable | ||||
|  | ||||
| async def capture_full_page_async(page): | ||||
|     import os | ||||
|     import time | ||||
|     from multiprocessing import Process, Pipe | ||||
|  | ||||
|     start = time.time() | ||||
|  | ||||
|     page_height = await page.evaluate("document.documentElement.scrollHeight") | ||||
|     page_width = await page.evaluate("document.documentElement.scrollWidth") | ||||
|     original_viewport = page.viewport_size | ||||
|  | ||||
|     logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}") | ||||
|  | ||||
|     # Use an approach similar to puppeteer: set a larger viewport and take screenshots in chunks | ||||
|     step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow | ||||
|     screenshot_chunks = [] | ||||
|     y = 0 | ||||
|  | ||||
|     if page_height > page.viewport_size['height']: | ||||
|         if page_height < step_size: | ||||
|             step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size | ||||
|         logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size") | ||||
|         # Set viewport to a larger size to capture more content at once | ||||
|         await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size}) | ||||
|  | ||||
|     # Capture screenshots in chunks up to the max total height | ||||
|     while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT): | ||||
|         await page.request_gc() | ||||
|         await page.evaluate(f"window.scrollTo(0, {y})") | ||||
|         await page.request_gc() | ||||
|         screenshot_chunks.append(await page.screenshot( | ||||
|             type="jpeg", | ||||
|             full_page=False, | ||||
|             quality=int(os.getenv("SCREENSHOT_QUALITY", 72)) | ||||
|         )) | ||||
|         y += step_size | ||||
|         await page.request_gc() | ||||
|  | ||||
|     # Restore original viewport size | ||||
|     await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']}) | ||||
|  | ||||
|     # If we have multiple chunks, stitch them together | ||||
|     if len(screenshot_chunks) > 1: | ||||
|         from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker | ||||
|         logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together") | ||||
|         parent_conn, child_conn = Pipe() | ||||
|         p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)) | ||||
|         p.start() | ||||
|         screenshot = parent_conn.recv_bytes() | ||||
|         p.join() | ||||
|         logger.debug( | ||||
|             f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|         # Explicit cleanup | ||||
|         del screenshot_chunks | ||||
|         del p | ||||
|         del parent_conn, child_conn | ||||
|         screenshot_chunks = None | ||||
|         return screenshot | ||||
|  | ||||
|     logger.debug( | ||||
|         f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|  | ||||
|     return screenshot_chunks[0] | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     fetcher_description = "Playwright {}/Javascript".format( | ||||
|         os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize() | ||||
| @@ -58,9 +123,10 @@ class fetcher(Fetcher): | ||||
|                 self.proxy['username'] = parsed.username | ||||
|                 self.proxy['password'] = parsed.password | ||||
|  | ||||
|     def screenshot_step(self, step_n=''): | ||||
|     async def screenshot_step(self, step_n=''): | ||||
|         super().screenshot_step(step_n=step_n) | ||||
|         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|         screenshot = await capture_full_page_async(page=self.page) | ||||
|  | ||||
|  | ||||
|         if self.browser_steps_screenshot_path is not None: | ||||
|             destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n)) | ||||
| @@ -68,46 +134,47 @@ class fetcher(Fetcher): | ||||
|             with open(destination, 'wb') as f: | ||||
|                 f.write(screenshot) | ||||
|  | ||||
|     def save_step_html(self, step_n): | ||||
|     async def save_step_html(self, step_n): | ||||
|         super().save_step_html(step_n=step_n) | ||||
|         content = self.page.content() | ||||
|         content = await self.page.content() | ||||
|         destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) | ||||
|         logger.debug(f"Saving step HTML to {destination}") | ||||
|         with open(destination, 'w') as f: | ||||
|             f.write(content) | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|     async def run(self, | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         from playwright.async_api import async_playwright | ||||
|         import playwright._impl._errors | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
|         import time | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         response = None | ||||
|  | ||||
|         with sync_playwright() as p: | ||||
|         async with async_playwright() as p: | ||||
|             browser_type = getattr(p, self.browser_type) | ||||
|  | ||||
|             # Seemed to cause a connection Exception even tho I can see it connect | ||||
|             # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000) | ||||
|             # 60,000 connection timeout only | ||||
|             browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000) | ||||
|             browser = await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000) | ||||
|  | ||||
|             # SOCKS5 with authentication is not supported (yet) | ||||
|             # https://github.com/microsoft/playwright/issues/10567 | ||||
|  | ||||
|             # Set user agent to prevent Cloudflare from blocking the browser | ||||
|             # Use the default one configured in the App.py model that's passed from fetch_site_status.py | ||||
|             context = browser.new_context( | ||||
|             context = await browser.new_context( | ||||
|                 accept_downloads=False,  # Should never be needed | ||||
|                 bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others | ||||
|                 extra_http_headers=request_headers, | ||||
| @@ -117,41 +184,47 @@ class fetcher(Fetcher): | ||||
|                 user_agent=manage_user_agent(headers=request_headers), | ||||
|             ) | ||||
|  | ||||
|             self.page = context.new_page() | ||||
|             self.page = await context.new_page() | ||||
|  | ||||
|             # Listen for all console events and handle errors | ||||
|             self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|             self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|             # Re-use as much code from browser steps as possible so its the same | ||||
|             from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|             browsersteps_interface = steppable_browser_interface(start_url=url) | ||||
|             browsersteps_interface.page = self.page | ||||
|  | ||||
|             response = browsersteps_interface.action_goto_url(value=url) | ||||
|             self.headers = response.all_headers() | ||||
|             response = await browsersteps_interface.action_goto_url(value=url) | ||||
|  | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 logger.debug("Content Fetcher > Response object from the browser communication was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             # In async_playwright, all_headers() returns a coroutine | ||||
|             try: | ||||
|                 self.headers = await response.all_headers() | ||||
|             except TypeError: | ||||
|                 # Fallback for sync version | ||||
|                 self.headers = response.all_headers() | ||||
|  | ||||
|             try: | ||||
|                 if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): | ||||
|                     browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) | ||||
|                     await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) | ||||
|             except playwright._impl._errors.TimeoutError as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 # This can be ok, we will try to grab what we could retrieve | ||||
|                 pass | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}") | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|             await self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|             try: | ||||
|                 self.status_code = response.status | ||||
| @@ -159,41 +232,57 @@ class fetcher(Fetcher): | ||||
|                 # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962 | ||||
|                 logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.") | ||||
|                 logger.critical(response) | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 screenshot = self.page.screenshot(type='jpeg', full_page=True, | ||||
|                                                   quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|             if fetch_favicon: | ||||
|                 try: | ||||
|                     self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS) | ||||
|                     await self.page.request_gc() | ||||
|                 except Exception as e: | ||||
|                     logger.error(f"Error fetching FavIcon info {str(e)}, continuing.") | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 screenshot = await capture_full_page_async(self.page) | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|  | ||||
|             if not empty_pages_are_a_change and len(self.page.content().strip()) == 0: | ||||
|             if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0: | ||||
|                 logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False") | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 await context.close() | ||||
|                 await browser.close() | ||||
|                 raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|             # Run Browser Steps here | ||||
|             if self.browser_steps_get_valid_steps(): | ||||
|                 self.iterate_browser_steps(start_url=url) | ||||
|                 await self.iterate_browser_steps(start_url=url) | ||||
|  | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|             await self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|             now = time.time() | ||||
|             # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) | ||||
|             if current_include_filters is not None: | ||||
|                 self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) | ||||
|                 await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) | ||||
|             else: | ||||
|                 self.page.evaluate("var include_filters=''") | ||||
|                 await self.page.evaluate("var include_filters=''") | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|             self.xpath_data = self.page.evaluate( | ||||
|                 "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") | ||||
|             self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}") | ||||
|             # request_gc before and after evaluate to free up memory | ||||
|             # @todo browsersteps etc | ||||
|             MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|             self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, { | ||||
|                 "visualselector_xpath_selectors": visualselector_xpath_selectors, | ||||
|                 "max_height": MAX_TOTAL_HEIGHT | ||||
|             }) | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|             self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS) | ||||
|             await self.page.request_gc() | ||||
|  | ||||
|             self.content = await self.page.content() | ||||
|             await self.page.request_gc() | ||||
|             logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s") | ||||
|  | ||||
|             self.content = self.page.content() | ||||
|             logger.debug(f"Time to scrape xpath element data in browser {time.time() - now:.2f}s") | ||||
|  | ||||
|             # Bug 3 in Playwright screenshot handling | ||||
|             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
| @@ -204,18 +293,41 @@ class fetcher(Fetcher): | ||||
|             # acceptable screenshot quality here | ||||
|             try: | ||||
|                 # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage | ||||
|                 full_height = self.page.evaluate("document.documentElement.scrollHeight") | ||||
|  | ||||
|                 if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD: | ||||
|                     logger.warning( | ||||
|                         f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.") | ||||
|                     self.screenshot = capture_stitched_together_full_page(self.page) | ||||
|                 else: | ||||
|                     self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30))) | ||||
|                 self.screenshot = await capture_full_page_async(page=self.page) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 # It's likely the screenshot was too long/big and something crashed | ||||
|                 raise ScreenshotUnavailable(url=url, status_code=self.status_code) | ||||
|             finally: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 # Request garbage collection one more time before closing | ||||
|                 try: | ||||
|                     await self.page.request_gc() | ||||
|                 except: | ||||
|                     pass | ||||
|                  | ||||
|                 # Clean up resources properly | ||||
|                 try: | ||||
|                     await self.page.request_gc() | ||||
|                 except: | ||||
|                     pass | ||||
|  | ||||
|                 try: | ||||
|                     await self.page.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 self.page = None | ||||
|  | ||||
|                 try: | ||||
|                     await context.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 context = None | ||||
|  | ||||
|                 try: | ||||
|                     await browser.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 browser = None | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -6,8 +6,85 @@ from urllib.parse import urlparse | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \ | ||||
|     SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \ | ||||
|     SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS | ||||
| from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \ | ||||
|     BrowserConnectError | ||||
|  | ||||
|  | ||||
| # Bug 3 in Playwright screenshot handling | ||||
| # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|  | ||||
| # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
| # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
| # acceptable screenshot quality here | ||||
| async def capture_full_page(page): | ||||
|     import os | ||||
|     import time | ||||
|     from multiprocessing import Process, Pipe | ||||
|  | ||||
|     start = time.time() | ||||
|  | ||||
|     page_height = await page.evaluate("document.documentElement.scrollHeight") | ||||
|     page_width = await page.evaluate("document.documentElement.scrollWidth") | ||||
|     original_viewport = page.viewport | ||||
|  | ||||
|     logger.debug(f"Puppeteer viewport size {page.viewport} page height {page_height} page width {page_width}") | ||||
|  | ||||
|     # Bug 3 in Playwright screenshot handling | ||||
|     # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|     # JPEG is better here because the screenshots can be very very large | ||||
|  | ||||
|     # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
|     # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
|     # acceptable screenshot quality here | ||||
|  | ||||
|  | ||||
|     step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Something that will not cause the GPU to overflow when taking the screenshot | ||||
|     screenshot_chunks = [] | ||||
|     y = 0 | ||||
|     if page_height > page.viewport['height']: | ||||
|         if page_height < step_size: | ||||
|             step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size | ||||
|         await page.setViewport({'width': page.viewport['width'], 'height': step_size}) | ||||
|  | ||||
|     while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT): | ||||
|         # better than scrollTo incase they override it in the page | ||||
|         await page.evaluate( | ||||
|             """(y) => { | ||||
|                 document.documentElement.scrollTop = y; | ||||
|                 document.body.scrollTop = y; | ||||
|             }""", | ||||
|             y | ||||
|         ) | ||||
|  | ||||
|         screenshot_chunks.append(await page.screenshot(type_='jpeg', | ||||
|                                                        fullPage=False, | ||||
|                                                        quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))) | ||||
|         y += step_size | ||||
|  | ||||
|     await page.setViewport({'width': original_viewport['width'], 'height': original_viewport['height']}) | ||||
|  | ||||
|     if len(screenshot_chunks) > 1: | ||||
|         from changedetectionio.content_fetchers.screenshot_handler import stitch_images_worker | ||||
|         logger.debug(f"Screenshot stitching {len(screenshot_chunks)} chunks together") | ||||
|         parent_conn, child_conn = Pipe() | ||||
|         p = Process(target=stitch_images_worker, args=(child_conn, screenshot_chunks, page_height, SCREENSHOT_MAX_TOTAL_HEIGHT)) | ||||
|         p.start() | ||||
|         screenshot = parent_conn.recv_bytes() | ||||
|         p.join() | ||||
|         logger.debug( | ||||
|             f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|  | ||||
|         screenshot_chunks = None | ||||
|         return screenshot | ||||
|  | ||||
|     logger.debug( | ||||
|         f"Screenshot Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|     return screenshot_chunks[0] | ||||
|  | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     fetcher_description = "Puppeteer/direct {}/Javascript".format( | ||||
| @@ -68,20 +145,24 @@ class fetcher(Fetcher): | ||||
|     #         f.write(content) | ||||
|  | ||||
|     async def fetch_page(self, | ||||
|                          url, | ||||
|                          timeout, | ||||
|                          request_headers, | ||||
|                          request_body, | ||||
|                          request_method, | ||||
|                          ignore_status_codes, | ||||
|                          current_include_filters, | ||||
|                          empty_pages_are_a_change, | ||||
|                          fetch_favicon, | ||||
|                          ignore_status_codes, | ||||
|                          is_binary, | ||||
|                          empty_pages_are_a_change | ||||
|                          request_body, | ||||
|                          request_headers, | ||||
|                          request_method, | ||||
|                          timeout, | ||||
|                          url, | ||||
|                          ): | ||||
|  | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
|         import re | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|  | ||||
|         n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|         extra_wait = min(n, 15) | ||||
|  | ||||
|         logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.") | ||||
|  | ||||
|         from pyppeteer import Pyppeteer | ||||
|         pyppeteer_instance = Pyppeteer() | ||||
| @@ -97,12 +178,24 @@ class fetcher(Fetcher): | ||||
|         except websockets.exceptions.InvalidURI: | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://") | ||||
|         except Exception as e: | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}") | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'") | ||||
|  | ||||
|         # Better is to launch chrome with the URL as arg | ||||
|         # non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab | ||||
|         # headless - ask a new page | ||||
|         self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() | ||||
|         # more reliable is to just request a new page | ||||
|         self.page = await browser.newPage() | ||||
|          | ||||
|         # Add console handler to capture console.log from favicon fetcher | ||||
|         #self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}")) | ||||
|  | ||||
|         if '--window-size' in self.browser_connection_url: | ||||
|             # Be sure the viewport is always the window-size, this is often not the same thing | ||||
|             match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url) | ||||
|             if match: | ||||
|                 logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}") | ||||
|                 await self.page.setViewport({ | ||||
|                     "width": int(match.group(1)), | ||||
|                     "height": int(match.group(2)) | ||||
|                 }) | ||||
|                 logger.debug(f"Puppeteer viewport size {self.page.viewport}") | ||||
|  | ||||
|         try: | ||||
|             from pyppeteerstealth import inject_evasions_into_page | ||||
| @@ -148,14 +241,35 @@ class fetcher(Fetcher): | ||||
|         #            browsersteps_interface = steppable_browser_interface() | ||||
|         #            browsersteps_interface.page = self.page | ||||
|  | ||||
|         response = await self.page.goto(url, waitUntil="load") | ||||
|         async def handle_frame_navigation(event): | ||||
|             logger.debug(f"Frame navigated: {event}") | ||||
|             w = extra_wait - 2 if extra_wait > 4 else 2 | ||||
|             logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...") | ||||
|             await asyncio.sleep(w) | ||||
|             logger.debug("Issuing stopLoading command...") | ||||
|             await self.page._client.send('Page.stopLoading') | ||||
|             logger.debug("stopLoading command sent!") | ||||
|  | ||||
|         self.page._client.on('Page.frameStartedNavigating', lambda event: asyncio.create_task(handle_frame_navigation(event))) | ||||
|         self.page._client.on('Page.frameStartedLoading', lambda event: asyncio.create_task(handle_frame_navigation(event))) | ||||
|         self.page._client.on('Page.frameStoppedLoading', lambda event: logger.debug(f"Frame stopped loading: {event}")) | ||||
|  | ||||
|         if response is None: | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|             logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)") | ||||
|             raise EmptyReply(url=url, status_code=None) | ||||
|         response = None | ||||
|         attempt=0 | ||||
|         while not response: | ||||
|             logger.debug(f"Attempting page fetch {url} attempt {attempt}") | ||||
|             response = await self.page.goto(url) | ||||
|             await asyncio.sleep(1 + extra_wait) | ||||
|             if response: | ||||
|                 break | ||||
|             if not response: | ||||
|                 logger.warning("Page did not fetch! trying again!") | ||||
|             if response is None and attempt>=2: | ||||
|                 await self.page.close() | ||||
|                 await browser.close() | ||||
|                 logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attmpt {attempt}") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|             attempt+=1 | ||||
|  | ||||
|         self.headers = response.headers | ||||
|  | ||||
| @@ -180,12 +294,17 @@ class fetcher(Fetcher): | ||||
|             await browser.close() | ||||
|             raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|         if fetch_favicon: | ||||
|             try: | ||||
|                 self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS) | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Error fetching FavIcon info {str(e)}, continuing.") | ||||
|  | ||||
|         if self.status_code != 200 and not ignore_status_codes: | ||||
|             screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                     fullPage=True, | ||||
|                                                     quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|             screenshot = await capture_full_page(page=self.page) | ||||
|  | ||||
|             raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|  | ||||
|         content = await self.page.content | ||||
|  | ||||
|         if not empty_pages_are_a_change and len(content.strip()) == 0: | ||||
| @@ -199,74 +318,69 @@ class fetcher(Fetcher): | ||||
|         #            if self.browser_steps_get_valid_steps(): | ||||
|         #                self.iterate_browser_steps() | ||||
|  | ||||
|         await asyncio.sleep(1 + extra_wait) | ||||
|  | ||||
|         # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) | ||||
|         # Setup the xPath/VisualSelector scraper | ||||
|         if current_include_filters is not None: | ||||
|         if current_include_filters: | ||||
|             js = json.dumps(current_include_filters) | ||||
|             await self.page.evaluate(f"var include_filters={js}") | ||||
|         else: | ||||
|             await self.page.evaluate(f"var include_filters=''") | ||||
|  | ||||
|         self.xpath_data = await self.page.evaluate( | ||||
|             "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") | ||||
|         self.instock_data = await self.page.evaluate("async () => {" + self.instock_data_js + "}") | ||||
|         MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|         self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, { | ||||
|             "visualselector_xpath_selectors": visualselector_xpath_selectors, | ||||
|             "max_height": MAX_TOTAL_HEIGHT | ||||
|         }) | ||||
|         if not self.xpath_data: | ||||
|             raise Exception(f"Content Fetcher > xPath scraper failed. Please report this URL so we can fix it :)") | ||||
|  | ||||
|         self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS) | ||||
|  | ||||
|         self.content = await self.page.content | ||||
|         # Bug 3 in Playwright screenshot handling | ||||
|         # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|         # JPEG is better here because the screenshots can be very very large | ||||
|  | ||||
|         # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
|         # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
|         # acceptable screenshot quality here | ||||
|         try: | ||||
|             self.screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                          fullPage=True, | ||||
|                                                          quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|         except Exception as e: | ||||
|             logger.error("Error fetching screenshot") | ||||
|             # // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw' | ||||
|             # // @ todo after text extract, we can place some overlay text with red background to say 'croppped' | ||||
|             logger.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot') | ||||
|             try: | ||||
|                 self.screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                              fullPage=False, | ||||
|                                                              quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|             except Exception as e: | ||||
|                 logger.error('ERROR: Failed to get viewport-only reduced screenshot :(') | ||||
|                 pass | ||||
|         finally: | ||||
|             # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need | ||||
|             logger.success(f"Fetching '{url}' complete, closing page") | ||||
|             await self.page.close() | ||||
|             logger.success(f"Fetching '{url}' complete, closing browser") | ||||
|             await browser.close() | ||||
|         self.screenshot = await capture_full_page(page=self.page) | ||||
|  | ||||
|         # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need | ||||
|         logger.success(f"Fetching '{url}' complete, closing page") | ||||
|         await self.page.close() | ||||
|         logger.success(f"Fetching '{url}' complete, closing browser") | ||||
|         await browser.close() | ||||
|         logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.") | ||||
|  | ||||
|     async def main(self, **kwargs): | ||||
|         await self.fetch_page(**kwargs) | ||||
|  | ||||
|     def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False, | ||||
|             current_include_filters=None, is_binary=False, empty_pages_are_a_change=False): | ||||
|     async def run(self, | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|  | ||||
|         #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints | ||||
|         max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180) | ||||
|         max_time = int(os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)) | ||||
|  | ||||
|         # This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only | ||||
|         # Now we run this properly in async context since we're called from async worker | ||||
|         try: | ||||
|             asyncio.run(asyncio.wait_for(self.main( | ||||
|                 url=url, | ||||
|                 timeout=timeout, | ||||
|                 request_headers=request_headers, | ||||
|                 request_body=request_body, | ||||
|                 request_method=request_method, | ||||
|                 ignore_status_codes=ignore_status_codes, | ||||
|             await asyncio.wait_for(self.main( | ||||
|                 current_include_filters=current_include_filters, | ||||
|                 empty_pages_are_a_change=empty_pages_are_a_change, | ||||
|                 fetch_favicon=fetch_favicon, | ||||
|                 ignore_status_codes=ignore_status_codes, | ||||
|                 is_binary=is_binary, | ||||
|                 empty_pages_are_a_change=empty_pages_are_a_change | ||||
|             ), timeout=max_time)) | ||||
|                 request_body=request_body, | ||||
|                 request_headers=request_headers, | ||||
|                 request_method=request_method, | ||||
|                 timeout=timeout, | ||||
|                 url=url, | ||||
|             ), timeout=max_time | ||||
|             ) | ||||
|         except asyncio.TimeoutError: | ||||
|             raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds.")) | ||||
|  | ||||
|             raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds.")) | ||||
|   | ||||
| @@ -1,6 +1,7 @@ | ||||
| from loguru import logger | ||||
| import hashlib | ||||
| import os | ||||
| import asyncio | ||||
| from changedetectionio import strtobool | ||||
| from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived | ||||
| from changedetectionio.content_fetchers.base import Fetcher | ||||
| @@ -15,7 +16,7 @@ class fetcher(Fetcher): | ||||
|         self.proxy_override = proxy_override | ||||
|         # browser_connection_url is none because its always 'launched locally' | ||||
|  | ||||
|     def run(self, | ||||
|     def _run_sync(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
| @@ -25,9 +26,11 @@ class fetcher(Fetcher): | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|         """Synchronous version of run - the original requests implementation""" | ||||
|  | ||||
|         import chardet | ||||
|         import requests | ||||
|         from requests.exceptions import ProxyError, ConnectionError, RequestException | ||||
|  | ||||
|         if self.browser_steps_get_valid_steps(): | ||||
|             raise BrowserStepsInUnsupportedFetcher(url=url) | ||||
| @@ -35,7 +38,6 @@ class fetcher(Fetcher): | ||||
|         proxies = {} | ||||
|  | ||||
|         # Allows override the proxy on a per-request basis | ||||
|  | ||||
|         # https://requests.readthedocs.io/en/latest/user/advanced/#socks | ||||
|         # Should also work with `socks5://user:pass@host:port` type syntax. | ||||
|  | ||||
| @@ -52,14 +54,19 @@ class fetcher(Fetcher): | ||||
|         if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'): | ||||
|             from requests_file import FileAdapter | ||||
|             session.mount('file://', FileAdapter()) | ||||
|  | ||||
|         r = session.request(method=request_method, | ||||
|                             data=request_body.encode('utf-8') if type(request_body) is str else request_body, | ||||
|                             url=url, | ||||
|                             headers=request_headers, | ||||
|                             timeout=timeout, | ||||
|                             proxies=proxies, | ||||
|                             verify=False) | ||||
|         try: | ||||
|             r = session.request(method=request_method, | ||||
|                                 data=request_body.encode('utf-8') if type(request_body) is str else request_body, | ||||
|                                 url=url, | ||||
|                                 headers=request_headers, | ||||
|                                 timeout=timeout, | ||||
|                                 proxies=proxies, | ||||
|                                 verify=False) | ||||
|         except Exception as e: | ||||
|             msg = str(e) | ||||
|             if proxies and 'SOCKSHTTPSConnectionPool' in msg: | ||||
|                 msg = f"Proxy connection failed? {msg}" | ||||
|             raise Exception(msg) from e | ||||
|  | ||||
|         # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. | ||||
|         # For example - some sites don't tell us it's utf-8, but return utf-8 content | ||||
| @@ -94,5 +101,50 @@ class fetcher(Fetcher): | ||||
|         else: | ||||
|             self.content = r.text | ||||
|  | ||||
|  | ||||
|         self.raw_content = r.content | ||||
|  | ||||
|     async def run(self, | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|         """Async wrapper that runs the synchronous requests code in a thread pool""" | ||||
|          | ||||
|         loop = asyncio.get_event_loop() | ||||
|          | ||||
|         # Run the synchronous _run_sync in a thread pool to avoid blocking the event loop | ||||
|         await loop.run_in_executor( | ||||
|             None,  # Use default ThreadPoolExecutor | ||||
|             lambda: self._run_sync( | ||||
|                 url=url, | ||||
|                 timeout=timeout, | ||||
|                 request_headers=request_headers, | ||||
|                 request_body=request_body, | ||||
|                 request_method=request_method, | ||||
|                 ignore_status_codes=ignore_status_codes, | ||||
|                 current_include_filters=current_include_filters, | ||||
|                 is_binary=is_binary, | ||||
|                 empty_pages_are_a_change=empty_pages_are_a_change | ||||
|             ) | ||||
|         ) | ||||
|  | ||||
|     def quit(self, watch=None): | ||||
|  | ||||
|         # In case they switched to `requests` fetcher from something else | ||||
|         # Then the screenshot could be old, in any case, it's not used here. | ||||
|         # REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing | ||||
|         if strtobool(os.getenv("REMOVE_REQUESTS_OLD_SCREENSHOTS", 'true')): | ||||
|             screenshot = watch.get_screenshot() | ||||
|             if screenshot: | ||||
|                 try: | ||||
|                     os.unlink(screenshot) | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}") | ||||
|  | ||||
|   | ||||
							
								
								
									
										101
									
								
								changedetectionio/content_fetchers/res/favicon-fetcher.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								changedetectionio/content_fetchers/res/favicon-fetcher.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,101 @@ | ||||
| (async () => { | ||||
|   // Define the function inside the IIFE for console testing | ||||
|   window.getFaviconAsBlob = async function() { | ||||
|     const links = Array.from(document.querySelectorAll( | ||||
|       'link[rel~="apple-touch-icon"], link[rel~="icon"]' | ||||
|     )); | ||||
|  | ||||
|     const icons = links.map(link => { | ||||
|       const sizesStr = link.getAttribute('sizes'); | ||||
|       let size = 0; | ||||
|       if (sizesStr) { | ||||
|         const [w] = sizesStr.split('x').map(Number); | ||||
|         if (!isNaN(w)) size = w; | ||||
|       } else { | ||||
|         size = 16; | ||||
|       } | ||||
|       return { | ||||
|         size, | ||||
|         rel: link.getAttribute('rel'), | ||||
|         href: link.href, | ||||
|         hasSizes: !!sizesStr | ||||
|       }; | ||||
|     }); | ||||
|  | ||||
|     // If no icons found, add fallback favicon.ico | ||||
|     if (icons.length === 0) { | ||||
|       icons.push({ | ||||
|         size: 16, | ||||
|         rel: 'icon', | ||||
|         href: '/favicon.ico', | ||||
|         hasSizes: false | ||||
|       }); | ||||
|     } | ||||
|  | ||||
|     // sort preference: highest resolution first, then apple-touch-icon, then regular icons | ||||
|     icons.sort((a, b) => { | ||||
|       // First priority: actual size (highest first) | ||||
|       if (a.size !== b.size) { | ||||
|         return b.size - a.size; | ||||
|       } | ||||
|        | ||||
|       // Second priority: apple-touch-icon over regular icon | ||||
|       const isAppleA = /apple-touch-icon/.test(a.rel); | ||||
|       const isAppleB = /apple-touch-icon/.test(b.rel); | ||||
|       if (isAppleA && !isAppleB) return -1; | ||||
|       if (!isAppleA && isAppleB) return 1; | ||||
|        | ||||
|       // Third priority: icons with no size attribute (fallback icons) last | ||||
|       const hasNoSizeA = !a.hasSizes; | ||||
|       const hasNoSizeB = !b.hasSizes; | ||||
|       if (hasNoSizeA && !hasNoSizeB) return 1; | ||||
|       if (!hasNoSizeA && hasNoSizeB) return -1; | ||||
|        | ||||
|       return 0; | ||||
|     }); | ||||
|  | ||||
|     const timeoutMs = 2000; | ||||
|  | ||||
|     for (const icon of icons) { | ||||
|       try { | ||||
|         const controller = new AbortController(); | ||||
|         const timeout = setTimeout(() => controller.abort(), timeoutMs); | ||||
|  | ||||
|         const resp = await fetch(icon.href, { | ||||
|           signal: controller.signal, | ||||
|           redirect: 'follow' | ||||
|         }); | ||||
|  | ||||
|         clearTimeout(timeout); | ||||
|  | ||||
|         if (!resp.ok) { | ||||
|           continue; | ||||
|         } | ||||
|  | ||||
|         const blob = await resp.blob(); | ||||
|  | ||||
|         // Convert blob to base64 | ||||
|         const reader = new FileReader(); | ||||
|         return await new Promise(resolve => { | ||||
|           reader.onloadend = () => { | ||||
|             resolve({ | ||||
|               url: icon.href, | ||||
|               base64: reader.result.split(",")[1] | ||||
|             }); | ||||
|           }; | ||||
|           reader.readAsDataURL(blob); | ||||
|         }); | ||||
|  | ||||
|       } catch (e) { | ||||
|         continue; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // nothing found | ||||
|     return null; | ||||
|   }; | ||||
|  | ||||
|   // Auto-execute and return result for page.evaluate() | ||||
|   return await window.getFaviconAsBlob(); | ||||
| })(); | ||||
|  | ||||
| @@ -1,190 +0,0 @@ | ||||
| module.exports = async ({page, context}) => { | ||||
|  | ||||
|     var { | ||||
|         url, | ||||
|         execute_js, | ||||
|         user_agent, | ||||
|         extra_wait_ms, | ||||
|         req_headers, | ||||
|         include_filters, | ||||
|         xpath_element_js, | ||||
|         screenshot_quality, | ||||
|         proxy_username, | ||||
|         proxy_password, | ||||
|         disk_cache_dir, | ||||
|         no_cache_list, | ||||
|         block_url_list, | ||||
|     } = context; | ||||
|  | ||||
|     await page.setBypassCSP(true) | ||||
|     await page.setExtraHTTPHeaders(req_headers); | ||||
|  | ||||
|     if (user_agent) { | ||||
|         await page.setUserAgent(user_agent); | ||||
|     } | ||||
|     // https://ourcodeworld.com/articles/read/1106/how-to-solve-puppeteer-timeouterror-navigation-timeout-of-30000-ms-exceeded | ||||
|  | ||||
|     await page.setDefaultNavigationTimeout(0); | ||||
|  | ||||
|     if (proxy_username) { | ||||
|         // Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer | ||||
|         // https://github.com/puppeteer/puppeteer/issues/676 ? | ||||
|         // https://help.brightdata.com/hc/en-us/articles/12632549957649-Proxy-Manager-How-to-Guides#h_01HAKWR4Q0AFS8RZTNYWRDFJC2 | ||||
|         // https://cri.dev/posts/2020-03-30-How-to-solve-Puppeteer-Chrome-Error-ERR_INVALID_ARGUMENT/ | ||||
|         await page.authenticate({ | ||||
|             username: proxy_username, | ||||
|             password: proxy_password | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     await page.setViewport({ | ||||
|         width: 1024, | ||||
|         height: 768, | ||||
|         deviceScaleFactor: 1, | ||||
|     }); | ||||
|  | ||||
|     await page.setRequestInterception(true); | ||||
|     if (disk_cache_dir) { | ||||
|         console.log(">>>>>>>>>>>>>>> LOCAL DISK CACHE ENABLED <<<<<<<<<<<<<<<<<<<<<"); | ||||
|     } | ||||
|     const fs = require('fs'); | ||||
|     const crypto = require('crypto'); | ||||
|  | ||||
|     function file_is_expired(file_path) { | ||||
|         if (!fs.existsSync(file_path)) { | ||||
|             return true; | ||||
|         } | ||||
|         var stats = fs.statSync(file_path); | ||||
|         const now_date = new Date(); | ||||
|         const expire_seconds = 300; | ||||
|         if ((now_date / 1000) - (stats.mtime.getTime() / 1000) > expire_seconds) { | ||||
|             console.log("CACHE EXPIRED: " + file_path); | ||||
|             return true; | ||||
|         } | ||||
|         return false; | ||||
|  | ||||
|     } | ||||
|  | ||||
|     page.on('request', async (request) => { | ||||
|         // General blocking of requests that waste traffic | ||||
|         if (block_url_list.some(substring => request.url().toLowerCase().includes(substring))) return request.abort(); | ||||
|  | ||||
|         if (disk_cache_dir) { | ||||
|             const url = request.url(); | ||||
|             const key = crypto.createHash('md5').update(url).digest("hex"); | ||||
|             const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/'; | ||||
|  | ||||
|             // https://stackoverflow.com/questions/4482686/check-synchronously-if-file-directory-exists-in-node-js | ||||
|  | ||||
|             if (fs.existsSync(dir_path + key)) { | ||||
|                 console.log("* CACHE HIT , using - " + dir_path + key + " - " + url); | ||||
|                 const cached_data = fs.readFileSync(dir_path + key); | ||||
|                 // @todo headers can come from dir_path+key+".meta" json file | ||||
|                 request.respond({ | ||||
|                     status: 200, | ||||
|                     //contentType: 'text/html', //@todo | ||||
|                     body: cached_data | ||||
|                 }); | ||||
|                 return; | ||||
|             } | ||||
|         } | ||||
|         request.continue(); | ||||
|     }); | ||||
|  | ||||
|  | ||||
|     if (disk_cache_dir) { | ||||
|         page.on('response', async (response) => { | ||||
|             const url = response.url(); | ||||
|             // Basic filtering for sane responses | ||||
|             if (response.request().method() != 'GET' || response.request().resourceType() == 'xhr' || response.request().resourceType() == 'document' || response.status() != 200) { | ||||
|                 console.log("Skipping (not useful) - Status:" + response.status() + " Method:" + response.request().method() + " ResourceType:" + response.request().resourceType() + " " + url); | ||||
|                 return; | ||||
|             } | ||||
|             if (no_cache_list.some(substring => url.toLowerCase().includes(substring))) { | ||||
|                 console.log("Skipping (no_cache_list) - " + url); | ||||
|                 return; | ||||
|             } | ||||
|             if (url.toLowerCase().includes('data:')) { | ||||
|                 console.log("Skipping (embedded-data) - " + url); | ||||
|                 return; | ||||
|             } | ||||
|             response.buffer().then(buffer => { | ||||
|                 if (buffer.length > 100) { | ||||
|                     console.log("Cache - Saving " + response.request().method() + " - " + url + " - " + response.request().resourceType()); | ||||
|  | ||||
|                     const key = crypto.createHash('md5').update(url).digest("hex"); | ||||
|                     const dir_path = disk_cache_dir + key.slice(0, 1) + '/' + key.slice(1, 2) + '/' + key.slice(2, 3) + '/'; | ||||
|  | ||||
|                     if (!fs.existsSync(dir_path)) { | ||||
|                         fs.mkdirSync(dir_path, {recursive: true}) | ||||
|                     } | ||||
|  | ||||
|                     if (fs.existsSync(dir_path + key)) { | ||||
|                         if (file_is_expired(dir_path + key)) { | ||||
|                             fs.writeFileSync(dir_path + key, buffer); | ||||
|                         } | ||||
|                     } else { | ||||
|                         fs.writeFileSync(dir_path + key, buffer); | ||||
|                     } | ||||
|                 } | ||||
|             }); | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     const r = await page.goto(url, { | ||||
|         waitUntil: 'load' | ||||
|     }); | ||||
|  | ||||
|     await page.waitForTimeout(1000); | ||||
|     await page.waitForTimeout(extra_wait_ms); | ||||
|  | ||||
|     if (execute_js) { | ||||
|         await page.evaluate(execute_js); | ||||
|         await page.waitForTimeout(200); | ||||
|     } | ||||
|  | ||||
|     var xpath_data; | ||||
|     var instock_data; | ||||
|     try { | ||||
|         // Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode | ||||
|         // (Once the old playwright is removed) | ||||
|         xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters); | ||||
|         instock_data = await page.evaluate(() => {%instock_scrape_code%}); | ||||
|     } catch (e) { | ||||
|         console.log(e); | ||||
|     } | ||||
|  | ||||
|     // Protocol error (Page.captureScreenshot): Cannot take screenshot with 0 width can come from a proxy auth failure | ||||
|     // Wrap it here (for now) | ||||
|  | ||||
|     var b64s = false; | ||||
|     try { | ||||
|         b64s = await page.screenshot({encoding: "base64", fullPage: true, quality: screenshot_quality, type: 'jpeg'}); | ||||
|     } catch (e) { | ||||
|         console.log(e); | ||||
|     } | ||||
|  | ||||
|     // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw' | ||||
|     if (!b64s) { | ||||
|         // @todo after text extract, we can place some overlay text with red background to say 'croppped' | ||||
|         console.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot'); | ||||
|         try { | ||||
|             b64s = await page.screenshot({encoding: "base64", quality: screenshot_quality, type: 'jpeg'}); | ||||
|         } catch (e) { | ||||
|             console.log(e); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     var html = await page.content(); | ||||
|     return { | ||||
|         data: { | ||||
|             'content': html, | ||||
|             'headers': r.headers(), | ||||
|             'instock_data': instock_data, | ||||
|             'screenshot': b64s, | ||||
|             'status_code': r.status(), | ||||
|             'xpath_data': xpath_data | ||||
|         }, | ||||
|         type: 'application/json', | ||||
|     }; | ||||
| }; | ||||
| @@ -1,223 +1,239 @@ | ||||
| // Restock Detector | ||||
| // (c) Leigh Morresi dgtlmoon@gmail.com | ||||
| // | ||||
| // Assumes the product is in stock to begin with, unless the following appears above the fold ; | ||||
| // - outOfStockTexts appears above the fold (out of stock) | ||||
| // - negateOutOfStockRegex (really is in stock) | ||||
| async () => { | ||||
|  | ||||
| function isItemInStock() { | ||||
|     // @todo Pass these in so the same list can be used in non-JS fetchers | ||||
|     const outOfStockTexts = [ | ||||
|         ' أخبرني عندما يتوفر', | ||||
|         '0 in stock', | ||||
|         'actuellement indisponible', | ||||
|         'agotado', | ||||
|         'article épuisé', | ||||
|         'artikel zurzeit vergriffen', | ||||
|         'as soon as stock is available', | ||||
|         'ausverkauft', // sold out | ||||
|         'available for back order', | ||||
|         'awaiting stock', | ||||
|         'back in stock soon', | ||||
|         'back-order or out of stock', | ||||
|         'backordered', | ||||
|         'benachrichtigt mich', // notify me | ||||
|         'brak na stanie', | ||||
|         'brak w magazynie', | ||||
|         'coming soon', | ||||
|         'currently have any tickets for this', | ||||
|         'currently unavailable', | ||||
|         'dieser artikel ist bald wieder verfügbar', | ||||
|         'dostępne wkrótce', | ||||
|         'en rupture', | ||||
|         'en rupture de stock', | ||||
|         'épuisé', | ||||
|         'esgotado', | ||||
|         'indisponible', | ||||
|         'indisponível', | ||||
|         'isn\'t in stock right now', | ||||
|         'isnt in stock right now', | ||||
|         'isn’t in stock right now', | ||||
|         'item is no longer available', | ||||
|         'let me know when it\'s available', | ||||
|         'mail me when available', | ||||
|         'message if back in stock', | ||||
|         'mevcut değil', | ||||
|         'nachricht bei', | ||||
|         'nicht auf lager', | ||||
|         'nicht lagernd', | ||||
|         'nicht lieferbar', | ||||
|         'nicht verfügbar', | ||||
|         'nicht vorrätig', | ||||
|         'nicht zur verfügung', | ||||
|         'nie znaleziono produktów', | ||||
|         'niet beschikbaar', | ||||
|         'niet leverbaar', | ||||
|         'niet op voorraad', | ||||
|         'no disponible', | ||||
|         'non disponibile', | ||||
|         'non disponible', | ||||
|         'no longer in stock', | ||||
|         'no tickets available', | ||||
|         'not available', | ||||
|         'not currently available', | ||||
|         'not in stock', | ||||
|         'notify me when available', | ||||
|         'notify me', | ||||
|         'notify when available', | ||||
|         'não disponível', | ||||
|         'não estamos a aceitar encomendas', | ||||
|         'out of stock', | ||||
|         'out-of-stock', | ||||
|         'plus disponible', | ||||
|         'prodotto esaurito', | ||||
|         'produkt niedostępny', | ||||
|         'rupture', | ||||
|         'sold out', | ||||
|         'sold-out', | ||||
|         'stokta yok', | ||||
|         'temporarily out of stock', | ||||
|         'temporarily unavailable', | ||||
|         'there were no search results for', | ||||
|         'this item is currently unavailable', | ||||
|         'tickets unavailable', | ||||
|         'tijdelijk uitverkocht', | ||||
|         'tükendi', | ||||
|         'unavailable nearby', | ||||
|         'unavailable tickets', | ||||
|         'vergriffen', | ||||
|         'vorbestellen', | ||||
|         'vorbestellung ist bald möglich', | ||||
|         'we don\'t currently have any', | ||||
|         'we couldn\'t find any products that match', | ||||
|         'we do not currently have an estimate of when this product will be back in stock.', | ||||
|         'we don\'t know when or if this item will be back in stock.', | ||||
|         'we were not able to find a match', | ||||
|         'when this arrives in stock', | ||||
|         'zur zeit nicht an lager', | ||||
|         '品切れ', | ||||
|         '已售', | ||||
|         '已售完', | ||||
|         '품절' | ||||
|     ]; | ||||
|     function isItemInStock() { | ||||
|         // @todo Pass these in so the same list can be used in non-JS fetchers | ||||
|         const outOfStockTexts = [ | ||||
|             ' أخبرني عندما يتوفر', | ||||
|             '0 in stock', | ||||
|             'actuellement indisponible', | ||||
|             'agotado', | ||||
|             'article épuisé', | ||||
|             'artikel zurzeit vergriffen', | ||||
|             'as soon as stock is available', | ||||
|             'aucune offre n\'est disponible', | ||||
|             'ausverkauft', // sold out | ||||
|             'available for back order', | ||||
|             'awaiting stock', | ||||
|             'back in stock soon', | ||||
|             'back-order or out of stock', | ||||
|             'backordered', | ||||
|             'backorder', | ||||
|             'benachrichtigt mich', // notify me | ||||
|             'binnenkort leverbaar', // coming soon | ||||
|             'brak na stanie', | ||||
|             'brak w magazynie', | ||||
|             'coming soon', | ||||
|             'currently have any tickets for this', | ||||
|             'currently unavailable', | ||||
|             'dieser artikel ist bald wieder verfügbar', | ||||
|             'dostępne wkrótce', | ||||
|             'en rupture', | ||||
|             'esgotado', | ||||
|             'in kürze lieferbar', | ||||
|             'indisponible', | ||||
|             'indisponível', | ||||
|             'isn\'t in stock right now', | ||||
|             'isnt in stock right now', | ||||
|             'isn’t in stock right now', | ||||
|             'item is no longer available', | ||||
|             'let me know when it\'s available', | ||||
|             'mail me when available', | ||||
|             'message if back in stock', | ||||
|             'mevcut değil', | ||||
|             'more on order', | ||||
|             'nachricht bei', | ||||
|             'nicht auf lager', | ||||
|             'nicht lagernd', | ||||
|             'nicht lieferbar', | ||||
|             'nicht verfügbar', | ||||
|             'nicht vorrätig', | ||||
|             'nicht mehr lieferbar', | ||||
|             'nicht zur verfügung', | ||||
|             'nie znaleziono produktów', | ||||
|             'niet beschikbaar', | ||||
|             'niet leverbaar', | ||||
|             'niet op voorraad', | ||||
|             'no disponible', | ||||
|             'no featured offers available', | ||||
|             'no longer available', | ||||
|             'no longer in stock', | ||||
|             'no tickets available', | ||||
|             'non disponibile', | ||||
|             'non disponible', | ||||
|             'not available', | ||||
|             'not currently available', | ||||
|             'not in stock', | ||||
|             'notify me when available', | ||||
|             'notify me', | ||||
|             'notify when available', | ||||
|             'não disponível', | ||||
|             'não estamos a aceitar encomendas', | ||||
|             'out of stock', | ||||
|             'out-of-stock', | ||||
|             'plus disponible', | ||||
|             'prodotto esaurito', | ||||
|             'produkt niedostępny', | ||||
|             'rupture', | ||||
|             'sold out', | ||||
|             'sold-out', | ||||
|             'stok habis', | ||||
|             'stok kosong', | ||||
|             'stok varian ini habis', | ||||
|             'stokta yok', | ||||
|             'temporarily out of stock', | ||||
|             'temporarily unavailable', | ||||
|             'there were no search results for', | ||||
|             'this item is currently unavailable', | ||||
|             'tickets unavailable', | ||||
|             'tidak dijual', | ||||
|             'tidak tersedia', | ||||
|             'tijdelijk uitverkocht', | ||||
|             'tiket tidak tersedia', | ||||
|             'to subscribe to back in stock', | ||||
|             'tükendi', | ||||
|             'unavailable nearby', | ||||
|             'unavailable tickets', | ||||
|             'vergriffen', | ||||
|             'vorbestellen', | ||||
|             'vorbestellung ist bald möglich', | ||||
|             'we couldn\'t find any products that match', | ||||
|             'we do not currently have an estimate of when this product will be back in stock.', | ||||
|             'we don\'t currently have any', | ||||
|             'we don\'t know when or if this item will be back in stock.', | ||||
|             'we were not able to find a match', | ||||
|             'when this arrives in stock', | ||||
|             'when this item is available to order', | ||||
|             'zur zeit nicht an lager', | ||||
|             'épuisé', | ||||
|             '品切れ', | ||||
|             '已售', | ||||
|             '已售完', | ||||
|             '품절' | ||||
|         ]; | ||||
|  | ||||
|  | ||||
|     const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); | ||||
|         const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); | ||||
|  | ||||
|     function getElementBaseText(element) { | ||||
|         // .textContent can include text from children which may give the wrong results | ||||
|         // scan only immediate TEXT_NODEs, which will be a child of the element | ||||
|         var text = ""; | ||||
|         for (var i = 0; i < element.childNodes.length; ++i) | ||||
|             if (element.childNodes[i].nodeType === Node.TEXT_NODE) | ||||
|                 text += element.childNodes[i].textContent; | ||||
|         return text.toLowerCase().trim(); | ||||
|     } | ||||
|         function getElementBaseText(element) { | ||||
|             // .textContent can include text from children which may give the wrong results | ||||
|             // scan only immediate TEXT_NODEs, which will be a child of the element | ||||
|             var text = ""; | ||||
|             for (var i = 0; i < element.childNodes.length; ++i) | ||||
|                 if (element.childNodes[i].nodeType === Node.TEXT_NODE) | ||||
|                     text += element.childNodes[i].textContent; | ||||
|             return text.toLowerCase().trim(); | ||||
|         } | ||||
|  | ||||
|     const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig'); | ||||
|  | ||||
|     // The out-of-stock or in-stock-text is generally always above-the-fold | ||||
|     // and often below-the-fold is a list of related products that may or may not contain trigger text | ||||
|     // so it's good to filter to just the 'above the fold' elements | ||||
|     // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist | ||||
|         const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock|arrives approximately)', 'ig'); | ||||
|         // The out-of-stock or in-stock-text is generally always above-the-fold | ||||
|         // and often below-the-fold is a list of related products that may or may not contain trigger text | ||||
|         // so it's good to filter to just the 'above the fold' elements | ||||
|         // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist | ||||
|  | ||||
|         function elementIsInEyeBallRange(element) { | ||||
|             // outside the 'fold' or some weird text in the heading area | ||||
|             // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|             // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|             // Skip text that could be in the header area | ||||
|             if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) { | ||||
|                 return false; | ||||
|             } | ||||
|             // Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there | ||||
|             if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) { | ||||
|                 return false; | ||||
|             } | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
| // %ELEMENTS% replaced at injection time because different interfaces use it with different settings | ||||
|  | ||||
|     console.log("Scanning %ELEMENTS%"); | ||||
|         function collectVisibleElements(parent, visibleElements) { | ||||
|             if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|  | ||||
|     function collectVisibleElements(parent, visibleElements) { | ||||
|         if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|             // Add the parent itself to the visible elements array if it's of the specified types | ||||
|             visibleElements.push(parent); | ||||
|  | ||||
|         // Add the parent itself to the visible elements array if it's of the specified types | ||||
|         visibleElements.push(parent); | ||||
|  | ||||
|         // Iterate over the parent's children | ||||
|         const children = parent.children; | ||||
|         for (let i = 0; i < children.length; i++) { | ||||
|             const child = children[i]; | ||||
|             if ( | ||||
|                 child.nodeType === Node.ELEMENT_NODE && | ||||
|                 window.getComputedStyle(child).display !== 'none' && | ||||
|                 window.getComputedStyle(child).visibility !== 'hidden' && | ||||
|                 child.offsetWidth >= 0 && | ||||
|                 child.offsetHeight >= 0 && | ||||
|                 window.getComputedStyle(child).contentVisibility !== 'hidden' | ||||
|             ) { | ||||
|                 // If the child is an element and is visible, recursively collect visible elements | ||||
|                 collectVisibleElements(child, visibleElements); | ||||
|             // Iterate over the parent's children | ||||
|             const children = parent.children; | ||||
|             for (let i = 0; i < children.length; i++) { | ||||
|                 const child = children[i]; | ||||
|                 if ( | ||||
|                     child.nodeType === Node.ELEMENT_NODE && | ||||
|                     window.getComputedStyle(child).display !== 'none' && | ||||
|                     window.getComputedStyle(child).visibility !== 'hidden' && | ||||
|                     child.offsetWidth >= 0 && | ||||
|                     child.offsetHeight >= 0 && | ||||
|                     window.getComputedStyle(child).contentVisibility !== 'hidden' | ||||
|                 ) { | ||||
|                     // If the child is an element and is visible, recursively collect visible elements | ||||
|                     collectVisibleElements(child, visibleElements); | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     const elementsToScan = []; | ||||
|     collectVisibleElements(document.body, elementsToScan); | ||||
|         const elementsToScan = []; | ||||
|         collectVisibleElements(document.body, elementsToScan); | ||||
|  | ||||
|     var elementText = ""; | ||||
|         var elementText = ""; | ||||
|  | ||||
|     // REGEXS THAT REALLY MEAN IT'S IN STOCK | ||||
|     for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|         const element = elementsToScan[i]; | ||||
|         // REGEXS THAT REALLY MEAN IT'S IN STOCK | ||||
|         for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|             const element = elementsToScan[i]; | ||||
|  | ||||
|         // outside the 'fold' or some weird text in the heading area | ||||
|         // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|         if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             continue | ||||
|             if (!elementIsInEyeBallRange(element)) { | ||||
|                 continue | ||||
|             } | ||||
|  | ||||
|             elementText = ""; | ||||
|             try { | ||||
|                 if (element.tagName.toLowerCase() === "input") { | ||||
|                     elementText = element.value.toLowerCase().trim(); | ||||
|                 } else { | ||||
|                     elementText = getElementBaseText(element); | ||||
|                 } | ||||
|             } catch (e) { | ||||
|                 console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e); | ||||
|             } | ||||
|             if (elementText.length) { | ||||
|                 // try which ones could mean its in stock | ||||
|                 if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { | ||||
|                     console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) | ||||
|                     element.style.border = "2px solid green"; // highlight the element that was detected as in stock | ||||
|                     return 'Possibly in stock'; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         elementText = ""; | ||||
|         try { | ||||
|         // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK | ||||
|         for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|             const element = elementsToScan[i]; | ||||
|  | ||||
|             if (!elementIsInEyeBallRange(element)) { | ||||
|                 continue | ||||
|             } | ||||
|             elementText = ""; | ||||
|             if (element.tagName.toLowerCase() === "input") { | ||||
|                 elementText = element.value.toLowerCase().trim(); | ||||
|             } else { | ||||
|                 elementText = getElementBaseText(element); | ||||
|             } | ||||
|         } catch (e) { | ||||
|             console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e); | ||||
|         } | ||||
|  | ||||
|         if (elementText.length) { | ||||
|             // try which ones could mean its in stock | ||||
|             if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { | ||||
|                 console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) | ||||
|                 return 'Possibly in stock'; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK | ||||
|     for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|         const element = elementsToScan[i]; | ||||
|         // outside the 'fold' or some weird text in the heading area | ||||
|         // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|         // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|         if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             continue | ||||
|         } | ||||
|         elementText = ""; | ||||
|         if (element.tagName.toLowerCase() === "input") { | ||||
|             elementText = element.value.toLowerCase().trim(); | ||||
|         } else { | ||||
|             elementText = getElementBaseText(element); | ||||
|         } | ||||
|  | ||||
|         if (elementText.length) { | ||||
|             // and these mean its out of stock | ||||
|             for (const outOfStockText of outOfStockTexts) { | ||||
|                 if (elementText.includes(outOfStockText)) { | ||||
|                     console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) | ||||
|                     return outOfStockText; // item is out of stock | ||||
|             if (elementText.length) { | ||||
|                 // and these mean its out of stock | ||||
|                 for (const outOfStockText of outOfStockTexts) { | ||||
|                     if (elementText.includes(outOfStockText)) { | ||||
|                         console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) | ||||
|                         element.style.border = "2px solid red"; // highlight the element that was detected as out of stock | ||||
|                         return outOfStockText; // item is out of stock | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`) | ||||
|         return 'Possibly in stock'; // possibly in stock, cant decide otherwise. | ||||
|     } | ||||
|  | ||||
|     console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`) | ||||
|     return 'Possibly in stock'; // possibly in stock, cant decide otherwise. | ||||
| } | ||||
|  | ||||
| // returns the element text that makes it think it's out of stock | ||||
| return isItemInStock().trim() | ||||
|  | ||||
|  | ||||
|     return isItemInStock().trim() | ||||
| } | ||||
|   | ||||
| @@ -1,285 +1,284 @@ | ||||
| // Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com) | ||||
| // All rights reserved. | ||||
| async (options) => { | ||||
|  | ||||
| // @file Scrape the page looking for elements of concern (%ELEMENTS%) | ||||
| // http://matatk.agrip.org.uk/tests/position-and-width/ | ||||
| // https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate | ||||
| // | ||||
| // Some pages like https://www.londonstockexchange.com/stock/NCCL/ncondezi-energy-limited/analysis | ||||
| // will automatically force a scroll somewhere, so include the position offset | ||||
| // Lets hope the position doesnt change while we iterate the bbox's, but this is better than nothing | ||||
| var scroll_y = 0; | ||||
| try { | ||||
|     scroll_y = +document.documentElement.scrollTop || document.body.scrollTop | ||||
| } catch (e) { | ||||
|     console.log(e); | ||||
| } | ||||
|     let visualselector_xpath_selectors = options.visualselector_xpath_selectors | ||||
|     let max_height = options.max_height | ||||
|  | ||||
|  | ||||
| // Include the getXpath script directly, easier than fetching | ||||
| function getxpath(e) { | ||||
|     var n = e; | ||||
|     if (n && n.id) return '//*[@id="' + n.id + '"]'; | ||||
|     for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) { | ||||
|         for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling; | ||||
|         for (d = n.nextSibling; d;) { | ||||
|             if (d.nodeName === n.nodeName) { | ||||
|                 r = !0; | ||||
|                 break | ||||
|             } | ||||
|             d = d.nextSibling | ||||
|         } | ||||
|         o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode | ||||
|     } | ||||
|     return o.length ? "/" + o.reverse().join("/") : "" | ||||
| } | ||||
|  | ||||
| const findUpTag = (el) => { | ||||
|     let r = el | ||||
|     chained_css = []; | ||||
|     depth = 0; | ||||
|  | ||||
|     //  Strategy 1: If it's an input, with name, and there's only one, prefer that | ||||
|     if (el.name !== undefined && el.name.length) { | ||||
|         var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]"; | ||||
|         var proposed_element = window.document.querySelectorAll(proposed); | ||||
|         if (proposed_element.length) { | ||||
|             if (proposed_element.length === 1) { | ||||
|                 return proposed; | ||||
|             } else { | ||||
|                 // Some sites change ID but name= stays the same, we can hit it if we know the index | ||||
|                 // Find all the elements that match and work out the input[n] | ||||
|                 var n = Array.from(proposed_element).indexOf(el); | ||||
|                 // Return a Playwright selector for nthinput[name=zipcode] | ||||
|                 return proposed + " >> nth=" + n; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // Strategy 2: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4 | ||||
|     while (r.parentNode) { | ||||
|         if (depth === 5) { | ||||
|             break; | ||||
|         } | ||||
|         if ('' !== r.id) { | ||||
|             chained_css.unshift("#" + CSS.escape(r.id)); | ||||
|             final_selector = chained_css.join(' > '); | ||||
|             // Be sure theres only one, some sites have multiples of the same ID tag :-( | ||||
|             if (window.document.querySelectorAll(final_selector).length === 1) { | ||||
|                 return final_selector; | ||||
|             } | ||||
|             return null; | ||||
|         } else { | ||||
|             chained_css.unshift(r.tagName.toLowerCase()); | ||||
|         } | ||||
|         r = r.parentNode; | ||||
|         depth += 1; | ||||
|     } | ||||
|     return null; | ||||
| } | ||||
|  | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
| // %ELEMENTS% replaced at injection time because different interfaces use it with different settings | ||||
|  | ||||
| var size_pos = []; | ||||
| // after page fetch, inject this JS | ||||
| // build a map of all elements and their positions (maybe that only include text?) | ||||
| var bbox; | ||||
| console.log("Scanning %ELEMENTS%"); | ||||
|  | ||||
| function collectVisibleElements(parent, visibleElements) { | ||||
|     if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|  | ||||
|  | ||||
|     // Add the parent itself to the visible elements array if it's of the specified types | ||||
|     const tagName = parent.tagName.toLowerCase(); | ||||
|     if ("%ELEMENTS%".split(',').includes(tagName)) { | ||||
|         visibleElements.push(parent); | ||||
|     } | ||||
|  | ||||
|     // Iterate over the parent's children | ||||
|     const children = parent.children; | ||||
|     for (let i = 0; i < children.length; i++) { | ||||
|         const child = children[i]; | ||||
|         const computedStyle = window.getComputedStyle(child); | ||||
|  | ||||
|         if ( | ||||
|             child.nodeType === Node.ELEMENT_NODE && | ||||
|             computedStyle.display !== 'none' && | ||||
|             computedStyle.visibility !== 'hidden' && | ||||
|             child.offsetWidth >= 0 && | ||||
|             child.offsetHeight >= 0 && | ||||
|             computedStyle.contentVisibility !== 'hidden' | ||||
|         ) { | ||||
|             // If the child is an element and is visible, recursively collect visible elements | ||||
|             collectVisibleElements(child, visibleElements); | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Create an array to hold the visible elements | ||||
| const visibleElementsArray = []; | ||||
|  | ||||
| // Call collectVisibleElements with the starting parent element | ||||
| collectVisibleElements(document.body, visibleElementsArray); | ||||
|  | ||||
|  | ||||
| visibleElementsArray.forEach(function (element) { | ||||
|  | ||||
|     bbox = element.getBoundingClientRect(); | ||||
|  | ||||
|     // Skip really small ones, and where width or height ==0 | ||||
|     if (bbox['width'] * bbox['height'] < 10) { | ||||
|         return | ||||
|     } | ||||
|  | ||||
|     // Don't include elements that are offset from canvas | ||||
|     if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) { | ||||
|         return | ||||
|     } | ||||
|  | ||||
|     // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes | ||||
|     // it should not traverse when we know we can anchor off just an ID one level up etc.. | ||||
|     // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match | ||||
|  | ||||
|     // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us. | ||||
|     xpath_result = false; | ||||
|     var scroll_y = 0; | ||||
|     try { | ||||
|         var d = findUpTag(element); | ||||
|         if (d) { | ||||
|             xpath_result = d; | ||||
|         } | ||||
|         scroll_y = +document.documentElement.scrollTop || document.body.scrollTop | ||||
|     } catch (e) { | ||||
|         console.log(e); | ||||
|     } | ||||
|     // You could swap it and default to getXpath and then try the smarter one | ||||
|     // default back to the less intelligent one | ||||
|     if (!xpath_result) { | ||||
|         try { | ||||
|             // I've seen on FB and eBay that this doesnt work | ||||
|             // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44) | ||||
|             xpath_result = getxpath(element); | ||||
|         } catch (e) { | ||||
|             console.log(e); | ||||
|             return | ||||
|  | ||||
| // Include the getXpath script directly, easier than fetching | ||||
|     function getxpath(e) { | ||||
|         var n = e; | ||||
|         if (n && n.id) return '//*[@id="' + n.id + '"]'; | ||||
|         for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) { | ||||
|             for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling; | ||||
|             for (d = n.nextSibling; d;) { | ||||
|                 if (d.nodeName === n.nodeName) { | ||||
|                     r = !0; | ||||
|                     break | ||||
|                 } | ||||
|                 d = d.nextSibling | ||||
|             } | ||||
|             o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode | ||||
|         } | ||||
|         return o.length ? "/" + o.reverse().join("/") : "" | ||||
|     } | ||||
|  | ||||
|     const findUpTag = (el) => { | ||||
|         let r = el | ||||
|         chained_css = []; | ||||
|         depth = 0; | ||||
|  | ||||
|         //  Strategy 1: If it's an input, with name, and there's only one, prefer that | ||||
|         if (el.name !== undefined && el.name.length) { | ||||
|             var proposed = el.tagName + "[name=\"" + CSS.escape(el.name) + "\"]"; | ||||
|             var proposed_element = window.document.querySelectorAll(proposed); | ||||
|             if (proposed_element.length) { | ||||
|                 if (proposed_element.length === 1) { | ||||
|                     return proposed; | ||||
|                 } else { | ||||
|                     // Some sites change ID but name= stays the same, we can hit it if we know the index | ||||
|                     // Find all the elements that match and work out the input[n] | ||||
|                     var n = Array.from(proposed_element).indexOf(el); | ||||
|                     // Return a Playwright selector for nthinput[name=zipcode] | ||||
|                     return proposed + " >> nth=" + n; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         // Strategy 2: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4 | ||||
|         while (r.parentNode) { | ||||
|             if (depth === 5) { | ||||
|                 break; | ||||
|             } | ||||
|             if ('' !== r.id) { | ||||
|                 chained_css.unshift("#" + CSS.escape(r.id)); | ||||
|                 final_selector = chained_css.join(' > '); | ||||
|                 // Be sure theres only one, some sites have multiples of the same ID tag :-( | ||||
|                 if (window.document.querySelectorAll(final_selector).length === 1) { | ||||
|                     return final_selector; | ||||
|                 } | ||||
|                 return null; | ||||
|             } else { | ||||
|                 chained_css.unshift(r.tagName.toLowerCase()); | ||||
|             } | ||||
|             r = r.parentNode; | ||||
|             depth += 1; | ||||
|         } | ||||
|         return null; | ||||
|     } | ||||
|  | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
|  | ||||
|     var size_pos = []; | ||||
| // after page fetch, inject this JS | ||||
| // build a map of all elements and their positions (maybe that only include text?) | ||||
|     var bbox; | ||||
|     console.log(`Scanning for "${visualselector_xpath_selectors}"`); | ||||
|  | ||||
|     function collectVisibleElements(parent, visibleElements) { | ||||
|         if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|  | ||||
|  | ||||
|         // Add the parent itself to the visible elements array if it's of the specified types | ||||
|         const tagName = parent.tagName.toLowerCase(); | ||||
|         if (visualselector_xpath_selectors.split(',').includes(tagName)) { | ||||
|             visibleElements.push(parent); | ||||
|         } | ||||
|  | ||||
|         // Iterate over the parent's children | ||||
|         const children = parent.children; | ||||
|         for (let i = 0; i < children.length; i++) { | ||||
|             const child = children[i]; | ||||
|             const computedStyle = window.getComputedStyle(child); | ||||
|  | ||||
|             if ( | ||||
|                 child.nodeType === Node.ELEMENT_NODE && | ||||
|                 computedStyle.display !== 'none' && | ||||
|                 computedStyle.visibility !== 'hidden' && | ||||
|                 child.offsetWidth >= 0 && | ||||
|                 child.offsetHeight >= 0 && | ||||
|                 computedStyle.contentVisibility !== 'hidden' | ||||
|             ) { | ||||
|                 // If the child is an element and is visible, recursively collect visible elements | ||||
|                 collectVisibleElements(child, visibleElements); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now | ||||
| // Create an array to hold the visible elements | ||||
|     const visibleElementsArray = []; | ||||
|  | ||||
|     let text = element.textContent.trim().slice(0, 30).trim(); | ||||
|     while (/\n{2,}|\t{2,}/.test(text)) { | ||||
|         text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t') | ||||
|     } | ||||
| // Call collectVisibleElements with the starting parent element | ||||
|     collectVisibleElements(document.body, visibleElementsArray); | ||||
|  | ||||
|     // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training. | ||||
|     const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6)) ) &&  /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text) ; | ||||
|     const computedStyle = window.getComputedStyle(element); | ||||
|  | ||||
|     size_pos.push({ | ||||
|         xpath: xpath_result, | ||||
|         width: Math.round(bbox['width']), | ||||
|         height: Math.round(bbox['height']), | ||||
|         left: Math.floor(bbox['left']), | ||||
|         top: Math.floor(bbox['top']) + scroll_y, | ||||
|         // tagName used by Browser Steps | ||||
|         tagName: (element.tagName) ? element.tagName.toLowerCase() : '', | ||||
|         // tagtype used by Browser Steps | ||||
|         tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', | ||||
|         isClickable: computedStyle.cursor === "pointer", | ||||
|         // Used by the keras trainer | ||||
|         fontSize: computedStyle.getPropertyValue('font-size'), | ||||
|         fontWeight: computedStyle.getPropertyValue('font-weight'), | ||||
|         hasDigitCurrency: hasDigitCurrency, | ||||
|         label: label, | ||||
|     visibleElementsArray.forEach(function (element) { | ||||
|  | ||||
|         bbox = element.getBoundingClientRect(); | ||||
|  | ||||
|         // Skip really small ones, and where width or height ==0 | ||||
|         if (bbox['width'] * bbox['height'] < 10) { | ||||
|             return | ||||
|         } | ||||
|  | ||||
|         // Don't include elements that are offset from canvas | ||||
|         if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) { | ||||
|             return | ||||
|         } | ||||
|  | ||||
|         // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes | ||||
|         // it should not traverse when we know we can anchor off just an ID one level up etc.. | ||||
|         // maybe, get current class or id, keep traversing up looking for only class or id until there is just one match | ||||
|  | ||||
|         // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us. | ||||
|         xpath_result = false; | ||||
|         try { | ||||
|             var d = findUpTag(element); | ||||
|             if (d) { | ||||
|                 xpath_result = d; | ||||
|             } | ||||
|         } catch (e) { | ||||
|             console.log(e); | ||||
|         } | ||||
|         // You could swap it and default to getXpath and then try the smarter one | ||||
|         // default back to the less intelligent one | ||||
|         if (!xpath_result) { | ||||
|             try { | ||||
|                 // I've seen on FB and eBay that this doesnt work | ||||
|                 // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44) | ||||
|                 xpath_result = getxpath(element); | ||||
|             } catch (e) { | ||||
|                 console.log(e); | ||||
|                 return | ||||
|             } | ||||
|         } | ||||
|  | ||||
|         let label = "not-interesting" // A placeholder, the actual labels for training are done by hand for now | ||||
|  | ||||
|         let text = element.textContent.trim().slice(0, 30).trim(); | ||||
|         while (/\n{2,}|\t{2,}/.test(text)) { | ||||
|             text = text.replace(/\n{2,}/g, '\n').replace(/\t{2,}/g, '\t') | ||||
|         } | ||||
|  | ||||
|         // Try to identify any possible currency amounts "Sale: 4000" or "Sale now 3000 Kc", can help with the training. | ||||
|         const hasDigitCurrency = (/\d/.test(text.slice(0, 6)) || /\d/.test(text.slice(-6))) && /([€£$¥₩₹]|USD|AUD|EUR|Kč|kr|SEK|,–)/.test(text); | ||||
|         const computedStyle = window.getComputedStyle(element); | ||||
|  | ||||
|         if (Math.floor(bbox['top']) + scroll_y > max_height) { | ||||
|             return | ||||
|         } | ||||
|  | ||||
|         size_pos.push({ | ||||
|             xpath: xpath_result, | ||||
|             width: Math.round(bbox['width']), | ||||
|             height: Math.round(bbox['height']), | ||||
|             left: Math.floor(bbox['left']), | ||||
|             top: Math.floor(bbox['top']) + scroll_y, | ||||
|             // tagName used by Browser Steps | ||||
|             tagName: (element.tagName) ? element.tagName.toLowerCase() : '', | ||||
|             // tagtype used by Browser Steps | ||||
|             tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', | ||||
|             isClickable: computedStyle.cursor === "pointer", | ||||
|             // Used by the keras trainer | ||||
|             fontSize: computedStyle.getPropertyValue('font-size'), | ||||
|             fontWeight: computedStyle.getPropertyValue('font-weight'), | ||||
|             hasDigitCurrency: hasDigitCurrency, | ||||
|             label: label, | ||||
|         }); | ||||
|  | ||||
|     }); | ||||
|  | ||||
| }); | ||||
|  | ||||
|  | ||||
| // Inject the current one set in the include_filters, which may be a CSS rule | ||||
| // used for displaying the current one in VisualSelector, where its not one we generated. | ||||
| if (include_filters.length) { | ||||
|     let results; | ||||
|     // Foreach filter, go and find it on the page and add it to the results so we can visualise it again | ||||
|     for (const f of include_filters) { | ||||
|         bbox = false; | ||||
|         q = false; | ||||
|     if (include_filters.length) { | ||||
|         let results; | ||||
|         // Foreach filter, go and find it on the page and add it to the results so we can visualise it again | ||||
|         for (const f of include_filters) { | ||||
|             bbox = false; | ||||
|  | ||||
|         if (!f.length) { | ||||
|             console.log("xpath_element_scraper: Empty filter, skipping"); | ||||
|             continue; | ||||
|         } | ||||
|  | ||||
|         try { | ||||
|             // is it xpath? | ||||
|             if (f.startsWith('/') || f.startsWith('xpath')) { | ||||
|                 var qry_f = f.replace(/xpath(:|\d:)/, '') | ||||
|                 console.log("[xpath] Scanning for included filter " + qry_f) | ||||
|                 let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | ||||
|                 results = []; | ||||
|                 for (let i = 0; i < xpathResult.snapshotLength; i++) { | ||||
|                     results.push(xpathResult.snapshotItem(i)); | ||||
|                 } | ||||
|             } else { | ||||
|                 console.log("[css] Scanning for included filter " + f) | ||||
|                 console.log("[css] Scanning for included filter " + f); | ||||
|                 results = document.querySelectorAll(f); | ||||
|             if (!f.length) { | ||||
|                 console.log("xpath_element_scraper: Empty filter, skipping"); | ||||
|                 continue; | ||||
|             } | ||||
|         } catch (e) { | ||||
|             // Maybe catch DOMException and alert? | ||||
|             console.log("xpath_element_scraper: Exception selecting element from filter " + f); | ||||
|             console.log(e); | ||||
|         } | ||||
|  | ||||
|         if (results != null && results.length) { | ||||
|  | ||||
|             // Iterate over the results | ||||
|             results.forEach(node => { | ||||
|                 // Try to resolve //something/text() back to its /something so we can atleast get the bounding box | ||||
|                 try { | ||||
|                     if (typeof node.nodeName == 'string' && node.nodeName === '#text') { | ||||
|                         node = node.parentElement | ||||
|             try { | ||||
|                 // is it xpath? | ||||
|                 if (f.startsWith('/') || f.startsWith('xpath')) { | ||||
|                     var qry_f = f.replace(/xpath(:|\d:)/, '') | ||||
|                     console.log("[xpath] Scanning for included filter " + qry_f) | ||||
|                     let xpathResult = document.evaluate(qry_f, document, null, XPathResult.ORDERED_NODE_SNAPSHOT_TYPE, null); | ||||
|                     results = []; | ||||
|                     for (let i = 0; i < xpathResult.snapshotLength; i++) { | ||||
|                         results.push(xpathResult.snapshotItem(i)); | ||||
|                     } | ||||
|                 } catch (e) { | ||||
|                     console.log(e) | ||||
|                     console.log("xpath_element_scraper: #text resolver") | ||||
|                 } | ||||
|  | ||||
|                 // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. | ||||
|                 if (typeof node.getBoundingClientRect == 'function') { | ||||
|                     bbox = node.getBoundingClientRect(); | ||||
|                     console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) | ||||
|                 } else { | ||||
|                     console.log("[css] Scanning for included filter " + f) | ||||
|                     console.log("[css] Scanning for included filter " + f); | ||||
|                     results = document.querySelectorAll(f); | ||||
|                 } | ||||
|             } catch (e) { | ||||
|                 // Maybe catch DOMException and alert? | ||||
|                 console.log("xpath_element_scraper: Exception selecting element from filter " + f); | ||||
|                 console.log(e); | ||||
|             } | ||||
|  | ||||
|             if (results != null && results.length) { | ||||
|  | ||||
|                 // Iterate over the results | ||||
|                 results.forEach(node => { | ||||
|                     // Try to resolve //something/text() back to its /something so we can atleast get the bounding box | ||||
|                     try { | ||||
|                         // Try and see we can find its ownerElement | ||||
|                         bbox = node.ownerElement.getBoundingClientRect(); | ||||
|                         console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) | ||||
|                         if (typeof node.nodeName == 'string' && node.nodeName === '#text') { | ||||
|                             node = node.parentElement | ||||
|                         } | ||||
|                     } catch (e) { | ||||
|                         console.log(e) | ||||
|                         console.log("xpath_element_scraper: error looking up q.ownerElement") | ||||
|                         console.log("xpath_element_scraper: #text resolver") | ||||
|                     } | ||||
|                 } | ||||
|  | ||||
|                 if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { | ||||
|                     size_pos.push({ | ||||
|                         xpath: f, | ||||
|                         width: parseInt(bbox['width']), | ||||
|                         height: parseInt(bbox['height']), | ||||
|                         left: parseInt(bbox['left']), | ||||
|                         top: parseInt(bbox['top']) + scroll_y, | ||||
|                         highlight_as_custom_filter: true | ||||
|                     }); | ||||
|                 } | ||||
|             }); | ||||
|                     // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. | ||||
|                     if (typeof node.getBoundingClientRect == 'function') { | ||||
|                         bbox = node.getBoundingClientRect(); | ||||
|                         console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) | ||||
|                     } else { | ||||
|                         try { | ||||
|                             // Try and see we can find its ownerElement | ||||
|                             bbox = node.ownerElement.getBoundingClientRect(); | ||||
|                             console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) | ||||
|                         } catch (e) { | ||||
|                             console.log(e) | ||||
|                             console.log("xpath_element_scraper: error looking up node.ownerElement") | ||||
|                         } | ||||
|                     } | ||||
|  | ||||
|                     if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { | ||||
|                         size_pos.push({ | ||||
|                             xpath: f, | ||||
|                             width: parseInt(bbox['width']), | ||||
|                             height: parseInt(bbox['height']), | ||||
|                             left: parseInt(bbox['left']), | ||||
|                             top: parseInt(bbox['top']) + scroll_y, | ||||
|                             highlight_as_custom_filter: true | ||||
|                         }); | ||||
|                     } | ||||
|                 }); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area | ||||
| // so that we dont select the wrapping element by mistake and be unable to select what we want | ||||
| size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1) | ||||
|     size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1) | ||||
|  | ||||
| // browser_width required for proper scaling in the frontend | ||||
|     // Return as a string to save playwright for juggling thousands of objects | ||||
|     return JSON.stringify({'size_pos': size_pos, 'browser_width': window.innerWidth}); | ||||
| } | ||||
|  | ||||
| // Window.width required for proper scaling in the frontend | ||||
| return {'size_pos': size_pos, 'browser_width': window.innerWidth}; | ||||
|   | ||||
							
								
								
									
										73
									
								
								changedetectionio/content_fetchers/screenshot_handler.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										73
									
								
								changedetectionio/content_fetchers/screenshot_handler.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,73 @@ | ||||
| # Pages with a vertical height longer than this will use the 'stitch together' method. | ||||
|  | ||||
| # - Many GPUs have a max texture size of 16384x16384px (or lower on older devices). | ||||
| # - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits. | ||||
| # - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer. | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, SCREENSHOT_DEFAULT_QUALITY | ||||
|  | ||||
|  | ||||
| def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_height): | ||||
|     import os | ||||
|     import io | ||||
|     from PIL import Image, ImageDraw, ImageFont | ||||
|  | ||||
|     try: | ||||
|  | ||||
|         # Load images from byte chunks | ||||
|         images = [Image.open(io.BytesIO(b)) for b in chunks_bytes] | ||||
|         total_height = sum(im.height for im in images) | ||||
|         max_width = max(im.width for im in images) | ||||
|  | ||||
|         # Create stitched image | ||||
|         stitched = Image.new('RGB', (max_width, total_height)) | ||||
|         y_offset = 0 | ||||
|         for im in images: | ||||
|             stitched.paste(im, (0, y_offset)) | ||||
|             y_offset += im.height | ||||
|  | ||||
|         # Draw caption on top (overlaid, not extending canvas) | ||||
|         draw = ImageDraw.Draw(stitched) | ||||
|  | ||||
|         if original_page_height > capture_height: | ||||
|             caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long" | ||||
|             padding = 10 | ||||
|             font_size = 35 | ||||
|             font_color = (255, 0, 0) | ||||
|             background_color = (255, 255, 255) | ||||
|  | ||||
|  | ||||
|             # Try to load a proper font | ||||
|             try: | ||||
|                 font = ImageFont.truetype("arial.ttf", font_size) | ||||
|             except IOError: | ||||
|                 font = ImageFont.load_default() | ||||
|  | ||||
|             bbox = draw.textbbox((0, 0), caption_text, font=font) | ||||
|             text_width = bbox[2] - bbox[0] | ||||
|             text_height = bbox[3] - bbox[1] | ||||
|  | ||||
|             # Draw white rectangle background behind text | ||||
|             rect_top = 0 | ||||
|             rect_bottom = text_height + 2 * padding | ||||
|             draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color) | ||||
|  | ||||
|             # Draw text centered horizontally, 10px padding from top of the rectangle | ||||
|             text_x = (max_width - text_width) // 2 | ||||
|             text_y = padding | ||||
|             draw.text((text_x, text_y), caption_text, font=font, fill=font_color) | ||||
|  | ||||
|         # Encode and send image | ||||
|         output = io.BytesIO() | ||||
|         stitched.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", SCREENSHOT_DEFAULT_QUALITY))) | ||||
|         pipe_conn.send_bytes(output.getvalue()) | ||||
|  | ||||
|         stitched.close() | ||||
|     except Exception as e: | ||||
|         pipe_conn.send(f"error:{e}") | ||||
|     finally: | ||||
|         pipe_conn.close() | ||||
|  | ||||
|  | ||||
| @@ -4,22 +4,20 @@ import time | ||||
| from loguru import logger | ||||
| from changedetectionio.content_fetchers.base import Fetcher | ||||
|  | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     if os.getenv("WEBDRIVER_URL"): | ||||
|         fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL")) | ||||
|         fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\"" | ||||
|     else: | ||||
|         fetcher_description = "WebDriver Chrome/Javascript" | ||||
|  | ||||
|     # Configs for Proxy setup | ||||
|     # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy" | ||||
|     selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy', | ||||
|                                         'proxyAutoconfigUrl', 'sslProxy', 'autodetect', | ||||
|                                         'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword'] | ||||
|     proxy = None | ||||
|     proxy_url = None | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|         from selenium.webdriver.common.proxy import Proxy as SeleniumProxy | ||||
|         from urllib.parse import urlparse | ||||
|         from selenium.webdriver.common.proxy import Proxy | ||||
|  | ||||
|         # .strip('"') is going to save someone a lot of time when they accidently wrap the env value | ||||
|         if not custom_browser_connection_url: | ||||
| @@ -28,93 +26,118 @@ class fetcher(Fetcher): | ||||
|             self.browser_connection_is_custom = True | ||||
|             self.browser_connection_url = custom_browser_connection_url | ||||
|  | ||||
|         # If any proxy settings are enabled, then we should setup the proxy object | ||||
|         proxy_args = {} | ||||
|         for k in self.selenium_proxy_settings_mappings: | ||||
|             v = os.getenv('webdriver_' + k, False) | ||||
|             if v: | ||||
|                 proxy_args[k] = v.strip('"') | ||||
|         ##### PROXY SETUP ##### | ||||
|  | ||||
|         # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy | ||||
|         if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy: | ||||
|             proxy_args['httpProxy'] = self.system_http_proxy | ||||
|         if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy: | ||||
|             proxy_args['httpsProxy'] = self.system_https_proxy | ||||
|         proxy_sources = [ | ||||
|             self.system_http_proxy, | ||||
|             self.system_https_proxy, | ||||
|             os.getenv('webdriver_proxySocks'), | ||||
|             os.getenv('webdriver_socksProxy'), | ||||
|             os.getenv('webdriver_proxyHttp'), | ||||
|             os.getenv('webdriver_httpProxy'), | ||||
|             os.getenv('webdriver_proxyHttps'), | ||||
|             os.getenv('webdriver_httpsProxy'), | ||||
|             os.getenv('webdriver_sslProxy'), | ||||
|             proxy_override,  # last one should override | ||||
|         ] | ||||
|         # The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server= | ||||
|         for k in filter(None, proxy_sources): | ||||
|             if not k: | ||||
|                 continue | ||||
|             self.proxy_url = k.strip() | ||||
|  | ||||
|         # Allows override the proxy on a per-request basis | ||||
|         if proxy_override is not None: | ||||
|             proxy_args['httpProxy'] = proxy_override | ||||
|     async def run(self, | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|  | ||||
|         if proxy_args: | ||||
|             self.proxy = SeleniumProxy(raw=proxy_args) | ||||
|         import asyncio | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|         # Wrap the entire selenium operation in a thread executor | ||||
|         def _run_sync(): | ||||
|             from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|             # request_body, request_method unused for now, until some magic in the future happens. | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|         from selenium.common.exceptions import WebDriverException | ||||
|         # request_body, request_method unused for now, until some magic in the future happens. | ||||
|             options = ChromeOptions() | ||||
|  | ||||
|         options = ChromeOptions() | ||||
|         if self.proxy: | ||||
|             options.proxy = self.proxy | ||||
|             # Load Chrome options from env | ||||
|             CHROME_OPTIONS = [ | ||||
|                 line.strip() | ||||
|                 for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines() | ||||
|                 if line.strip() | ||||
|             ] | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.browser_connection_url, | ||||
|             options=options) | ||||
|             for opt in CHROME_OPTIONS: | ||||
|                 options.add_argument(opt) | ||||
|  | ||||
|         try: | ||||
|             self.driver.get(url) | ||||
|         except WebDriverException as e: | ||||
|             # Be sure we close the session window | ||||
|             self.quit() | ||||
|             raise | ||||
|             # 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable | ||||
|             # 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng | ||||
|             # 3. selenium only allows ONE runner at a time by default! | ||||
|             # 4. driver must use quit() or it will continue to block/hold the selenium process!! | ||||
|  | ||||
|         self.driver.set_window_size(1280, 1024) | ||||
|         self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|             if self.proxy_url: | ||||
|                 options.add_argument(f'--proxy-server={self.proxy_url}') | ||||
|  | ||||
|         if self.webdriver_js_execute_code is not None: | ||||
|             self.driver.execute_script(self.webdriver_js_execute_code) | ||||
|             # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|             self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         # @todo - how to check this? is it possible? | ||||
|         self.status_code = 200 | ||||
|         # @todo somehow we should try to get this working for WebDriver | ||||
|         # raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
|         # @todo - dom wait loaded? | ||||
|         time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) | ||||
|         self.content = self.driver.page_source | ||||
|         self.headers = {} | ||||
|  | ||||
|         self.screenshot = self.driver.get_screenshot_as_png() | ||||
|  | ||||
|     # Does the connection to the webdriver work? run a test connection. | ||||
|     def is_ready(self): | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.command_executor, | ||||
|             options=ChromeOptions()) | ||||
|  | ||||
|         # driver.quit() seems to cause better exceptions | ||||
|         self.quit() | ||||
|         return True | ||||
|  | ||||
|     def quit(self): | ||||
|         if self.driver: | ||||
|             from selenium.webdriver.remote.remote_connection import RemoteConnection | ||||
|             from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver | ||||
|             driver = None | ||||
|             try: | ||||
|                 self.driver.quit() | ||||
|                 # Create the RemoteConnection and set timeout (e.g., 30 seconds) | ||||
|                 remote_connection = RemoteConnection( | ||||
|                     self.browser_connection_url, | ||||
|                 ) | ||||
|                 remote_connection.set_timeout(30)  # seconds | ||||
|  | ||||
|                 # Now create the driver with the RemoteConnection | ||||
|                 driver = RemoteWebDriver( | ||||
|                     command_executor=remote_connection, | ||||
|                     options=options | ||||
|                 ) | ||||
|  | ||||
|                 driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45))) | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}") | ||||
|                 if driver: | ||||
|                     driver.quit() | ||||
|                 raise e | ||||
|  | ||||
|             try: | ||||
|                 driver.get(url) | ||||
|  | ||||
|                 if not "--window-size" in os.getenv("CHROME_OPTIONS", ""): | ||||
|                     driver.set_window_size(1280, 1024) | ||||
|  | ||||
|                 driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|                 if self.webdriver_js_execute_code is not None: | ||||
|                     driver.execute_script(self.webdriver_js_execute_code) | ||||
|                     # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|                     driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|                 # @todo - how to check this? is it possible? | ||||
|                 self.status_code = 200 | ||||
|                 # @todo somehow we should try to get this working for WebDriver | ||||
|                 # raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
|                 # @todo - dom wait loaded? | ||||
|                 import time | ||||
|                 time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) | ||||
|                 self.content = driver.page_source | ||||
|                 self.headers = {} | ||||
|                 self.screenshot = driver.get_screenshot_as_png() | ||||
|             except Exception as e: | ||||
|                 driver.quit() | ||||
|                 raise e | ||||
|  | ||||
|             driver.quit() | ||||
|  | ||||
|         # Run the selenium operations in a thread pool to avoid blocking the event loop | ||||
|         loop = asyncio.get_event_loop() | ||||
|         await loop.run_in_executor(None, _run_sync) | ||||
|   | ||||
							
								
								
									
										535
									
								
								changedetectionio/custom_queue.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										535
									
								
								changedetectionio/custom_queue.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,535 @@ | ||||
| import queue | ||||
| import asyncio | ||||
| from blinker import signal | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| class NotificationQueue(queue.Queue): | ||||
|     """ | ||||
|     Extended Queue that sends a 'notification_event' signal when notifications are added. | ||||
|      | ||||
|     This class extends the standard Queue and adds a signal emission after a notification | ||||
|     is put into the queue. The signal includes the watch UUID if available. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize=0): | ||||
|         super().__init__(maxsize) | ||||
|         try: | ||||
|             self.notification_event_signal = signal('notification_event') | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception creating notification_event signal: {e}") | ||||
|  | ||||
|     def put(self, item, block=True, timeout=None): | ||||
|         # Call the parent's put method first | ||||
|         super().put(item, block, timeout) | ||||
|          | ||||
|         # After putting the notification in the queue, emit signal with watch UUID | ||||
|         try: | ||||
|             if self.notification_event_signal and isinstance(item, dict): | ||||
|                 watch_uuid = item.get('uuid') | ||||
|                 if watch_uuid: | ||||
|                     # Send the notification_event signal with the watch UUID | ||||
|                     self.notification_event_signal.send(watch_uuid=watch_uuid) | ||||
|                     logger.trace(f"NotificationQueue: Emitted notification_event signal for watch UUID {watch_uuid}") | ||||
|                 else: | ||||
|                     # Send signal without UUID for system notifications | ||||
|                     self.notification_event_signal.send() | ||||
|                     logger.trace("NotificationQueue: Emitted notification_event signal for system notification") | ||||
|         except Exception as e: | ||||
|             logger.error(f"Exception emitting notification_event signal: {e}") | ||||
|  | ||||
| class SignalPriorityQueue(queue.PriorityQueue): | ||||
|     """ | ||||
|     Extended PriorityQueue that sends a signal when items with a UUID are added. | ||||
|      | ||||
|     This class extends the standard PriorityQueue and adds a signal emission | ||||
|     after an item is put into the queue. If the item contains a UUID, the signal | ||||
|     is sent with that UUID as a parameter. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize=0): | ||||
|         super().__init__(maxsize) | ||||
|         try: | ||||
|             self.queue_length_signal = signal('queue_length') | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|  | ||||
|     def put(self, item, block=True, timeout=None): | ||||
|         # Call the parent's put method first | ||||
|         super().put(item, block, timeout) | ||||
|          | ||||
|         # After putting the item in the queue, check if it has a UUID and emit signal | ||||
|         if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item: | ||||
|             uuid = item.item['uuid'] | ||||
|             # Get the signal and send it if it exists | ||||
|             watch_check_update = signal('watch_check_update') | ||||
|             if watch_check_update: | ||||
|                 # Send the watch_uuid parameter | ||||
|                 watch_check_update.send(watch_uuid=uuid) | ||||
|          | ||||
|         # Send queue_length signal with current queue size | ||||
|         try: | ||||
|  | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|  | ||||
|     def get(self, block=True, timeout=None): | ||||
|         # Call the parent's get method first | ||||
|         item = super().get(block, timeout) | ||||
|          | ||||
|         # Send queue_length signal with current queue size | ||||
|         try: | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|         return item | ||||
|      | ||||
|     def get_uuid_position(self, target_uuid): | ||||
|         """ | ||||
|         Find the position of a watch UUID in the priority queue. | ||||
|         Optimized for large queues - O(n) complexity instead of O(n log n). | ||||
|          | ||||
|         Args: | ||||
|             target_uuid: The UUID to search for | ||||
|              | ||||
|         Returns: | ||||
|             dict: Contains position info or None if not found | ||||
|                 - position: 0-based position in queue (0 = next to be processed) | ||||
|                 - total_items: total number of items in queue | ||||
|                 - priority: the priority value of the found item | ||||
|         """ | ||||
|         with self.mutex: | ||||
|             queue_list = list(self.queue) | ||||
|             total_items = len(queue_list) | ||||
|              | ||||
|             if total_items == 0: | ||||
|                 return { | ||||
|                     'position': None, | ||||
|                     'total_items': 0, | ||||
|                     'priority': None, | ||||
|                     'found': False | ||||
|                 } | ||||
|              | ||||
|             # Find the target item and its priority first - O(n) | ||||
|             target_item = None | ||||
|             target_priority = None | ||||
|              | ||||
|             for item in queue_list: | ||||
|                 if (hasattr(item, 'item') and  | ||||
|                     isinstance(item.item, dict) and  | ||||
|                     item.item.get('uuid') == target_uuid): | ||||
|                     target_item = item | ||||
|                     target_priority = item.priority | ||||
|                     break | ||||
|              | ||||
|             if target_item is None: | ||||
|                 return { | ||||
|                     'position': None, | ||||
|                     'total_items': total_items, | ||||
|                     'priority': None, | ||||
|                     'found': False | ||||
|                 } | ||||
|              | ||||
|             # Count how many items have higher priority (lower numbers) - O(n) | ||||
|             position = 0 | ||||
|             for item in queue_list: | ||||
|                 # Items with lower priority numbers are processed first | ||||
|                 if item.priority < target_priority: | ||||
|                     position += 1 | ||||
|                 elif item.priority == target_priority and item != target_item: | ||||
|                     # For same priority, count items that come before this one | ||||
|                     # (Note: this is approximate since heap order isn't guaranteed for equal priorities) | ||||
|                     position += 1 | ||||
|              | ||||
|             return { | ||||
|                 'position': position, | ||||
|                 'total_items': total_items, | ||||
|                 'priority': target_priority, | ||||
|                 'found': True | ||||
|             } | ||||
|      | ||||
|     def get_all_queued_uuids(self, limit=None, offset=0): | ||||
|         """ | ||||
|         Get UUIDs currently in the queue with their positions. | ||||
|         For large queues, use limit/offset for pagination. | ||||
|          | ||||
|         Args: | ||||
|             limit: Maximum number of items to return (None = all) | ||||
|             offset: Number of items to skip (for pagination) | ||||
|          | ||||
|         Returns: | ||||
|             dict: Contains items and metadata | ||||
|                 - items: List of dicts with uuid, position, and priority | ||||
|                 - total_items: Total number of items in queue | ||||
|                 - returned_items: Number of items returned | ||||
|                 - has_more: Whether there are more items after this page | ||||
|         """ | ||||
|         with self.mutex: | ||||
|             queue_list = list(self.queue) | ||||
|             total_items = len(queue_list) | ||||
|              | ||||
|             if total_items == 0: | ||||
|                 return { | ||||
|                     'items': [], | ||||
|                     'total_items': 0, | ||||
|                     'returned_items': 0, | ||||
|                     'has_more': False | ||||
|                 } | ||||
|              | ||||
|             # For very large queues, warn about performance | ||||
|             if total_items > 1000 and limit is None: | ||||
|                 logger.warning(f"Getting all {total_items} queued items without limit - this may be slow") | ||||
|              | ||||
|             # Sort only if we need exact positions (expensive for large queues) | ||||
|             if limit is not None and limit <= 100: | ||||
|                 # For small requests, we can afford to sort | ||||
|                 queue_items = sorted(queue_list) | ||||
|                 end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items) | ||||
|                 items_to_process = queue_items[offset:end_idx] | ||||
|                  | ||||
|                 result = [] | ||||
|                 for position, item in enumerate(items_to_process, start=offset): | ||||
|                     if (hasattr(item, 'item') and  | ||||
|                         isinstance(item.item, dict) and  | ||||
|                         'uuid' in item.item): | ||||
|                          | ||||
|                         result.append({ | ||||
|                             'uuid': item.item['uuid'], | ||||
|                             'position': position, | ||||
|                             'priority': item.priority | ||||
|                         }) | ||||
|                  | ||||
|                 return { | ||||
|                     'items': result, | ||||
|                     'total_items': total_items, | ||||
|                     'returned_items': len(result), | ||||
|                     'has_more': (offset + len(result)) < total_items | ||||
|                 } | ||||
|             else: | ||||
|                 # For large requests, return items with approximate positions | ||||
|                 # This is much faster O(n) instead of O(n log n) | ||||
|                 result = [] | ||||
|                 processed = 0 | ||||
|                 skipped = 0 | ||||
|                  | ||||
|                 for item in queue_list: | ||||
|                     if (hasattr(item, 'item') and  | ||||
|                         isinstance(item.item, dict) and  | ||||
|                         'uuid' in item.item): | ||||
|                          | ||||
|                         if skipped < offset: | ||||
|                             skipped += 1 | ||||
|                             continue | ||||
|                          | ||||
|                         if limit and processed >= limit: | ||||
|                             break | ||||
|                          | ||||
|                         # Approximate position based on priority comparison | ||||
|                         approx_position = sum(1 for other in queue_list if other.priority < item.priority) | ||||
|                          | ||||
|                         result.append({ | ||||
|                             'uuid': item.item['uuid'], | ||||
|                             'position': approx_position,  # Approximate | ||||
|                             'priority': item.priority | ||||
|                         }) | ||||
|                         processed += 1 | ||||
|                  | ||||
|                 return { | ||||
|                     'items': result, | ||||
|                     'total_items': total_items, | ||||
|                     'returned_items': len(result), | ||||
|                     'has_more': (offset + len(result)) < total_items, | ||||
|                     'note': 'Positions are approximate for performance with large queues' | ||||
|                 } | ||||
|      | ||||
|     def get_queue_summary(self): | ||||
|         """ | ||||
|         Get a quick summary of queue state without expensive operations. | ||||
|         O(n) complexity - fast even for large queues. | ||||
|          | ||||
|         Returns: | ||||
|             dict: Queue summary statistics | ||||
|         """ | ||||
|         with self.mutex: | ||||
|             queue_list = list(self.queue) | ||||
|             total_items = len(queue_list) | ||||
|              | ||||
|             if total_items == 0: | ||||
|                 return { | ||||
|                     'total_items': 0, | ||||
|                     'priority_breakdown': {}, | ||||
|                     'immediate_items': 0, | ||||
|                     'clone_items': 0, | ||||
|                     'scheduled_items': 0 | ||||
|                 } | ||||
|              | ||||
|             # Count items by priority type - O(n) | ||||
|             immediate_items = 0  # priority 1 | ||||
|             clone_items = 0      # priority 5   | ||||
|             scheduled_items = 0  # priority > 100 (timestamps) | ||||
|             priority_counts = {} | ||||
|              | ||||
|             for item in queue_list: | ||||
|                 priority = item.priority | ||||
|                 priority_counts[priority] = priority_counts.get(priority, 0) + 1 | ||||
|                  | ||||
|                 if priority == 1: | ||||
|                     immediate_items += 1 | ||||
|                 elif priority == 5: | ||||
|                     clone_items += 1 | ||||
|                 elif priority > 100: | ||||
|                     scheduled_items += 1 | ||||
|              | ||||
|             return { | ||||
|                 'total_items': total_items, | ||||
|                 'priority_breakdown': priority_counts, | ||||
|                 'immediate_items': immediate_items, | ||||
|                 'clone_items': clone_items, | ||||
|                 'scheduled_items': scheduled_items, | ||||
|                 'min_priority': min(priority_counts.keys()) if priority_counts else None, | ||||
|                 'max_priority': max(priority_counts.keys()) if priority_counts else None | ||||
|             } | ||||
|  | ||||
|  | ||||
| class AsyncSignalPriorityQueue(asyncio.PriorityQueue): | ||||
|     """ | ||||
|     Async version of SignalPriorityQueue that sends signals when items are added/removed. | ||||
|      | ||||
|     This class extends asyncio.PriorityQueue and maintains the same signal behavior | ||||
|     as the synchronous version for real-time UI updates. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize=0): | ||||
|         super().__init__(maxsize) | ||||
|         try: | ||||
|             self.queue_length_signal = signal('queue_length') | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|  | ||||
|     async def put(self, item): | ||||
|         # Call the parent's put method first | ||||
|         await super().put(item) | ||||
|          | ||||
|         # After putting the item in the queue, check if it has a UUID and emit signal | ||||
|         if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item: | ||||
|             uuid = item.item['uuid'] | ||||
|             # Get the signal and send it if it exists | ||||
|             watch_check_update = signal('watch_check_update') | ||||
|             if watch_check_update: | ||||
|                 # Send the watch_uuid parameter | ||||
|                 watch_check_update.send(watch_uuid=uuid) | ||||
|          | ||||
|         # Send queue_length signal with current queue size | ||||
|         try: | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|  | ||||
|     async def get(self): | ||||
|         # Call the parent's get method first | ||||
|         item = await super().get() | ||||
|          | ||||
|         # Send queue_length signal with current queue size | ||||
|         try: | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception: {e}") | ||||
|         return item | ||||
|      | ||||
|     @property | ||||
|     def queue(self): | ||||
|         """ | ||||
|         Provide compatibility with sync PriorityQueue.queue access | ||||
|         Returns the internal queue for template access | ||||
|         """ | ||||
|         return self._queue if hasattr(self, '_queue') else [] | ||||
|      | ||||
|     def get_uuid_position(self, target_uuid): | ||||
|         """ | ||||
|         Find the position of a watch UUID in the async priority queue. | ||||
|         Optimized for large queues - O(n) complexity instead of O(n log n). | ||||
|          | ||||
|         Args: | ||||
|             target_uuid: The UUID to search for | ||||
|              | ||||
|         Returns: | ||||
|             dict: Contains position info or None if not found | ||||
|                 - position: 0-based position in queue (0 = next to be processed) | ||||
|                 - total_items: total number of items in queue | ||||
|                 - priority: the priority value of the found item | ||||
|         """ | ||||
|         queue_list = list(self._queue) | ||||
|         total_items = len(queue_list) | ||||
|          | ||||
|         if total_items == 0: | ||||
|             return { | ||||
|                 'position': None, | ||||
|                 'total_items': 0, | ||||
|                 'priority': None, | ||||
|                 'found': False | ||||
|             } | ||||
|          | ||||
|         # Find the target item and its priority first - O(n) | ||||
|         target_item = None | ||||
|         target_priority = None | ||||
|          | ||||
|         for item in queue_list: | ||||
|             if (hasattr(item, 'item') and  | ||||
|                 isinstance(item.item, dict) and  | ||||
|                 item.item.get('uuid') == target_uuid): | ||||
|                 target_item = item | ||||
|                 target_priority = item.priority | ||||
|                 break | ||||
|          | ||||
|         if target_item is None: | ||||
|             return { | ||||
|                 'position': None, | ||||
|                 'total_items': total_items, | ||||
|                 'priority': None, | ||||
|                 'found': False | ||||
|             } | ||||
|          | ||||
|         # Count how many items have higher priority (lower numbers) - O(n) | ||||
|         position = 0 | ||||
|         for item in queue_list: | ||||
|             if item.priority < target_priority: | ||||
|                 position += 1 | ||||
|             elif item.priority == target_priority and item != target_item: | ||||
|                 position += 1 | ||||
|          | ||||
|         return { | ||||
|             'position': position, | ||||
|             'total_items': total_items, | ||||
|             'priority': target_priority, | ||||
|             'found': True | ||||
|         } | ||||
|      | ||||
|     def get_all_queued_uuids(self, limit=None, offset=0): | ||||
|         """ | ||||
|         Get UUIDs currently in the async queue with their positions. | ||||
|         For large queues, use limit/offset for pagination. | ||||
|          | ||||
|         Args: | ||||
|             limit: Maximum number of items to return (None = all) | ||||
|             offset: Number of items to skip (for pagination) | ||||
|          | ||||
|         Returns: | ||||
|             dict: Contains items and metadata (same structure as sync version) | ||||
|         """ | ||||
|         queue_list = list(self._queue) | ||||
|         total_items = len(queue_list) | ||||
|          | ||||
|         if total_items == 0: | ||||
|             return { | ||||
|                 'items': [], | ||||
|                 'total_items': 0, | ||||
|                 'returned_items': 0, | ||||
|                 'has_more': False | ||||
|             } | ||||
|          | ||||
|         # Same logic as sync version but without mutex | ||||
|         if limit is not None and limit <= 100: | ||||
|             queue_items = sorted(queue_list) | ||||
|             end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items) | ||||
|             items_to_process = queue_items[offset:end_idx] | ||||
|              | ||||
|             result = [] | ||||
|             for position, item in enumerate(items_to_process, start=offset): | ||||
|                 if (hasattr(item, 'item') and  | ||||
|                     isinstance(item.item, dict) and  | ||||
|                     'uuid' in item.item): | ||||
|                      | ||||
|                     result.append({ | ||||
|                         'uuid': item.item['uuid'], | ||||
|                         'position': position, | ||||
|                         'priority': item.priority | ||||
|                     }) | ||||
|              | ||||
|             return { | ||||
|                 'items': result, | ||||
|                 'total_items': total_items, | ||||
|                 'returned_items': len(result), | ||||
|                 'has_more': (offset + len(result)) < total_items | ||||
|             } | ||||
|         else: | ||||
|             # Fast approximate positions for large queues | ||||
|             result = [] | ||||
|             processed = 0 | ||||
|             skipped = 0 | ||||
|              | ||||
|             for item in queue_list: | ||||
|                 if (hasattr(item, 'item') and  | ||||
|                     isinstance(item.item, dict) and  | ||||
|                     'uuid' in item.item): | ||||
|                      | ||||
|                     if skipped < offset: | ||||
|                         skipped += 1 | ||||
|                         continue | ||||
|                      | ||||
|                     if limit and processed >= limit: | ||||
|                         break | ||||
|                      | ||||
|                     approx_position = sum(1 for other in queue_list if other.priority < item.priority) | ||||
|                      | ||||
|                     result.append({ | ||||
|                         'uuid': item.item['uuid'], | ||||
|                         'position': approx_position, | ||||
|                         'priority': item.priority | ||||
|                     }) | ||||
|                     processed += 1 | ||||
|              | ||||
|             return { | ||||
|                 'items': result, | ||||
|                 'total_items': total_items, | ||||
|                 'returned_items': len(result), | ||||
|                 'has_more': (offset + len(result)) < total_items, | ||||
|                 'note': 'Positions are approximate for performance with large queues' | ||||
|             } | ||||
|      | ||||
|     def get_queue_summary(self): | ||||
|         """ | ||||
|         Get a quick summary of async queue state. | ||||
|         O(n) complexity - fast even for large queues. | ||||
|         """ | ||||
|         queue_list = list(self._queue) | ||||
|         total_items = len(queue_list) | ||||
|          | ||||
|         if total_items == 0: | ||||
|             return { | ||||
|                 'total_items': 0, | ||||
|                 'priority_breakdown': {}, | ||||
|                 'immediate_items': 0, | ||||
|                 'clone_items': 0, | ||||
|                 'scheduled_items': 0 | ||||
|             } | ||||
|          | ||||
|         immediate_items = 0 | ||||
|         clone_items = 0 | ||||
|         scheduled_items = 0 | ||||
|         priority_counts = {} | ||||
|          | ||||
|         for item in queue_list: | ||||
|             priority = item.priority | ||||
|             priority_counts[priority] = priority_counts.get(priority, 0) + 1 | ||||
|              | ||||
|             if priority == 1: | ||||
|                 immediate_items += 1 | ||||
|             elif priority == 5: | ||||
|                 clone_items += 1 | ||||
|             elif priority > 100: | ||||
|                 scheduled_items += 1 | ||||
|          | ||||
|         return { | ||||
|             'total_items': total_items, | ||||
|             'priority_breakdown': priority_counts, | ||||
|             'immediate_items': immediate_items, | ||||
|             'clone_items': clone_items, | ||||
|             'scheduled_items': scheduled_items, | ||||
|             'min_priority': min(priority_counts.keys()) if priority_counts else None, | ||||
|             'max_priority': max(priority_counts.keys()) if priority_counts else None | ||||
|         } | ||||
| @@ -4,49 +4,53 @@ import flask_login | ||||
| import locale | ||||
| import os | ||||
| import queue | ||||
| import sys | ||||
| import threading | ||||
| import time | ||||
| import timeago | ||||
| from blinker import signal | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from threading import Event | ||||
| from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue | ||||
| from changedetectionio import worker_handler | ||||
|  | ||||
| from flask import ( | ||||
|     Flask, | ||||
|     abort, | ||||
|     flash, | ||||
|     make_response, | ||||
|     redirect, | ||||
|     render_template, | ||||
|     request, | ||||
|     send_from_directory, | ||||
|     session, | ||||
|     url_for, | ||||
| ) | ||||
| from flask_compress import Compress as FlaskCompress | ||||
| from flask_login import current_user | ||||
| from flask_paginate import Pagination, get_page_parameter | ||||
| from flask_restful import abort, Api | ||||
| from flask_cors import CORS | ||||
|  | ||||
| # Create specific signals for application events | ||||
| # Make this a global singleton to avoid multiple signal objects | ||||
| watch_check_update = signal('watch_check_update', doc='Signal sent when a watch check is completed') | ||||
| from flask_wtf import CSRFProtect | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio import __version__ | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags | ||||
| from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon | ||||
| from changedetectionio.api.Search import Search | ||||
| from .time_handler import is_within_schedule | ||||
|  | ||||
| datastore = None | ||||
|  | ||||
| # Local | ||||
| running_update_threads = [] | ||||
| ticker_thread = None | ||||
|  | ||||
| extra_stylesheets = [] | ||||
|  | ||||
| update_q = queue.PriorityQueue() | ||||
| notification_q = queue.Queue() | ||||
| # Use bulletproof janus-based queues for sync/async reliability   | ||||
| update_q = RecheckPriorityQueue() | ||||
| notification_q = NotificationQueue() | ||||
| MAX_QUEUE_SIZE = 2000 | ||||
|  | ||||
| app = Flask(__name__, | ||||
| @@ -54,6 +58,9 @@ app = Flask(__name__, | ||||
|             static_folder="static", | ||||
|             template_folder="templates") | ||||
|  | ||||
| # Will be initialized in changedetection_app | ||||
| socketio_server = None | ||||
|  | ||||
| # Enable CORS, especially useful for the Chrome extension to operate from anywhere | ||||
| CORS(app) | ||||
|  | ||||
| @@ -91,7 +98,7 @@ watch_api = Api(app, decorators=[csrf.exempt]) | ||||
| def init_app_secret(datastore_path): | ||||
|     secret = "" | ||||
|  | ||||
|     path = "{}/secret.txt".format(datastore_path) | ||||
|     path = os.path.join(datastore_path, "secret.txt") | ||||
|  | ||||
|     try: | ||||
|         with open(path, "r") as f: | ||||
| @@ -115,6 +122,18 @@ def get_darkmode_state(): | ||||
| def get_css_version(): | ||||
|     return __version__ | ||||
|  | ||||
| @app.template_global() | ||||
| def get_socketio_path(): | ||||
|     """Generate the correct Socket.IO path prefix for the client""" | ||||
|     # If behind a proxy with a sub-path, we need to respect that path | ||||
|     prefix = "" | ||||
|     if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers: | ||||
|         prefix = request.headers['X-Forwarded-Prefix'] | ||||
|  | ||||
|     # Socket.IO will be available at {prefix}/socket.io/ | ||||
|     return prefix | ||||
|  | ||||
|  | ||||
| @app.template_filter('format_number_locale') | ||||
| def _jinja2_filter_format_number_locale(value: float) -> str: | ||||
|     "Formats for example 4000.10 to the local locale default of 4,000.10" | ||||
| @@ -125,10 +144,32 @@ def _jinja2_filter_format_number_locale(value: float) -> str: | ||||
|  | ||||
| @app.template_global('is_checking_now') | ||||
| def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"): | ||||
|     # Worker thread tells us which UUID it is currently processing. | ||||
|     for t in running_update_threads: | ||||
|         if t.current_uuid == watch_obj['uuid']: | ||||
|             return True | ||||
|     return worker_handler.is_watch_running(watch_obj['uuid']) | ||||
|  | ||||
| @app.template_global('get_watch_queue_position') | ||||
| def _get_watch_queue_position(watch_obj): | ||||
|     """Get the position of a watch in the queue""" | ||||
|     uuid = watch_obj['uuid'] | ||||
|     return update_q.get_uuid_position(uuid) | ||||
|  | ||||
| @app.template_global('get_current_worker_count') | ||||
| def _get_current_worker_count(): | ||||
|     """Get the current number of operational workers""" | ||||
|     return worker_handler.get_worker_count() | ||||
|  | ||||
| @app.template_global('get_worker_status_info') | ||||
| def _get_worker_status_info(): | ||||
|     """Get detailed worker status information for display""" | ||||
|     status = worker_handler.get_worker_status() | ||||
|     running_uuids = worker_handler.get_running_uuids() | ||||
|      | ||||
|     return { | ||||
|         'count': status['worker_count'], | ||||
|         'type': status['worker_type'], | ||||
|         'active_workers': len(running_uuids), | ||||
|         'processing_watches': running_uuids, | ||||
|         'loop_running': status.get('async_loop_running', None) | ||||
|     } | ||||
|  | ||||
|  | ||||
| # We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread | ||||
| @@ -215,12 +256,15 @@ class User(flask_login.UserMixin): | ||||
| def changedetection_app(config=None, datastore_o=None): | ||||
|     logger.trace("TRACE log is enabled") | ||||
|  | ||||
|     global datastore | ||||
|     global datastore, socketio_server | ||||
|     datastore = datastore_o | ||||
|  | ||||
|     # so far just for read-only via tests, but this will be moved eventually to be the main source | ||||
|     # (instead of the global var) | ||||
|     app.config['DATASTORE'] = datastore_o | ||||
|      | ||||
|     # Store the signal in the app config to ensure it's accessible everywhere | ||||
|     app.config['watch_check_update_SIGNAL'] = watch_check_update | ||||
|  | ||||
|     login_manager = flask_login.LoginManager(app) | ||||
|     login_manager.login_view = 'login' | ||||
| @@ -233,7 +277,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         if has_password_enabled and not flask_login.current_user.is_authenticated: | ||||
|             # Permitted | ||||
|             if request.endpoint and request.endpoint == 'static_content' and request.view_args and request.view_args.get('group') in ['styles', 'js', 'images', 'favicons']: | ||||
|             if request.endpoint and request.endpoint == 'static_content' and request.view_args: | ||||
|                 # Handled by static_content handler | ||||
|                 return None | ||||
|             # Permitted | ||||
|             elif request.endpoint and 'login' in request.endpoint: | ||||
| @@ -247,6 +292,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # RSS access with token is allowed | ||||
|             elif request.endpoint and 'rss.feed' in request.endpoint: | ||||
|                 return None | ||||
|             # Socket.IO routes - need separate handling | ||||
|             elif request.path.startswith('/socket.io/'): | ||||
|                 return None | ||||
|             # API routes - use their own auth mechanism (@auth.check_token) | ||||
|             elif request.path.startswith('/api/'): | ||||
|                 return None | ||||
| @@ -257,7 +305,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     watch_api.add_resource(WatchSingleHistory, | ||||
|                            '/api/v1/watch/<string:uuid>/history/<string:timestamp>', | ||||
|                            resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) | ||||
|  | ||||
|     watch_api.add_resource(WatchFavicon, | ||||
|                            '/api/v1/watch/<string:uuid>/favicon', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
|     watch_api.add_resource(WatchHistory, | ||||
|                            '/api/v1/watch/<string:uuid>/history', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
| @@ -279,12 +329,13 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
|  | ||||
|     watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
|                            resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) | ||||
|                             | ||||
|     watch_api.add_resource(Search, '/api/v1/search', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
|  | ||||
|  | ||||
|     watch_api.add_resource(Notifications, '/api/v1/notifications', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
|  | ||||
|     @login_manager.user_loader | ||||
|     def user_loader(email): | ||||
| @@ -351,11 +402,15 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     @app.route("/static/<string:group>/<string:filename>", methods=['GET']) | ||||
|     def static_content(group, filename): | ||||
|         from flask import make_response | ||||
|         import re | ||||
|         group = re.sub(r'[^\w.-]+', '', group.lower()) | ||||
|         filename = re.sub(r'[^\w.-]+', '', filename.lower()) | ||||
|  | ||||
|         if group == 'screenshot': | ||||
|             # Could be sensitive, follow password requirements | ||||
|             if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: | ||||
|                 abort(403) | ||||
|                 if not datastore.data['settings']['application'].get('shared_diff_access'): | ||||
|                     abort(403) | ||||
|  | ||||
|             screenshot_filename = "last-screenshot.png" if not request.args.get('error_screenshot') else "last-error-screenshot.png" | ||||
|  | ||||
| @@ -372,6 +427,32 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             except FileNotFoundError: | ||||
|                 abort(404) | ||||
|  | ||||
|         if group == 'favicon': | ||||
|             # Could be sensitive, follow password requirements | ||||
|             if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: | ||||
|                 abort(403) | ||||
|             # Get the watch object | ||||
|             watch = datastore.data['watching'].get(filename) | ||||
|             if not watch: | ||||
|                 abort(404) | ||||
|  | ||||
|             favicon_filename = watch.get_favicon_filename() | ||||
|             if favicon_filename: | ||||
|                 try: | ||||
|                     import magic | ||||
|                     mime = magic.from_file( | ||||
|                         os.path.join(watch.watch_data_dir, favicon_filename), | ||||
|                         mime=True | ||||
|                     ) | ||||
|                 except ImportError: | ||||
|                     # Fallback, no python-magic | ||||
|                     import mimetypes | ||||
|                     mime, encoding = mimetypes.guess_type(favicon_filename) | ||||
|  | ||||
|                 response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename)) | ||||
|                 response.headers['Content-type'] = mime | ||||
|                 response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate | ||||
|                 return response | ||||
|  | ||||
|         if group == 'visual_selector_data': | ||||
|             # Could be sensitive, follow password requirements | ||||
| @@ -389,7 +470,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                     response.headers['Content-Type'] = 'application/json' | ||||
|                     response.headers['Content-Encoding'] = 'deflate' | ||||
|                 else: | ||||
|                     logger.error(f'Request elements.deflate at "{watch_directory}" but was notfound.') | ||||
|                     logger.error(f'Request elements.deflate at "{watch_directory}" but was not found.') | ||||
|                     abort(404) | ||||
|  | ||||
|                 if response: | ||||
| @@ -404,7 +485,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         # These files should be in our subdirectory | ||||
|         try: | ||||
|             return send_from_directory("static/{}".format(group), path=filename) | ||||
|             return send_from_directory(f"static/{group}", path=filename) | ||||
|         except FileNotFoundError: | ||||
|             abort(404) | ||||
|  | ||||
| @@ -438,19 +519,121 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|     # watchlist UI buttons etc | ||||
|     import changedetectionio.blueprint.ui as ui | ||||
|     app.register_blueprint(ui.construct_blueprint(datastore, update_q, running_update_threads, queuedWatchMetaData)) | ||||
|     app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update)) | ||||
|  | ||||
|     import changedetectionio.blueprint.watchlist as watchlist | ||||
|     app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='') | ||||
|  | ||||
|     # Initialize Socket.IO server conditionally based on settings | ||||
|     socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True) | ||||
|     if socket_io_enabled: | ||||
|         from changedetectionio.realtime.socket_server import init_socketio | ||||
|         global socketio_server | ||||
|         socketio_server = init_socketio(app, datastore) | ||||
|         logger.info("Socket.IO server initialized") | ||||
|     else: | ||||
|         logger.info("Socket.IO server disabled via settings") | ||||
|         socketio_server = None | ||||
|  | ||||
|     # Memory cleanup endpoint | ||||
|     @app.route('/gc-cleanup', methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def gc_cleanup(): | ||||
|         from changedetectionio.gc_cleanup import memory_cleanup | ||||
|         from flask import jsonify | ||||
|  | ||||
|         result = memory_cleanup(app) | ||||
|         return jsonify({"status": "success", "message": "Memory cleanup completed", "result": result}) | ||||
|  | ||||
|     # Worker health check endpoint | ||||
|     @app.route('/worker-health', methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def worker_health(): | ||||
|         from flask import jsonify | ||||
|          | ||||
|         expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers'])) | ||||
|          | ||||
|         # Get basic status | ||||
|         status = worker_handler.get_worker_status() | ||||
|          | ||||
|         # Perform health check | ||||
|         health_result = worker_handler.check_worker_health( | ||||
|             expected_count=expected_workers, | ||||
|             update_q=update_q, | ||||
|             notification_q=notification_q, | ||||
|             app=app, | ||||
|             datastore=datastore | ||||
|         ) | ||||
|          | ||||
|         return jsonify({ | ||||
|             "status": "success", | ||||
|             "worker_status": status, | ||||
|             "health_check": health_result, | ||||
|             "expected_workers": expected_workers | ||||
|         }) | ||||
|  | ||||
|     # Queue status endpoint | ||||
|     @app.route('/queue-status', methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def queue_status(): | ||||
|         from flask import jsonify, request | ||||
|          | ||||
|         # Get specific UUID position if requested | ||||
|         target_uuid = request.args.get('uuid') | ||||
|          | ||||
|         if target_uuid: | ||||
|             position_info = update_q.get_uuid_position(target_uuid) | ||||
|             return jsonify({ | ||||
|                 "status": "success", | ||||
|                 "uuid": target_uuid, | ||||
|                 "queue_position": position_info | ||||
|             }) | ||||
|         else: | ||||
|             # Get pagination parameters | ||||
|             limit = request.args.get('limit', type=int) | ||||
|             offset = request.args.get('offset', type=int, default=0) | ||||
|             summary_only = request.args.get('summary', type=bool, default=False) | ||||
|              | ||||
|             if summary_only: | ||||
|                 # Fast summary for large queues | ||||
|                 summary = update_q.get_queue_summary() | ||||
|                 return jsonify({ | ||||
|                     "status": "success", | ||||
|                     "queue_summary": summary | ||||
|                 }) | ||||
|             else: | ||||
|                 # Get queued items with pagination support | ||||
|                 if limit is None: | ||||
|                     # Default limit for large queues to prevent performance issues | ||||
|                     queue_size = update_q.qsize() | ||||
|                     if queue_size > 100: | ||||
|                         limit = 50 | ||||
|                         logger.warning(f"Large queue ({queue_size} items) detected, limiting to {limit} items. Use ?limit=N for more.") | ||||
|                  | ||||
|                 all_queued = update_q.get_all_queued_uuids(limit=limit, offset=offset) | ||||
|                 return jsonify({ | ||||
|                     "status": "success", | ||||
|                     "queue_size": update_q.qsize(), | ||||
|                     "queued_data": all_queued | ||||
|                 }) | ||||
|  | ||||
|     # Start the async workers during app initialization | ||||
|     # Can be overridden by ENV or use the default settings | ||||
|     n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers'])) | ||||
|     logger.info(f"Starting {n_workers} workers during app initialization") | ||||
|     worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore) | ||||
|  | ||||
|     # @todo handle ctrl break | ||||
|     ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() | ||||
|     threading.Thread(target=notification_runner).start() | ||||
|  | ||||
|     in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ | ||||
|     # Check for new release version, but not when running in test/build or pytest | ||||
|     if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')): | ||||
|     if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest: | ||||
|         threading.Thread(target=check_for_new_version).start() | ||||
|  | ||||
|     # Return the Flask app - the Socket.IO will be attached to it but initialized separately | ||||
|     # This avoids circular dependencies | ||||
|     return app | ||||
|  | ||||
|  | ||||
| @@ -486,72 +669,87 @@ def notification_runner(): | ||||
|     global notification_debug_log | ||||
|     from datetime import datetime | ||||
|     import json | ||||
|     while not app.config.exit.is_set(): | ||||
|         try: | ||||
|             # At the moment only one thread runs (single runner) | ||||
|             n_object = notification_q.get(block=False) | ||||
|         except queue.Empty: | ||||
|             time.sleep(1) | ||||
|  | ||||
|         else: | ||||
|  | ||||
|             now = datetime.now() | ||||
|             sent_obj = None | ||||
|  | ||||
|     with app.app_context(): | ||||
|         while not app.config.exit.is_set(): | ||||
|             try: | ||||
|                 from changedetectionio import notification | ||||
|                 # Fallback to system config if not set | ||||
|                 if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'): | ||||
|                     n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body') | ||||
|                 # At the moment only one thread runs (single runner) | ||||
|                 n_object = notification_q.get(block=False) | ||||
|             except queue.Empty: | ||||
|                 time.sleep(1) | ||||
|  | ||||
|                 if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'): | ||||
|                     n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title') | ||||
|             else: | ||||
|  | ||||
|                 if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'): | ||||
|                     n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format') | ||||
|                 now = datetime.now() | ||||
|                 sent_obj = None | ||||
|  | ||||
|                 sent_obj = notification.process_notification(n_object, datastore) | ||||
|                 try: | ||||
|                     from changedetectionio.notification.handler import process_notification | ||||
|  | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Watch URL: {n_object['watch_url']}  Error {str(e)}") | ||||
|                     # Fallback to system config if not set | ||||
|                     if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'): | ||||
|                         n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body') | ||||
|  | ||||
|                 # UUID wont be present when we submit a 'test' from the global settings | ||||
|                 if 'uuid' in n_object: | ||||
|                     datastore.update_watch(uuid=n_object['uuid'], | ||||
|                                            update_obj={'last_notification_error': "Notification error detected, goto notification log."}) | ||||
|                     if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'): | ||||
|                         n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title') | ||||
|  | ||||
|                     if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'): | ||||
|                         n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format') | ||||
|                     if n_object.get('notification_urls', {}): | ||||
|                         sent_obj = process_notification(n_object, datastore) | ||||
|  | ||||
|                 except Exception as e: | ||||
|                     logger.error(f"Watch URL: {n_object['watch_url']}  Error {str(e)}") | ||||
|  | ||||
|                     # UUID wont be present when we submit a 'test' from the global settings | ||||
|                     if 'uuid' in n_object: | ||||
|                         datastore.update_watch(uuid=n_object['uuid'], | ||||
|                                                update_obj={'last_notification_error': "Notification error detected, goto notification log."}) | ||||
|  | ||||
|                     log_lines = str(e).splitlines() | ||||
|                     notification_debug_log += log_lines | ||||
|  | ||||
|                     with app.app_context(): | ||||
|                         app.config['watch_check_update_SIGNAL'].send(app_context=app, watch_uuid=n_object.get('uuid')) | ||||
|  | ||||
|                 # Process notifications | ||||
|                 notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))] | ||||
|                 # Trim the log length | ||||
|                 notification_debug_log = notification_debug_log[-100:] | ||||
|  | ||||
|                 log_lines = str(e).splitlines() | ||||
|                 notification_debug_log += log_lines | ||||
|  | ||||
|             # Process notifications | ||||
|             notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))] | ||||
|             # Trim the log length | ||||
|             notification_debug_log = notification_debug_log[-100:] | ||||
|  | ||||
| # Threaded runner, look for new watches to feed into the Queue. | ||||
| def ticker_thread_check_time_launch_checks(): | ||||
|     import random | ||||
|     from changedetectionio import update_worker | ||||
|     proxy_last_called_time = {} | ||||
|     last_health_check = 0 | ||||
|  | ||||
|     recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) | ||||
|     logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}") | ||||
|  | ||||
|     # Spin up Workers that do the fetching | ||||
|     # Can be overriden by ENV or use the default settings | ||||
|     n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers'])) | ||||
|     for _ in range(n_workers): | ||||
|         new_worker = update_worker.update_worker(update_q, notification_q, app, datastore) | ||||
|         running_update_threads.append(new_worker) | ||||
|         new_worker.start() | ||||
|     # Workers are now started during app initialization, not here | ||||
|  | ||||
|     while not app.config.exit.is_set(): | ||||
|  | ||||
|         # Periodic worker health check (every 60 seconds) | ||||
|         now = time.time() | ||||
|         if now - last_health_check > 60: | ||||
|             expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers'])) | ||||
|             health_result = worker_handler.check_worker_health( | ||||
|                 expected_count=expected_workers, | ||||
|                 update_q=update_q, | ||||
|                 notification_q=notification_q, | ||||
|                 app=app, | ||||
|                 datastore=datastore | ||||
|             ) | ||||
|              | ||||
|             if health_result['status'] != 'healthy': | ||||
|                 logger.warning(f"Worker health check: {health_result['message']}") | ||||
|                  | ||||
|             last_health_check = now | ||||
|  | ||||
|         # Get a list of watches by UUID that are currently fetching data | ||||
|         running_uuids = [] | ||||
|         for t in running_update_threads: | ||||
|             if t.current_uuid: | ||||
|                 running_uuids.append(t.current_uuid) | ||||
|         running_uuids = worker_handler.get_running_uuids() | ||||
|  | ||||
|         # Re #232 - Deepcopy the data incase it changes while we're iterating through it all | ||||
|         watch_uuid_list = [] | ||||
| @@ -646,16 +844,22 @@ def ticker_thread_check_time_launch_checks(): | ||||
|  | ||||
|                     # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it. | ||||
|                     priority = int(time.time()) | ||||
|                     logger.debug( | ||||
|                         f"> Queued watch UUID {uuid} " | ||||
|                         f"last checked at {watch['last_checked']} " | ||||
|                         f"queued at {now:0.2f} priority {priority} " | ||||
|                         f"jitter {watch.jitter_seconds:0.2f}s, " | ||||
|                         f"{now - watch['last_checked']:0.2f}s since last checked") | ||||
|  | ||||
|                     # Into the queue with you | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid})) | ||||
|  | ||||
|                     queued_successfully = worker_handler.queue_item_async_safe(update_q, | ||||
|                                                                                queuedWatchMetaData.PrioritizedItem(priority=priority, | ||||
|                                                                                                                    item={'uuid': uuid}) | ||||
|                                                                                ) | ||||
|                     if queued_successfully: | ||||
|                         logger.debug( | ||||
|                             f"> Queued watch UUID {uuid} " | ||||
|                             f"last checked at {watch['last_checked']} " | ||||
|                             f"queued at {now:0.2f} priority {priority} " | ||||
|                             f"jitter {watch.jitter_seconds:0.2f}s, " | ||||
|                             f"{now - watch['last_checked']:0.2f}s since last checked") | ||||
|                     else: | ||||
|                         logger.critical(f"CRITICAL: Failed to queue watch UUID {uuid} in ticker thread!") | ||||
|                          | ||||
|                     # Reset for next time | ||||
|                     watch.jitter_seconds = 0 | ||||
|  | ||||
|   | ||||
| @@ -28,6 +28,8 @@ from wtforms.validators import ValidationError | ||||
|  | ||||
| from validators.url import url as url_validator | ||||
|  | ||||
| from changedetectionio.widgets import TernaryNoneBooleanField | ||||
|  | ||||
|  | ||||
| # default | ||||
| # each select <option data-enabled="enabled-0-0" | ||||
| @@ -224,27 +226,37 @@ class StringDictKeyValue(StringField): | ||||
|  | ||||
|     def _value(self): | ||||
|         if self.data: | ||||
|             output = u'' | ||||
|             for k in self.data.keys(): | ||||
|                 output += "{}: {}\r\n".format(k, self.data[k]) | ||||
|  | ||||
|             output = '' | ||||
|             for k, v in self.data.items(): | ||||
|                 output += f"{k}: {v}\r\n" | ||||
|             return output | ||||
|         else: | ||||
|             return u'' | ||||
|             return '' | ||||
|  | ||||
|     # incoming | ||||
|     # incoming data processing + validation | ||||
|     def process_formdata(self, valuelist): | ||||
|         self.data = {} | ||||
|         errors = [] | ||||
|         if valuelist: | ||||
|             self.data = {} | ||||
|             # Remove empty strings | ||||
|             cleaned = list(filter(None, valuelist[0].split("\n"))) | ||||
|             for s in cleaned: | ||||
|                 parts = s.strip().split(':', 1) | ||||
|                 if len(parts) == 2: | ||||
|                     self.data.update({parts[0].strip(): parts[1].strip()}) | ||||
|             # Remove empty strings (blank lines) | ||||
|             cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()] | ||||
|             for idx, s in enumerate(cleaned, start=1): | ||||
|                 if ':' not in s: | ||||
|                     errors.append(f"Line {idx} is missing a ':' separator.") | ||||
|                     continue | ||||
|                 parts = s.split(':', 1) | ||||
|                 key = parts[0].strip() | ||||
|                 value = parts[1].strip() | ||||
|  | ||||
|         else: | ||||
|             self.data = {} | ||||
|                 if not key: | ||||
|                     errors.append(f"Line {idx} has an empty key.") | ||||
|                 if not value: | ||||
|                     errors.append(f"Line {idx} has an empty value.") | ||||
|  | ||||
|                 self.data[key] = value | ||||
|  | ||||
|         if errors: | ||||
|             raise ValidationError("Invalid input:\n" + "\n".join(errors)) | ||||
|  | ||||
| class ValidateContentFetcherIsReady(object): | ||||
|     """ | ||||
| @@ -306,8 +318,8 @@ class ValidateAppRiseServers(object): | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         import apprise | ||||
|         from .apprise_plugin.assets import apprise_asset | ||||
|         from .apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401 | ||||
|         from .notification.apprise_plugin.assets import apprise_asset | ||||
|         from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401 | ||||
|  | ||||
|         apobj = apprise.Apprise(asset=apprise_asset) | ||||
|  | ||||
| @@ -386,6 +398,19 @@ def validate_url(test_url): | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format') | ||||
|  | ||||
|  | ||||
| class ValidateSinglePythonRegexString(object): | ||||
|     def __init__(self, message=None): | ||||
|         self.message = message | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         try: | ||||
|             re.compile(field.data) | ||||
|         except re.error: | ||||
|             message = field.gettext('RegEx \'%s\' is not a valid regular expression.') | ||||
|             raise ValidationError(message % (field.data)) | ||||
|  | ||||
|  | ||||
| class ValidateListRegex(object): | ||||
|     """ | ||||
|     Validates that anything that looks like a regex passes as a regex | ||||
| @@ -404,6 +429,7 @@ class ValidateListRegex(object): | ||||
|                     message = field.gettext('RegEx \'%s\' is not a valid regular expression.') | ||||
|                     raise ValidationError(message % (line)) | ||||
|  | ||||
|  | ||||
| class ValidateCSSJSONXPATHInput(object): | ||||
|     """ | ||||
|     Filter validation | ||||
| @@ -524,7 +550,6 @@ class commonSettingsForm(Form): | ||||
|         self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) | ||||
|         self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) | ||||
|  | ||||
|     extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False) | ||||
|     fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_format = SelectField('Notification format', choices=valid_notification_formats.keys()) | ||||
| @@ -586,24 +611,24 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     filter_text_replaced = BooleanField('Replaced/changed lines', default=True) | ||||
|     filter_text_removed = BooleanField('Removed lines', default=True) | ||||
|  | ||||
|     trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) | ||||
|     trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) | ||||
|     if os.getenv("PLAYWRIGHT_DRIVER_URL"): | ||||
|         browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10) | ||||
|     text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()]) | ||||
|     webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()]) | ||||
|  | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"}) | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|     proxy = RadioField('Proxy') | ||||
|     # filter_failure_notification_send @todo make ternary | ||||
|     filter_failure_notification_send = BooleanField( | ||||
|         'Send a notification when the filter can no longer be found on the page', default=False) | ||||
|  | ||||
|     notification_muted = BooleanField('Notifications Muted / Off', default=False) | ||||
|     notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On") | ||||
|     notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False) | ||||
|  | ||||
|     conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL') | ||||
|     conditions = FieldList(FormField(ConditionFormRow), min_entries=1)  # Add rule logic here | ||||
|  | ||||
|     use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None) | ||||
|  | ||||
|     def extra_tab_content(self): | ||||
|         return None | ||||
| @@ -709,6 +734,12 @@ class globalSettingsRequestForm(Form): | ||||
|     jitter_seconds = IntegerField('Random jitter seconds ± check', | ||||
|                                   render_kw={"style": "width: 5em;"}, | ||||
|                                   validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|      | ||||
|     workers = IntegerField('Number of fetch workers', | ||||
|                           render_kw={"style": "width: 5em;"}, | ||||
|                           validators=[validators.NumberRange(min=1, max=50, | ||||
|                                                              message="Should be between 1 and 50")]) | ||||
|      | ||||
|     extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) | ||||
|     extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5) | ||||
|  | ||||
| @@ -721,6 +752,11 @@ class globalSettingsRequestForm(Form): | ||||
|                     self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.') | ||||
|                     return False | ||||
|  | ||||
| class globalSettingsApplicationUIForm(Form): | ||||
|     open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()]) | ||||
|     socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()]) | ||||
|     favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()]) | ||||
|     use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True | ||||
|  | ||||
| # datastore.data['settings']['application'].. | ||||
| class globalSettingsApplicationForm(commonSettingsForm): | ||||
| @@ -745,13 +781,14 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|  | ||||
|     removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) | ||||
|     shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()]) | ||||
|     shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()]) | ||||
|     rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True, | ||||
|                                       validators=[validators.Optional()]) | ||||
|     filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', | ||||
|                                                                   render_kw={"style": "width: 5em;"}, | ||||
|                                                                   validators=[validators.NumberRange(min=0, | ||||
|                                                                                                      message="Should contain zero or more attempts")]) | ||||
|     ui = FormField(globalSettingsApplicationUIForm) | ||||
|  | ||||
|  | ||||
| class globalSettingsForm(Form): | ||||
| @@ -766,9 +803,9 @@ class globalSettingsForm(Form): | ||||
|  | ||||
|     requests = FormField(globalSettingsRequestForm) | ||||
|     application = FormField(globalSettingsApplicationForm) | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"}) | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|  | ||||
| class extractDataForm(Form): | ||||
|     extract_regex = StringField('RegEx to extract', validators=[validators.Length(min=1, message="Needs a RegEx")]) | ||||
|     extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()]) | ||||
|     extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|   | ||||
							
								
								
									
										162
									
								
								changedetectionio/gc_cleanup.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										162
									
								
								changedetectionio/gc_cleanup.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,162 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import ctypes | ||||
| import gc | ||||
| import re | ||||
| import psutil | ||||
| import sys | ||||
| import threading | ||||
| import importlib | ||||
| from loguru import logger | ||||
|  | ||||
| def memory_cleanup(app=None): | ||||
|     """ | ||||
|     Perform comprehensive memory cleanup operations and log memory usage | ||||
|     at each step with nicely formatted numbers. | ||||
|      | ||||
|     Args: | ||||
|         app: Optional Flask app instance for clearing Flask-specific caches | ||||
|          | ||||
|     Returns: | ||||
|         str: Status message | ||||
|     """ | ||||
|     # Get current process | ||||
|     process = psutil.Process() | ||||
|      | ||||
|     # Log initial memory usage with nicely formatted numbers | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"Memory cleanup started - Current memory usage: {current_memory:,.2f} MB") | ||||
|  | ||||
|     # 1. Standard garbage collection - force full collection on all generations | ||||
|     gc.collect(0)  # Collect youngest generation | ||||
|     gc.collect(1)  # Collect middle generation | ||||
|     gc.collect(2)  # Collect oldest generation | ||||
|  | ||||
|     # Run full collection again to ensure maximum cleanup | ||||
|     gc.collect() | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"After full gc.collect() - Memory usage: {current_memory:,.2f} MB") | ||||
|      | ||||
|  | ||||
|     # 3. Call libc's malloc_trim to release memory back to the OS | ||||
|     libc = ctypes.CDLL("libc.so.6") | ||||
|     libc.malloc_trim(0) | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"After malloc_trim(0) - Memory usage: {current_memory:,.2f} MB") | ||||
|      | ||||
|     # 4. Clear Python's regex cache | ||||
|     re.purge() | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"After re.purge() - Memory usage: {current_memory:,.2f} MB") | ||||
|  | ||||
|     # 5. Reset thread-local storage | ||||
|     # Create a new thread local object to encourage cleanup of old ones | ||||
|     threading.local() | ||||
|     current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.debug(f"After threading.local() - Memory usage: {current_memory:,.2f} MB") | ||||
|  | ||||
|     # 6. Clear sys.intern cache if Python version supports it | ||||
|     try: | ||||
|         sys.intern.clear() | ||||
|         current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|         logger.debug(f"After sys.intern.clear() - Memory usage: {current_memory:,.2f} MB") | ||||
|     except (AttributeError, TypeError): | ||||
|         logger.debug("sys.intern.clear() not supported in this Python version") | ||||
|      | ||||
|     # 7. Clear XML/lxml caches if available | ||||
|     try: | ||||
|         # Check if lxml.etree is in use | ||||
|         lxml_etree = sys.modules.get('lxml.etree') | ||||
|         if lxml_etree: | ||||
|             # Clear module-level caches | ||||
|             if hasattr(lxml_etree, 'clear_error_log'): | ||||
|                 lxml_etree.clear_error_log() | ||||
|              | ||||
|             # Check for _ErrorLog and _RotatingErrorLog objects and clear them | ||||
|             for obj in gc.get_objects(): | ||||
|                 if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'): | ||||
|                     class_name = obj.__class__.__name__ | ||||
|                     if class_name in ('_ErrorLog', '_RotatingErrorLog', '_DomainErrorLog') and hasattr(obj, 'clear'): | ||||
|                         try: | ||||
|                             obj.clear() | ||||
|                         except (AttributeError, TypeError): | ||||
|                             pass | ||||
|                      | ||||
|                     # Clear Element objects which can hold references to documents | ||||
|                     elif class_name in ('_Element', 'ElementBase') and hasattr(obj, 'clear'): | ||||
|                         try: | ||||
|                             obj.clear() | ||||
|                         except (AttributeError, TypeError): | ||||
|                             pass | ||||
|              | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After lxml.etree cleanup - Memory usage: {current_memory:,.2f} MB") | ||||
|  | ||||
|         # Check if lxml.html is in use | ||||
|         lxml_html = sys.modules.get('lxml.html') | ||||
|         if lxml_html: | ||||
|             # Clear HTML-specific element types | ||||
|             for obj in gc.get_objects(): | ||||
|                 if hasattr(obj, '__class__') and hasattr(obj.__class__, '__name__'): | ||||
|                     class_name = obj.__class__.__name__ | ||||
|                     if class_name in ('HtmlElement', 'FormElement', 'InputElement', | ||||
|                                     'SelectElement', 'TextareaElement', 'CheckboxGroup', | ||||
|                                     'RadioGroup', 'MultipleSelectOptions', 'FieldsDict') and hasattr(obj, 'clear'): | ||||
|                         try: | ||||
|                             obj.clear() | ||||
|                         except (AttributeError, TypeError): | ||||
|                             pass | ||||
|  | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After lxml.html cleanup - Memory usage: {current_memory:,.2f} MB") | ||||
|     except (ImportError, AttributeError): | ||||
|         logger.debug("lxml cleanup not applicable") | ||||
|      | ||||
|     # 8. Clear JSON parser caches if applicable | ||||
|     try: | ||||
|         # Check if json module is being used and try to clear its cache | ||||
|         json_module = sys.modules.get('json') | ||||
|         if json_module and hasattr(json_module, '_default_encoder'): | ||||
|             json_module._default_encoder.markers.clear() | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After JSON parser cleanup - Memory usage: {current_memory:,.2f} MB") | ||||
|     except (AttributeError, KeyError): | ||||
|         logger.debug("JSON cleanup not applicable") | ||||
|      | ||||
|     # 9. Force Python's memory allocator to release unused memory | ||||
|     try: | ||||
|         if hasattr(sys, 'pypy_version_info'): | ||||
|             # PyPy has different memory management | ||||
|             gc.collect() | ||||
|         else: | ||||
|             # CPython - try to release unused memory | ||||
|             ctypes.pythonapi.PyGC_Collect() | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After PyGC_Collect - Memory usage: {current_memory:,.2f} MB") | ||||
|     except (AttributeError, TypeError): | ||||
|         logger.debug("PyGC_Collect not supported") | ||||
|      | ||||
|     # 10. Clear Flask-specific caches if applicable | ||||
|     if app: | ||||
|         try: | ||||
|             # Clear Flask caches if they exist | ||||
|             for key in list(app.config.get('_cache', {}).keys()): | ||||
|                 app.config['_cache'].pop(key, None) | ||||
|              | ||||
|             # Clear Jinja2 template cache if available | ||||
|             if hasattr(app, 'jinja_env') and hasattr(app.jinja_env, 'cache'): | ||||
|                 app.jinja_env.cache.clear() | ||||
|              | ||||
|             current_memory = process.memory_info().rss / 1024 / 1024 | ||||
|             logger.debug(f"After Flask cache clear - Memory usage: {current_memory:,.2f} MB") | ||||
|         except (AttributeError, KeyError): | ||||
|             logger.debug("No Flask cache to clear") | ||||
|      | ||||
|     # Final garbage collection pass | ||||
|     gc.collect() | ||||
|     libc.malloc_trim(0) | ||||
|      | ||||
|     # Log final memory usage | ||||
|     final_memory = process.memory_info().rss / 1024 / 1024 | ||||
|     logger.info(f"Memory cleanup completed - Final memory usage: {final_memory:,.2f} MB") | ||||
|     return "cleaned" | ||||
| @@ -1,6 +1,7 @@ | ||||
| from loguru import logger | ||||
| from lxml import etree | ||||
| from typing import List | ||||
| import html | ||||
| import json | ||||
| import re | ||||
|  | ||||
| @@ -9,6 +10,11 @@ TEXT_FILTER_LIST_LINE_SUFFIX = "<br>" | ||||
| TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ') | ||||
| PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$' | ||||
|  | ||||
| TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S) | ||||
| META_CS  = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I) | ||||
| META_CT  = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I) | ||||
|  | ||||
|  | ||||
| # 'price' , 'lowPrice', 'highPrice' are usually under here | ||||
| # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here | ||||
| LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] | ||||
| @@ -309,10 +315,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None | ||||
|         soup = BeautifulSoup(content, 'html.parser') | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             bs_result = soup.findAll('script', {"type": "application/ld+json"}) | ||||
|             bs_result = soup.find_all('script', {"type": "application/ld+json"}) | ||||
|         else: | ||||
|             bs_result = soup.findAll('script') | ||||
|         bs_result += soup.findAll('body') | ||||
|             bs_result = soup.find_all('script') | ||||
|         bs_result += soup.find_all('body') | ||||
|  | ||||
|         bs_jsons = [] | ||||
|         for result in bs_result: | ||||
| @@ -366,22 +372,41 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None | ||||
| # wordlist - list of regex's (str) or words (str) | ||||
| # Preserves all linefeeds and other whitespacing, its not the job of this to remove that | ||||
| def strip_ignore_text(content, wordlist, mode="content"): | ||||
|     i = 0 | ||||
|     output = [] | ||||
|     ignore_text = [] | ||||
|     ignore_regex = [] | ||||
|     ignored_line_numbers = [] | ||||
|     ignore_regex_multiline = [] | ||||
|     ignored_lines = [] | ||||
|  | ||||
|     for k in wordlist: | ||||
|         # Is it a regex? | ||||
|         res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE) | ||||
|         if res: | ||||
|             ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k))) | ||||
|             res = re.compile(perl_style_slash_enclosed_regex_to_options(k)) | ||||
|             if res.flags & re.DOTALL or res.flags & re.MULTILINE: | ||||
|                 ignore_regex_multiline.append(res) | ||||
|             else: | ||||
|                 ignore_regex.append(res) | ||||
|         else: | ||||
|             ignore_text.append(k.strip()) | ||||
|  | ||||
|     for line in content.splitlines(keepends=True): | ||||
|         i += 1 | ||||
|     for r in ignore_regex_multiline: | ||||
|         for match in r.finditer(content): | ||||
|             content_lines = content[:match.end()].splitlines(keepends=True) | ||||
|             match_lines = content[match.start():match.end()].splitlines(keepends=True) | ||||
|  | ||||
|             end_line = len(content_lines) | ||||
|             start_line = end_line - len(match_lines) | ||||
|  | ||||
|             if end_line - start_line <= 1: | ||||
|                 # Match is empty or in the middle of the line | ||||
|                 ignored_lines.append(start_line) | ||||
|             else: | ||||
|                 for i in range(start_line, end_line): | ||||
|                     ignored_lines.append(i) | ||||
|  | ||||
|     line_index = 0 | ||||
|     lines = content.splitlines(keepends=True) | ||||
|     for line in lines: | ||||
|         # Always ignore blank lines in this mode. (when this function gets called) | ||||
|         got_match = False | ||||
|         for l in ignore_text: | ||||
| @@ -393,17 +418,19 @@ def strip_ignore_text(content, wordlist, mode="content"): | ||||
|                 if r.search(line): | ||||
|                     got_match = True | ||||
|  | ||||
|         if not got_match: | ||||
|             # Not ignored, and should preserve "keepends" | ||||
|             output.append(line) | ||||
|         else: | ||||
|             ignored_line_numbers.append(i) | ||||
|         if got_match: | ||||
|             ignored_lines.append(line_index) | ||||
|  | ||||
|         line_index += 1 | ||||
|  | ||||
|     ignored_lines = set([i for i in ignored_lines if i >= 0 and i < len(lines)]) | ||||
|  | ||||
|     # Used for finding out what to highlight | ||||
|     if mode == "line numbers": | ||||
|         return ignored_line_numbers | ||||
|         return [i + 1 for i in ignored_lines] | ||||
|  | ||||
|     return ''.join(output) | ||||
|     output_lines = set(range(len(lines))) - ignored_lines | ||||
|     return ''.join([lines[i] for i in output_lines]) | ||||
|  | ||||
| def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|     from xml.sax.saxutils import escape as xml_escape | ||||
| @@ -414,50 +441,36 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False | ||||
|  | ||||
|     return re.sub(pattern, repl, html_content) | ||||
|  | ||||
| def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str: | ||||
|  | ||||
| # NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON | ||||
|  | ||||
|  | ||||
| def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str: | ||||
|     from inscriptis import get_text | ||||
|     from inscriptis.model.config import ParserConfig | ||||
|  | ||||
|     """Converts html string to a string with just the text. If ignoring | ||||
|     rendering anchor tag content is enable, anchor tag content are also | ||||
|     included in the text | ||||
|  | ||||
|     :param html_content: string with html content | ||||
|     :param render_anchor_tag_content: boolean flag indicating whether to extract | ||||
|     hyperlinks (the anchor tag content) together with text. This refers to the | ||||
|     'href' inside 'a' tags. | ||||
|     Anchor tag content is rendered in the following manner: | ||||
|     '[ text ](anchor tag content)' | ||||
|     :return: extracted text from the HTML | ||||
|     """ | ||||
|     #  if anchor tag content flag is set to True define a config for | ||||
|     #  extracting this content | ||||
|     if render_anchor_tag_content: | ||||
|         parser_config = ParserConfig( | ||||
|             annotation_rules={"a": ["hyperlink"]}, | ||||
|             display_links=True | ||||
|         ) | ||||
|     # otherwise set config to None/default | ||||
|     else: | ||||
|         parser_config = None | ||||
|  | ||||
|     # RSS Mode - Inscriptis will treat `title` as something else. | ||||
|     # Make it as a regular block display element (//item/title) | ||||
|     # This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874 | ||||
|     if is_rss: | ||||
|         html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content) | ||||
|         html_content = re.sub(r'</title>', r'</h1>', html_content) | ||||
|  | ||||
|     text_content = get_text(html_content, config=parser_config) | ||||
|  | ||||
|     return text_content | ||||
|  | ||||
|  | ||||
| # Does LD+JSON exist with a @type=='product' and a .price set anywhere? | ||||
| def has_ldjson_product_info(content): | ||||
|     try: | ||||
|         lc = content.lower() | ||||
|         if 'application/ld+json' in lc and lc.count('"price"') == 1 and '"pricecurrency"' in lc: | ||||
|         # Better than .lower() which can use a lot of ram | ||||
|         if (re.search(r'application/ld\+json', content, re.IGNORECASE) and | ||||
|             re.search(r'"price"', content, re.IGNORECASE) and | ||||
|             re.search(r'"pricecurrency"', content, re.IGNORECASE)): | ||||
|             return True | ||||
|  | ||||
| #       On some pages this is really terribly expensive when they dont really need it | ||||
| @@ -503,3 +516,43 @@ def get_triggered_text(content, trigger_text): | ||||
|         i += 1 | ||||
|  | ||||
|     return triggered_text | ||||
|  | ||||
|  | ||||
| def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None: | ||||
|     try: | ||||
|         # Only decode/process the prefix we need for title extraction | ||||
|         match data: | ||||
|             case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")): | ||||
|                 prefix = data[:scan_chars * 2].decode("utf-16", errors="replace") | ||||
|             case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")): | ||||
|                 prefix = data[:scan_chars * 4].decode("utf-32", errors="replace") | ||||
|             case bytes(): | ||||
|                 try: | ||||
|                     prefix = data[:scan_chars].decode("utf-8") | ||||
|                 except UnicodeDecodeError: | ||||
|                     try: | ||||
|                         head = data[:sniff_bytes].decode("ascii", errors="ignore") | ||||
|                         if m := (META_CS.search(head) or META_CT.search(head)): | ||||
|                             enc = m.group(1).lower() | ||||
|                         else: | ||||
|                             enc = "cp1252" | ||||
|                         prefix = data[:scan_chars * 2].decode(enc, errors="replace") | ||||
|                     except Exception as e: | ||||
|                         logger.error(f"Title extraction encoding detection failed: {e}") | ||||
|                         return None | ||||
|             case str(): | ||||
|                 prefix = data[:scan_chars] if len(data) > scan_chars else data | ||||
|             case _: | ||||
|                 logger.error(f"Title extraction received unsupported data type: {type(data)}") | ||||
|                 return None | ||||
|  | ||||
|         # Search only in the prefix | ||||
|         if m := TITLE_RE.search(prefix): | ||||
|             title = html.unescape(" ".join(m.group(1).split())).strip() | ||||
|             # Some safe limit | ||||
|             return title[:2000] | ||||
|         return None | ||||
|          | ||||
|     except Exception as e: | ||||
|         logger.error(f"Title extraction failed: {e}") | ||||
|         return None | ||||
| @@ -39,12 +39,12 @@ class model(dict): | ||||
|                     'api_access_token_enabled': True, | ||||
|                     'base_url' : None, | ||||
|                     'empty_pages_are_a_change': False, | ||||
|                     'extract_title_as_title': False, | ||||
|                     'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|                     'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, | ||||
|                     'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum | ||||
|                     'global_subtractive_selectors': [], | ||||
|                     'ignore_whitespace': True, | ||||
|                     'ignore_status_codes': False, #@todo implement, as ternary. | ||||
|                     'notification_body': default_notification_body, | ||||
|                     'notification_format': default_notification_format, | ||||
|                     'notification_title': default_notification_title, | ||||
| @@ -57,9 +57,15 @@ class model(dict): | ||||
|                     'rss_hide_muted_watches': True, | ||||
|                     'schema_version' : 0, | ||||
|                     'shared_diff_access': False, | ||||
|                     'webdriver_delay': None , # Extra delay in seconds before extracting text | ||||
|                     'tags': {}, #@todo use Tag.model initialisers | ||||
|                     'timezone': None, # Default IANA timezone name | ||||
|                     'webdriver_delay': None , # Extra delay in seconds before extracting text | ||||
|                     'ui': { | ||||
|                         'use_page_title_in_list': True, | ||||
|                         'open_diff_in_new_tab': True, | ||||
|                         'socket_io_enabled': True, | ||||
|                         'favicons_enabled': True | ||||
|                     }, | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|   | ||||
| @@ -1,3 +1,5 @@ | ||||
| from blinker import signal | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from . import watch_base | ||||
| @@ -6,11 +8,14 @@ import re | ||||
| from pathlib import Path | ||||
| from loguru import logger | ||||
|  | ||||
| from .. import safe_jinja | ||||
| from ..html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
|  | ||||
| # Allowable protocols, protects against javascript: etc | ||||
| # file:// is further checked by ALLOW_FILE_URI | ||||
| SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' | ||||
| FAVICON_RESAVE_THRESHOLD_SECONDS=86400 | ||||
|  | ||||
|  | ||||
| minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) | ||||
| mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} | ||||
| @@ -41,6 +46,7 @@ class model(watch_base): | ||||
|         self.__datastore_path = kw.get('datastore_path') | ||||
|         if kw.get('datastore_path'): | ||||
|             del kw['datastore_path'] | ||||
|              | ||||
|         super(model, self).__init__(*arg, **kw) | ||||
|         if kw.get('default'): | ||||
|             self.update(kw['default']) | ||||
| @@ -60,6 +66,10 @@ class model(watch_base): | ||||
|  | ||||
|         return False | ||||
|  | ||||
|     @property | ||||
|     def has_unviewed(self): | ||||
|         return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2 | ||||
|  | ||||
|     def ensure_data_dir_exists(self): | ||||
|         if not os.path.isdir(self.watch_data_dir): | ||||
|             logger.debug(f"> Creating data dir {self.watch_data_dir}") | ||||
| @@ -95,6 +105,13 @@ class model(watch_base): | ||||
|             return 'DISABLED' | ||||
|         return ready_url | ||||
|  | ||||
|     @property | ||||
|     def domain_only_from_link(self): | ||||
|         from urllib.parse import urlparse | ||||
|         parsed = urlparse(self.link) | ||||
|         domain = parsed.hostname | ||||
|         return domain | ||||
|  | ||||
|     def clear_watch(self): | ||||
|         import pathlib | ||||
|  | ||||
| @@ -120,6 +137,10 @@ class model(watch_base): | ||||
|             'remote_server_reply': None, | ||||
|             'track_ldjson_price_data': None | ||||
|         }) | ||||
|         watch_check_update = signal('watch_check_update') | ||||
|         if watch_check_update: | ||||
|             watch_check_update.send(watch_uuid=self.get('uuid')) | ||||
|  | ||||
|         return | ||||
|  | ||||
|     @property | ||||
| @@ -148,8 +169,8 @@ class model(watch_base): | ||||
|  | ||||
|     @property | ||||
|     def label(self): | ||||
|         # Used for sorting | ||||
|         return self.get('title') if self.get('title') else self.get('url') | ||||
|         # Used for sorting, display, etc | ||||
|         return self.get('title') or self.get('page_title') or self.get('url') | ||||
|  | ||||
|     @property | ||||
|     def last_changed(self): | ||||
| @@ -401,6 +422,154 @@ class model(watch_base): | ||||
|         # False is not an option for AppRise, must be type None | ||||
|         return None | ||||
|  | ||||
|     def favicon_is_expired(self): | ||||
|         favicon_fname = self.get_favicon_filename() | ||||
|         import glob | ||||
|         import time | ||||
|  | ||||
|         if not favicon_fname: | ||||
|             return True | ||||
|         try: | ||||
|             fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None) | ||||
|             logger.trace(f"Favicon file maybe found at {fname}") | ||||
|             if os.path.isfile(fname): | ||||
|                 file_age = int(time.time() - os.path.getmtime(fname)) | ||||
|                 logger.trace(f"Favicon file age is {file_age}s") | ||||
|                 if file_age < FAVICON_RESAVE_THRESHOLD_SECONDS: | ||||
|                     return False | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception checking Favicon age {str(e)}") | ||||
|             return True | ||||
|  | ||||
|         # Also in the case that the file didnt exist | ||||
|         return True | ||||
|  | ||||
|     def bump_favicon(self, url, favicon_base_64: str) -> None: | ||||
|         from urllib.parse import urlparse | ||||
|         import base64 | ||||
|         import binascii | ||||
|         decoded = None | ||||
|  | ||||
|         if url: | ||||
|             try: | ||||
|                 parsed = urlparse(url) | ||||
|                 filename = os.path.basename(parsed.path) | ||||
|                 (base, extension) = filename.lower().strip().rsplit('.', 1) | ||||
|             except ValueError: | ||||
|                 logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'") | ||||
|                 return None | ||||
|         else: | ||||
|             # Assume favicon.ico | ||||
|             base = "favicon" | ||||
|             extension = "ico" | ||||
|  | ||||
|         fname = os.path.join(self.watch_data_dir, f"favicon.{extension}") | ||||
|  | ||||
|         try: | ||||
|             # validate=True makes sure the string only contains valid base64 chars | ||||
|             decoded = base64.b64decode(favicon_base_64, validate=True) | ||||
|         except (binascii.Error, ValueError) as e: | ||||
|             logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}") | ||||
|         else: | ||||
|             if decoded: | ||||
|                 try: | ||||
|                     with open(fname, 'wb') as f: | ||||
|                         f.write(decoded) | ||||
|                     # A signal that could trigger the socket server to update the browser also | ||||
|                     watch_check_update = signal('watch_favicon_bump') | ||||
|                     if watch_check_update: | ||||
|                         watch_check_update.send(watch_uuid=self.get('uuid')) | ||||
|  | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}") | ||||
|  | ||||
|         # @todo - Store some checksum and only write when its different | ||||
|         logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}") | ||||
|  | ||||
|     def get_favicon_filename(self) -> str | None: | ||||
|         """ | ||||
|         Find any favicon.* file in the current working directory | ||||
|         and return the contents of the newest one. | ||||
|  | ||||
|         Returns: | ||||
|             bytes: Contents of the newest favicon file, or None if not found. | ||||
|         """ | ||||
|         import glob | ||||
|  | ||||
|         # Search for all favicon.* files | ||||
|         files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*")) | ||||
|  | ||||
|         if not files: | ||||
|             return None | ||||
|  | ||||
|         # Find the newest by modification time | ||||
|         newest_file = max(files, key=os.path.getmtime) | ||||
|         return os.path.basename(newest_file) | ||||
|  | ||||
|     def get_screenshot_as_thumbnail(self, max_age=3200): | ||||
|         """Return path to a square thumbnail of the most recent screenshot. | ||||
|  | ||||
|         Creates a 150x150 pixel thumbnail from the top portion of the screenshot. | ||||
|  | ||||
|         Args: | ||||
|             max_age: Maximum age in seconds before recreating thumbnail | ||||
|  | ||||
|         Returns: | ||||
|             Path to thumbnail or None if no screenshot exists | ||||
|         """ | ||||
|         import os | ||||
|         import time | ||||
|  | ||||
|         thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg") | ||||
|         top_trim = 500  # Pixels from top of screenshot to use | ||||
|  | ||||
|         screenshot_path = self.get_screenshot() | ||||
|         if not screenshot_path: | ||||
|             return None | ||||
|  | ||||
|         # Reuse thumbnail if it's fresh and screenshot hasn't changed | ||||
|         if os.path.isfile(thumbnail_path): | ||||
|             thumbnail_mtime = os.path.getmtime(thumbnail_path) | ||||
|             screenshot_mtime = os.path.getmtime(screenshot_path) | ||||
|  | ||||
|             if screenshot_mtime <= thumbnail_mtime and time.time() - thumbnail_mtime < max_age: | ||||
|                 return thumbnail_path | ||||
|  | ||||
|         try: | ||||
|             from PIL import Image | ||||
|  | ||||
|             with Image.open(screenshot_path) as img: | ||||
|                 # Crop top portion first (full width, top_trim height) | ||||
|                 top_crop_height = min(top_trim, img.height) | ||||
|                 img = img.crop((0, 0, img.width, top_crop_height)) | ||||
|  | ||||
|                 # Create a smaller intermediate image (to reduce memory usage) | ||||
|                 aspect = img.width / img.height | ||||
|                 interim_width = min(top_trim, img.width) | ||||
|                 interim_height = int(interim_width / aspect) if aspect > 0 else top_trim | ||||
|                 img = img.resize((interim_width, interim_height), Image.NEAREST) | ||||
|  | ||||
|                 # Convert to RGB if needed | ||||
|                 if img.mode != 'RGB': | ||||
|                     img = img.convert('RGB') | ||||
|  | ||||
|                 # Crop to square from top center | ||||
|                 square_size = min(img.width, img.height) | ||||
|                 left = (img.width - square_size) // 2 | ||||
|                 img = img.crop((left, 0, left + square_size, square_size)) | ||||
|  | ||||
|                 # Final resize to exact thumbnail size with better filter | ||||
|                 img = img.resize((350, 350), Image.BILINEAR) | ||||
|  | ||||
|                 # Save with optimized settings | ||||
|                 img.save(thumbnail_path, "JPEG", quality=75, optimize=True) | ||||
|  | ||||
|             return thumbnail_path | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error creating thumbnail for {self.get('uuid')}: {str(e)}") | ||||
|             return None | ||||
|  | ||||
|     def __get_file_ctime(self, filename): | ||||
|         fname = os.path.join(self.watch_data_dir, filename) | ||||
|         if os.path.isfile(fname): | ||||
| @@ -494,7 +663,7 @@ class model(watch_base): | ||||
|                     if res: | ||||
|                         if not csv_writer: | ||||
|                             # A file on the disk can be transferred much faster via flask than a string reply | ||||
|                             csv_output_filename = 'report.csv' | ||||
|                             csv_output_filename = f"report-{self.get('uuid')}.csv" | ||||
|                             f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w') | ||||
|                             # @todo some headers in the future | ||||
|                             #fieldnames = ['Epoch seconds', 'Date'] | ||||
| @@ -553,7 +722,10 @@ class model(watch_base): | ||||
|         self.ensure_data_dir_exists() | ||||
|  | ||||
|         with open(target_path, 'wb') as f: | ||||
|             f.write(zlib.compress(json.dumps(data).encode())) | ||||
|             if not isinstance(data, str): | ||||
|                 f.write(zlib.compress(json.dumps(data).encode())) | ||||
|             else: | ||||
|                 f.write(zlib.compress(data.encode())) | ||||
|             f.close() | ||||
|  | ||||
|     # Save as PNG, PNG is larger but better for doing visual diff in the future | ||||
| @@ -575,7 +747,7 @@ class model(watch_base): | ||||
|         import brotli | ||||
|         filepath = os.path.join(self.watch_data_dir, 'last-fetched.br') | ||||
|  | ||||
|         if not os.path.isfile(filepath): | ||||
|         if not os.path.isfile(filepath) or os.path.getsize(filepath) == 0: | ||||
|             # If a previous attempt doesnt yet exist, just snarf the previous snapshot instead | ||||
|             dates = list(self.history.keys()) | ||||
|             if len(dates): | ||||
| @@ -645,3 +817,44 @@ class model(watch_base): | ||||
|             if step_n: | ||||
|                 available.append(step_n.group(1)) | ||||
|         return available | ||||
|  | ||||
|     def compile_error_texts(self, has_proxies=None): | ||||
|         """Compile error texts for this watch. | ||||
|         Accepts has_proxies parameter to ensure it works even outside app context""" | ||||
|         from flask import url_for | ||||
|         from markupsafe import Markup | ||||
|  | ||||
|         output = []  # Initialize as list since we're using append | ||||
|         last_error = self.get('last_error','') | ||||
|  | ||||
|         try: | ||||
|             url_for('settings.settings_page') | ||||
|         except Exception as e: | ||||
|             has_app_context = False | ||||
|         else: | ||||
|             has_app_context = True | ||||
|  | ||||
|         # has app+request context, we can use url_for() | ||||
|         if has_app_context: | ||||
|             if last_error: | ||||
|                 if '403' in last_error: | ||||
|                     if has_proxies: | ||||
|                         output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a> '"))) | ||||
|                     else: | ||||
|                         output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try adding external proxies/locations</a> '"))) | ||||
|                 else: | ||||
|                     output.append(str(Markup(last_error))) | ||||
|  | ||||
|             if self.get('last_notification_error'): | ||||
|                 output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>"))) | ||||
|  | ||||
|         else: | ||||
|             # Lo_Fi version - no app context, cant rely on Jinja2 Markup | ||||
|             if last_error: | ||||
|                 output.append(safe_jinja.render_fully_escaped(last_error)) | ||||
|             if self.get('last_notification_error'): | ||||
|                 output.append(safe_jinja.render_fully_escaped(self.get('last_notification_error'))) | ||||
|  | ||||
|         res = "\n".join(output) | ||||
|         return res | ||||
|  | ||||
|   | ||||
| @@ -2,7 +2,8 @@ import os | ||||
| import uuid | ||||
|  | ||||
| from changedetectionio import strtobool | ||||
| from changedetectionio.notification import default_notification_format_for_watch | ||||
| default_notification_format_for_watch = 'System default' | ||||
| CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL' | ||||
|  | ||||
| class watch_base(dict): | ||||
|  | ||||
| @@ -15,13 +16,14 @@ class watch_base(dict): | ||||
|             'body': None, | ||||
|             'browser_steps': [], | ||||
|             'browser_steps_last_error_step': None, | ||||
|             'conditions' : {}, | ||||
|             'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT, | ||||
|             'check_count': 0, | ||||
|             'check_unique_lines': False,  # On change-detected, compare against all history if its something new | ||||
|             'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'content-type': None, | ||||
|             'date_created': None, | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'extract_title_as_title': False, | ||||
|             'fetch_backend': 'system',  # plaintext, playwright etc | ||||
|             'fetch_time': 0.0, | ||||
|             'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
| @@ -32,10 +34,12 @@ class watch_base(dict): | ||||
|             'has_ldjson_price_data': None, | ||||
|             'headers': {},  # Extra headers to send | ||||
|             'ignore_text': [],  # List of text to ignore when calculating the comparison checksum | ||||
|             'ignore_status_codes': None, | ||||
|             'in_stock_only': True,  # Only trigger change on going to instock from out-of-stock | ||||
|             'include_filters': [], | ||||
|             'last_checked': 0, | ||||
|             'last_error': False, | ||||
|             'last_notification_error': None, | ||||
|             'last_viewed': 0,  # history key value of the last viewed via the [diff] link | ||||
|             'method': 'GET', | ||||
|             'notification_alert_count': 0, | ||||
| @@ -45,6 +49,7 @@ class watch_base(dict): | ||||
|             'notification_screenshot': False,  # Include the latest screenshot if available and supported by the apprise URL | ||||
|             'notification_title': None, | ||||
|             'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise) | ||||
|             'page_title': None, # <title> from the page | ||||
|             'paused': False, | ||||
|             'previous_md5': False, | ||||
|             'previous_md5_before_filters': False,  # Used for skipping changedetection entirely | ||||
| @@ -118,12 +123,13 @@ class watch_base(dict): | ||||
|                     } | ||||
|                 }, | ||||
|             }, | ||||
|             'title': None, | ||||
|             'title': None, # An arbitrary field that overrides 'page_title' | ||||
|             'track_ldjson_price_data': None, | ||||
|             'trim_text_whitespace': False, | ||||
|             'remove_duplicate_lines': False, | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'url': '', | ||||
|             'use_page_title_in_list': None, # None = use system settings | ||||
|             'uuid': str(uuid.uuid4()), | ||||
|             'webdriver_delay': None, | ||||
|             'webdriver_js_execute_code': None,  # Run before change-detection | ||||
|   | ||||
							
								
								
									
										35
									
								
								changedetectionio/notification/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										35
									
								
								changedetectionio/notification/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,35 @@ | ||||
| from changedetectionio.model import default_notification_format_for_watch | ||||
|  | ||||
| ult_notification_format_for_watch = 'System default' | ||||
| default_notification_format = 'HTML Color' | ||||
| default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n' | ||||
| default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}' | ||||
|  | ||||
| # The values (markdown etc) are from apprise NotifyFormat, | ||||
| # But to avoid importing the whole heavy module just use the same strings here. | ||||
| valid_notification_formats = { | ||||
|     'Text': 'text', | ||||
|     'Markdown': 'markdown', | ||||
|     'HTML': 'html', | ||||
|     'HTML Color': 'htmlcolor', | ||||
|     # Used only for editing a watch (not for global) | ||||
|     default_notification_format_for_watch: default_notification_format_for_watch | ||||
| } | ||||
|  | ||||
|  | ||||
| valid_tokens = { | ||||
|     'base_url': '', | ||||
|     'current_snapshot': '', | ||||
|     'diff': '', | ||||
|     'diff_added': '', | ||||
|     'diff_full': '', | ||||
|     'diff_patch': '', | ||||
|     'diff_removed': '', | ||||
|     'diff_url': '', | ||||
|     'preview_url': '', | ||||
|     'triggered_text': '', | ||||
|     'watch_tag': '', | ||||
|     'watch_title': '', | ||||
|     'watch_url': '', | ||||
|     'watch_uuid': '', | ||||
| } | ||||
| @@ -1,47 +1,15 @@ | ||||
| 
 | ||||
| import time | ||||
| from apprise import NotifyFormat | ||||
| import apprise | ||||
| from loguru import logger | ||||
| 
 | ||||
| from .apprise_plugin.assets import APPRISE_AVATAR_URL | ||||
| from .apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401 | ||||
| from .safe_jinja import render as jinja_render | ||||
| 
 | ||||
| valid_tokens = { | ||||
|     'base_url': '', | ||||
|     'current_snapshot': '', | ||||
|     'diff': '', | ||||
|     'diff_added': '', | ||||
|     'diff_full': '', | ||||
|     'diff_patch': '', | ||||
|     'diff_removed': '', | ||||
|     'diff_url': '', | ||||
|     'preview_url': '', | ||||
|     'triggered_text': '', | ||||
|     'watch_tag': '', | ||||
|     'watch_title': '', | ||||
|     'watch_url': '', | ||||
|     'watch_uuid': '', | ||||
| } | ||||
| 
 | ||||
| default_notification_format_for_watch = 'System default' | ||||
| default_notification_format = 'HTML Color' | ||||
| default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n' | ||||
| default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}' | ||||
| 
 | ||||
| valid_notification_formats = { | ||||
|     'Text': NotifyFormat.TEXT, | ||||
|     'Markdown': NotifyFormat.MARKDOWN, | ||||
|     'HTML': NotifyFormat.HTML, | ||||
|     'HTML Color': 'htmlcolor', | ||||
|     # Used only for editing a watch (not for global) | ||||
|     default_notification_format_for_watch: default_notification_format_for_watch | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL | ||||
| 
 | ||||
| def process_notification(n_object, datastore): | ||||
|     from changedetectionio.safe_jinja import render as jinja_render | ||||
|     from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats | ||||
|     # be sure its registered | ||||
|     from .apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
| 
 | ||||
|     now = time.time() | ||||
|     if n_object.get('notification_timestamp'): | ||||
|         logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s") | ||||
| @@ -58,14 +26,13 @@ def process_notification(n_object, datastore): | ||||
|         # Initially text or whatever | ||||
|         n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format]) | ||||
| 
 | ||||
|     logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.3f}s") | ||||
|     logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.2f}s") | ||||
| 
 | ||||
|     # https://github.com/caronc/apprise/wiki/Development_LogCapture | ||||
|     # Anything higher than or equal to WARNING (which covers things like Connection errors) | ||||
|     # raise it as an exception | ||||
| 
 | ||||
|     sent_objs = [] | ||||
|     from .apprise_plugin.assets import apprise_asset | ||||
| 
 | ||||
|     if 'as_async' in n_object: | ||||
|         apprise_asset.async_mode = n_object.get('as_async') | ||||
| @@ -176,12 +143,13 @@ def process_notification(n_object, datastore): | ||||
| # ( Where we prepare the tokens in the notification to be replaced with actual values ) | ||||
| def create_notification_parameters(n_object, datastore): | ||||
|     from copy import deepcopy | ||||
|     from . import valid_tokens | ||||
| 
 | ||||
|     # in the case we send a test notification from the main settings, there is no UUID. | ||||
|     uuid = n_object['uuid'] if 'uuid' in n_object else '' | ||||
| 
 | ||||
|     if uuid: | ||||
|         watch_title = datastore.data['watching'][uuid].get('title', '') | ||||
|         watch_title = datastore.data['watching'][uuid].label | ||||
|         tag_list = [] | ||||
|         tags = datastore.get_all_tags_for_watch(uuid) | ||||
|         if tags: | ||||
							
								
								
									
										246
									
								
								changedetectionio/notification_service.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										246
									
								
								changedetectionio/notification_service.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,246 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| """ | ||||
| Notification Service Module | ||||
| Extracted from update_worker.py to provide standalone notification functionality | ||||
| for both sync and async workers | ||||
| """ | ||||
|  | ||||
| import time | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| class NotificationService: | ||||
|     """ | ||||
|     Standalone notification service that handles all notification functionality | ||||
|     previously embedded in the update_worker class | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, datastore, notification_q): | ||||
|         self.datastore = datastore | ||||
|         self.notification_q = notification_q | ||||
|      | ||||
|     def queue_notification_for_watch(self, n_object, watch): | ||||
|         """ | ||||
|         Queue a notification for a watch with full diff rendering and template variables | ||||
|         """ | ||||
|         from changedetectionio import diff | ||||
|         from changedetectionio.notification import default_notification_format_for_watch | ||||
|  | ||||
|         dates = [] | ||||
|         trigger_text = '' | ||||
|  | ||||
|         now = time.time() | ||||
|  | ||||
|         if watch: | ||||
|             watch_history = watch.history | ||||
|             dates = list(watch_history.keys()) | ||||
|             trigger_text = watch.get('trigger_text', []) | ||||
|  | ||||
|         # Add text that was triggered | ||||
|         if len(dates): | ||||
|             snapshot_contents = watch.get_history_snapshot(dates[-1]) | ||||
|         else: | ||||
|             snapshot_contents = "No snapshot/history available, the watch should fetch atleast once." | ||||
|  | ||||
|         # If we ended up here with "System default" | ||||
|         if n_object.get('notification_format') == default_notification_format_for_watch: | ||||
|             n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|         html_colour_enable = False | ||||
|         # HTML needs linebreak, but MarkDown and Text can use a linefeed | ||||
|         if n_object.get('notification_format') == 'HTML': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|         elif n_object.get('notification_format') == 'HTML Color': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|             html_colour_enable = True | ||||
|         else: | ||||
|             line_feed_sep = "\n" | ||||
|  | ||||
|         triggered_text = '' | ||||
|         if len(trigger_text): | ||||
|             from . import html_tools | ||||
|             triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text) | ||||
|             if triggered_text: | ||||
|                 triggered_text = line_feed_sep.join(triggered_text) | ||||
|  | ||||
|         # Could be called as a 'test notification' with only 1 snapshot available | ||||
|         prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n" | ||||
|         current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples" | ||||
|  | ||||
|         if len(dates) > 1: | ||||
|             prev_snapshot = watch.get_history_snapshot(dates[-2]) | ||||
|             current_snapshot = watch.get_history_snapshot(dates[-1]) | ||||
|  | ||||
|         n_object.update({ | ||||
|             'current_snapshot': snapshot_contents, | ||||
|             'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep), | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep), | ||||
|             'notification_timestamp': now, | ||||
|             'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None, | ||||
|             'triggered_text': triggered_text, | ||||
|             'uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_url': watch.get('url') if watch else None, | ||||
|         }) | ||||
|  | ||||
|         if watch: | ||||
|             n_object.update(watch.extra_notification_token_values()) | ||||
|  | ||||
|         logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s") | ||||
|         logger.debug("Queued notification for sending") | ||||
|         self.notification_q.put(n_object) | ||||
|  | ||||
|     def _check_cascading_vars(self, var_name, watch): | ||||
|         """ | ||||
|         Check notification variables in cascading priority: | ||||
|         Individual watch settings > Tag settings > Global settings | ||||
|         """ | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch, | ||||
|             default_notification_body, | ||||
|             default_notification_title | ||||
|         ) | ||||
|  | ||||
|         # Would be better if this was some kind of Object where Watch can reference the parent datastore etc | ||||
|         v = watch.get(var_name) | ||||
|         if v and not watch.get('notification_muted'): | ||||
|             if var_name == 'notification_format' and v == default_notification_format_for_watch: | ||||
|                 return self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|             return v | ||||
|  | ||||
|         tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid')) | ||||
|         if tags: | ||||
|             for tag_uuid, tag in tags.items(): | ||||
|                 v = tag.get(var_name) | ||||
|                 if v and not tag.get('notification_muted'): | ||||
|                     return v | ||||
|  | ||||
|         if self.datastore.data['settings']['application'].get(var_name): | ||||
|             return self.datastore.data['settings']['application'].get(var_name) | ||||
|  | ||||
|         # Otherwise could be defaults | ||||
|         if var_name == 'notification_format': | ||||
|             return default_notification_format_for_watch | ||||
|         if var_name == 'notification_body': | ||||
|             return default_notification_body | ||||
|         if var_name == 'notification_title': | ||||
|             return default_notification_title | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def send_content_changed_notification(self, watch_uuid): | ||||
|         """ | ||||
|         Send notification when content changes are detected | ||||
|         """ | ||||
|         n_object = {} | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         watch_history = watch.history | ||||
|         dates = list(watch_history.keys()) | ||||
|         # Theoretically it's possible that this could be just 1 long, | ||||
|         # - In the case that the timestamp key was not unique | ||||
|         if len(dates) == 1: | ||||
|             raise ValueError( | ||||
|                 "History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?" | ||||
|             ) | ||||
|  | ||||
|         # Should be a better parent getter in the model object | ||||
|  | ||||
|         # Prefer - Individual watch settings > Tag settings >  Global settings (in that order) | ||||
|         n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch) | ||||
|         n_object['notification_title'] = self._check_cascading_vars('notification_title', watch) | ||||
|         n_object['notification_body'] = self._check_cascading_vars('notification_body', watch) | ||||
|         n_object['notification_format'] = self._check_cascading_vars('notification_format', watch) | ||||
|  | ||||
|         # (Individual watch) Only prepare to notify if the rules above matched | ||||
|         queued = False | ||||
|         if n_object and n_object.get('notification_urls'): | ||||
|             queued = True | ||||
|  | ||||
|             count = watch.get('notification_alert_count', 0) + 1 | ||||
|             self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count}) | ||||
|  | ||||
|             self.queue_notification_for_watch(n_object=n_object, watch=watch) | ||||
|  | ||||
|         return queued | ||||
|  | ||||
|     def send_filter_failure_notification(self, watch_uuid): | ||||
|         """ | ||||
|         Send notification when CSS/XPath filters fail consecutively | ||||
|         """ | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         ", ".join(watch['include_filters']), | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|  | ||||
|         elif len(self.datastore.data['settings']['application']['notification_urls']): | ||||
|             n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] | ||||
|  | ||||
|         # Only prepare to notify if the rules above matched | ||||
|         if 'notification_urls' in n_object: | ||||
|             n_object.update({ | ||||
|                 'watch_url': watch['url'], | ||||
|                 'uuid': watch_uuid, | ||||
|                 'screenshot': None | ||||
|             }) | ||||
|             self.notification_q.put(n_object) | ||||
|             logger.debug(f"Sent filter not found notification for {watch_uuid}") | ||||
|         else: | ||||
|             logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs") | ||||
|  | ||||
|     def send_step_failure_notification(self, watch_uuid, step_n): | ||||
|         """ | ||||
|         Send notification when browser steps fail consecutively | ||||
|         """ | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid, False) | ||||
|         if not watch: | ||||
|             return | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1), | ||||
|                     'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} " | ||||
|                                          "did not appear on the page after {} attempts, did the page change layout? " | ||||
|                                          "Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n" | ||||
|                                          "Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|  | ||||
|         elif len(self.datastore.data['settings']['application']['notification_urls']): | ||||
|             n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] | ||||
|  | ||||
|         # Only prepare to notify if the rules above matched | ||||
|         if 'notification_urls' in n_object: | ||||
|             n_object.update({ | ||||
|                 'watch_url': watch['url'], | ||||
|                 'uuid': watch_uuid | ||||
|             }) | ||||
|             self.notification_q.put(n_object) | ||||
|             logger.error(f"Sent step not found notification for {watch_uuid}") | ||||
|  | ||||
|  | ||||
| # Convenience functions for creating notification service instances | ||||
| def create_notification_service(datastore, notification_q): | ||||
|     """ | ||||
|     Factory function to create a NotificationService instance | ||||
|     """ | ||||
|     return NotificationService(datastore, notification_q) | ||||
							
								
								
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| import pluggy | ||||
| import os | ||||
| import importlib | ||||
| import sys | ||||
|  | ||||
| # Global plugin namespace for changedetection.io | ||||
| PLUGIN_NAMESPACE = "changedetectionio" | ||||
|  | ||||
| hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE) | ||||
| hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE) | ||||
|  | ||||
|  | ||||
| class ChangeDetectionSpec: | ||||
|     """Hook specifications for extending changedetection.io functionality.""" | ||||
|  | ||||
|     @hookspec | ||||
|     def ui_edit_stats_extras(watch): | ||||
|         """Return HTML content to add to the stats tab in the edit view. | ||||
|          | ||||
|         Args: | ||||
|             watch: The watch object being edited | ||||
|              | ||||
|         Returns: | ||||
|             str: HTML content to be inserted in the stats tab | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|  | ||||
| # Set up Plugin Manager | ||||
| plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # Register hookspecs | ||||
| plugin_manager.add_hookspecs(ChangeDetectionSpec) | ||||
|  | ||||
| # Load plugins from subdirectories | ||||
| def load_plugins_from_directories(): | ||||
|     # Dictionary of directories to scan for plugins | ||||
|     plugin_dirs = { | ||||
|         'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'), | ||||
|         # Add more plugin directories here as needed | ||||
|     } | ||||
|      | ||||
|     # Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory | ||||
|      | ||||
|     for dir_name, dir_path in plugin_dirs.items(): | ||||
|         if not os.path.exists(dir_path): | ||||
|             continue | ||||
|              | ||||
|         # Get all Python files (excluding __init__.py) | ||||
|         for filename in os.listdir(dir_path): | ||||
|             if filename.endswith(".py") and filename != "__init__.py": | ||||
|                 module_name = filename[:-3]  # Remove .py extension | ||||
|                 module_path = f"changedetectionio.{dir_name}.plugins.{module_name}" | ||||
|                  | ||||
|                 try: | ||||
|                     module = importlib.import_module(module_path) | ||||
|                     # Register the plugin with pluggy | ||||
|                     plugin_manager.register(module, module_name) | ||||
|                 except (ImportError, AttributeError) as e: | ||||
|                     print(f"Error loading plugin {module_name}: {e}") | ||||
|  | ||||
| # Load plugins | ||||
| load_plugins_from_directories() | ||||
|  | ||||
| # Discover installed plugins from external packages (if any) | ||||
| plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # Helper function to collect UI stats extras from all plugins | ||||
| def collect_ui_edit_stats_extras(watch): | ||||
|     """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" | ||||
|     extras_content = [] | ||||
|      | ||||
|     # Get all plugins that implement the ui_edit_stats_extras hook | ||||
|     results = plugin_manager.hook.ui_edit_stats_extras(watch=watch) | ||||
|      | ||||
|     # If we have results, add them to our content | ||||
|     if results: | ||||
|         for result in results: | ||||
|             if result:  # Skip empty results | ||||
|                 extras_content.append(result) | ||||
|              | ||||
|     return "\n".join(extras_content) if extras_content else "" | ||||
| @@ -27,7 +27,7 @@ class difference_detection_processor(): | ||||
|         # Generic fetcher that should be extended (requests, playwright etc) | ||||
|         self.fetcher = Fetcher() | ||||
|  | ||||
|     def call_browser(self, preferred_proxy_id=None): | ||||
|     async def call_browser(self, preferred_proxy_id=None): | ||||
|  | ||||
|         from requests.structures import CaseInsensitiveDict | ||||
|  | ||||
| @@ -89,7 +89,7 @@ class difference_detection_processor(): | ||||
|                 proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url') | ||||
|                 logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}") | ||||
|             else: | ||||
|                 logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified. ") | ||||
|                 logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ") | ||||
|  | ||||
|         # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. | ||||
|         # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) | ||||
| @@ -146,20 +146,22 @@ class difference_detection_processor(): | ||||
|  | ||||
|         # And here we go! call the right browser with browser-specific settings | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|  | ||||
|         self.fetcher.run(url=url, | ||||
|                          timeout=timeout, | ||||
|                          request_headers=request_headers, | ||||
|                          request_body=request_body, | ||||
|                          request_method=request_method, | ||||
|                          ignore_status_codes=ignore_status_codes, | ||||
|                          current_include_filters=self.watch.get('include_filters'), | ||||
|                          is_binary=is_binary, | ||||
|                          empty_pages_are_a_change=empty_pages_are_a_change | ||||
|                          ) | ||||
|         # All fetchers are now async | ||||
|         await self.fetcher.run( | ||||
|             current_include_filters=self.watch.get('include_filters'), | ||||
|             empty_pages_are_a_change=empty_pages_are_a_change, | ||||
|             fetch_favicon=self.watch.favicon_is_expired(), | ||||
|             ignore_status_codes=ignore_status_codes, | ||||
|             is_binary=is_binary, | ||||
|             request_body=request_body, | ||||
|             request_headers=request_headers, | ||||
|             request_method=request_method, | ||||
|             timeout=timeout, | ||||
|             url=url, | ||||
|        ) | ||||
|  | ||||
|         #@todo .quit here could go on close object, so we can run JS if change-detected | ||||
|         self.fetcher.quit() | ||||
|         self.fetcher.quit(watch=self.watch) | ||||
|  | ||||
|         # After init, call run_changedetection() which will do the actual change-detection | ||||
|  | ||||
|   | ||||
| @@ -7,7 +7,7 @@ import urllib3 | ||||
| import time | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
| name = 'Re-stock & Price detection for single product pages' | ||||
| name = 'Re-stock & Price detection for pages with a SINGLE product' | ||||
| description = 'Detects if the product goes back to in-stock' | ||||
|  | ||||
| class UnableToExtractRestockData(Exception): | ||||
| @@ -79,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock: | ||||
|     # First phase, dead simple scanning of anything that looks useful | ||||
|     value = Restock() | ||||
|     if data: | ||||
|         logger.debug(f"Using jsonpath to find price/availability/etc") | ||||
|         logger.debug("Using jsonpath to find price/availability/etc") | ||||
|         price_parse = parse('$..(price|Price)') | ||||
|         pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )') | ||||
|         availability_parse = parse('$..(availability|Availability)') | ||||
| @@ -110,7 +110,7 @@ def get_itemprop_availability(html_content) -> Restock: | ||||
|  | ||||
|         # Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:) | ||||
|         if not value.get('price') or value.get('availability'): | ||||
|             logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..") | ||||
|             logger.debug("Alternatively digging through OpenGraph properties for restock/price info..") | ||||
|             jsonpath_expr = parse('$..properties') | ||||
|  | ||||
|             for match in jsonpath_expr.find(data): | ||||
|   | ||||
| @@ -15,7 +15,7 @@ def _task(watch, update_handler): | ||||
|     except FilterNotFoundInResponse as e: | ||||
|         text_after_filter = f"Filter not found in HTML: {str(e)}" | ||||
|     except ReplyWithContentButNoText as e: | ||||
|         text_after_filter = f"Filter found but no text (empty result)" | ||||
|         text_after_filter = "Filter found but no text (empty result)" | ||||
|     except Exception as e: | ||||
|         text_after_filter = f"Error: {str(e)}" | ||||
|  | ||||
|   | ||||
| @@ -251,7 +251,7 @@ class perform_site_check(difference_detection_processor): | ||||
|         update_obj["last_check_status"] = self.fetcher.get_last_status_code() | ||||
|  | ||||
|         # 615 Extract text by regex | ||||
|         extract_text = watch.get('extract_text', []) | ||||
|         extract_text = list(dict.fromkeys(watch.get('extract_text', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text'))) | ||||
|         if len(extract_text) > 0: | ||||
|             regex_matched_output = [] | ||||
|             for s_re in extract_text: | ||||
| @@ -296,6 +296,8 @@ class perform_site_check(difference_detection_processor): | ||||
| ### CALCULATE MD5 | ||||
|         # If there's text to ignore | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text') | ||||
|  | ||||
|         text_for_checksuming = stripped_text_from_html | ||||
|         if text_to_ignore: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
| @@ -308,8 +310,7 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         blocked = False | ||||
|  | ||||
|         trigger_text = watch.get('trigger_text', []) | ||||
|         trigger_text = list(dict.fromkeys(watch.get('trigger_text', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text'))) | ||||
|         if len(trigger_text): | ||||
|             # Assume blocked | ||||
|             blocked = True | ||||
| @@ -323,7 +324,7 @@ class perform_site_check(difference_detection_processor): | ||||
|             if result: | ||||
|                 blocked = False | ||||
|  | ||||
|         text_should_not_be_present = watch.get('text_should_not_be_present', []) | ||||
|         text_should_not_be_present = list(dict.fromkeys(watch.get('text_should_not_be_present', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present'))) | ||||
|         if len(text_should_not_be_present): | ||||
|             # If anything matched, then we should block a change from happening | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|   | ||||
							
								
								
									
										435
									
								
								changedetectionio/queue_handlers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										435
									
								
								changedetectionio/queue_handlers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,435 @@ | ||||
| from blinker import signal | ||||
| from loguru import logger | ||||
| from typing import Dict, List, Any, Optional | ||||
| import heapq | ||||
| import queue | ||||
| import threading | ||||
|  | ||||
| try: | ||||
|     import janus | ||||
| except ImportError: | ||||
|     logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus") | ||||
|     raise | ||||
|  | ||||
|  | ||||
| class RecheckPriorityQueue: | ||||
|     """ | ||||
|     Ultra-reliable priority queue using janus for async/sync bridging. | ||||
|      | ||||
|     CRITICAL DESIGN NOTE: Both sync_q and async_q are required because: | ||||
|     - sync_q: Used by Flask routes, ticker threads, and other synchronous code | ||||
|     - async_q: Used by async workers (the actual fetchers/processors) and coroutines | ||||
|      | ||||
|     DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts: | ||||
|     - Synchronous code (Flask, threads) cannot use async methods without blocking | ||||
|     - Async code cannot use sync methods without blocking the event loop | ||||
|     - janus provides the only safe bridge between these two worlds | ||||
|      | ||||
|     Attempting to unify to async-only would require: | ||||
|     - Converting all Flask routes to async (major breaking change) | ||||
|     - Using asyncio.run() in sync contexts (causes deadlocks) | ||||
|     - Thread-pool wrapping (adds complexity and overhead) | ||||
|      | ||||
|     Minimal implementation focused on reliability: | ||||
|     - Pure janus for sync/async bridge | ||||
|     - Thread-safe priority ordering   | ||||
|     - Bulletproof error handling with critical logging | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize: int = 0): | ||||
|         try: | ||||
|             self._janus_queue = janus.Queue(maxsize=maxsize) | ||||
|             # BOTH interfaces required - see class docstring for why | ||||
|             self.sync_q = self._janus_queue.sync_q   # Flask routes, ticker thread | ||||
|             self.async_q = self._janus_queue.async_q # Async workers | ||||
|              | ||||
|             # Priority storage - thread-safe | ||||
|             self._priority_items = [] | ||||
|             self._lock = threading.RLock() | ||||
|              | ||||
|             # Signals for UI updates | ||||
|             self.queue_length_signal = signal('queue_length') | ||||
|              | ||||
|             logger.debug("RecheckPriorityQueue initialized successfully") | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     # SYNC INTERFACE (for ticker thread) | ||||
|     def put(self, item, block: bool = True, timeout: Optional[float] = None): | ||||
|         """Thread-safe sync put with priority ordering""" | ||||
|         try: | ||||
|             # Add to priority storage | ||||
|             with self._lock: | ||||
|                 heapq.heappush(self._priority_items, item) | ||||
|              | ||||
|             # Notify via janus sync queue | ||||
|             self.sync_q.put(True, block=block, timeout=timeout) | ||||
|              | ||||
|             # Emit signals | ||||
|             self._emit_put_signals(item) | ||||
|              | ||||
|             logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}") | ||||
|             return True | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}") | ||||
|             # Remove from priority storage if janus put failed | ||||
|             try: | ||||
|                 with self._lock: | ||||
|                     if item in self._priority_items: | ||||
|                         self._priority_items.remove(item) | ||||
|                         heapq.heapify(self._priority_items) | ||||
|             except Exception as cleanup_e: | ||||
|                 logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}") | ||||
|             return False | ||||
|      | ||||
|     def get(self, block: bool = True, timeout: Optional[float] = None): | ||||
|         """Thread-safe sync get with priority ordering""" | ||||
|         try: | ||||
|             # Wait for notification | ||||
|             self.sync_q.get(block=block, timeout=timeout) | ||||
|              | ||||
|             # Get highest priority item | ||||
|             with self._lock: | ||||
|                 if not self._priority_items: | ||||
|                     logger.critical(f"CRITICAL: Queue notification received but no priority items available") | ||||
|                     raise Exception("Priority queue inconsistency") | ||||
|                 item = heapq.heappop(self._priority_items) | ||||
|              | ||||
|             # Emit signals | ||||
|             self._emit_get_signals() | ||||
|              | ||||
|             logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}") | ||||
|             return item | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get item from queue: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     # ASYNC INTERFACE (for workers) | ||||
|     async def async_put(self, item): | ||||
|         """Pure async put with priority ordering""" | ||||
|         try: | ||||
|             # Add to priority storage | ||||
|             with self._lock: | ||||
|                 heapq.heappush(self._priority_items, item) | ||||
|              | ||||
|             # Notify via janus async queue | ||||
|             await self.async_q.put(True) | ||||
|              | ||||
|             # Emit signals | ||||
|             self._emit_put_signals(item) | ||||
|              | ||||
|             logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}") | ||||
|             return True | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}") | ||||
|             # Remove from priority storage if janus put failed | ||||
|             try: | ||||
|                 with self._lock: | ||||
|                     if item in self._priority_items: | ||||
|                         self._priority_items.remove(item) | ||||
|                         heapq.heapify(self._priority_items) | ||||
|             except Exception as cleanup_e: | ||||
|                 logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}") | ||||
|             return False | ||||
|      | ||||
|     async def async_get(self): | ||||
|         """Pure async get with priority ordering""" | ||||
|         try: | ||||
|             # Wait for notification | ||||
|             await self.async_q.get() | ||||
|              | ||||
|             # Get highest priority item | ||||
|             with self._lock: | ||||
|                 if not self._priority_items: | ||||
|                     logger.critical(f"CRITICAL: Async queue notification received but no priority items available") | ||||
|                     raise Exception("Priority queue inconsistency") | ||||
|                 item = heapq.heappop(self._priority_items) | ||||
|              | ||||
|             # Emit signals | ||||
|             self._emit_get_signals() | ||||
|              | ||||
|             logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}") | ||||
|             return item | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     # UTILITY METHODS | ||||
|     def qsize(self) -> int: | ||||
|         """Get current queue size""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 return len(self._priority_items) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get queue size: {str(e)}") | ||||
|             return 0 | ||||
|      | ||||
|     def empty(self) -> bool: | ||||
|         """Check if queue is empty""" | ||||
|         return self.qsize() == 0 | ||||
|      | ||||
|     def close(self): | ||||
|         """Close the janus queue""" | ||||
|         try: | ||||
|             self._janus_queue.close() | ||||
|             logger.debug("RecheckPriorityQueue closed successfully") | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}") | ||||
|      | ||||
|     # COMPATIBILITY METHODS (from original implementation) | ||||
|     @property | ||||
|     def queue(self): | ||||
|         """Provide compatibility with original queue access""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 return list(self._priority_items) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get queue list: {str(e)}") | ||||
|             return [] | ||||
|      | ||||
|     def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]: | ||||
|         """Find position of UUID in queue""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 queue_list = list(self._priority_items) | ||||
|                 total_items = len(queue_list) | ||||
|                  | ||||
|                 if total_items == 0: | ||||
|                     return {'position': None, 'total_items': 0, 'priority': None, 'found': False} | ||||
|                  | ||||
|                 # Find target item | ||||
|                 for item in queue_list: | ||||
|                     if (hasattr(item, 'item') and isinstance(item.item, dict) and  | ||||
|                         item.item.get('uuid') == target_uuid): | ||||
|                          | ||||
|                         # Count items with higher priority | ||||
|                         position = sum(1 for other in queue_list if other.priority < item.priority) | ||||
|                         return { | ||||
|                             'position': position, | ||||
|                             'total_items': total_items,  | ||||
|                             'priority': item.priority, | ||||
|                             'found': True | ||||
|                         } | ||||
|                  | ||||
|                 return {'position': None, 'total_items': total_items, 'priority': None, 'found': False} | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {str(e)}") | ||||
|             return {'position': None, 'total_items': 0, 'priority': None, 'found': False} | ||||
|      | ||||
|     def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]: | ||||
|         """Get all queued UUIDs with pagination""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 queue_list = sorted(self._priority_items)  # Sort by priority | ||||
|                 total_items = len(queue_list) | ||||
|                  | ||||
|                 if total_items == 0: | ||||
|                     return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False} | ||||
|                  | ||||
|                 # Apply pagination | ||||
|                 end_idx = min(offset + limit, total_items) if limit else total_items | ||||
|                 items_to_process = queue_list[offset:end_idx] | ||||
|                  | ||||
|                 result = [] | ||||
|                 for position, item in enumerate(items_to_process, start=offset): | ||||
|                     if (hasattr(item, 'item') and isinstance(item.item, dict) and  | ||||
|                         'uuid' in item.item): | ||||
|                         result.append({ | ||||
|                             'uuid': item.item['uuid'], | ||||
|                             'position': position, | ||||
|                             'priority': item.priority | ||||
|                         }) | ||||
|                  | ||||
|                 return { | ||||
|                     'items': result, | ||||
|                     'total_items': total_items, | ||||
|                     'returned_items': len(result), | ||||
|                     'has_more': (offset + len(result)) < total_items | ||||
|                 } | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {str(e)}") | ||||
|             return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False} | ||||
|      | ||||
|     def get_queue_summary(self) -> Dict[str, Any]: | ||||
|         """Get queue summary statistics""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 queue_list = list(self._priority_items) | ||||
|                 total_items = len(queue_list) | ||||
|                  | ||||
|                 if total_items == 0: | ||||
|                     return { | ||||
|                         'total_items': 0, 'priority_breakdown': {}, | ||||
|                         'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0 | ||||
|                     } | ||||
|                  | ||||
|                 immediate_items = clone_items = scheduled_items = 0 | ||||
|                 priority_counts = {} | ||||
|                  | ||||
|                 for item in queue_list: | ||||
|                     priority = item.priority | ||||
|                     priority_counts[priority] = priority_counts.get(priority, 0) + 1 | ||||
|                      | ||||
|                     if priority == 1: | ||||
|                         immediate_items += 1 | ||||
|                     elif priority == 5: | ||||
|                         clone_items += 1 | ||||
|                     elif priority > 100: | ||||
|                         scheduled_items += 1 | ||||
|                  | ||||
|                 return { | ||||
|                     'total_items': total_items, | ||||
|                     'priority_breakdown': priority_counts, | ||||
|                     'immediate_items': immediate_items, | ||||
|                     'clone_items': clone_items, | ||||
|                     'scheduled_items': scheduled_items, | ||||
|                     'min_priority': min(priority_counts.keys()) if priority_counts else None, | ||||
|                     'max_priority': max(priority_counts.keys()) if priority_counts else None | ||||
|                 } | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get queue summary: {str(e)}") | ||||
|             return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0,  | ||||
|                    'clone_items': 0, 'scheduled_items': 0} | ||||
|      | ||||
|     # PRIVATE METHODS | ||||
|     def _get_item_uuid(self, item) -> str: | ||||
|         """Safely extract UUID from item for logging""" | ||||
|         try: | ||||
|             if hasattr(item, 'item') and isinstance(item.item, dict): | ||||
|                 return item.item.get('uuid', 'unknown') | ||||
|         except Exception: | ||||
|             pass | ||||
|         return 'unknown' | ||||
|      | ||||
|     def _emit_put_signals(self, item): | ||||
|         """Emit signals when item is added""" | ||||
|         try: | ||||
|             # Watch update signal | ||||
|             if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item: | ||||
|                 watch_check_update = signal('watch_check_update') | ||||
|                 if watch_check_update: | ||||
|                     watch_check_update.send(watch_uuid=item.item['uuid']) | ||||
|              | ||||
|             # Queue length signal | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}") | ||||
|      | ||||
|     def _emit_get_signals(self): | ||||
|         """Emit signals when item is removed""" | ||||
|         try: | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to emit get signals: {str(e)}") | ||||
|  | ||||
|  | ||||
| class NotificationQueue: | ||||
|     """ | ||||
|     Ultra-reliable notification queue using pure janus. | ||||
|      | ||||
|     CRITICAL DESIGN NOTE: Both sync_q and async_q are required because: | ||||
|     - sync_q: Used by Flask routes, ticker threads, and other synchronous code | ||||
|     - async_q: Used by async workers and coroutines | ||||
|      | ||||
|     DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts. | ||||
|     See RecheckPriorityQueue docstring above for detailed explanation. | ||||
|      | ||||
|     Simple wrapper around janus with bulletproof error handling. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize: int = 0): | ||||
|         try: | ||||
|             self._janus_queue = janus.Queue(maxsize=maxsize) | ||||
|             # BOTH interfaces required - see class docstring for why | ||||
|             self.sync_q = self._janus_queue.sync_q   # Flask routes, threads | ||||
|             self.async_q = self._janus_queue.async_q # Async workers | ||||
|             self.notification_event_signal = signal('notification_event') | ||||
|             logger.debug("NotificationQueue initialized successfully") | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None): | ||||
|         """Thread-safe sync put with signal emission""" | ||||
|         try: | ||||
|             self.sync_q.put(item, block=block, timeout=timeout) | ||||
|             self._emit_notification_signal(item) | ||||
|             logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}") | ||||
|             return True | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}") | ||||
|             return False | ||||
|      | ||||
|     async def async_put(self, item: Dict[str, Any]): | ||||
|         """Pure async put with signal emission""" | ||||
|         try: | ||||
|             await self.async_q.put(item) | ||||
|             self._emit_notification_signal(item) | ||||
|             logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}") | ||||
|             return True | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}") | ||||
|             return False | ||||
|      | ||||
|     def get(self, block: bool = True, timeout: Optional[float] = None): | ||||
|         """Thread-safe sync get""" | ||||
|         try: | ||||
|             return self.sync_q.get(block=block, timeout=timeout) | ||||
|         except queue.Empty as e: | ||||
|             raise e | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get notification: {str(e)}") | ||||
|             raise e | ||||
|      | ||||
|     async def async_get(self): | ||||
|         """Pure async get""" | ||||
|         try: | ||||
|             return await self.async_q.get() | ||||
|         except queue.Empty as e: | ||||
|             raise e | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}") | ||||
|             raise e | ||||
|      | ||||
|     def qsize(self) -> int: | ||||
|         """Get current queue size""" | ||||
|         try: | ||||
|             return self.sync_q.qsize() | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}") | ||||
|             return 0 | ||||
|      | ||||
|     def empty(self) -> bool: | ||||
|         """Check if queue is empty""" | ||||
|         return self.qsize() == 0 | ||||
|      | ||||
|     def close(self): | ||||
|         """Close the janus queue""" | ||||
|         try: | ||||
|             self._janus_queue.close() | ||||
|             logger.debug("NotificationQueue closed successfully") | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}") | ||||
|      | ||||
|     def _emit_notification_signal(self, item: Dict[str, Any]): | ||||
|         """Emit notification signal""" | ||||
|         try: | ||||
|             if self.notification_event_signal and isinstance(item, dict): | ||||
|                 watch_uuid = item.get('uuid') | ||||
|                 if watch_uuid: | ||||
|                     self.notification_event_signal.send(watch_uuid=watch_uuid) | ||||
|                 else: | ||||
|                     self.notification_event_signal.send() | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to emit notification signal: {str(e)}") | ||||
							
								
								
									
										124
									
								
								changedetectionio/realtime/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								changedetectionio/realtime/README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,124 @@ | ||||
| # Real-time Socket.IO Implementation | ||||
|  | ||||
| This directory contains the Socket.IO implementation for changedetection.io's real-time updates. | ||||
|  | ||||
| ## Architecture Overview | ||||
|  | ||||
| The real-time system provides live updates to the web interface for: | ||||
| - Watch status changes (checking, completed, errors) | ||||
| - Queue length updates   | ||||
| - General statistics updates | ||||
|  | ||||
| ## Current Implementation | ||||
|  | ||||
| ### Socket.IO Configuration | ||||
| - **Async Mode**: `threading` (default) or `gevent` (optional via SOCKETIO_MODE env var) | ||||
| - **Server**: Flask-SocketIO with threading support | ||||
| - **Background Tasks**: Python threading with daemon threads | ||||
|  | ||||
| ### Async Worker Integration | ||||
| - **Workers**: Async workers using asyncio for watch processing | ||||
| - **Queue**: AsyncSignalPriorityQueue for job distribution | ||||
| - **Signals**: Blinker signals for real-time updates between workers and Socket.IO | ||||
|  | ||||
| ### Environment Variables | ||||
| - `SOCKETIO_MODE=threading` (default, recommended) | ||||
| - `SOCKETIO_MODE=gevent` (optional, has cross-platform limitations) | ||||
|  | ||||
| ## Architecture Decision: Why Threading Mode? | ||||
|  | ||||
| ### Previous Issues with Eventlet | ||||
| **Eventlet was completely removed** due to fundamental compatibility issues: | ||||
|  | ||||
| 1. **Monkey Patching Conflicts**: `eventlet.monkey_patch()` globally replaced Python's threading/socket modules, causing conflicts with: | ||||
|    - Playwright's synchronous browser automation | ||||
|    - Async worker event loops | ||||
|    - Various Python libraries expecting real threading | ||||
|  | ||||
| 2. **Python 3.12+ Compatibility**: Eventlet had issues with newer Python versions and asyncio integration | ||||
|  | ||||
| 3. **CVE-2023-29483**: Security vulnerability in eventlet's dnspython dependency | ||||
|  | ||||
| ### Current Solution Benefits | ||||
| ✅ **Threading Mode Advantages**: | ||||
| - Full compatibility with async workers and Playwright | ||||
| - No monkey patching - uses standard Python threading | ||||
| - Better Python 3.12+ support | ||||
| - Cross-platform compatibility (Windows, macOS, Linux) | ||||
| - No external async library dependencies | ||||
| - Fast shutdown capabilities | ||||
|  | ||||
| ✅ **Optional Gevent Support**: | ||||
| - Available via `SOCKETIO_MODE=gevent` for high-concurrency scenarios | ||||
| - Cross-platform limitations documented in requirements.txt | ||||
| - Not recommended as default due to Windows socket limits and macOS ARM build issues | ||||
|  | ||||
| ## Socket.IO Mode Configuration | ||||
|  | ||||
| ### Threading Mode (Default) | ||||
| ```python | ||||
| # Enabled automatically | ||||
| async_mode = 'threading' | ||||
| socketio = SocketIO(app, async_mode='threading') | ||||
| ``` | ||||
|  | ||||
| ### Gevent Mode (Optional) | ||||
| ```bash | ||||
| # Set environment variable | ||||
| export SOCKETIO_MODE=gevent | ||||
| ``` | ||||
|  | ||||
| ## Background Tasks | ||||
|  | ||||
| ### Queue Polling | ||||
| - **Threading Mode**: `threading.Thread` with `threading.Event` for shutdown | ||||
| - **Signal Handling**: Blinker signals for watch state changes | ||||
| - **Real-time Updates**: Direct Socket.IO `emit()` calls to connected clients | ||||
|  | ||||
| ### Worker Integration | ||||
| - **Async Workers**: Run in separate asyncio event loop thread | ||||
| - **Communication**: AsyncSignalPriorityQueue bridges async workers and Socket.IO | ||||
| - **Updates**: Real-time updates sent when workers complete tasks | ||||
|  | ||||
| ## Files in This Directory | ||||
|  | ||||
| - `socket_server.py`: Main Socket.IO initialization and event handling | ||||
| - `events.py`: Watch operation event handlers   | ||||
| - `__init__.py`: Module initialization | ||||
|  | ||||
| ## Production Deployment | ||||
|  | ||||
| ### Recommended WSGI Servers | ||||
| For production with Socket.IO threading mode: | ||||
| - **Gunicorn**: `gunicorn --worker-class eventlet changedetection:app` (if using gevent mode) | ||||
| - **uWSGI**: With threading support | ||||
| - **Docker**: Built-in Flask server works well for containerized deployments | ||||
|  | ||||
| ### Performance Considerations | ||||
| - Threading mode: Better memory usage, standard Python threading | ||||
| - Gevent mode: Higher concurrency but platform limitations | ||||
| - Async workers: Separate from Socket.IO, provides scalability | ||||
|  | ||||
| ## Environment Variables | ||||
|  | ||||
| | Variable | Default | Description | | ||||
| |----------|---------|-------------| | ||||
| | `SOCKETIO_MODE` | `threading` | Socket.IO async mode (`threading` or `gevent`) | | ||||
| | `FETCH_WORKERS` | `10` | Number of async workers for watch processing | | ||||
| | `CHANGEDETECTION_HOST` | `0.0.0.0` | Server bind address | | ||||
| | `CHANGEDETECTION_PORT` | `5000` | Server port | | ||||
|  | ||||
| ## Debugging Tips | ||||
|  | ||||
| 1. **Socket.IO Issues**: Check browser dev tools for WebSocket connection errors | ||||
| 2. **Threading Issues**: Monitor with `ps -T` to check thread count   | ||||
| 3. **Worker Issues**: Use `/worker-health` endpoint to check async worker status | ||||
| 4. **Queue Issues**: Use `/queue-status` endpoint to monitor job queue | ||||
| 5. **Performance**: Use `/gc-cleanup` endpoint to trigger memory cleanup | ||||
|  | ||||
| ## Migration Notes | ||||
|  | ||||
| If upgrading from eventlet-based versions: | ||||
| - Remove any `EVENTLET_*` environment variables | ||||
| - No code changes needed - Socket.IO mode is automatically configured | ||||
| - Optional: Set `SOCKETIO_MODE=gevent` if high concurrency is required and platform supports it | ||||
							
								
								
									
										3
									
								
								changedetectionio/realtime/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								changedetectionio/realtime/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| """ | ||||
| Socket.IO realtime updates module for changedetection.io | ||||
| """ | ||||
							
								
								
									
										58
									
								
								changedetectionio/realtime/events.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								changedetectionio/realtime/events.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| from flask_socketio import emit | ||||
| from loguru import logger | ||||
| from blinker import signal | ||||
|  | ||||
|  | ||||
| def register_watch_operation_handlers(socketio, datastore): | ||||
|     """Register Socket.IO event handlers for watch operations""" | ||||
|      | ||||
|     @socketio.on('watch_operation') | ||||
|     def handle_watch_operation(data): | ||||
|         """Handle watch operations like pause, mute, recheck via Socket.IO""" | ||||
|         try: | ||||
|             op = data.get('op') | ||||
|             uuid = data.get('uuid') | ||||
|              | ||||
|             logger.debug(f"Socket.IO: Received watch operation '{op}' for UUID {uuid}") | ||||
|              | ||||
|             if not op or not uuid: | ||||
|                 emit('operation_result', {'success': False, 'error': 'Missing operation or UUID'}) | ||||
|                 return | ||||
|              | ||||
|             # Check if watch exists | ||||
|             if not datastore.data['watching'].get(uuid): | ||||
|                 emit('operation_result', {'success': False, 'error': 'Watch not found'}) | ||||
|                 return | ||||
|              | ||||
|             watch = datastore.data['watching'][uuid] | ||||
|              | ||||
|             # Perform the operation | ||||
|             if op == 'pause': | ||||
|                 watch.toggle_pause() | ||||
|                 logger.info(f"Socket.IO: Toggled pause for watch {uuid}") | ||||
|             elif op == 'mute': | ||||
|                 watch.toggle_mute() | ||||
|                 logger.info(f"Socket.IO: Toggled mute for watch {uuid}") | ||||
|             elif op == 'recheck': | ||||
|                 # Import here to avoid circular imports | ||||
|                 from changedetectionio.flask_app import update_q | ||||
|                 from changedetectionio import queuedWatchMetaData | ||||
|                 from changedetectionio import worker_handler | ||||
|                  | ||||
|                 worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid})) | ||||
|                 logger.info(f"Socket.IO: Queued recheck for watch {uuid}") | ||||
|             else: | ||||
|                 emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'}) | ||||
|                 return | ||||
|              | ||||
|             # Send signal to update UI | ||||
|             watch_check_update = signal('watch_check_update') | ||||
|             if watch_check_update: | ||||
|                 watch_check_update.send(watch_uuid=uuid) | ||||
|              | ||||
|             # Send success response to client | ||||
|             emit('operation_result', {'success': True, 'operation': op, 'uuid': uuid}) | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error in handle_watch_operation: {str(e)}") | ||||
|             emit('operation_result', {'success': False, 'error': str(e)}) | ||||
							
								
								
									
										407
									
								
								changedetectionio/realtime/socket_server.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										407
									
								
								changedetectionio/realtime/socket_server.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,407 @@ | ||||
| import timeago | ||||
| from flask_socketio import SocketIO | ||||
|  | ||||
| import time | ||||
| import os | ||||
| from loguru import logger | ||||
| from blinker import signal | ||||
|  | ||||
| from changedetectionio import strtobool | ||||
|  | ||||
|  | ||||
| class SignalHandler: | ||||
|     """A standalone class to receive signals""" | ||||
|  | ||||
|     def __init__(self, socketio_instance, datastore): | ||||
|         self.socketio_instance = socketio_instance | ||||
|         self.datastore = datastore | ||||
|  | ||||
|         # Connect to the watch_check_update signal | ||||
|         from changedetectionio.flask_app import watch_check_update as wcc | ||||
|         wcc.connect(self.handle_signal, weak=False) | ||||
|         #        logger.info("SignalHandler: Connected to signal from direct import") | ||||
|  | ||||
|         # Connect to the queue_length signal | ||||
|         queue_length_signal = signal('queue_length') | ||||
|         queue_length_signal.connect(self.handle_queue_length, weak=False) | ||||
|         #       logger.info("SignalHandler: Connected to queue_length signal") | ||||
|  | ||||
|         watch_delete_signal = signal('watch_deleted') | ||||
|         watch_delete_signal.connect(self.handle_deleted_signal, weak=False) | ||||
|  | ||||
|         watch_favicon_bumped_signal = signal('watch_favicon_bump') | ||||
|         watch_favicon_bumped_signal.connect(self.handle_watch_bumped_favicon_signal, weak=False) | ||||
|  | ||||
|         # Connect to the notification_event signal | ||||
|         notification_event_signal = signal('notification_event') | ||||
|         notification_event_signal.connect(self.handle_notification_event, weak=False) | ||||
|         logger.info("SignalHandler: Connected to notification_event signal") | ||||
|  | ||||
|         # Create and start the queue update thread using standard threading | ||||
|         import threading | ||||
|         self.polling_emitter_thread = threading.Thread( | ||||
|             target=self.polling_emit_running_or_queued_watches_threaded, | ||||
|             daemon=True | ||||
|         ) | ||||
|         self.polling_emitter_thread.start() | ||||
|         logger.info("Started polling thread using threading (eventlet-free)") | ||||
|  | ||||
|         # Store the thread reference in socketio for clean shutdown | ||||
|         self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread | ||||
|  | ||||
|     def handle_signal(self, *args, **kwargs): | ||||
|         logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs") | ||||
|         # Safely extract the watch UUID from kwargs | ||||
|         watch_uuid = kwargs.get('watch_uuid') | ||||
|         app_context = kwargs.get('app_context') | ||||
|  | ||||
|         if watch_uuid: | ||||
|             # Get the watch object from the datastore | ||||
|             watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|             if watch: | ||||
|                 if app_context: | ||||
|                     # note | ||||
|                     with app_context.app_context(): | ||||
|                         with app_context.test_request_context(): | ||||
|                             # Forward to handle_watch_update with the watch parameter | ||||
|                             handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore) | ||||
|                 else: | ||||
|                     handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore) | ||||
|  | ||||
|                 logger.trace(f"Signal handler processed watch UUID {watch_uuid}") | ||||
|             else: | ||||
|                 logger.warning(f"Watch UUID {watch_uuid} not found in datastore") | ||||
|  | ||||
|     def handle_watch_bumped_favicon_signal(self, *args, **kwargs): | ||||
|         watch_uuid = kwargs.get('watch_uuid') | ||||
|         if watch_uuid: | ||||
|             # Emit the queue size to all connected clients | ||||
|             self.socketio_instance.emit("watch_bumped_favicon", { | ||||
|                 "uuid": watch_uuid, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|         logger.debug(f"Watch UUID {watch_uuid} got its favicon updated") | ||||
|  | ||||
|     def handle_deleted_signal(self, *args, **kwargs): | ||||
|         watch_uuid = kwargs.get('watch_uuid') | ||||
|         if watch_uuid: | ||||
|             # Emit the queue size to all connected clients | ||||
|             self.socketio_instance.emit("watch_deleted", { | ||||
|                 "uuid": watch_uuid, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|         logger.debug(f"Watch UUID {watch_uuid} was deleted") | ||||
|  | ||||
|     def handle_queue_length(self, *args, **kwargs): | ||||
|         """Handle queue_length signal and emit to all clients""" | ||||
|         try: | ||||
|             queue_length = kwargs.get('length', 0) | ||||
|             logger.debug(f"SignalHandler: Queue length update received: {queue_length}") | ||||
|  | ||||
|             # Emit the queue size to all connected clients | ||||
|             self.socketio_instance.emit("queue_size", { | ||||
|                 "q_length": queue_length, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error in handle_queue_length: {str(e)}") | ||||
|  | ||||
|     def handle_notification_event(self, *args, **kwargs): | ||||
|         """Handle notification_event signal and emit to all clients""" | ||||
|         try: | ||||
|             watch_uuid = kwargs.get('watch_uuid') | ||||
|             logger.debug(f"SignalHandler: Notification event received for watch UUID: {watch_uuid}") | ||||
|  | ||||
|             # Emit the notification event to all connected clients | ||||
|             self.socketio_instance.emit("notification_event", { | ||||
|                 "watch_uuid": watch_uuid, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|  | ||||
|             logger.trace(f"Socket.IO: Emitted notification_event for watch UUID {watch_uuid}") | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error in handle_notification_event: {str(e)}") | ||||
|  | ||||
|     def polling_emit_running_or_queued_watches_threaded(self): | ||||
|         """Threading version of polling for Windows compatibility""" | ||||
|         import time | ||||
|         import threading | ||||
|         logger.info("Queue update thread started (threading mode)") | ||||
|  | ||||
|         # Import here to avoid circular imports | ||||
|         from changedetectionio.flask_app import app | ||||
|         from changedetectionio import worker_handler | ||||
|         watch_check_update = signal('watch_check_update') | ||||
|  | ||||
|         # Track previous state to avoid unnecessary emissions | ||||
|         previous_running_uuids = set() | ||||
|  | ||||
|         # Run until app shutdown - check exit flag more frequently for fast shutdown | ||||
|         exit_event = getattr(app.config, 'exit', threading.Event()) | ||||
|  | ||||
|         while not exit_event.is_set(): | ||||
|             try: | ||||
|                 # Get current running UUIDs from async workers | ||||
|                 running_uuids = set(worker_handler.get_running_uuids()) | ||||
|  | ||||
|                 # Only send updates for UUIDs that changed state | ||||
|                 newly_running = running_uuids - previous_running_uuids | ||||
|                 no_longer_running = previous_running_uuids - running_uuids | ||||
|  | ||||
|                 # Send updates for newly running UUIDs (but exit fast if shutdown requested) | ||||
|                 for uuid in newly_running: | ||||
|                     if exit_event.is_set(): | ||||
|                         break | ||||
|                     logger.trace(f"Threading polling: UUID {uuid} started processing") | ||||
|                     with app.app_context(): | ||||
|                         watch_check_update.send(app_context=app, watch_uuid=uuid) | ||||
|                     time.sleep(0.01)  # Small yield | ||||
|  | ||||
|                 # Send updates for UUIDs that finished processing (but exit fast if shutdown requested) | ||||
|                 if not exit_event.is_set(): | ||||
|                     for uuid in no_longer_running: | ||||
|                         if exit_event.is_set(): | ||||
|                             break | ||||
|                         logger.trace(f"Threading polling: UUID {uuid} finished processing") | ||||
|                         with app.app_context(): | ||||
|                             watch_check_update.send(app_context=app, watch_uuid=uuid) | ||||
|                         time.sleep(0.01)  # Small yield | ||||
|  | ||||
|                 # Update tracking for next iteration | ||||
|                 previous_running_uuids = running_uuids | ||||
|  | ||||
|                 # Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown | ||||
|                 for _ in range(20):  # 20 * 0.5 = 10 seconds total | ||||
|                     if exit_event.is_set(): | ||||
|                         break | ||||
|                     time.sleep(0.5) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Error in threading polling: {str(e)}") | ||||
|                 # Even during error recovery, check for exit quickly | ||||
|                 for _ in range(1):  # 1 * 0.5 = 0.5 seconds | ||||
|                     if exit_event.is_set(): | ||||
|                         break | ||||
|                     time.sleep(0.5) | ||||
|  | ||||
|         # Check if we're in pytest environment - if so, be more gentle with logging | ||||
|         import sys | ||||
|         in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ | ||||
|  | ||||
|         if not in_pytest: | ||||
|             logger.info("Queue update thread stopped (threading mode)") | ||||
|  | ||||
|  | ||||
| def handle_watch_update(socketio, **kwargs): | ||||
|     """Handle watch update signal from blinker""" | ||||
|     try: | ||||
|         watch = kwargs.get('watch') | ||||
|         datastore = kwargs.get('datastore') | ||||
|  | ||||
|         # Emit the watch update to all connected clients | ||||
|         from changedetectionio.flask_app import update_q | ||||
|         from changedetectionio.flask_app import _jinja2_filter_datetime | ||||
|         from changedetectionio import worker_handler | ||||
|  | ||||
|         # Get list of watches that are currently running | ||||
|         running_uuids = worker_handler.get_running_uuids() | ||||
|  | ||||
|         # Get list of watches in the queue | ||||
|         queue_list = [] | ||||
|         for q_item in update_q.queue: | ||||
|             if hasattr(q_item, 'item') and 'uuid' in q_item.item: | ||||
|                 queue_list.append(q_item.item['uuid']) | ||||
|  | ||||
|         # Get the error texts from the watch | ||||
|         error_texts = watch.compile_error_texts() | ||||
|         # Create a simplified watch data object to send to clients | ||||
|  | ||||
|         watch_data = { | ||||
|             'checking_now': True if watch.get('uuid') in running_uuids else False, | ||||
|             'error_text': error_texts, | ||||
|             'event_timestamp': time.time(), | ||||
|             'fetch_time': watch.get('fetch_time'), | ||||
|             'has_error': True if error_texts else False, | ||||
|             'has_favicon': True if watch.get_favicon_filename() else False, | ||||
|             'history_n': watch.history_n, | ||||
|             'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet', | ||||
|             'last_checked': watch.get('last_checked'), | ||||
|             'last_checked_text': _jinja2_filter_datetime(watch), | ||||
|             'notification_muted': True if watch.get('notification_muted') else False, | ||||
|             'paused': True if watch.get('paused') else False, | ||||
|             'queued': True if watch.get('uuid') in queue_list else False, | ||||
|             'unviewed': watch.has_unviewed, | ||||
|             'uuid': watch.get('uuid'), | ||||
|         } | ||||
|  | ||||
|         errored_count = 0 | ||||
|         for watch_uuid_iter, watch_iter in datastore.data['watching'].items(): | ||||
|             if watch_iter.get('last_error'): | ||||
|                 errored_count += 1 | ||||
|  | ||||
|         general_stats = { | ||||
|             'count_errors': errored_count, | ||||
|             'has_unviewed': datastore.has_unviewed | ||||
|         } | ||||
|  | ||||
|         # Debug what's being emitted | ||||
|         # logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}") | ||||
|  | ||||
|         # Emit to all clients (no 'broadcast' parameter needed - it's the default behavior) | ||||
|         socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats}) | ||||
|  | ||||
|         # Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues | ||||
|         logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}") | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"Socket.IO error in handle_watch_update: {str(e)}") | ||||
|  | ||||
|  | ||||
| def init_socketio(app, datastore): | ||||
|     """Initialize SocketIO with the main Flask app""" | ||||
|     import platform | ||||
|     import sys | ||||
|  | ||||
|     # Platform-specific async_mode selection for better stability | ||||
|     system = platform.system().lower() | ||||
|     python_version = sys.version_info | ||||
|  | ||||
|     # Check for SocketIO mode configuration via environment variable | ||||
|     # Default is 'threading' for best cross-platform compatibility | ||||
|     socketio_mode = os.getenv('SOCKETIO_MODE', 'threading').lower() | ||||
|  | ||||
|     if socketio_mode == 'gevent': | ||||
|         # Use gevent mode (higher concurrency but platform limitations) | ||||
|         try: | ||||
|             import gevent | ||||
|             async_mode = 'gevent' | ||||
|             logger.info(f"SOCKETIO_MODE=gevent: Using {async_mode} mode for Socket.IO") | ||||
|         except ImportError: | ||||
|             async_mode = 'threading' | ||||
|             logger.warning(f"SOCKETIO_MODE=gevent but gevent not available, falling back to {async_mode} mode") | ||||
|     elif socketio_mode == 'threading': | ||||
|         # Use threading mode (default - best compatibility) | ||||
|         async_mode = 'threading' | ||||
|         logger.info(f"SOCKETIO_MODE=threading: Using {async_mode} mode for Socket.IO") | ||||
|     else: | ||||
|         # Invalid mode specified, use default | ||||
|         async_mode = 'threading' | ||||
|         logger.warning(f"Invalid SOCKETIO_MODE='{socketio_mode}', using default {async_mode} mode for Socket.IO") | ||||
|  | ||||
|     # Log platform info for debugging | ||||
|     logger.info(f"Platform: {system}, Python: {python_version.major}.{python_version.minor}, Socket.IO mode: {async_mode}") | ||||
|  | ||||
|     # Restrict SocketIO CORS to same origin by default, can be overridden with env var | ||||
|     cors_origins = os.environ.get('SOCKETIO_CORS_ORIGINS', None) | ||||
|  | ||||
|     socketio = SocketIO(app, | ||||
|                         async_mode=async_mode, | ||||
|                         cors_allowed_origins=cors_origins,  # None means same-origin only | ||||
|                         logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')), | ||||
|                         engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False'))) | ||||
|  | ||||
|     # Set up event handlers | ||||
|     logger.info("Socket.IO: Registering connect event handler") | ||||
|  | ||||
|     @socketio.on('checkbox-operation') | ||||
|     def event_checkbox_operations(data): | ||||
|         from changedetectionio.blueprint.ui import _handle_operations | ||||
|         from changedetectionio import queuedWatchMetaData | ||||
|         from changedetectionio import worker_handler | ||||
|         from changedetectionio.flask_app import update_q, watch_check_update | ||||
|         logger.trace(f"Got checkbox operations event: {data}") | ||||
|  | ||||
|         datastore = socketio.datastore | ||||
|  | ||||
|         _handle_operations( | ||||
|             op=data.get('op'), | ||||
|             uuids=data.get('uuids'), | ||||
|             datastore=datastore, | ||||
|             extra_data=data.get('extra_data'), | ||||
|             worker_handler=worker_handler, | ||||
|             update_q=update_q, | ||||
|             queuedWatchMetaData=queuedWatchMetaData, | ||||
|             watch_check_update=watch_check_update, | ||||
|             emit_flash=False | ||||
|         ) | ||||
|  | ||||
|     @socketio.on('connect') | ||||
|     def handle_connect(): | ||||
|         """Handle client connection""" | ||||
|         #        logger.info("Socket.IO: CONNECT HANDLER CALLED - Starting connection process") | ||||
|         from flask import request | ||||
|         from flask_login import current_user | ||||
|         from changedetectionio.flask_app import update_q | ||||
|  | ||||
|         # Access datastore from socketio | ||||
|         datastore = socketio.datastore | ||||
|         #        logger.info(f"Socket.IO: Current user authenticated: {current_user.is_authenticated if hasattr(current_user, 'is_authenticated') else 'No current_user'}") | ||||
|  | ||||
|         # Check if authentication is required and user is not authenticated | ||||
|         has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False) | ||||
|         #        logger.info(f"Socket.IO: Password enabled: {has_password_enabled}") | ||||
|         if has_password_enabled and not current_user.is_authenticated: | ||||
|             logger.warning("Socket.IO: Rejecting unauthenticated connection") | ||||
|             return False  # Reject the connection | ||||
|  | ||||
|         # Send the current queue size to the newly connected client | ||||
|         try: | ||||
|             queue_size = update_q.qsize() | ||||
|             socketio.emit("queue_size", { | ||||
|                 "q_length": queue_size, | ||||
|                 "event_timestamp": time.time() | ||||
|             }, room=request.sid)  # Send only to this client | ||||
|             logger.debug(f"Socket.IO: Sent initial queue size {queue_size} to new client") | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error sending initial queue size: {str(e)}") | ||||
|  | ||||
|         logger.info("Socket.IO: Client connected") | ||||
|  | ||||
|     #    logger.info("Socket.IO: Registering disconnect event handler") | ||||
|     @socketio.on('disconnect') | ||||
|     def handle_disconnect(): | ||||
|         """Handle client disconnection""" | ||||
|         logger.info("Socket.IO: Client disconnected") | ||||
|  | ||||
|     # Create a dedicated signal handler that will receive signals and emit them to clients | ||||
|     signal_handler = SignalHandler(socketio, datastore) | ||||
|  | ||||
|     # Register watch operation event handlers | ||||
|     from .events import register_watch_operation_handlers | ||||
|     register_watch_operation_handlers(socketio, datastore) | ||||
|  | ||||
|     # Store the datastore reference on the socketio object for later use | ||||
|     socketio.datastore = datastore | ||||
|  | ||||
|     # No stop event needed for threading mode - threads check app.config.exit directly | ||||
|  | ||||
|     # Add a shutdown method to the socketio object | ||||
|     def shutdown(): | ||||
|         """Shutdown the SocketIO server fast and aggressively""" | ||||
|         try: | ||||
|             logger.info("Socket.IO: Fast shutdown initiated...") | ||||
|  | ||||
|             # For threading mode, give the thread a very short time to exit gracefully | ||||
|             if hasattr(socketio, 'polling_emitter_thread'): | ||||
|                 if socketio.polling_emitter_thread.is_alive(): | ||||
|                     logger.info("Socket.IO: Waiting 1 second for polling thread to stop...") | ||||
|                     socketio.polling_emitter_thread.join(timeout=1.0)  # Only 1 second timeout | ||||
|                     if socketio.polling_emitter_thread.is_alive(): | ||||
|                         logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown") | ||||
|                     else: | ||||
|                         logger.info("Socket.IO: Polling thread stopped quickly") | ||||
|                 else: | ||||
|                     logger.info("Socket.IO: Polling thread already stopped") | ||||
|  | ||||
|             logger.info("Socket.IO: Fast shutdown complete") | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error during shutdown: {str(e)}") | ||||
|  | ||||
|     # Attach the shutdown method to the socketio object | ||||
|     socketio.shutdown = shutdown | ||||
|  | ||||
|     logger.info("Socket.IO initialized and attached to main Flask app") | ||||
|     logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}") | ||||
|     return socketio | ||||
| @@ -14,7 +14,8 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||||
| find tests/test_*py -type f|while read test_name | ||||
| do | ||||
|   echo "TEST RUNNING $test_name" | ||||
|   pytest $test_name | ||||
|   # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser | ||||
|   REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name | ||||
| done | ||||
|  | ||||
| echo "RUNNING WITH BASE_URL SET" | ||||
| @@ -22,7 +23,7 @@ echo "RUNNING WITH BASE_URL SET" | ||||
| # Now re-run some tests with BASE_URL enabled | ||||
| # Re #65 - Ability to include a link back to the installation, in the notification. | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| pytest tests/test_notification.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
|  | ||||
|  | ||||
| # Re-run with HIDE_REFERER set - could affect login | ||||
| @@ -32,11 +33,14 @@ pytest tests/test_access_control.py | ||||
| # Re-run a few tests that will trigger brotli based storage | ||||
| export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5 | ||||
| pytest tests/test_access_control.py | ||||
| pytest tests/test_notification.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
| pytest tests/test_backend.py | ||||
| pytest tests/test_rss.py | ||||
| pytest tests/test_unique_lines.py | ||||
|  | ||||
| # Try high concurrency | ||||
| FETCH_WORKERS=130 pytest  tests/test_history_consistency.py -v -l | ||||
|  | ||||
| # Check file:// will pickup a file when enabled | ||||
| echo "Hello world" > /tmp/test-file.txt | ||||
| ALLOW_FILE_URI=yes pytest tests/test_security.py | ||||
|   | ||||
| @@ -82,3 +82,25 @@ done | ||||
|  | ||||
|  | ||||
| docker kill squid-one squid-two squid-custom | ||||
|  | ||||
| # Test that the UI is returning the correct error message when a proxy is not available | ||||
|  | ||||
| # Requests | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Playwright | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Puppeteer fast | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Selenium | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   | ||||
| @@ -10,9 +10,15 @@ import os | ||||
|  | ||||
| JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10)) | ||||
|  | ||||
|  | ||||
| # This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available. | ||||
| # (Which also limits available functions that could be called) | ||||
| def render(template_str, **args: t.Any) -> str: | ||||
|     jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension']) | ||||
|     output = jinja2_env.from_string(template_str).render(args) | ||||
|     return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE] | ||||
|  | ||||
| def render_fully_escaped(content): | ||||
|     env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True) | ||||
|     template = env.from_string("{{ some_html|e }}") | ||||
|     return template.render(some_html=content) | ||||
|  | ||||
|   | ||||
| Before Width: | Height: | Size: 569 B After Width: | Height: | Size: 569 B | 
| Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB | 
| Before Width: | Height: | Size: 6.2 KiB After Width: | Height: | Size: 6.2 KiB | 
| @@ -211,7 +211,14 @@ $(document).ready(function () { | ||||
|                     $('input[type=text]', first_available).first().val(x['xpath']); | ||||
|                     $('input[placeholder="Value"]', first_available).addClass('ok').click().focus(); | ||||
|                     found_something = true; | ||||
|                 } else { | ||||
|                 } | ||||
|                 else if (x['tagName'] === 'select') { | ||||
|                     $('select', first_available).val('<select> by option text').change(); | ||||
|                     $('input[type=text]', first_available).first().val(x['xpath']); | ||||
|                     $('input[placeholder="Value"]', first_available).addClass('ok').click().focus(); | ||||
|                     found_something = true; | ||||
|                 } | ||||
|                 else { | ||||
|                     // There's no good way (that I know) to find if this | ||||
|                     // see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging | ||||
|                     // https://codepen.io/azaslavsky/pen/DEJVWv | ||||
| @@ -251,6 +258,10 @@ $(document).ready(function () { | ||||
|                 400: function () { | ||||
|                     // More than likely the CSRF token was lost when the server restarted | ||||
|                     alert("There was a problem processing the request, please reload the page."); | ||||
|                 }, | ||||
|                 401: function (err) { | ||||
|                     // This will be a custom error | ||||
|                     alert(err.responseText); | ||||
|                 } | ||||
|             } | ||||
|         }).done(function (data) { | ||||
|   | ||||
| @@ -8,7 +8,7 @@ $(document).ready(function () { | ||||
|         $(".addRuleRow").on("click", function(e) { | ||||
|             e.preventDefault(); | ||||
|              | ||||
|             let currentRow = $(this).closest("tr"); | ||||
|             let currentRow = $(this).closest(".fieldlist-row"); | ||||
|              | ||||
|             // Clone without events | ||||
|             let newRow = currentRow.clone(false); | ||||
| @@ -29,8 +29,8 @@ $(document).ready(function () { | ||||
|             e.preventDefault(); | ||||
|              | ||||
|             // Only remove if there's more than one row | ||||
|             if ($("#rulesTable tbody tr").length > 1) { | ||||
|                 $(this).closest("tr").remove(); | ||||
|             if ($("#rulesTable .fieldlist-row").length > 1) { | ||||
|                 $(this).closest(".fieldlist-row").remove(); | ||||
|                 reindexRules(); | ||||
|             } | ||||
|         }); | ||||
| @@ -39,7 +39,7 @@ $(document).ready(function () { | ||||
|         $(".verifyRuleRow").on("click", function(e) { | ||||
|             e.preventDefault(); | ||||
|              | ||||
|             let row = $(this).closest("tr"); | ||||
|             let row = $(this).closest(".fieldlist-row"); | ||||
|             let field = row.find("select[name$='field']").val(); | ||||
|             let operator = row.find("select[name$='operator']").val(); | ||||
|             let value = row.find("input[name$='value']").val(); | ||||
| @@ -128,7 +128,7 @@ $(document).ready(function () { | ||||
|         $(".addRuleRow, .removeRuleRow, .verifyRuleRow").off("click"); | ||||
|          | ||||
|         // Reindex all form elements | ||||
|         $("#rulesTable tbody tr").each(function(index) { | ||||
|         $("#rulesTable .fieldlist-row").each(function(index) { | ||||
|             $(this).find("select, input").each(function() { | ||||
|                 let oldName = $(this).attr("name"); | ||||
|                 let oldId = $(this).attr("id"); | ||||
|   | ||||
							
								
								
									
										13
									
								
								changedetectionio/static/js/feather-icons.min.js
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								changedetectionio/static/js/feather-icons.min.js
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -159,6 +159,7 @@ | ||||
|         // Return the current request in case it's needed | ||||
|         return requests[namespace]; | ||||
|     }; | ||||
|  | ||||
| })(jQuery); | ||||
|  | ||||
|  | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user