mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			176 Commits
		
	
	
		
			0.50.4
			...
			docker-bui
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 46d07dbed7 | ||
|   | afadaf5467 | ||
|   | db11f601a1 | ||
|   | ef04840dd2 | ||
|   | 1628586553 | ||
|   | a23c07ba94 | ||
|   | 431fd168a1 | ||
|   | 7dbd0b75b2 | ||
|   | ae532c82e8 | ||
|   | ab0b85d088 | ||
|   | 66aec365c2 | ||
|   | e09cea60ef | ||
|   | f304ae19db | ||
|   | 2116b2cb93 | ||
|   | 8f580ac96b | ||
|   | a8cadc3d16 | ||
|   | c9290d73e0 | ||
|   | 2db5e906e9 | ||
|   | 0751bd371a | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 3ffa0805e9 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 3335270692 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | a7573b10ec | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | df945ad743 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 4536e95205 | ||
|   | 1479d7bd46 | ||
|   | 9ba2094f75 | ||
|   | 8aa012ba8e | ||
|   | 8bc6b10db1 | ||
|   | 76d799c95b | ||
|   | 7c8bdfcc9f | ||
|   | 01a938d7ce | ||
|   | e44853c439 | ||
|   | 3830bec891 | ||
|   | 88ab663330 | ||
|   | 68335b95c3 | ||
|   | 7bbfa0ef32 | ||
|   | e233d52931 | ||
|   | 181d32e82a | ||
|   | a51614f83d | ||
|   | 07f98d6bd3 | ||
|   | f71550da4d | ||
|   | 8c3d0d7e31 | ||
|   | 46658a85d6 | ||
|   | d699652955 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 9e88db5d9b | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 5d9c102aff | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | cb1c36d97d | ||
|   | cc29ba5ea9 | ||
|   | 6f371b1bc6 | ||
|   | 785dabd071 | ||
|   | 09914d54a0 | ||
|   | 58b5586674 | ||
|   | cb02ccc8b4 | ||
|   | ec692ed727 | ||
|   | 2fb2ea573e | ||
|   | ada2dc6112 | ||
|   | ad9024a4f0 | ||
|   | 047c10e23c | ||
|   | 4f83164544 | ||
|   | 6f926ed595 | ||
|   | 249dc55212 | ||
|   | 46252bc6f3 | ||
|   | 64350a2e78 | ||
|   | 2902c63a3b | ||
|   | 55b8588f1f | ||
|   | 02ecc4ae9a | ||
|   | 3ee50b7832 | ||
|   | 66ddd87ee4 | ||
|   | 233189e4f7 | ||
|   | b237fd7201 | ||
|   | 3c81efe2f4 | ||
|   | 0fcfb94690 | ||
|   | bb6d4c2756 | ||
|   | b59ce190ac | ||
|   | 80be1a30f2 | ||
|   | 93b4f79006 | ||
|   | 3009e46617 | ||
|   | 8f040a1a84 | ||
|   | 4dbab8d77a | ||
|   | cde42c8a49 | ||
|   | 3b9d19df43 | ||
|   | 6ad4acc9fc | ||
|   | 3e59521f48 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 0970c087c8 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 676c550e6e | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 78fa47f6f8 | ||
|   | 4aa5bb6da3 | ||
|   | f7dfc9bbb8 | ||
|   | 584b6e378d | ||
|   | 754febfd33 | ||
|   | 0c9c475f32 | ||
|   | e4baca1127 | ||
|   | bb61a35a54 | ||
|   | 4b9ae5a97c | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | c8caa0662d | ||
|   | f4e8d1963f | ||
|   | 45d5e961dc | ||
|   | 45f2863966 | ||
|   | 01c1ac4c0c | ||
|   | b2f9aec383 | ||
|   | a95aa67aef | ||
|   | cbeefeccbb | ||
|   | 2b72d38235 | ||
|   | 8fe7aec3c6 | ||
|   | 6e1f5a8503 | ||
|   | b74b76c9f9 | ||
|   | a27265450c | ||
|   | cc5455c3dc | ||
|   | 9db7fb83eb | ||
|   | f0061110c9 | ||
|   | a13fedc0d6 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 7576bec66a | ||
|   | 7672190923 | ||
|   | 0ade4307b0 | ||
|   | 8c03b65dc6 | ||
|   | 8a07459e43 | ||
|   | cd8e115118 | ||
|   | 4ff7b20fcf | ||
|   | 8120f00148 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 127abf49f1 | ||
|   | db81c3c5e2 | ||
|   | 9952af7a52 | ||
|   | 790577c1b6 | ||
|   | bab362fb7d | ||
|   | a177d02406 | ||
|   | 8b8f280565 | ||
|   | e752875504 | ||
|   | 0a4562fc09 | ||
|   | c84ac2eab1 | ||
|   | 3ae07ac633 | ||
|   | 8379fdb1f8 | ||
|   | 3f77e075b9 | ||
|   | 685bd01156 | ||
|   | 20bcca578a | ||
|   | f05f143b46 | ||
|   | d7f00679a0 | ||
|   | b7da6f0ca7 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | e4a81ebe08 | ||
|   | a4edc46af0 | ||
|   | 767db3b79b | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 4f6e9dcc56 | ||
|   | aa4e182549 | ||
|   | fe1f7c30e1 | ||
|   | e5ed1ae349 | ||
|   | d1b1dd70f4 | ||
|   | 93b14c9fc8 | ||
|   | c9c5de20d8 | ||
|   | 011fa3540e | ||
|   | c3c3671f8b | ||
|   | 5980bd9bcd | ||
|   | 438871429c | ||
|   | 173ce5bfa2 | ||
|   | 106b1f85fa | ||
|   | a5c7f343d0 | ||
|   | 401886bcda | ||
|   | c66fca9de9 | ||
|   | daee4c5c17 | ||
|   | af5d0b6963 | ||
|   | f92dd81c8f | ||
|   | 55cdcfe3ea | ||
|   | 2f7520a6c5 | ||
|   | 4fdc5d7da2 | ||
|   | 308f30b2e8 | ||
|   | 4fa2042d12 | ||
|   | 2a4e1bad4e | ||
|   | 8a317eead5 | ||
|   | b58094877f | ||
|   | afe252126c | ||
|   | 342e6119f1 | ||
|   | e4ff87e970 | ||
|   | e45a544f15 | ||
|   | 9a5abaa17a | ||
|   | b8ecfff861 | ||
|   | 58e2a41c95 | ||
|   | a7214db9c3 | ||
|   | b9da4af64f | 
| @@ -33,7 +33,6 @@ venv/ | ||||
| # Test and development files | ||||
| test-datastore/ | ||||
| tests/ | ||||
| docs/ | ||||
| *.md | ||||
| !README.md | ||||
|  | ||||
|   | ||||
							
								
								
									
										51
									
								
								.github/actions/extract-memory-report/action.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								.github/actions/extract-memory-report/action.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,51 @@ | ||||
| name: 'Extract Memory Test Report' | ||||
| description: 'Extracts and displays memory test report from a container' | ||||
| inputs: | ||||
|   container-name: | ||||
|     description: 'Name of the container to extract logs from' | ||||
|     required: true | ||||
|   python-version: | ||||
|     description: 'Python version for artifact naming' | ||||
|     required: true | ||||
|   output-dir: | ||||
|     description: 'Directory to store output logs' | ||||
|     required: false | ||||
|     default: 'output-logs' | ||||
|  | ||||
| runs: | ||||
|   using: "composite" | ||||
|   steps: | ||||
|     - name: Create output directory | ||||
|       shell: bash | ||||
|       run: | | ||||
|         mkdir -p ${{ inputs.output-dir }} | ||||
|  | ||||
|     - name: Dump container log | ||||
|       shell: bash | ||||
|       run: | | ||||
|         echo "Disabled for now" | ||||
| #        return | ||||
| #        docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout" | ||||
| #        docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr" | ||||
|  | ||||
|     - name: Extract and display memory test report | ||||
|       shell: bash | ||||
|       run: | | ||||
|         echo "Disabled for now" | ||||
| #        echo "Extracting test-memory.log from container..." | ||||
| #        docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container" | ||||
| # | ||||
| #        echo "=== Top 10 Highest Peak Memory Tests ===" | ||||
| #        if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then | ||||
| #          grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \ | ||||
| #            sed 's/.*Peak memory: //' | \ | ||||
| #            paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \ | ||||
| #            sort -t'|' -k1 -nr | \ | ||||
| #            cut -d'|' -f2 | \ | ||||
| #            head -10 | ||||
| #          echo "" | ||||
| #          echo "=== Full Memory Test Report ===" | ||||
| #          cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | ||||
| #        else | ||||
| #          echo "No memory log available" | ||||
| #        fi | ||||
							
								
								
									
										8
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							| @@ -4,11 +4,11 @@ updates: | ||||
|     directory: / | ||||
|     schedule: | ||||
|       interval: "weekly" | ||||
|     "caronc/apprise": | ||||
|       versioning-strategy: "increase" | ||||
|       schedule: | ||||
|         interval: "daily" | ||||
|     groups: | ||||
|       all: | ||||
|         patterns: | ||||
|         - "*" | ||||
|   - package-ecosystem: pip | ||||
|     directory: / | ||||
|     schedule: | ||||
|       interval: "weekly" | ||||
|   | ||||
							
								
								
									
										6
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							| @@ -2,7 +2,7 @@ | ||||
| # Test that we can still build on Alpine (musl modified libc https://musl.libc.org/) | ||||
| # Some packages wont install via pypi because they dont have a wheel available under this architecture. | ||||
|  | ||||
| FROM ghcr.io/linuxserver/baseimage-alpine:3.21 | ||||
| FROM ghcr.io/linuxserver/baseimage-alpine:3.22 | ||||
| ENV PYTHONUNBUFFERED=1 | ||||
|  | ||||
| COPY requirements.txt /requirements.txt | ||||
| @@ -18,17 +18,19 @@ RUN \ | ||||
|     libxslt-dev \ | ||||
|     openssl-dev \ | ||||
|     python3-dev \ | ||||
|     file \ | ||||
|     zip \ | ||||
|     zlib-dev && \ | ||||
|   apk add --update --no-cache \ | ||||
|     libjpeg \ | ||||
|     libxslt \ | ||||
|     file \ | ||||
|     nodejs \ | ||||
|     poppler-utils \ | ||||
|     python3 && \ | ||||
|   echo "**** pip3 install test of changedetection.io ****" && \ | ||||
|   python3 -m venv /lsiopy  && \ | ||||
|   pip install -U pip wheel setuptools && \ | ||||
|   pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \ | ||||
|   pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \ | ||||
|   apk del --purge \ | ||||
|     build-dependencies | ||||
|   | ||||
							
								
								
									
										8
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							| @@ -30,11 +30,11 @@ jobs: | ||||
|  | ||||
|     steps: | ||||
|     - name: Checkout repository | ||||
|       uses: actions/checkout@v4 | ||||
|       uses: actions/checkout@v5 | ||||
|  | ||||
|     # Initializes the CodeQL tools for scanning. | ||||
|     - name: Initialize CodeQL | ||||
|       uses: github/codeql-action/init@v3 | ||||
|       uses: github/codeql-action/init@v4 | ||||
|       with: | ||||
|         languages: ${{ matrix.language }} | ||||
|         # If you wish to specify custom queries, you can do so here or in a config file. | ||||
| @@ -45,7 +45,7 @@ jobs: | ||||
|     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java). | ||||
|     # If this step fails, then you should remove it and run the build manually (see below) | ||||
|     - name: Autobuild | ||||
|       uses: github/codeql-action/autobuild@v3 | ||||
|       uses: github/codeql-action/autobuild@v4 | ||||
|  | ||||
|     # ℹ️ Command-line programs to run using the OS shell. | ||||
|     # 📚 https://git.io/JvXDl | ||||
| @@ -59,4 +59,4 @@ jobs: | ||||
|     #   make release | ||||
|  | ||||
|     - name: Perform CodeQL Analysis | ||||
|       uses: github/codeql-action/analyze@v3 | ||||
|       uses: github/codeql-action/analyze@v4 | ||||
|   | ||||
							
								
								
									
										16
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										16
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							| @@ -39,12 +39,20 @@ jobs: | ||||
|     # Or if we are in a tagged release scenario. | ||||
|     if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != '' | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - uses: actions/checkout@v5 | ||||
|       - name: Set up Python 3.11 | ||||
|         uses: actions/setup-python@v5 | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           python-version: 3.11 | ||||
|  | ||||
|       - name: Cache pip packages | ||||
|         uses: actions/cache@v4 | ||||
|         with: | ||||
|           path: ~/.cache/pip | ||||
|           key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} | ||||
|           restore-keys: | | ||||
|             ${{ runner.os }}-pip- | ||||
|  | ||||
|       - name: Install dependencies | ||||
|         run: | | ||||
|           python -m pip install --upgrade pip | ||||
| @@ -95,7 +103,7 @@ jobs: | ||||
|           push: true | ||||
|           tags: | | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 | ||||
|           cache-from: type=gha | ||||
|           cache-to: type=gha,mode=max | ||||
|  | ||||
| @@ -133,7 +141,7 @@ jobs: | ||||
|           file: ./Dockerfile | ||||
|           push: true | ||||
|           tags: ${{ steps.meta.outputs.tags }} | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 | ||||
|           cache-from: type=gha | ||||
|           cache-to: type=gha,mode=max | ||||
| # Looks like this was disabled | ||||
|   | ||||
							
								
								
									
										39
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										39
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							| @@ -7,9 +7,9 @@ jobs: | ||||
|     runs-on: ubuntu-latest | ||||
|  | ||||
|     steps: | ||||
|     - uses: actions/checkout@v4 | ||||
|     - uses: actions/checkout@v5 | ||||
|     - name: Set up Python | ||||
|       uses: actions/setup-python@v5 | ||||
|       uses: actions/setup-python@v6 | ||||
|       with: | ||||
|         python-version: "3.11" | ||||
|     - name: Install pypa/build | ||||
| @@ -21,39 +21,60 @@ jobs: | ||||
|     - name: Build a binary wheel and a source tarball | ||||
|       run: python3 -m build | ||||
|     - name: Store the distribution packages | ||||
|       uses: actions/upload-artifact@v4 | ||||
|       uses: actions/upload-artifact@v5 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|  | ||||
|  | ||||
|   test-pypi-package: | ||||
|     name: Test the built 📦 package works basically. | ||||
|     name: Test the built package works basically. | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: | ||||
|     - build | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v4 | ||||
|       uses: actions/download-artifact@v6 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|     - name: Set up Python 3.11 | ||||
|       uses: actions/setup-python@v5 | ||||
|       uses: actions/setup-python@v6 | ||||
|       with: | ||||
|         python-version: '3.11' | ||||
|  | ||||
|     - name: Test that the basic pip built package runs without error | ||||
|       run: | | ||||
|         set -ex | ||||
|         ls -alR  | ||||
|          | ||||
|         # Find and install the first .whl file | ||||
|         find dist -type f -name "*.whl" -exec pip3 install {} \; -quit | ||||
|         # Install the first wheel found in dist/ | ||||
|         WHEEL=$(find dist -type f -name "*.whl" -print -quit) | ||||
|         echo Installing $WHEEL | ||||
|         python3 -m pip install --upgrade pip | ||||
|         python3 -m pip install "$WHEEL" | ||||
|         changedetection.io -d /tmp -p 10000 & | ||||
|          | ||||
|         sleep 3 | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null | ||||
|          | ||||
|         # --- API test --- | ||||
|         # This also means that the docs/api-spec.yml was shipped and could be read | ||||
|         test -f /tmp/url-watches.json | ||||
|         API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json) | ||||
|         echo Test API KEY is $API_KEY | ||||
|         curl -X POST "http://127.0.0.1:10000/api/v1/watch" \ | ||||
|           -H "x-api-key: ${API_KEY}" \ | ||||
|           -H "Content-Type: application/json" \ | ||||
|           --show-error --fail \ | ||||
|           --retry 6 --retry-delay 1 --retry-connrefused \ | ||||
|           -d '{ | ||||
|             "url": "https://example.com", | ||||
|             "title": "Example Site Monitor", | ||||
|             "time_between_check": { "hours": 1 } | ||||
|           }' | ||||
|            | ||||
|         killall changedetection.io | ||||
|  | ||||
|  | ||||
| @@ -72,7 +93,7 @@ jobs: | ||||
|  | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v4 | ||||
|       uses: actions/download-artifact@v6 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|   | ||||
							
								
								
									
										14
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										14
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							| @@ -38,20 +38,26 @@ jobs: | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm64/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           # Alpine Dockerfile platforms (musl via alpine check) | ||||
|           - platform: linux/amd64 | ||||
|             dockerfile: ./.github/test/Dockerfile-alpine | ||||
|           - platform: linux/arm64 | ||||
|             dockerfile: ./.github/test/Dockerfile-alpine | ||||
|     steps: | ||||
|         - uses: actions/checkout@v4 | ||||
|         - uses: actions/checkout@v5 | ||||
|         - name: Set up Python 3.11 | ||||
|           uses: actions/setup-python@v5 | ||||
|           uses: actions/setup-python@v6 | ||||
|           with: | ||||
|             python-version: 3.11 | ||||
|  | ||||
|         - name: Cache pip packages | ||||
|           uses: actions/cache@v4 | ||||
|           with: | ||||
|             path: ~/.cache/pip | ||||
|             key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} | ||||
|             restore-keys: | | ||||
|               ${{ runner.os }}-pip- | ||||
|  | ||||
|         # Just test that the build works, some libraries won't compile on ARM/rPi etc | ||||
|         - name: Set up QEMU | ||||
|           uses: docker/setup-qemu-action@v3 | ||||
|   | ||||
							
								
								
									
										13
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										13
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -7,7 +7,7 @@ jobs: | ||||
|   lint-code: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - uses: actions/checkout@v5 | ||||
|       - name: Lint with Ruff | ||||
|         run: | | ||||
|           pip install ruff | ||||
| @@ -15,8 +15,14 @@ jobs: | ||||
|           ruff check . --select E9,F63,F7,F82 | ||||
|           # Complete check with errors treated as warnings | ||||
|           ruff check . --exit-zero | ||||
|       - name: Validate OpenAPI spec | ||||
|         run: | | ||||
|           pip install openapi-spec-validator | ||||
|           python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))" | ||||
|  | ||||
|   test-application-3-10: | ||||
|     # Only run on push to master (including PR merges) | ||||
|     if: github.event_name == 'push' && github.ref == 'refs/heads/master' | ||||
|     needs: lint-code | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
| @@ -24,12 +30,15 @@ jobs: | ||||
|  | ||||
|  | ||||
|   test-application-3-11: | ||||
|     # Always run | ||||
|     needs: lint-code | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
|       python-version: '3.11' | ||||
|  | ||||
|   test-application-3-12: | ||||
|     # Only run on push to master (including PR merges) | ||||
|     if: github.event_name == 'push' && github.ref == 'refs/heads/master' | ||||
|     needs: lint-code | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
| @@ -37,6 +46,8 @@ jobs: | ||||
|       skip-pypuppeteer: true | ||||
|  | ||||
|   test-application-3-13: | ||||
|     # Only run on push to master (including PR merges) | ||||
|     if: github.event_name == 'push' && github.ref == 'refs/heads/master' | ||||
|     needs: lint-code | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
|   | ||||
							
								
								
									
										447
									
								
								.github/workflows/test-stack-reusable-workflow.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										447
									
								
								.github/workflows/test-stack-reusable-workflow.yml
									
									
									
									
										vendored
									
									
								
							| @@ -15,138 +15,294 @@ on: | ||||
|         default: false | ||||
|  | ||||
| jobs: | ||||
|   test-application: | ||||
|   # Build the Docker image once and share it with all test jobs | ||||
|   build: | ||||
|     runs-on: ubuntu-latest | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       # Mainly just for link/flake8 | ||||
|       - name: Set up Python ${{ env.PYTHON_VERSION }} | ||||
|         uses: actions/setup-python@v5 | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           python-version: ${{ env.PYTHON_VERSION }} | ||||
|  | ||||
|       - name: Cache pip packages | ||||
|         uses: actions/cache@v4 | ||||
|         with: | ||||
|           path: ~/.cache/pip | ||||
|           key: ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt') }} | ||||
|           restore-keys: | | ||||
|             ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}- | ||||
|             ${{ runner.os }}-pip- | ||||
|  | ||||
|       - name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }} | ||||
|         run: | | ||||
|           echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----" | ||||
|           # Build a changedetection.io container and start testing inside | ||||
|           docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio . | ||||
|           # Debug info | ||||
|           docker run test-changedetectionio  bash -c 'pip list'          | ||||
|           docker run test-changedetectionio bash -c 'pip list' | ||||
|  | ||||
|       - name: We should be Python ${{ env.PYTHON_VERSION }} ... | ||||
|         run: |          | ||||
|           docker run test-changedetectionio  bash -c 'python3 --version' | ||||
|  | ||||
|       - name: Spin up ancillary testable services | ||||
|         run: | | ||||
|            | ||||
|           docker network create changedet-network | ||||
|            | ||||
|           # Selenium | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|            | ||||
|           # SocketPuppetBrowser + Extra for custom browser test | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest                     | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url  -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|           docker run test-changedetectionio bash -c 'python3 --version' | ||||
|  | ||||
|       - name: Spin up ancillary SMTP+Echo message test server | ||||
|       - name: Save Docker image | ||||
|         run: | | ||||
|           # Debug SMTP server/echo message back server | ||||
|           docker run --network changedet-network -d -p 11025:11025 -p 11080:11080  --hostname mailserver test-changedetectionio  bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py' | ||||
|           docker ps | ||||
|           docker save test-changedetectionio -o /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Show docker container state and other debug info | ||||
|       - name: Upload Docker image artifact | ||||
|         uses: actions/upload-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp/test-changedetectionio.tar | ||||
|           retention-days: 1 | ||||
|  | ||||
|   # Unit tests (lightweight, no ancillary services needed) | ||||
|   unit-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           set -x | ||||
|           echo "Running processes in docker..." | ||||
|           docker ps | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Run Unit Tests | ||||
|         run: | | ||||
|           # Unit tests | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver' | ||||
|           docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' | ||||
|           docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' | ||||
|           docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' | ||||
|           docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver' | ||||
|  | ||||
|       - name: Test built container with Pytest (generally as requests/plaintext fetching) | ||||
|   # Basic pytest tests with ancillary services | ||||
|   basic-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 25 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           # All tests | ||||
|           echo "run test with pytest" | ||||
|           # The default pytest logger_level is TRACE | ||||
|           # To change logger_level for pytest(test/conftest.py), | ||||
|           # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG' | ||||
|           docker run --name test-cdio-basic-tests --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio && ./run_basic_tests.sh' | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
| # PLAYWRIGHT/NODE-> CDP | ||||
|       - name: Playwright and SocketPuppetBrowser - Specific tests in built container | ||||
|       - name: Test built container with Pytest | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch | ||||
|           # tests/visualselector/test_fetch_data.py will do browser steps   | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|           docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network | ||||
|           docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh' | ||||
|  | ||||
|       - name: Extract memory report and logs | ||||
|         if: always() | ||||
|         uses: ./.github/actions/extract-memory-report | ||||
|         with: | ||||
|           container-name: test-cdio-basic-tests | ||||
|           python-version: ${{ env.PYTHON_VERSION }} | ||||
|  | ||||
|       - name: Playwright and SocketPuppetBrowser - Headers and requests | ||||
|         run: |        | ||||
|           # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py; pwd;find .' | ||||
|       - name: Store test artifacts | ||||
|         if: always() | ||||
|         uses: actions/upload-artifact@v5 | ||||
|         with: | ||||
|           name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }} | ||||
|           path: output-logs | ||||
|  | ||||
|       - name: Playwright and SocketPuppetBrowser - Restock detection | ||||
|         run: |                             | ||||
|           # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|   # Playwright tests | ||||
|   playwright-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
| # STRAIGHT TO CDP | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container | ||||
|         if: ${{ inputs.skip-pypuppeteer == false }} | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch  | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks | ||||
|         if: ${{ inputs.skip-pypuppeteer == false }} | ||||
|       - name: Spin up ancillary services | ||||
|         run: | | ||||
|           # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm  -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Restock detection | ||||
|         if: ${{ inputs.skip-pypuppeteer == false }} | ||||
|         run: |                             | ||||
|           # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet"  -e "FAST_PUPPETEER_CHROME_FETCHER=True"  -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|       - name: Playwright - Specific tests in built container | ||||
|         run: | | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|  | ||||
|       - name: Playwright - Headers and requests | ||||
|         run: | | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .' | ||||
|  | ||||
|       - name: Playwright - Restock detection | ||||
|         run: | | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
|   # Pyppeteer tests | ||||
|   pyppeteer-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     if: ${{ inputs.skip-pypuppeteer == false }} | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up ancillary services | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Pyppeteer - Specific tests in built container | ||||
|         run: | | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|  | ||||
|       - name: Pyppeteer - Headers and requests checks | ||||
|         run: | | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
|       - name: Pyppeteer - Restock detection | ||||
|         run: | | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
|   # Selenium tests | ||||
|   selenium-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up ancillary services | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 | ||||
|           sleep 3 | ||||
|  | ||||
|       - name: Specific tests for headers and requests checks with Selenium | ||||
|         run: | | ||||
|  | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
| # SELENIUM | ||||
|       - name: Specific tests in built container for Selenium | ||||
|         run: | | ||||
|           # Selenium fetch | ||||
|           docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' | ||||
|           docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' | ||||
|  | ||||
|       - name: Specific tests in built container for headers and requests checks with Selenium | ||||
|  | ||||
|   # SMTP tests | ||||
|   smtp-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up SMTP test server | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py' | ||||
|  | ||||
| # OTHER STUFF | ||||
|       - name: Test SMTP notification mime types | ||||
|         run: | | ||||
|           # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above | ||||
|           # "mailserver" hostname defined above | ||||
|           docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' | ||||
|  | ||||
|       # @todo Add a test via playwright/puppeteer | ||||
|       # squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py | ||||
|       - name: Test proxy squid style interaction | ||||
|   # Proxy tests | ||||
|   proxy-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up services | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Test proxy Squid style interaction | ||||
|         run: | | ||||
|           cd changedetectionio | ||||
|           ./run_proxy_tests.sh | ||||
|           docker ps | ||||
|           cd .. | ||||
|  | ||||
|       - name: Test proxy SOCKS5 style interaction | ||||
| @@ -155,28 +311,65 @@ jobs: | ||||
|           ./run_socks_proxy_tests.sh | ||||
|           cd .. | ||||
|  | ||||
|   # Custom browser URL tests | ||||
|   custom-browser-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up ancillary services | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Test custom browser URL | ||||
|         run: | | ||||
|           cd changedetectionio | ||||
|           ./run_custom_browser_url_tests.sh | ||||
|           cd .. | ||||
|  | ||||
|       - name: Test changedetection.io container starts+runs basically without error | ||||
|   # Container startup tests | ||||
|   container-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker run --name test-changedetectionio -p 5556:5000  -d test-changedetectionio | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Test container starts+runs basically without error | ||||
|         run: | | ||||
|           docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it | ||||
|           curl --retry-connrefused --retry 6  -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|            | ||||
|           # and IPv6 | ||||
|           curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|  | ||||
|           # Check whether TRACE log is enabled. | ||||
|           # Also, check whether TRACE came from STDOUT | ||||
|           curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|           curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Check whether DEBUG is came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 | ||||
|  | ||||
|           docker kill test-changedetectionio | ||||
|  | ||||
|       - name: Test HTTPS SSL mode | ||||
| @@ -184,78 +377,66 @@ jobs: | ||||
|           openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost" | ||||
|           docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it | ||||
|           # -k because its self-signed | ||||
|           curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid | ||||
|        | ||||
|           docker kill test-changedetectionio-ssl | ||||
|  | ||||
|       - name: Test IPv6 Mode | ||||
|         run: | | ||||
|           # IPv6 - :: bind to all interfaces inside container (like 0.0.0.0), ::1 would be localhost only | ||||
|           docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it on localhost | ||||
|           curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid | ||||
|           docker kill test-changedetectionio-ipv6 | ||||
|  | ||||
|       - name: Test changedetection.io SIGTERM and SIGINT signal shutdown | ||||
|   # Signal tests | ||||
|   signal-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Test SIGTERM and SIGINT signal shutdown | ||||
|         run: | | ||||
|            | ||||
|           echo SIGINT Shutdown request test | ||||
|           docker run --name sig-test -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           echo ">>> Sending SIGINT to sig-test container" | ||||
|           docker kill --signal=SIGINT sig-test | ||||
|           sleep 3 | ||||
|           # invert the check (it should be not 0/not running) | ||||
|           docker ps | ||||
|           # check signal catch(STDERR) log. Because of | ||||
|           # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) | ||||
|           docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1 | ||||
|           test -z "`docker ps|grep sig-test`" | ||||
|           if [ $? -ne 0 ] | ||||
|           then | ||||
|           if [ $? -ne 0 ]; then | ||||
|             echo "Looks like container was running when it shouldnt be" | ||||
|             docker ps | ||||
|             exit 1 | ||||
|           fi | ||||
|            | ||||
|           # @todo - scan the container log to see the right "graceful shutdown" text exists  | ||||
|           docker rm sig-test | ||||
|            | ||||
|  | ||||
|           echo SIGTERM Shutdown request test | ||||
|           docker run --name sig-test -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           echo ">>> Sending SIGTERM to sig-test container" | ||||
|           docker kill --signal=SIGTERM sig-test | ||||
|           sleep 3 | ||||
|           # invert the check (it should be not 0/not running) | ||||
|           docker ps | ||||
|           # check signal catch(STDERR) log. Because of | ||||
|           # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) | ||||
|           docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1 | ||||
|           test -z "`docker ps|grep sig-test`" | ||||
|           if [ $? -ne 0 ] | ||||
|           then | ||||
|           if [ $? -ne 0 ]; then | ||||
|             echo "Looks like container was running when it shouldnt be" | ||||
|             docker ps | ||||
|             exit 1 | ||||
|           fi | ||||
|            | ||||
|           # @todo - scan the container log to see the right "graceful shutdown" text exists            | ||||
|           docker rm sig-test | ||||
|  | ||||
|       - name: Dump container log | ||||
|         if: always() | ||||
|         run: | | ||||
|           mkdir output-logs | ||||
|           docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt | ||||
|           docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt | ||||
|  | ||||
|       - name: Store everything including test-datastore | ||||
|         if: always() | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }} | ||||
|           path: . | ||||
|   | ||||
							
								
								
									
										51
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										51
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -5,7 +5,6 @@ ARG PYTHON_VERSION=3.11 | ||||
| FROM python:${PYTHON_VERSION}-slim-bookworm AS builder | ||||
|  | ||||
| # See `cryptography` pin comment in requirements.txt | ||||
| ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     g++ \ | ||||
| @@ -16,6 +15,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     libssl-dev \ | ||||
|     libxslt-dev \ | ||||
|     make \ | ||||
|     patch \ | ||||
|     pkg-config \ | ||||
|     zlib1g-dev | ||||
|  | ||||
| RUN mkdir /install | ||||
| @@ -25,23 +26,35 @@ COPY requirements.txt /requirements.txt | ||||
|  | ||||
| # Use cache mounts and multiple wheel sources for faster ARM builds | ||||
| ENV PIP_CACHE_DIR=/tmp/pip-cache | ||||
| RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
|     pip install \ | ||||
|     --extra-index-url https://www.piwheels.org/simple \ | ||||
|     --extra-index-url https://pypi.anaconda.org/ARM-software/simple \ | ||||
|     --cache-dir=/tmp/pip-cache \ | ||||
|     --target=/dependencies \ | ||||
|     -r /requirements.txt | ||||
| # Help Rust find OpenSSL for cryptography package compilation on ARM | ||||
| ENV PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/lib/arm-linux-gnueabihf/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig" | ||||
| ENV PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1 | ||||
| ENV OPENSSL_DIR="/usr" | ||||
| ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf" | ||||
| ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl" | ||||
| # Additional environment variables for cryptography Rust build | ||||
| ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
| RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \ | ||||
|   pip install \ | ||||
|   --prefer-binary \ | ||||
|   --extra-index-url https://www.piwheels.org/simple \ | ||||
|   --extra-index-url https://pypi.anaconda.org/ARM-software/simple \ | ||||
|   --cache-dir=/tmp/pip-cache \ | ||||
|   --target=/dependencies \ | ||||
|   -r /requirements.txt | ||||
|  | ||||
|  | ||||
| # Playwright is an alternative to Selenium | ||||
| # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) | ||||
| RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
|     pip install \ | ||||
|     --cache-dir=/tmp/pip-cache \ | ||||
|     --target=/dependencies \ | ||||
|     playwright~=1.48.0 \ | ||||
|     || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
| RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \ | ||||
|   pip install \ | ||||
|   --prefer-binary \ | ||||
|   --cache-dir=/tmp/pip-cache \ | ||||
|   --target=/dependencies \ | ||||
|   playwright~=1.48.0 \ | ||||
|   || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
|  | ||||
|  | ||||
| # Final image stage | ||||
| FROM python:${PYTHON_VERSION}-slim-bookworm | ||||
| @@ -53,6 +66,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     locales \ | ||||
|     # For pdftohtml | ||||
|     poppler-utils \ | ||||
|     # favicon type detection and other uses | ||||
|     file \ | ||||
|     zlib1g \ | ||||
|     && apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
| @@ -73,6 +88,11 @@ EXPOSE 5000 | ||||
|  | ||||
| # The actual flask app module | ||||
| COPY changedetectionio /app/changedetectionio | ||||
|  | ||||
| # Also for OpenAPI validation wrapper - needs the YML | ||||
| RUN [ ! -d "/app/docs" ] && mkdir /app/docs | ||||
| COPY docs/api-spec.yaml /app/docs/api-spec.yaml | ||||
|  | ||||
| # Starting wrapper | ||||
| COPY changedetection.py /app/changedetection.py | ||||
|  | ||||
| @@ -81,6 +101,9 @@ COPY changedetection.py /app/changedetection.py | ||||
| ARG LOGGER_LEVEL='' | ||||
| ENV LOGGER_LEVEL="$LOGGER_LEVEL" | ||||
|  | ||||
| # Default | ||||
| ENV LC_ALL=en_US.UTF-8 | ||||
|  | ||||
| WORKDIR /app | ||||
| CMD ["python", "./changedetection.py", "-d", "/datastore"] | ||||
|  | ||||
|   | ||||
							
								
								
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							| @@ -186,7 +186,7 @@ | ||||
|       same "printed page" as the copyright notice for easier | ||||
|       identification within third-party archives. | ||||
|  | ||||
|    Copyright [yyyy] [name of copyright owner] | ||||
|    Copyright 2025 Web Technologies s.r.o. | ||||
|  | ||||
|    Licensed under the Apache License, Version 2.0 (the "License"); | ||||
|    you may not use this file except in compliance with the License. | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| recursive-include changedetectionio/api * | ||||
| include docs/api-spec.yaml | ||||
| recursive-include changedetectionio/blueprint * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/conditions * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/jinja2_custom * | ||||
| recursive-include changedetectionio/model * | ||||
| recursive-include changedetectionio/notification * | ||||
| recursive-include changedetectionio/processors * | ||||
| @@ -9,6 +11,7 @@ recursive-include changedetectionio/realtime * | ||||
| recursive-include changedetectionio/static * | ||||
| recursive-include changedetectionio/templates * | ||||
| recursive-include changedetectionio/tests * | ||||
| recursive-include changedetectionio/widgets * | ||||
| prune changedetectionio/static/package-lock.json | ||||
| prune changedetectionio/static/styles/node_modules | ||||
| prune changedetectionio/static/styles/package-lock.json | ||||
|   | ||||
| @@ -1,11 +1,21 @@ | ||||
| ## Web Site Change Detection, Monitoring and Notification. | ||||
| # Monitor website changes | ||||
|  | ||||
| Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more | ||||
| Detect WebPage Changes Automatically — Monitor Web Page Changes in Real Time | ||||
|  | ||||
| Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more. | ||||
|  | ||||
| Detect web page content changes and get instant alerts. | ||||
|  | ||||
|  | ||||
| [Changedetection.io is the best tool to monitor web-pages for changes](https://changedetection.io) Track website content changes and receive notifications via Discord, Email, Slack, Telegram and 90+ more | ||||
|  | ||||
| Ideal for monitoring price changes, content edits, conditional changes and more. | ||||
|  | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes"  title="Self-hosted web page change monitoring, list of websites with changes"  />](https://changedetection.io) | ||||
|  | ||||
|  | ||||
| [**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)  | ||||
| [**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io)  | ||||
|  | ||||
|  | ||||
|  | ||||
| ### Target specific parts of the webpage using the Visual Selector tool. | ||||
|   | ||||
							
								
								
									
										20
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,11 +1,13 @@ | ||||
| ## Web Site Change Detection, Restock monitoring and notifications. | ||||
| # Detect Website Changes Automatically — Monitor Web Page Changes in Real Time | ||||
|  | ||||
| **_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._** | ||||
| Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more. | ||||
|  | ||||
| _Live your data-life pro-actively._  | ||||
| **Detect web page content changes and get instant alerts.**   | ||||
|  | ||||
| Ideal for monitoring price changes, content edits, conditional changes and more. | ||||
|  | ||||
|  | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web site page change monitoring"  title="Self-hosted web site page change monitoring"  />](https://changedetection.io?src=github) | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Web site page change monitoring"  title="Web site page change monitoring"  />](https://changedetection.io?src=github) | ||||
|  | ||||
| [![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md) | ||||
|  | ||||
| @@ -13,6 +15,7 @@ _Live your data-life pro-actively._ | ||||
|  | ||||
| [**Get started with website page change monitoring straight away. Don't have time? Try our $8.99/month subscription, use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_ | ||||
|  | ||||
|  | ||||
| - Chrome browser included. | ||||
| - Nothing to install, access via browser login after signup. | ||||
| - Super fast, no registration needed setup. | ||||
| @@ -99,9 +102,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W | ||||
| - Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration) | ||||
| - Send a screenshot with the notification when a change is detected in the web page | ||||
|  | ||||
| We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link. | ||||
|  | ||||
| [Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project.  | ||||
| We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $150 using our signup link. | ||||
|  | ||||
| Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ | ||||
|  | ||||
| @@ -279,7 +280,10 @@ Excel import is recommended - that way you can better organise tags/groups of we | ||||
|  | ||||
| ## API Support | ||||
|  | ||||
| Supports managing the website watch list [via our API](https://changedetection.io/docs/api_v1/index.html) | ||||
| Full REST API for programmatic management of watches, tags, notifications and more.  | ||||
|  | ||||
| - **[Interactive API Documentation](https://changedetection.io/docs/api_v1/index.html)** - Complete API reference with live testing | ||||
| - **[OpenAPI Specification](docs/api-spec.yaml)** - Generate SDKs for any programming language | ||||
|  | ||||
| ## Support us | ||||
|  | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.50.4' | ||||
| __version__ = '0.50.35' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
| @@ -35,13 +35,22 @@ def sigshutdown_handler(_signo, _stack_frame): | ||||
|     app.config.exit.set() | ||||
|     datastore.stop_thread = True | ||||
|      | ||||
|     # Shutdown workers immediately | ||||
|     # Shutdown workers and queues immediately | ||||
|     try: | ||||
|         from changedetectionio import worker_handler | ||||
|         worker_handler.shutdown_workers() | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error shutting down workers: {str(e)}") | ||||
|      | ||||
|     # Close janus queues properly | ||||
|     try: | ||||
|         from changedetectionio.flask_app import update_q, notification_q | ||||
|         update_q.close() | ||||
|         notification_q.close() | ||||
|         logger.debug("Janus queues closed successfully") | ||||
|     except Exception as e: | ||||
|         logger.critical(f"CRITICAL: Failed to close janus queues: {e}") | ||||
|      | ||||
|     # Shutdown socketio server fast | ||||
|     from changedetectionio.flask_app import socketio_server | ||||
|     if socketio_server and hasattr(socketio_server, 'shutdown'): | ||||
|   | ||||
| @@ -1,9 +1,22 @@ | ||||
| import os | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request | ||||
| import validators | ||||
| from . import auth | ||||
| from functools import wraps | ||||
| from . import auth, validate_openapi_request | ||||
| from ..validate_url import is_safe_valid_url | ||||
|  | ||||
|  | ||||
| def default_content_type(content_type='text/plain'): | ||||
|     """Decorator to set a default Content-Type header if none is provided.""" | ||||
|     def decorator(f): | ||||
|         @wraps(f) | ||||
|         def wrapper(*args, **kwargs): | ||||
|             if not request.content_type: | ||||
|                 # Set default content type in the request environment | ||||
|                 request.environ['CONTENT_TYPE'] = content_type | ||||
|             return f(*args, **kwargs) | ||||
|         return wrapper | ||||
|     return decorator | ||||
|  | ||||
|  | ||||
| class Import(Resource): | ||||
| @@ -12,17 +25,10 @@ class Import(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @default_content_type('text/plain') #3547 #3542 | ||||
|     @validate_openapi_request('importWatches') | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/import Import a list of watched URLs | ||||
|         @apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag  id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line. | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a" | ||||
|         @apiName Import | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {List} OK List of watch UUIDs added | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Import a list of watched URLs.""" | ||||
|  | ||||
|         extras = {} | ||||
|  | ||||
| @@ -43,14 +49,13 @@ class Import(Resource): | ||||
|  | ||||
|         urls = request.get_data().decode('utf8').splitlines() | ||||
|         added = [] | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         for url in urls: | ||||
|             url = url.strip() | ||||
|             if not len(url): | ||||
|                 continue | ||||
|  | ||||
|             # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|             if not validators.url(url, simple_host=allow_simplehost): | ||||
|             if not is_safe_valid_url(url): | ||||
|                 return f"Invalid or unsupported URL - {url}", 400 | ||||
|  | ||||
|             if dedupe and self.datastore.url_exists(url): | ||||
|   | ||||
| @@ -1,9 +1,7 @@ | ||||
| from flask_expects_json import expects_json | ||||
| from flask_restful import Resource | ||||
| from . import auth | ||||
| from flask_restful import abort, Resource | ||||
| from flask_restful import Resource, abort | ||||
| from flask import request | ||||
| from . import auth | ||||
| from . import auth, validate_openapi_request | ||||
| from . import schema_create_notification_urls, schema_delete_notification_urls | ||||
|  | ||||
| class Notifications(Resource): | ||||
| @@ -12,19 +10,9 @@ class Notifications(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getNotifications') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/notifications Return Notification URL List | ||||
|         @apiDescription Return the Notification URL List from the configuration | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             HTTP/1.0 200 | ||||
|             { | ||||
|                 'notification_urls': ["notification-urls-list"] | ||||
|             } | ||||
|         @apiName Get | ||||
|         @apiGroup Notifications | ||||
|         """ | ||||
|         """Return Notification URL List.""" | ||||
|  | ||||
|         notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])         | ||||
|  | ||||
| @@ -33,18 +21,10 @@ class Notifications(Resource): | ||||
|                }, 200 | ||||
|      | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('addNotifications') | ||||
|     @expects_json(schema_create_notification_urls) | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/notifications Create Notification URLs | ||||
|         @apiDescription Add one or more notification URLs from the configuration | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/notifications/batch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}' | ||||
|         @apiName CreateBatch | ||||
|         @apiGroup Notifications | ||||
|         @apiSuccess (201) {Object[]} notification_urls List of added notification URLs | ||||
|         @apiError (400) {String} Invalid input | ||||
|         """ | ||||
|         """Create Notification URLs.""" | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         notification_urls = json_data.get("notification_urls", []) | ||||
| @@ -69,18 +49,10 @@ class Notifications(Resource): | ||||
|         return {'notification_urls': added_urls}, 201 | ||||
|      | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('replaceNotifications') | ||||
|     @expects_json(schema_create_notification_urls) | ||||
|     def put(self): | ||||
|         """ | ||||
|         @api {put} /api/v1/notifications Replace Notification URLs | ||||
|         @apiDescription Replace all notification URLs with the provided list (can be empty) | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl -X PUT http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}' | ||||
|         @apiName Replace | ||||
|         @apiGroup Notifications | ||||
|         @apiSuccess (200) {Object[]} notification_urls List of current notification URLs | ||||
|         @apiError (400) {String} Invalid input | ||||
|         """ | ||||
|         """Replace Notification URLs.""" | ||||
|         json_data = request.get_json() | ||||
|         notification_urls = json_data.get("notification_urls", []) | ||||
|  | ||||
| @@ -100,19 +72,10 @@ class Notifications(Resource): | ||||
|         return {'notification_urls': clean_urls}, 200 | ||||
|          | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('deleteNotifications') | ||||
|     @expects_json(schema_delete_notification_urls) | ||||
|     def delete(self): | ||||
|         """ | ||||
|         @api {delete} /api/v1/notifications Delete Notification URLs | ||||
|         @apiDescription Deletes one or more notification URLs from the configuration | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/notifications -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}' | ||||
|         @apiParam {String[]} notification_urls The notification URLs to delete. | ||||
|         @apiName Delete | ||||
|         @apiGroup Notifications | ||||
|         @apiSuccess (204) {String} OK Deleted | ||||
|         @apiError (400) {String} No matching notification URLs found. | ||||
|         """ | ||||
|         """Delete Notification URLs.""" | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         urls_to_delete = json_data.get("notification_urls", []) | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| from flask_restful import Resource, abort | ||||
| from flask import request | ||||
| from . import auth | ||||
| from . import auth, validate_openapi_request | ||||
|  | ||||
| class Search(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
| @@ -8,21 +8,9 @@ class Search(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('searchWatches') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/search Search for watches | ||||
|         @apiDescription Search watches by URL or title text | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com/page1" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com/page1?tag=Favourites" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/search?q=https://example.com?partial=true" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiName Search | ||||
|         @apiGroup Watch Management | ||||
|         @apiQuery {String} q Search query to match against watch URLs and titles | ||||
|         @apiQuery {String} [tag] Optional name of tag to limit results (name not UUID) | ||||
|         @apiQuery {String} [partial] Allow partial matching of URL query | ||||
|         @apiSuccess (200) {Object} JSON Object containing matched watches | ||||
|         """ | ||||
|         """Search for watches by URL or title text.""" | ||||
|         query = request.args.get('q', '').strip() | ||||
|         tag_limit = request.args.get('tag', '').strip() | ||||
|         from changedetectionio.strtobool import strtobool | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| from flask_restful import Resource | ||||
| from . import auth | ||||
| from . import auth, validate_openapi_request | ||||
|  | ||||
|  | ||||
| class SystemInfo(Resource): | ||||
| @@ -9,23 +9,9 @@ class SystemInfo(Resource): | ||||
|         self.update_q = kwargs['update_q'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getSystemInfo') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/systeminfo Return system info | ||||
|         @apiDescription Return some info about the current system state | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             HTTP/1.0 200 | ||||
|             { | ||||
|                 'queue_size': 10 , | ||||
|                 'overdue_watches': ["watch-uuid-list"], | ||||
|                 'uptime': 38344.55, | ||||
|                 'watch_count': 800, | ||||
|                 'version': "0.40.1" | ||||
|             } | ||||
|         @apiName Get Info | ||||
|         @apiGroup System Information | ||||
|         """ | ||||
|         """Return system info.""" | ||||
|         import time | ||||
|         overdue_watches = [] | ||||
|  | ||||
|   | ||||
| @@ -1,39 +1,46 @@ | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from flask_expects_json import expects_json | ||||
| from flask_restful import abort, Resource | ||||
|  | ||||
| from flask import request | ||||
| from . import auth | ||||
|  | ||||
| # Import schemas from __init__.py | ||||
| from . import schema_tag, schema_create_tag, schema_update_tag | ||||
| from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request | ||||
|  | ||||
|  | ||||
| class Tag(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|         self.update_q = kwargs['update_q'] | ||||
|  | ||||
|     # Get information about a single tag | ||||
|     # curl http://localhost:5000/api/v1/tag/<string:uuid> | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getTag') | ||||
|     def get(self, uuid): | ||||
|         """ | ||||
|         @api {get} /api/v1/tag/:uuid Single tag - get data or toggle notification muting. | ||||
|         @apiDescription Retrieve tag information and set notification_muted status | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=muted" -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiName Tag | ||||
|         @apiGroup Tag | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state | ||||
|         @apiSuccess (200) {String} OK When muted operation OR full JSON object of the tag | ||||
|         @apiSuccess (200) {JSON} TagJSON JSON Full JSON object of the tag | ||||
|         """ | ||||
|         """Get data for a single tag/group, toggle notification muting, or recheck all.""" | ||||
|         from copy import deepcopy | ||||
|         tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid)) | ||||
|         if not tag: | ||||
|             abort(404, message=f'No tag exists with the UUID of {uuid}') | ||||
|  | ||||
|         if request.args.get('recheck'): | ||||
|             # Recheck all, including muted | ||||
|             # Get most overdue first | ||||
|             i=0 | ||||
|             for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)): | ||||
|                 watch_uuid = k[0] | ||||
|                 watch = k[1] | ||||
|                 if not watch['paused'] and tag['uuid'] not in watch['tags']: | ||||
|                     continue | ||||
|                 worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid})) | ||||
|                 i+=1 | ||||
|  | ||||
|             return f"OK, {i} watches queued", 200 | ||||
|  | ||||
|         if request.args.get('muted', '') == 'muted': | ||||
|             self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True | ||||
|             return "OK", 200 | ||||
| @@ -44,16 +51,9 @@ class Tag(Resource): | ||||
|         return tag | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('deleteTag') | ||||
|     def delete(self, uuid): | ||||
|         """ | ||||
|         @api {delete} /api/v1/tag/:uuid Delete a tag and remove it from all watches | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiName DeleteTag | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was deleted | ||||
|         """ | ||||
|         """Delete a tag/group and remove it from all watches.""" | ||||
|         if not self.datastore.data['settings']['application']['tags'].get(uuid): | ||||
|             abort(400, message='No tag exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
| @@ -68,21 +68,10 @@ class Tag(Resource): | ||||
|         return 'OK', 204 | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('updateTag') | ||||
|     @expects_json(schema_update_tag) | ||||
|     def put(self, uuid): | ||||
|         """ | ||||
|         @api {put} /api/v1/tag/:uuid Update tag information | ||||
|         @apiExample {curl} Example usage: | ||||
|             Update (PUT) | ||||
|             curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"title": "New Tag Title"}' | ||||
|  | ||||
|         @apiDescription Updates an existing tag using JSON | ||||
|         @apiParam {uuid} uuid Tag unique ID. | ||||
|         @apiName UpdateTag | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was updated | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Update tag information.""" | ||||
|         tag = self.datastore.data['settings']['application']['tags'].get(uuid) | ||||
|         if not tag: | ||||
|             abort(404, message='No tag exists with the UUID of {}'.format(uuid)) | ||||
| @@ -94,17 +83,10 @@ class Tag(Resource): | ||||
|  | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('createTag') | ||||
|     # Only cares for {'title': 'xxxx'} | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/watch Create a single tag | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"name": "Work related"}' | ||||
|         @apiName Create | ||||
|         @apiGroup Tag | ||||
|         @apiSuccess (200) {String} OK Was created | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Create a single tag/group.""" | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         title = json_data.get("title",'').strip() | ||||
| @@ -122,28 +104,9 @@ class Tags(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('listTags') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/tags List tags | ||||
|         @apiDescription Return list of available tags | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/tags -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             { | ||||
|                 "cc0cfffa-f449-477b-83ea-0caafd1dc091": { | ||||
|                     "title": "Tech News", | ||||
|                     "notification_muted": false, | ||||
|                     "date_created": 1677103794 | ||||
|                 }, | ||||
|                 "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": { | ||||
|                     "title": "Shopping", | ||||
|                     "notification_muted": true, | ||||
|                     "date_created": 1676662819 | ||||
|                 } | ||||
|             } | ||||
|         @apiName ListTags | ||||
|         @apiGroup Tag Management | ||||
|         @apiSuccess (200) {String} OK JSON dict | ||||
|         """ | ||||
|         """List tags/groups.""" | ||||
|         result = {} | ||||
|         for uuid, tag in self.datastore.data['settings']['application']['tags'].items(): | ||||
|             result[uuid] = { | ||||
|   | ||||
| @@ -1,17 +1,50 @@ | ||||
| import os | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from flask_expects_json import expects_json | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request, make_response | ||||
| import validators | ||||
| from flask import request, make_response, send_from_directory | ||||
| from . import auth | ||||
| import copy | ||||
|  | ||||
| # Import schemas from __init__.py | ||||
| from . import schema, schema_create_watch, schema_update_watch | ||||
| from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request | ||||
|  | ||||
|  | ||||
| def validate_time_between_check_required(json_data): | ||||
|     """ | ||||
|     Validate that at least one time interval is specified when not using default settings. | ||||
|     Returns None if valid, or error message string if invalid. | ||||
|     Defaults to using global settings if time_between_check_use_default is not provided. | ||||
|     """ | ||||
|     # Default to using global settings if not specified | ||||
|     use_default = json_data.get('time_between_check_use_default', True) | ||||
|  | ||||
|     # If using default settings, no validation needed | ||||
|     if use_default: | ||||
|         return None | ||||
|  | ||||
|     # If not using defaults, check if time_between_check exists and has at least one non-zero value | ||||
|     time_check = json_data.get('time_between_check') | ||||
|     if not time_check: | ||||
|         # No time_between_check provided and not using defaults - this is an error | ||||
|         return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings." | ||||
|  | ||||
|     # time_between_check exists, check if it has at least one non-zero value | ||||
|     if any([ | ||||
|         (time_check.get('weeks') or 0) > 0, | ||||
|         (time_check.get('days') or 0) > 0, | ||||
|         (time_check.get('hours') or 0) > 0, | ||||
|         (time_check.get('minutes') or 0) > 0, | ||||
|         (time_check.get('seconds') or 0) > 0 | ||||
|     ]): | ||||
|         return None | ||||
|  | ||||
|     # time_between_check exists but all values are 0 or empty - this is an error | ||||
|     return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings." | ||||
|  | ||||
|  | ||||
| class Watch(Resource): | ||||
| @@ -25,23 +58,9 @@ class Watch(Resource): | ||||
|     # @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK" | ||||
|     # ?recheck=true | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getWatch') | ||||
|     def get(self, uuid): | ||||
|         """ | ||||
|         @api {get} /api/v1/watch/:uuid Single watch - get data, recheck, pause, mute. | ||||
|         @apiDescription Retrieve watch information and set muted/paused status | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091  -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=unmuted"  -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?paused=unpaused"  -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiName Watch | ||||
|         @apiGroup Watch | ||||
|         @apiParam {uuid} uuid Watch unique ID. | ||||
|         @apiQuery {Boolean} [recheck] Recheck this watch `recheck=1` | ||||
|         @apiQuery {String} [paused] =`paused` or =`unpaused` , Sets the PAUSED state | ||||
|         @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state | ||||
|         @apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch | ||||
|         @apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch | ||||
|         """ | ||||
|         """Get information about a single watch, recheck, pause, or mute.""" | ||||
|         from copy import deepcopy | ||||
|         watch = deepcopy(self.datastore.data['watching'].get(uuid)) | ||||
|         if not watch: | ||||
| @@ -69,19 +88,14 @@ class Watch(Resource): | ||||
|         # attr .last_changed will check for the last written text snapshot on change | ||||
|         watch['last_changed'] = watch.last_changed | ||||
|         watch['viewed'] = watch.viewed | ||||
|         watch['link'] = watch.link, | ||||
|  | ||||
|         return watch | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('deleteWatch') | ||||
|     def delete(self, uuid): | ||||
|         """ | ||||
|         @api {delete} /api/v1/watch/:uuid Delete a watch and related history | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|         @apiParam {uuid} uuid Watch unique ID. | ||||
|         @apiName Delete | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {String} OK Was deleted | ||||
|         """ | ||||
|         """Delete a watch and related history.""" | ||||
|         if not self.datastore.data['watching'].get(uuid): | ||||
|             abort(400, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
| @@ -89,21 +103,10 @@ class Watch(Resource): | ||||
|         return 'OK', 204 | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('updateWatch') | ||||
|     @expects_json(schema_update_watch) | ||||
|     def put(self, uuid): | ||||
|         """ | ||||
|         @api {put} /api/v1/watch/:uuid Update watch information | ||||
|         @apiExample {curl} Example usage: | ||||
|             Update (PUT) | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "new list"}' | ||||
|  | ||||
|         @apiDescription Updates an existing watch using JSON, accepts the same structure as returned in <a href="#api-Watch-Watch">get single watch information</a> | ||||
|         @apiParam {uuid} uuid Watch unique ID. | ||||
|         @apiName Update a watch | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {String} OK Was updated | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Update watch information.""" | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
| @@ -113,6 +116,15 @@ class Watch(Resource): | ||||
|             if not request.json.get('proxy') in plist: | ||||
|                 return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400 | ||||
|  | ||||
|         # Validate time_between_check when not using defaults | ||||
|         validation_error = validate_time_between_check_required(request.json) | ||||
|         if validation_error: | ||||
|             return validation_error, 400 | ||||
|  | ||||
|         # XSS etc protection | ||||
|         if request.json.get('url') and not is_safe_valid_url(request.json.get('url')): | ||||
|             return "Invalid URL", 400 | ||||
|  | ||||
|         watch.update(request.json) | ||||
|  | ||||
|         return "OK", 200 | ||||
| @@ -126,22 +138,9 @@ class WatchHistory(Resource): | ||||
|     # Get a list of available history for a watch by UUID | ||||
|     # curl http://localhost:5000/api/v1/watch/<string:uuid>/history | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getWatchHistory') | ||||
|     def get(self, uuid): | ||||
|         """ | ||||
|         @api {get} /api/v1/watch/<string:uuid>/history Get a list of all historical snapshots available for a watch | ||||
|         @apiDescription Requires `uuid`, returns list | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" | ||||
|             { | ||||
|                 "1676649279": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/cb7e9be8258368262246910e6a2a4c30.txt", | ||||
|                 "1677092785": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/e20db368d6fc633e34f559ff67bb4044.txt", | ||||
|                 "1677103794": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/02efdd37dacdae96554a8cc85dc9c945.txt" | ||||
|             } | ||||
|         @apiName Get list of available stored snapshots for watch | ||||
|         @apiGroup Watch History | ||||
|         @apiSuccess (200) {String} OK | ||||
|         @apiSuccess (404) {String} ERR Not found | ||||
|         """ | ||||
|         """Get a list of all historical snapshots available for a watch.""" | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
| @@ -154,18 +153,9 @@ class WatchSingleHistory(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getWatchSnapshot') | ||||
|     def get(self, uuid, timestamp): | ||||
|         """ | ||||
|         @api {get} /api/v1/watch/<string:uuid>/history/<int:timestamp> Get single snapshot from watch | ||||
|         @apiDescription Requires watch `uuid` and `timestamp`. `timestamp` of "`latest`" for latest available snapshot, or <a href="#api-Watch_History-Get_list_of_available_stored_snapshots_for_watch">use the list returned here</a> | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" | ||||
|         @apiName Get single snapshot content | ||||
|         @apiGroup Watch History | ||||
|         @apiParam {String} [html]       Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp) | ||||
|         @apiSuccess (200) {String} OK | ||||
|         @apiSuccess (404) {String} ERR Not found | ||||
|         """ | ||||
|         """Get single snapshot from watch.""" | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             abort(404, message=f"No watch exists with the UUID of {uuid}") | ||||
| @@ -191,6 +181,39 @@ class WatchSingleHistory(Resource): | ||||
|  | ||||
|         return response | ||||
|  | ||||
| class WatchFavicon(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('getWatchFavicon') | ||||
|     def get(self, uuid): | ||||
|         """Get favicon for a watch.""" | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             abort(404, message=f"No watch exists with the UUID of {uuid}") | ||||
|  | ||||
|         favicon_filename = watch.get_favicon_filename() | ||||
|         if favicon_filename: | ||||
|             try: | ||||
|                 import magic | ||||
|                 mime = magic.from_file( | ||||
|                     os.path.join(watch.watch_data_dir, favicon_filename), | ||||
|                     mime=True | ||||
|                 ) | ||||
|             except ImportError: | ||||
|                 # Fallback, no python-magic | ||||
|                 import mimetypes | ||||
|                 mime, encoding = mimetypes.guess_type(favicon_filename) | ||||
|  | ||||
|             response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename)) | ||||
|             response.headers['Content-type'] = mime | ||||
|             response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate | ||||
|             return response | ||||
|  | ||||
|         abort(404, message=f'No Favicon available for {uuid}') | ||||
|  | ||||
|  | ||||
| class CreateWatch(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
| @@ -199,25 +222,15 @@ class CreateWatch(Resource): | ||||
|         self.update_q = kwargs['update_q'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('createWatch') | ||||
|     @expects_json(schema_create_watch) | ||||
|     def post(self): | ||||
|         """ | ||||
|         @api {post} /api/v1/watch Create a single watch | ||||
|         @apiDescription Requires atleast `url` set, can accept the same structure as <a href="#api-Watch-Watch">get single watch information</a> to create. | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "nice list"}' | ||||
|         @apiName Create | ||||
|         @apiGroup Watch | ||||
|         @apiSuccess (200) {String} OK Was created | ||||
|         @apiSuccess (500) {String} ERR Some other error | ||||
|         """ | ||||
|         """Create a single watch.""" | ||||
|  | ||||
|         json_data = request.get_json() | ||||
|         url = json_data['url'].strip() | ||||
|  | ||||
|         # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         if not validators.url(url, simple_host=allow_simplehost): | ||||
|         if not is_safe_valid_url(url): | ||||
|             return "Invalid or unsupported URL", 400 | ||||
|  | ||||
|         if json_data.get('proxy'): | ||||
| @@ -225,6 +238,11 @@ class CreateWatch(Resource): | ||||
|             if not json_data.get('proxy') in plist: | ||||
|                 return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400 | ||||
|  | ||||
|         # Validate time_between_check when not using defaults | ||||
|         validation_error = validate_time_between_check_required(json_data) | ||||
|         if validation_error: | ||||
|             return validation_error, 400 | ||||
|  | ||||
|         extras = copy.deepcopy(json_data) | ||||
|  | ||||
|         # Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API) | ||||
| @@ -243,35 +261,9 @@ class CreateWatch(Resource): | ||||
|             return "Invalid or unsupported URL", 400 | ||||
|  | ||||
|     @auth.check_token | ||||
|     @validate_openapi_request('listWatches') | ||||
|     def get(self): | ||||
|         """ | ||||
|         @api {get} /api/v1/watch List watches | ||||
|         @apiDescription Return concise list of available watches and some very basic info | ||||
|         @apiExample {curl} Example usage: | ||||
|             curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" | ||||
|             { | ||||
|                 "6a4b7d5c-fee4-4616-9f43-4ac97046b595": { | ||||
|                     "last_changed": 1677103794, | ||||
|                     "last_checked": 1677103794, | ||||
|                     "last_error": false, | ||||
|                     "title": "", | ||||
|                     "url": "http://www.quotationspage.com/random.php" | ||||
|                 }, | ||||
|                 "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": { | ||||
|                     "last_changed": 0, | ||||
|                     "last_checked": 1676662819, | ||||
|                     "last_error": false, | ||||
|                     "title": "QuickLook", | ||||
|                     "url": "https://github.com/QL-Win/QuickLook/tags" | ||||
|                 } | ||||
|             } | ||||
|  | ||||
|         @apiParam {String} [recheck_all]       Optional Set to =1 to force recheck of all watches | ||||
|         @apiParam {String} [tag]               Optional name of tag to limit results | ||||
|         @apiName ListWatches | ||||
|         @apiGroup Watch Management | ||||
|         @apiSuccess (200) {String} OK JSON dict | ||||
|         """ | ||||
|         """List watches.""" | ||||
|         list = {} | ||||
|  | ||||
|         tag_limit = request.args.get('tag', '').lower() | ||||
| @@ -285,6 +277,8 @@ class CreateWatch(Resource): | ||||
|                 'last_changed': watch.last_changed, | ||||
|                 'last_checked': watch['last_checked'], | ||||
|                 'last_error': watch['last_error'], | ||||
|                 'link': watch.link, | ||||
|                 'page_title': watch['page_title'], | ||||
|                 'title': watch['title'], | ||||
|                 'url': watch['url'], | ||||
|                 'viewed': watch.viewed | ||||
|   | ||||
| @@ -1,4 +1,7 @@ | ||||
| import copy | ||||
| import functools | ||||
| from flask import request, abort | ||||
| from loguru import logger | ||||
| from . import api_schema | ||||
| from ..model import watch_base | ||||
|  | ||||
| @@ -8,6 +11,7 @@ schema = api_schema.build_watch_json_schema(watch_base_config) | ||||
|  | ||||
| schema_create_watch = copy.deepcopy(schema) | ||||
| schema_create_watch['required'] = ['url'] | ||||
| del schema_create_watch['properties']['last_viewed'] | ||||
|  | ||||
| schema_update_watch = copy.deepcopy(schema) | ||||
| schema_update_watch['additionalProperties'] = False | ||||
| @@ -25,9 +29,58 @@ schema_create_notification_urls['required'] = ['notification_urls'] | ||||
| schema_delete_notification_urls = copy.deepcopy(schema_notification_urls) | ||||
| schema_delete_notification_urls['required'] = ['notification_urls'] | ||||
|  | ||||
| @functools.cache | ||||
| def get_openapi_spec(): | ||||
|     """Lazy load OpenAPI spec and dependencies only when validation is needed.""" | ||||
|     import os | ||||
|     import yaml  # Lazy import - only loaded when API validation is actually used | ||||
|     from openapi_core import OpenAPI  # Lazy import - saves ~10.7 MB on startup | ||||
|  | ||||
|     spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml') | ||||
|     if not os.path.exists(spec_path): | ||||
|         # Possibly for pip3 packages | ||||
|         spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') | ||||
|  | ||||
|     with open(spec_path, 'r') as f: | ||||
|         spec_dict = yaml.safe_load(f) | ||||
|     _openapi_spec = OpenAPI.from_dict(spec_dict) | ||||
|     return _openapi_spec | ||||
|  | ||||
| def validate_openapi_request(operation_id): | ||||
|     """Decorator to validate incoming requests against OpenAPI spec.""" | ||||
|     def decorator(f): | ||||
|         @functools.wraps(f) | ||||
|         def wrapper(*args, **kwargs): | ||||
|             try: | ||||
|                 # Skip OpenAPI validation for GET requests since they don't have request bodies | ||||
|                 if request.method.upper() != 'GET': | ||||
|                     # Lazy import - only loaded when actually validating a request | ||||
|                     from openapi_core.contrib.flask import FlaskOpenAPIRequest | ||||
|  | ||||
|                     spec = get_openapi_spec() | ||||
|                     openapi_request = FlaskOpenAPIRequest(request) | ||||
|                     result = spec.unmarshal_request(openapi_request) | ||||
|                     if result.errors: | ||||
|                         from werkzeug.exceptions import BadRequest | ||||
|                         error_details = [] | ||||
|                         for error in result.errors: | ||||
|                             error_details.append(str(error)) | ||||
|                         raise BadRequest(f"OpenAPI validation failed: {error_details}") | ||||
|             except BadRequest: | ||||
|                 # Re-raise BadRequest exceptions (validation failures) | ||||
|                 raise | ||||
|             except Exception as e: | ||||
|                 # If OpenAPI spec loading fails, log but don't break existing functionality | ||||
|                 logger.critical(f"OpenAPI validation warning for {operation_id}: {e}") | ||||
|                 abort(500) | ||||
|             return f(*args, **kwargs) | ||||
|         return wrapper | ||||
|     return decorator | ||||
|  | ||||
| # Import all API resources | ||||
| from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch | ||||
| from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon | ||||
| from .Tags import Tags, Tag | ||||
| from .Import import Import | ||||
| from .SystemInfo import SystemInfo | ||||
| from .Notifications import Notifications | ||||
|  | ||||
|   | ||||
| @@ -78,6 +78,13 @@ def build_watch_json_schema(d): | ||||
|               ]: | ||||
|         schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000}) | ||||
|  | ||||
|     for v in ['last_viewed']: | ||||
|         schema['properties'][v] = { | ||||
|             "type": "integer", | ||||
|             "description": "Unix timestamp in seconds of the last time the watch was viewed.", | ||||
|             "minimum": 0 | ||||
|         } | ||||
|  | ||||
|     # None or Boolean | ||||
|     schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'}) | ||||
|  | ||||
| @@ -112,6 +119,12 @@ def build_watch_json_schema(d): | ||||
|  | ||||
|     schema['properties']['time_between_check'] = build_time_between_check_json_schema() | ||||
|  | ||||
|     schema['properties']['time_between_check_use_default'] = { | ||||
|         "type": "boolean", | ||||
|         "default": True, | ||||
|         "description": "Whether to use global settings for time between checks - defaults to true if not set" | ||||
|     } | ||||
|  | ||||
|     schema['properties']['browser_steps'] = { | ||||
|         "anyOf": [ | ||||
|             { | ||||
|   | ||||
| @@ -7,6 +7,7 @@ from changedetectionio.flask_app import watch_check_update | ||||
| import asyncio | ||||
| import importlib | ||||
| import os | ||||
| import queue | ||||
| import time | ||||
|  | ||||
| from loguru import logger | ||||
| @@ -37,13 +38,23 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|         watch = None | ||||
|  | ||||
|         try: | ||||
|             # Use asyncio wait_for to make queue.get() cancellable | ||||
|             queued_item_data = await asyncio.wait_for(q.get(), timeout=1.0) | ||||
|             # Use native janus async interface - no threads needed! | ||||
|             queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0) | ||||
|              | ||||
|         except asyncio.TimeoutError: | ||||
|             # No jobs available, continue loop | ||||
|             continue | ||||
|         except Exception as e: | ||||
|             logger.error(f"Worker {worker_id} error getting queue item: {e}") | ||||
|             logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}") | ||||
|              | ||||
|             # Log queue health for debugging | ||||
|             try: | ||||
|                 queue_size = q.qsize() | ||||
|                 is_empty = q.empty() | ||||
|                 logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}") | ||||
|             except Exception as health_e: | ||||
|                 logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}") | ||||
|              | ||||
|             await asyncio.sleep(0.1) | ||||
|             continue | ||||
|          | ||||
| @@ -299,15 +310,6 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                     continue | ||||
|  | ||||
|                 if process_changedetection_results: | ||||
|                     # Extract title if needed | ||||
|                     if datastore.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']: | ||||
|                         if not watch['title'] or not len(watch['title']): | ||||
|                             try: | ||||
|                                 update_obj['title'] = html_tools.extract_element(find='title', html_content=update_handler.fetcher.content) | ||||
|                                 logger.info(f"UUID: {uuid} Extract <title> updated title to '{update_obj['title']}") | ||||
|                             except Exception as e: | ||||
|                                 logger.warning(f"UUID: {uuid} Extract <title> as watch title was enabled, but couldn't find a <title>.") | ||||
|  | ||||
|                     try: | ||||
|                         datastore.update_watch(uuid=uuid, update_obj=update_obj) | ||||
|  | ||||
| @@ -332,6 +334,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                             if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change): | ||||
|                                 watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time)) | ||||
|  | ||||
|                             # Explicitly delete large content variables to free memory IMMEDIATELY after saving | ||||
|                             # These are no longer needed after being saved to history | ||||
|                             del contents | ||||
|  | ||||
|                             # Send notifications on second+ check | ||||
|                             if watch.history_n >= 2: | ||||
|                                 logger.info(f"Change detected in UUID {uuid} - {watch['url']}") | ||||
| @@ -346,6 +352,14 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                 # Always record attempt count | ||||
|                 count = watch.get('check_count', 0) + 1 | ||||
|  | ||||
|                 # Always record page title (used in notifications, and can change even when the content is the same) | ||||
|                 try: | ||||
|                     page_title = html_tools.extract_title(data=update_handler.fetcher.content) | ||||
|                     logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'") | ||||
|                     datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title}) | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}") | ||||
|  | ||||
|                 # Record server header | ||||
|                 try: | ||||
|                     server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255] | ||||
| @@ -353,9 +367,21 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                 except Exception as e: | ||||
|                     pass | ||||
|  | ||||
|                 # Store favicon if necessary | ||||
|                 if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'): | ||||
|                     watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'), | ||||
|                                        favicon_base_64=update_handler.fetcher.favicon_blob.get('base64') | ||||
|                                        ) | ||||
|  | ||||
|                 datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3), | ||||
|                                                                'check_count': count}) | ||||
|  | ||||
|                 # NOW clear fetcher content - after all processing is complete | ||||
|                 # This is the last point where we need the fetcher data | ||||
|                 if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher: | ||||
|                     update_handler.fetcher.clear_content() | ||||
|                     logger.debug(f"Cleared fetcher content for UUID {uuid}") | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}") | ||||
|             logger.error(f"Worker {worker_id} traceback:", exc_info=True) | ||||
| @@ -376,7 +402,28 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                         #logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}") | ||||
|                         watch_check_update.send(watch_uuid=watch['uuid']) | ||||
|  | ||||
|                     update_handler = None | ||||
|                     # Explicitly clean up update_handler and all its references | ||||
|                     if update_handler: | ||||
|                         # Clear fetcher content using the proper method | ||||
|                         if hasattr(update_handler, 'fetcher') and update_handler.fetcher: | ||||
|                             update_handler.fetcher.clear_content() | ||||
|  | ||||
|                         # Clear processor references | ||||
|                         if hasattr(update_handler, 'content_processor'): | ||||
|                             update_handler.content_processor = None | ||||
|  | ||||
|                         update_handler = None | ||||
|  | ||||
|                     # Clear local contents variable if it still exists | ||||
|                     if 'contents' in locals(): | ||||
|                         del contents | ||||
|  | ||||
|                     # Note: We don't set watch = None here because: | ||||
|                     # 1. watch is just a local reference to datastore.data['watching'][uuid] | ||||
|                     # 2. Setting it to None doesn't affect the datastore | ||||
|                     # 3. GC can't collect the object anyway (still referenced by datastore) | ||||
|                     # 4. It would just cause confusion | ||||
|  | ||||
|                     logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s") | ||||
|                 except Exception as cleanup_error: | ||||
|                     logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}") | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT | ||||
| from changedetectionio.content_fetchers.base import manage_user_agent | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     def long_task(uuid, preferred_proxy): | ||||
|         import time | ||||
|         from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         status = {'status': '', 'length': 0, 'text': ''} | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
|  | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
| from changedetectionio.notification.handler import apply_service_tweaks | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from feedgen.feed import FeedGenerator | ||||
| from flask import Blueprint, make_response, request, url_for, redirect | ||||
| @@ -108,18 +109,25 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|                 fe.link(link=diff_link) | ||||
|  | ||||
|                 # @todo watch should be a getter - watch.get('title') (internally if URL else..) | ||||
|                 # Same logic as watch-overview.html | ||||
|                 if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'): | ||||
|                     watch_label = watch.label | ||||
|                 else: | ||||
|                     watch_label = watch.get('url') | ||||
|  | ||||
|                 watch_title = watch.get('title') if watch.get('title') else watch.get('url') | ||||
|                 fe.title(title=watch_title) | ||||
|                 fe.title(title=watch_label) | ||||
|                 try: | ||||
|  | ||||
|                     html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]), | ||||
|                                                  newest_version_file_contents=watch.get_history_snapshot(dates[-1]), | ||||
|                                                  include_equal=False, | ||||
|                                                  line_feed_sep="<br>", | ||||
|                                                  html_colour=html_colour_enable | ||||
|                                                  line_feed_sep="<br>" | ||||
|                                                  ) | ||||
|  | ||||
|  | ||||
|                     requested_output_format = 'htmlcolor' if html_colour_enable else 'html' | ||||
|                     html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format) | ||||
|  | ||||
|                 except FileNotFoundError as e: | ||||
|                     html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found." | ||||
|  | ||||
| @@ -127,7 +135,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                 # @todo User could decide if <link> goes to the diff page, or to the watch link | ||||
|                 rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n" | ||||
|  | ||||
|                 content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link) | ||||
|                 content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link) | ||||
|  | ||||
|                 # Out of range chars could also break feedgen | ||||
|                 if scan_invalid_chars_in_rss(content): | ||||
|   | ||||
| @@ -119,7 +119,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                                 hide_remove_pass=os.getenv("SALTED_PASS", False), | ||||
|                                 min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)), | ||||
|                                 settings_application=datastore.data['settings']['application'], | ||||
|                                 timezone_default_config=datastore.data['settings']['application'].get('timezone'), | ||||
|                                 timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'), | ||||
|                                 utc_time=utc_time, | ||||
|                                 ) | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| {% extends 'base.html' %} | ||||
|  | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}"; | ||||
| @@ -72,33 +72,23 @@ | ||||
|                         <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page) | ||||
|                         </span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.pager_size) }} | ||||
|                         <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.rss_content_format) }} | ||||
|                         <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.extract_title_as_title) }} | ||||
|                         <span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} | ||||
|                         <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span> | ||||
|                     </div> | ||||
|                 {% if form.requests.proxy %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                         {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                         Choose a default proxy for all watches | ||||
|                         </span> | ||||
|                     <div class="grey-form-border"> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} | ||||
|                         </div> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_field(form.application.form.rss_content_format) }} | ||||
|                             <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span> | ||||
|                         </div> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_checkbox_field(form.application.form.rss_reader_mode) }} | ||||
|                             <span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 {% endif %} | ||||
|                 </fieldset> | ||||
|             </div> | ||||
|  | ||||
| @@ -141,6 +131,10 @@ | ||||
|                     <span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br> | ||||
|                     Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.requests.form.timeout) }} | ||||
|                     <span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group inline-radio"> | ||||
|                     {{ render_field(form.requests.form.default_ua) }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
| @@ -199,11 +193,17 @@ nav | ||||
|                         </ul> | ||||
|                      </span> | ||||
|                     </fieldset> | ||||
|                     <fieldset class="pure-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.strip_ignored_lines) }} | ||||
|                         <span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br> | ||||
|                         <i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc. | ||||
|                         </span> | ||||
|                     </fieldset> | ||||
|            </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="api"> | ||||
|                 <h4>API Access</h4> | ||||
|                 <p>Drive your changedetection.io via API, More about <a href="https://github.com/dgtlmoon/changedetection.io/wiki/API-Reference">API access here</a></p> | ||||
|                 <p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p> | ||||
|  | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.api_access_token_enabled) }} | ||||
| @@ -238,11 +238,9 @@ nav | ||||
|                     <p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p> | ||||
|                     <p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p> | ||||
|                     <p> | ||||
|                        {{ render_field(form.application.form.timezone) }} | ||||
|                        {{ render_field(form.application.form.scheduler_timezone_default) }} | ||||
|                         <datalist id="timezones" style="display: none;"> | ||||
|                             {% for tz_name in available_timezones %} | ||||
|                                 <option value="{{ tz_name }}">{{ tz_name }}</option> | ||||
|                             {% endfor %} | ||||
|                             {%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%} | ||||
|                         </datalist> | ||||
|                     </p> | ||||
|                 </div> | ||||
| @@ -256,6 +254,18 @@ nav | ||||
|                     {{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }} | ||||
|                     <span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }} | ||||
|                     <span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }} | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.application.form.pager_size) }} | ||||
|                     <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span> | ||||
|                 </div> | ||||
|  | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="proxies"> | ||||
|                 <div id="recommended-proxy"> | ||||
| @@ -304,23 +314,33 @@ nav | ||||
|                <p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites. | ||||
|  | ||||
|                 <div class="pure-control-group" id="extra-proxies-setting"> | ||||
|                 {{ render_field(form.requests.form.extra_proxies) }} | ||||
|                 {{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }} | ||||
|                 <span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br> | ||||
|                 <span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span> | ||||
|                 {% if form.requests.proxy %} | ||||
|                 <div> | ||||
|                 <br> | ||||
|                     <div class="inline-radio"> | ||||
|                         {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} | ||||
|                         <span class="pure-form-message-inline">Choose a default proxy for all watches</span> | ||||
|                     </div> | ||||
|                 </div> | ||||
|                 {% endif %} | ||||
|                 </div> | ||||
|                 <div class="pure-control-group" id="extra-browsers-setting"> | ||||
|                     <p> | ||||
|                     <span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br> | ||||
|                     <span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span> | ||||
|                     </p> | ||||
|                     {{ render_field(form.requests.form.extra_browsers) }} | ||||
|                     {{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }} | ||||
|                 </div> | ||||
|              | ||||
|             </div> | ||||
|             <div id="actions"> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_button(form.save_button) }} | ||||
|                     <a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a> | ||||
|                     <a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a> | ||||
|                     <a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a> | ||||
|                     <a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </form> | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| {% extends 'base.html' %} | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_ternary_field %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}"; | ||||
| @@ -64,7 +64,7 @@ | ||||
|             <div class="tab-pane-inner" id="notifications"> | ||||
|                 <fieldset> | ||||
|                     <div  class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_muted) }} | ||||
|                       {{ render_ternary_field(form.notification_muted, BooleanField=True) }} | ||||
|                     </div> | ||||
|                     {% if 1 %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|   | ||||
| @@ -76,14 +76,14 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat | ||||
|  | ||||
|     elif (op == 'notification-default'): | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch | ||||
|             USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         ) | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_title'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_body'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_urls'] = [] | ||||
|                 datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch | ||||
|                 datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches set to use default notification settings") | ||||
|  | ||||
|   | ||||
| @@ -187,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|  | ||||
|             tz_name = time_schedule_limit.get('timezone') | ||||
|             if not tz_name: | ||||
|                 tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') | ||||
|                 tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip()) | ||||
|  | ||||
|             if time_schedule_limit and time_schedule_limit.get('enabled'): | ||||
|                 try: | ||||
| @@ -242,6 +242,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 'available_timezones': sorted(available_timezones()), | ||||
|                 'browser_steps_config': browser_step_ui_config, | ||||
|                 'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), | ||||
|                 'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '', | ||||
|                 'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(), | ||||
|                 'extra_processor_config': form.extra_tab_content(), | ||||
|                 'extra_title': f" - Edit - {watch.label}", | ||||
| @@ -256,7 +257,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), | ||||
|                 'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch), | ||||
|                 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'), | ||||
|                 'using_global_webdriver_wait': not default['webdriver_delay'], | ||||
|                 'uuid': uuid, | ||||
|                 'watch': watch, | ||||
|   | ||||
| @@ -2,6 +2,7 @@ from flask import Blueprint, request, make_response | ||||
| import random | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.notification_service import NotificationContextData | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
|  | ||||
| @@ -19,6 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         import apprise | ||||
|         from changedetectionio.notification.handler import process_notification | ||||
|         from changedetectionio.notification.apprise_plugin.assets import apprise_asset | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
|  | ||||
| @@ -37,11 +39,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400) | ||||
|  | ||||
|         watch = datastore.data['watching'].get(watch_uuid) | ||||
|  | ||||
|         notification_urls = None | ||||
|  | ||||
|         if request.form.get('notification_urls'): | ||||
|             notification_urls = request.form['notification_urls'].strip().splitlines() | ||||
|         notification_urls = request.form.get('notification_urls','').strip().splitlines() | ||||
|  | ||||
|         if not notification_urls: | ||||
|             logger.debug("Test notification - Trying by group/tag in the edit form if available") | ||||
| @@ -61,20 +59,26 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             return 'Error: No Notification URLs set/found' | ||||
|  | ||||
|         for n_url in notification_urls: | ||||
|             # We are ONLY validating the apprise:// part here, convert all tags to something so as not to break apprise URLs | ||||
|             generic_notification_context_data = NotificationContextData() | ||||
|             generic_notification_context_data.set_random_for_validation() | ||||
|             n_url = jinja_render(template_str=n_url, **generic_notification_context_data).strip() | ||||
|             if len(n_url.strip()): | ||||
|                 if not apobj.add(n_url): | ||||
|                     return f'Error:  {n_url} is not a valid AppRise URL.' | ||||
|  | ||||
|         try: | ||||
|             # use the same as when it is triggered, but then override it with the form test values | ||||
|             n_object = { | ||||
|             n_object = NotificationContextData({ | ||||
|                 'watch_url': request.form.get('window_url', "https://changedetection.io"), | ||||
|                 'notification_urls': notification_urls | ||||
|             } | ||||
|             }) | ||||
|  | ||||
|             # Only use if present, if not set in n_object it should use the default system value | ||||
|             if 'notification_format' in request.form and request.form['notification_format'].strip(): | ||||
|                 n_object['notification_format'] = request.form.get('notification_format', '').strip() | ||||
|             else: | ||||
|                 n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|             if 'notification_title' in request.form and request.form['notification_title'].strip(): | ||||
|                 n_object['notification_title'] = request.form.get('notification_title', '').strip() | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| {% extends 'base.html' %} | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script> | ||||
| @@ -72,15 +72,16 @@ | ||||
|                         <div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div> | ||||
|                         <div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.tags) }} | ||||
|                         <span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                         {{ render_field(form.processor) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.title, class="m-d") }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.tags) }} | ||||
|                         <span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span> | ||||
|                         {{ render_field(form.title, class="m-d", placeholder=watch.label) }} | ||||
|                         <span class="pure-form-message-inline">Automatically uses the page title if found, you can also use your own title/description here</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group time-between-check border-fieldset"> | ||||
| 
 | ||||
| @@ -101,15 +102,16 @@ | ||||
|                         </div> | ||||
| <br> | ||||
|               </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.extract_title_as_title) }} | ||||
|                     </div> | ||||
| 
 | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.filter_failure_notification_send) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                          Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore. | ||||
|                         </span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_ternary_field(form.use_page_title_in_list) }} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
| 
 | ||||
| @@ -262,7 +264,7 @@ Math: {{ 1 + 1 }}") }} | ||||
|             <div class="tab-pane-inner" id="notifications"> | ||||
|                 <fieldset> | ||||
|                     <div  class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_muted) }} | ||||
|                       {{ render_ternary_field(form.notification_muted, BooleanField=true) }} | ||||
|                     </div> | ||||
|                     {% if watch_needs_selenium_or_playwright %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
| @@ -469,11 +471,11 @@ Math: {{ 1 + 1 }}") }} | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_button(form.save_button) }} | ||||
|                     <a href="{{url_for('ui.form_delete', uuid=uuid)}}" | ||||
|                        class="pure-button button-small button-error ">Delete</a> | ||||
|                        class="pure-button button-error ">Delete</a> | ||||
|                     {% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}" | ||||
|                        class="pure-button button-small button-error ">Clear History</a>{% endif %} | ||||
|                        class="pure-button button-error">Clear History</a>{% endif %} | ||||
|                     <a href="{{url_for('ui.form_clone', uuid=uuid)}}" | ||||
|                        class="pure-button button-small ">Clone & Edit</a> | ||||
|                        class="pure-button">Clone & Edit</a> | ||||
|                 </div> | ||||
|             </div> | ||||
|         </form> | ||||
| @@ -1,8 +1,7 @@ | ||||
| from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort | ||||
| from flask_login import current_user | ||||
| import os | ||||
| import time | ||||
| from copy import deepcopy | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
| @@ -78,9 +77,46 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     @views_blueprint.route("/diff/<string:uuid>", methods=['GET', 'POST']) | ||||
|     @views_blueprint.route("/diff/<string:uuid>", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|     def diff_history_page(uuid): | ||||
|     def diff_history_page_build_report(uuid): | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         # More for testing, possible to return the first/only | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         try: | ||||
|             watch = datastore.data['watching'][uuid] | ||||
|         except KeyError: | ||||
|             flash("No history found for the specified link, bad link?", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         # For submission of requesting an extract | ||||
|         extract_form = forms.extractDataForm(formdata=request.form, | ||||
|                                              data={'extract_regex': request.form.get('extract_regex', '')} | ||||
|                                              ) | ||||
|         if not extract_form.validate(): | ||||
|             flash("An error occurred, please see below.", "error") | ||||
|             return _render_diff_template(uuid, extract_form) | ||||
|  | ||||
|         else: | ||||
|             extract_regex = request.form.get('extract_regex', '').strip() | ||||
|             output = watch.extract_regex_from_all_history(extract_regex) | ||||
|             if output: | ||||
|                 watch_dir = os.path.join(datastore.datastore_path, uuid) | ||||
|                 response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True)) | ||||
|                 response.headers['Content-type'] = 'text/csv' | ||||
|                 response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' | ||||
|                 response.headers['Pragma'] = 'no-cache' | ||||
|                 response.headers['Expires'] = "0" | ||||
|                 return response | ||||
|  | ||||
|             flash('No matches found while scanning all of the watch history for that RegEx.', 'error') | ||||
|         return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract') | ||||
|  | ||||
|     def _render_diff_template(uuid, extract_form=None): | ||||
|         """Helper function to render the diff template with all required data""" | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         # More for testing, possible to return the first/only | ||||
| @@ -94,62 +130,36 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             flash("No history found for the specified link, bad link?", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         # For submission of requesting an extract | ||||
|         extract_form = forms.extractDataForm(request.form) | ||||
|         if request.method == 'POST': | ||||
|             if not extract_form.validate(): | ||||
|                 flash("An error occurred, please see below.", "error") | ||||
|  | ||||
|             else: | ||||
|                 extract_regex = request.form.get('extract_regex').strip() | ||||
|                 output = watch.extract_regex_from_all_history(extract_regex) | ||||
|                 if output: | ||||
|                     watch_dir = os.path.join(datastore.datastore_path, uuid) | ||||
|                     response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True)) | ||||
|                     response.headers['Content-type'] = 'text/csv' | ||||
|                     response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate' | ||||
|                     response.headers['Pragma'] = 'no-cache' | ||||
|                     response.headers['Expires'] = 0 | ||||
|                     return response | ||||
|  | ||||
|                 flash('Nothing matches that RegEx', 'error') | ||||
|                 redirect(url_for('ui_views.diff_history_page', uuid=uuid)+'#extract') | ||||
|         # Use provided form or create a new one | ||||
|         if extract_form is None: | ||||
|             extract_form = forms.extractDataForm(formdata=request.form, | ||||
|                                                  data={'extract_regex': request.form.get('extract_regex', '')} | ||||
|                                                  ) | ||||
|  | ||||
|         history = watch.history | ||||
|         dates = list(history.keys()) | ||||
|  | ||||
|         if len(dates) < 2: | ||||
|             flash("Not enough saved change detection snapshots to produce a report.", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|         # If a "from_version" was requested, then find it (or the closest one) | ||||
|         # Also set "from version" to be the closest version to the one that was last viewed. | ||||
|  | ||||
|         # Save the current newest history as the most recently viewed | ||||
|         datastore.set_last_viewed(uuid, time.time()) | ||||
|         best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed | ||||
|         from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2] | ||||
|         from_version = request.args.get('from_version', from_version_timestamp ) | ||||
|  | ||||
|         # Read as binary and force decode as UTF-8 | ||||
|         # Windows may fail decode in python if we just use 'r' mode (chardet decode exception) | ||||
|         from_version = request.args.get('from_version') | ||||
|         from_version_index = -2  # second newest | ||||
|         if from_version and from_version in dates: | ||||
|             from_version_index = dates.index(from_version) | ||||
|         else: | ||||
|             from_version = dates[from_version_index] | ||||
|         # Use the current one if nothing was specified | ||||
|         to_version = request.args.get('to_version', str(dates[-1])) | ||||
|  | ||||
|         try: | ||||
|             from_version_file_contents = watch.get_history_snapshot(dates[from_version_index]) | ||||
|             to_version_file_contents = watch.get_history_snapshot(timestamp=to_version) | ||||
|         except Exception as e: | ||||
|             from_version_file_contents = f"Unable to read to-version at index {dates[from_version_index]}.\n" | ||||
|  | ||||
|         to_version = request.args.get('to_version') | ||||
|         to_version_index = -1 | ||||
|         if to_version and to_version in dates: | ||||
|             to_version_index = dates.index(to_version) | ||||
|         else: | ||||
|             to_version = dates[to_version_index] | ||||
|             logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}") | ||||
|             to_version_file_contents = f"Unable to read to-version at {to_version}.\n" | ||||
|  | ||||
|         try: | ||||
|             to_version_file_contents = watch.get_history_snapshot(dates[to_version_index]) | ||||
|             from_version_file_contents = watch.get_history_snapshot(timestamp=from_version) | ||||
|         except Exception as e: | ||||
|             to_version_file_contents = "Unable to read to-version at index{}.\n".format(dates[to_version_index]) | ||||
|             logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}") | ||||
|             from_version_file_contents = f"Unable to read to-version {from_version}.\n" | ||||
|  | ||||
|         screenshot_url = watch.get_screenshot() | ||||
|  | ||||
| @@ -163,7 +173,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|         if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False): | ||||
|             password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access') | ||||
|  | ||||
|         output = render_template("diff.html", | ||||
|         datastore.set_last_viewed(uuid, time.time()) | ||||
|  | ||||
|         return render_template("diff.html", | ||||
|                                  current_diff_url=watch['url'], | ||||
|                                  from_version=str(from_version), | ||||
|                                  to_version=str(to_version), | ||||
| @@ -186,7 +198,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                                  watch_a=watch | ||||
|                                  ) | ||||
|  | ||||
|         return output | ||||
|     @views_blueprint.route("/diff/<string:uuid>", methods=['GET']) | ||||
|     @login_optionally_required | ||||
|     def diff_history_page(uuid): | ||||
|         return _render_diff_template(uuid) | ||||
|  | ||||
|     @views_blueprint.route("/form/add/quickwatch", methods=['POST']) | ||||
|     @login_optionally_required | ||||
|   | ||||
| @@ -44,12 +44,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|         # Sort by last_changed and add the uuid which is usually the key.. | ||||
|         sorted_watches = [] | ||||
|         with_errors = request.args.get('with_errors') == "1" | ||||
|         unread_only = request.args.get('unread') == "1" | ||||
|         errored_count = 0 | ||||
|         search_q = request.args.get('q').strip().lower() if request.args.get('q') else False | ||||
|         for uuid, watch in datastore.data['watching'].items(): | ||||
|             if with_errors and not watch.get('last_error'): | ||||
|                 continue | ||||
|  | ||||
|             if unread_only and (watch.viewed or watch.last_changed == 0) : | ||||
|                 continue | ||||
|  | ||||
|             if active_tag_uuid and not active_tag_uuid in watch['tags']: | ||||
|                     continue | ||||
|             if watch.get('last_error'): | ||||
| @@ -83,7 +87,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             form=form, | ||||
|             guid=datastore.data['app_guid'], | ||||
|             has_proxies=datastore.proxy_list, | ||||
|             has_unviewed=datastore.has_unviewed, | ||||
|             hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|             now_time_server=round(time.time()), | ||||
|             pagination=pagination, | ||||
| @@ -93,6 +96,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), | ||||
|             system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'), | ||||
|             tags=sorted_tags, | ||||
|             unread_changes_count=datastore.unread_changes_count, | ||||
|             watches=sorted_watches | ||||
|         ) | ||||
|  | ||||
|   | ||||
| @@ -4,6 +4,7 @@ | ||||
| <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script> | ||||
| <script>let nowtimeserver={{ now_time_server }};</script> | ||||
| <script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script> | ||||
| <script> | ||||
| // Initialize Feather icons after the page loads | ||||
| document.addEventListener('DOMContentLoaded', function() { | ||||
| @@ -80,16 +81,26 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|     {%- if any_has_restock_price_processor -%} | ||||
|         {%- set cols_required = cols_required + 1 -%} | ||||
|     {%- endif -%} | ||||
|  | ||||
|     <div id="watch-table-wrapper"> | ||||
|  | ||||
|         <table class="pure-table pure-table-striped watch-table"> | ||||
|     {%- set ui_settings = datastore.data['settings']['application']['ui'] -%} | ||||
|     {%- set wrapper_classes = [ | ||||
|         'has-unread-changes' if unread_changes_count else '', | ||||
|         'has-error' if errored_count else '', | ||||
|     ] -%} | ||||
|     <div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}"> | ||||
|         {%- set table_classes = [ | ||||
|             'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled', | ||||
|         ] -%} | ||||
|         <table class="pure-table pure-table-striped watch-table {{ table_classes | reject('equalto', '') | join(' ') }}"> | ||||
|             <thead> | ||||
|             <tr> | ||||
|                 {%- set link_order = "desc" if sort_order  == 'asc' else "asc" -%} | ||||
|                 {%- set arrow_span = "" -%} | ||||
|                 <th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}"  href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th> | ||||
|                 <th class="empty-cell"></th> | ||||
|                 <th> | ||||
|                     <a class="{{ 'active '+link_order if sort_attribute == 'paused' else 'inactive' }}" href="{{url_for('watchlist.index', sort='paused', order=link_order, tag=active_tag_uuid)}}"><i data-feather="pause" style="vertical-align: bottom; width: 14px; height: 14px;  margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a> | ||||
|                       | ||||
|                     <a class="{{ 'active '+link_order if sort_attribute == 'notification_muted' else 'inactive' }}" href="{{url_for('watchlist.index', sort='notification_muted', order=link_order, tag=active_tag_uuid)}}"><i data-feather="volume-2" style="vertical-align: bottom; width: 14px; height: 14px;  margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a> | ||||
|                 </th> | ||||
|                 <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th> | ||||
|              {%- if any_has_restock_price_processor -%} | ||||
|                 <th>Restock & Price</th> | ||||
| @@ -105,10 +116,13 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|                 <td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td> | ||||
|             </tr> | ||||
|             {%- endif -%} | ||||
|  | ||||
|             {%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%} | ||||
|                 {%- set checking_now = is_checking_now(watch) -%} | ||||
|                 {%- set history_n = watch.history_n -%} | ||||
|                 {#  Mirror in changedetectionio/static/js/realtime.js for the frontend #} | ||||
|                 {%- set favicon = watch.get_favicon_filename() -%} | ||||
|                 {%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list')  -%} | ||||
|                 {#  Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #} | ||||
|                 {%- set row_classes = [ | ||||
|                     loop.cycle('pure-table-odd', 'pure-table-even'), | ||||
|                     'processor-' ~ watch['processor'], | ||||
| @@ -116,49 +130,69 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|                     'paused' if watch.paused is defined and watch.paused != False else '', | ||||
|                     'unviewed' if watch.has_unviewed else '', | ||||
|                     'has-restock-info' if watch.has_restock_info else 'no-restock-info', | ||||
|                     'has-favicon' if favicon else '', | ||||
|                     'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '', | ||||
|                     'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '', | ||||
|                     'queued' if watch.uuid in queued_uuids else '', | ||||
|                     'checking-now' if checking_now else '', | ||||
|                     'notification_muted' if watch.notification_muted else '', | ||||
|                     'single-history' if history_n == 1 else '', | ||||
|                     'multiple-history' if history_n >= 2 else '' | ||||
|                     'multiple-history' if history_n >= 2 else '', | ||||
|                     'use-html-title' if system_use_url_watchlist else 'no-html-title', | ||||
|                 ] -%} | ||||
|             <tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}"> | ||||
|                 <td class="inline checkbox-uuid" ><input name="uuids"  type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td> | ||||
|                 <td class="inline checkbox-uuid" ><div><input name="uuids"  type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td> | ||||
|                 <td class="inline watch-controls"> | ||||
|                     <div> | ||||
|                     <a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a> | ||||
|                     <a class="ajax-op state-on pause-toggle"  data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a> | ||||
|                     <a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a> | ||||
|                     <a class="ajax-op state-on mute-toggle" data-op="mute"  style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a> | ||||
|                     </div> | ||||
|                 </td> | ||||
|                 <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} | ||||
|                     <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a> | ||||
|                     <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a> | ||||
|  | ||||
|                     {%- if watch.get_fetch_backend == "html_webdriver" | ||||
|                          or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver'  ) | ||||
|                          or "extra_browser_" in watch.get_fetch_backend | ||||
|                     -%} | ||||
|                     <img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" > | ||||
|                     {%- endif -%} | ||||
|                 <td class="title-col inline"> | ||||
|                     <div class="flex-wrapper"> | ||||
|                     {% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %} | ||||
|                         <div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder'  #} | ||||
|                             <img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {%  endif %} /> | ||||
|                         </div> | ||||
|                     {%  endif %} | ||||
|                         <div> | ||||
|                         <span class="watch-title"> | ||||
|                             {% if system_use_url_watchlist or watch.get('use_page_title_in_list') %} | ||||
|                                 {{ watch.label }} | ||||
|                             {% else %} | ||||
|                                 {{ watch.get('title') or watch.link }} | ||||
|                             {% endif %} | ||||
|                            <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a> | ||||
|                         </span> | ||||
|                             <div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div> | ||||
|                             {%- if watch['processor'] == 'text_json_diff'  -%} | ||||
|                                 {%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  -%} | ||||
|                                 <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> | ||||
|                                 {%- endif -%} | ||||
|                             {%- endif -%} | ||||
|                             {%- if watch['processor'] == 'restock_diff' -%} | ||||
|                                 <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon" > Price</span> | ||||
|                             {%- endif -%} | ||||
|                             {%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%} | ||||
|                               <span class="watch-tag-list">{{ watch_tag.title }}</span> | ||||
|                             {%- endfor -%} | ||||
|                         </div> | ||||
|                     <div class="status-icons"> | ||||
|                             <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a> | ||||
|                             {%- if watch.get_fetch_backend == "html_webdriver" | ||||
|                                  or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver'  ) | ||||
|                                  or "extra_browser_" in watch.get_fetch_backend | ||||
|                             -%} | ||||
|                             <img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" > | ||||
|                             {%- endif -%} | ||||
|                             {%- if watch.is_pdf  -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%} | ||||
|                             {%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%} | ||||
|  | ||||
|                     {%- if watch.is_pdf  -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%} | ||||
|                     {%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%} | ||||
|  | ||||
|                     <div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div> | ||||
|  | ||||
|                     {%- if watch['processor'] == 'text_json_diff'  -%} | ||||
|                         {%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  -%} | ||||
|                         <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> | ||||
|                         {%- endif -%} | ||||
|                     {%- endif -%} | ||||
|                     {%- if watch['processor'] == 'restock_diff' -%} | ||||
|                         <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon" > Price</span> | ||||
|                     {%- endif -%} | ||||
|                     {%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%} | ||||
|                       <span class="watch-tag-list">{{ watch_tag.title }}</span> | ||||
|                     {%- endfor -%} | ||||
|                     </div> | ||||
|                     </div> | ||||
|                 </td> | ||||
| {%- if any_has_restock_price_processor -%} | ||||
|                 <td class="restock-and-price"> | ||||
| @@ -195,23 +229,25 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|                     Not yet | ||||
|                     {%- endif -%} | ||||
|                 </td> | ||||
|                 <td> | ||||
|                 <td class="buttons"> | ||||
|                     <div> | ||||
|                     {%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%} | ||||
|                     <a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a> | ||||
|                     <a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a> | ||||
|                     <a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a> | ||||
|                     <a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a> | ||||
|                     <a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a> | ||||
|                     </div> | ||||
|                 </td> | ||||
|             </tr> | ||||
|             {%- endfor -%} | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <ul id="post-list-buttons"> | ||||
|             <li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" > | ||||
|             <li id="post-list-with-errors" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a> | ||||
|             </li> | ||||
|             <li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" > | ||||
|             <li id="post-list-mark-views" style="display: none;" > | ||||
|                 <a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a> | ||||
|             </li> | ||||
|         {%-  if active_tag_uuid -%} | ||||
| @@ -219,6 +255,9 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|                 <a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a> | ||||
|             </li> | ||||
|         {%-  endif -%} | ||||
|             <li id="post-list-unread" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a> | ||||
|             </li> | ||||
|             <li> | ||||
|                <a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck | ||||
|                 all {% if active_tag_uuid %}  in '{{active_tag.title}}'{%endif%}</a> | ||||
|   | ||||
| @@ -1,5 +1,3 @@ | ||||
| from flask import Blueprint | ||||
|  | ||||
| from json_logic.builtins import BUILTINS | ||||
|  | ||||
| from .exceptions import EmptyConditionRuleRowNotUsable | ||||
| @@ -16,7 +14,6 @@ operator_choices = [ | ||||
|     ("==", "Equals"), | ||||
|     ("!=", "Not Equals"), | ||||
|     ("in", "Contains"), | ||||
|     ("!in", "Does Not Contain"), | ||||
| ] | ||||
|  | ||||
| # Fields available in the rules | ||||
|   | ||||
| @@ -21,17 +21,21 @@ def register_operators(): | ||||
|     def length_max(_, text, strlen): | ||||
|         return len(text) <= int(strlen) | ||||
|  | ||||
|     # ✅ Custom function for case-insensitive regex matching | ||||
|     # Custom function for case-insensitive regex matching | ||||
|     def contains_regex(_, text, pattern): | ||||
|         """Returns True if `text` contains `pattern` (case-insensitive regex match).""" | ||||
|         return bool(re.search(pattern, str(text), re.IGNORECASE)) | ||||
|  | ||||
|     # ✅ Custom function for NOT matching case-insensitive regex | ||||
|     # Custom function for NOT matching case-insensitive regex | ||||
|     def not_contains_regex(_, text, pattern): | ||||
|         """Returns True if `text` does NOT contain `pattern` (case-insensitive regex match).""" | ||||
|         return not bool(re.search(pattern, str(text), re.IGNORECASE)) | ||||
|  | ||||
|     def not_contains(_, text, pattern): | ||||
|         return not pattern in text | ||||
|  | ||||
|     return { | ||||
|         "!in": not_contains, | ||||
|         "!contains_regex": not_contains_regex, | ||||
|         "contains_regex": contains_regex, | ||||
|         "ends_with": ends_with, | ||||
| @@ -43,6 +47,7 @@ def register_operators(): | ||||
| @hookimpl | ||||
| def register_operator_choices(): | ||||
|     return [ | ||||
|         ("!in", "Does NOT Contain"), | ||||
|         ("starts_with", "Text Starts With"), | ||||
|         ("ends_with", "Text Ends With"), | ||||
|         ("length_min", "Length minimum"), | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000 | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
| @@ -72,7 +74,17 @@ def ui_edit_stats_extras(watch): | ||||
|     """Generate the HTML for Levenshtein stats - shared by both plugin systems""" | ||||
|     if len(watch.history.keys()) < 2: | ||||
|         return "<p>Not enough history to calculate Levenshtein metrics</p>" | ||||
|      | ||||
|  | ||||
|  | ||||
|     # Protection against the algorithm getting stuck on huge documents | ||||
|     k = list(watch.history.keys()) | ||||
|     if any( | ||||
|             len(watch.get_history_snapshot(timestamp=k[idx])) > LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS | ||||
|             for idx in (-1, -2) | ||||
|             if len(k) >= abs(idx) | ||||
|     ): | ||||
|         return "<p>Snapshot too large for edit statistics, skipping.</p>" | ||||
|  | ||||
|     try: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch) | ||||
|         if not lev_data or not isinstance(lev_data, dict): | ||||
|   | ||||
| @@ -28,6 +28,7 @@ from changedetectionio.content_fetchers.requests import fetcher as html_requests | ||||
| import importlib.resources | ||||
| XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8') | ||||
| INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8') | ||||
| FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8') | ||||
|  | ||||
|  | ||||
| def available_fetchers(): | ||||
|   | ||||
| @@ -48,6 +48,7 @@ class Fetcher(): | ||||
|     error = None | ||||
|     fetcher_description = "No description" | ||||
|     headers = {} | ||||
|     favicon_blob = None | ||||
|     instock_data = None | ||||
|     instock_data_js = "" | ||||
|     status_code = None | ||||
| @@ -63,21 +64,35 @@ class Fetcher(): | ||||
|     # Time ONTOP of the system defined env minimum time | ||||
|     render_extract_delay = 0 | ||||
|  | ||||
|     def clear_content(self): | ||||
|         """ | ||||
|         Explicitly clear all content from memory to free up heap space. | ||||
|         Call this after content has been saved to disk. | ||||
|         """ | ||||
|         self.content = None | ||||
|         if hasattr(self, 'raw_content'): | ||||
|             self.raw_content = None | ||||
|         self.screenshot = None | ||||
|         self.xpath_data = None | ||||
|         # Keep headers and status_code as they're small | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_error(self): | ||||
|         return self.error | ||||
|  | ||||
|     @abstractmethod | ||||
|     async def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|         # Should set self.error, self.status_code and self.content | ||||
|         pass | ||||
|  | ||||
| @@ -125,7 +140,7 @@ class Fetcher(): | ||||
|     async def iterate_browser_steps(self, start_url=None): | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|         from playwright._impl._errors import TimeoutError, Error | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         step_n = 0 | ||||
|  | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from urllib.parse import urlparse | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \ | ||||
|     SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS | ||||
|     SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS | ||||
| from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable | ||||
|  | ||||
| @@ -143,15 +143,17 @@ class fetcher(Fetcher): | ||||
|             f.write(content) | ||||
|  | ||||
|     async def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|  | ||||
|         from playwright.async_api import async_playwright | ||||
|         import playwright._impl._errors | ||||
| @@ -234,6 +236,13 @@ class fetcher(Fetcher): | ||||
|                 await browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             if fetch_favicon: | ||||
|                 try: | ||||
|                     self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS) | ||||
|                     await self.page.request_gc() | ||||
|                 except Exception as e: | ||||
|                     logger.error(f"Error fetching FavIcon info {str(e)}, continuing.") | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 screenshot = await capture_full_page_async(self.page) | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
| @@ -274,6 +283,7 @@ class fetcher(Fetcher): | ||||
|             await self.page.request_gc() | ||||
|             logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s") | ||||
|  | ||||
|  | ||||
|             # Bug 3 in Playwright screenshot handling | ||||
|             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|             # JPEG is better here because the screenshots can be very very large | ||||
|   | ||||
| @@ -8,7 +8,7 @@ from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \ | ||||
|     SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \ | ||||
|     SCREENSHOT_MAX_TOTAL_HEIGHT | ||||
|     SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS | ||||
| from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \ | ||||
|     BrowserConnectError | ||||
| @@ -145,15 +145,16 @@ class fetcher(Fetcher): | ||||
|     #         f.write(content) | ||||
|  | ||||
|     async def fetch_page(self, | ||||
|                          url, | ||||
|                          timeout, | ||||
|                          request_headers, | ||||
|                          request_body, | ||||
|                          request_method, | ||||
|                          ignore_status_codes, | ||||
|                          current_include_filters, | ||||
|                          empty_pages_are_a_change, | ||||
|                          fetch_favicon, | ||||
|                          ignore_status_codes, | ||||
|                          is_binary, | ||||
|                          empty_pages_are_a_change | ||||
|                          request_body, | ||||
|                          request_headers, | ||||
|                          request_method, | ||||
|                          timeout, | ||||
|                          url, | ||||
|                          ): | ||||
|         import re | ||||
|         self.delete_browser_steps_screenshots() | ||||
| @@ -179,10 +180,11 @@ class fetcher(Fetcher): | ||||
|         except Exception as e: | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'") | ||||
|  | ||||
|         # Better is to launch chrome with the URL as arg | ||||
|         # non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab | ||||
|         # headless - ask a new page | ||||
|         self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() | ||||
|         # more reliable is to just request a new page | ||||
|         self.page = await browser.newPage() | ||||
|          | ||||
|         # Add console handler to capture console.log from favicon fetcher | ||||
|         #self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}")) | ||||
|  | ||||
|         if '--window-size' in self.browser_connection_url: | ||||
|             # Be sure the viewport is always the window-size, this is often not the same thing | ||||
| @@ -292,6 +294,12 @@ class fetcher(Fetcher): | ||||
|             await browser.close() | ||||
|             raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|         if fetch_favicon: | ||||
|             try: | ||||
|                 self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS) | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Error fetching FavIcon info {str(e)}, continuing.") | ||||
|  | ||||
|         if self.status_code != 200 and not ignore_status_codes: | ||||
|             screenshot = await capture_full_page(page=self.page) | ||||
|  | ||||
| @@ -343,8 +351,18 @@ class fetcher(Fetcher): | ||||
|     async def main(self, **kwargs): | ||||
|         await self.fetch_page(**kwargs) | ||||
|  | ||||
|     async def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False, | ||||
|             current_include_filters=None, is_binary=False, empty_pages_are_a_change=False): | ||||
|     async def run(self, | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|  | ||||
|         #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints | ||||
|         max_time = int(os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)) | ||||
| @@ -352,16 +370,17 @@ class fetcher(Fetcher): | ||||
|         # Now we run this properly in async context since we're called from async worker | ||||
|         try: | ||||
|             await asyncio.wait_for(self.main( | ||||
|                 url=url, | ||||
|                 timeout=timeout, | ||||
|                 request_headers=request_headers, | ||||
|                 request_body=request_body, | ||||
|                 request_method=request_method, | ||||
|                 ignore_status_codes=ignore_status_codes, | ||||
|                 current_include_filters=current_include_filters, | ||||
|                 empty_pages_are_a_change=empty_pages_are_a_change, | ||||
|                 fetch_favicon=fetch_favicon, | ||||
|                 ignore_status_codes=ignore_status_codes, | ||||
|                 is_binary=is_binary, | ||||
|                 empty_pages_are_a_change=empty_pages_are_a_change | ||||
|             ), timeout=max_time) | ||||
|                 request_body=request_body, | ||||
|                 request_headers=request_headers, | ||||
|                 request_method=request_method, | ||||
|                 timeout=timeout, | ||||
|                 url=url, | ||||
|             ), timeout=max_time | ||||
|             ) | ||||
|         except asyncio.TimeoutError: | ||||
|             raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds.")) | ||||
|  | ||||
|             raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds.")) | ||||
|   | ||||
| @@ -51,6 +51,7 @@ class fetcher(Fetcher): | ||||
|  | ||||
|         session = requests.Session() | ||||
|  | ||||
|  | ||||
|         if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'): | ||||
|             from requests_file import FileAdapter | ||||
|             session.mount('file://', FileAdapter()) | ||||
| @@ -104,15 +105,17 @@ class fetcher(Fetcher): | ||||
|         self.raw_content = r.content | ||||
|  | ||||
|     async def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|         """Async wrapper that runs the synchronous requests code in a thread pool""" | ||||
|          | ||||
|         loop = asyncio.get_event_loop() | ||||
|   | ||||
							
								
								
									
										101
									
								
								changedetectionio/content_fetchers/res/favicon-fetcher.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								changedetectionio/content_fetchers/res/favicon-fetcher.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,101 @@ | ||||
| (async () => { | ||||
|   // Define the function inside the IIFE for console testing | ||||
|   window.getFaviconAsBlob = async function() { | ||||
|     const links = Array.from(document.querySelectorAll( | ||||
|       'link[rel~="apple-touch-icon"], link[rel~="icon"]' | ||||
|     )); | ||||
|  | ||||
|     const icons = links.map(link => { | ||||
|       const sizesStr = link.getAttribute('sizes'); | ||||
|       let size = 0; | ||||
|       if (sizesStr) { | ||||
|         const [w] = sizesStr.split('x').map(Number); | ||||
|         if (!isNaN(w)) size = w; | ||||
|       } else { | ||||
|         size = 16; | ||||
|       } | ||||
|       return { | ||||
|         size, | ||||
|         rel: link.getAttribute('rel'), | ||||
|         href: link.href, | ||||
|         hasSizes: !!sizesStr | ||||
|       }; | ||||
|     }); | ||||
|  | ||||
|     // If no icons found, add fallback favicon.ico | ||||
|     if (icons.length === 0) { | ||||
|       icons.push({ | ||||
|         size: 16, | ||||
|         rel: 'icon', | ||||
|         href: '/favicon.ico', | ||||
|         hasSizes: false | ||||
|       }); | ||||
|     } | ||||
|  | ||||
|     // sort preference: highest resolution first, then apple-touch-icon, then regular icons | ||||
|     icons.sort((a, b) => { | ||||
|       // First priority: actual size (highest first) | ||||
|       if (a.size !== b.size) { | ||||
|         return b.size - a.size; | ||||
|       } | ||||
|        | ||||
|       // Second priority: apple-touch-icon over regular icon | ||||
|       const isAppleA = /apple-touch-icon/.test(a.rel); | ||||
|       const isAppleB = /apple-touch-icon/.test(b.rel); | ||||
|       if (isAppleA && !isAppleB) return -1; | ||||
|       if (!isAppleA && isAppleB) return 1; | ||||
|        | ||||
|       // Third priority: icons with no size attribute (fallback icons) last | ||||
|       const hasNoSizeA = !a.hasSizes; | ||||
|       const hasNoSizeB = !b.hasSizes; | ||||
|       if (hasNoSizeA && !hasNoSizeB) return 1; | ||||
|       if (!hasNoSizeA && hasNoSizeB) return -1; | ||||
|        | ||||
|       return 0; | ||||
|     }); | ||||
|  | ||||
|     const timeoutMs = 2000; | ||||
|  | ||||
|     for (const icon of icons) { | ||||
|       try { | ||||
|         const controller = new AbortController(); | ||||
|         const timeout = setTimeout(() => controller.abort(), timeoutMs); | ||||
|  | ||||
|         const resp = await fetch(icon.href, { | ||||
|           signal: controller.signal, | ||||
|           redirect: 'follow' | ||||
|         }); | ||||
|  | ||||
|         clearTimeout(timeout); | ||||
|  | ||||
|         if (!resp.ok) { | ||||
|           continue; | ||||
|         } | ||||
|  | ||||
|         const blob = await resp.blob(); | ||||
|  | ||||
|         // Convert blob to base64 | ||||
|         const reader = new FileReader(); | ||||
|         return await new Promise(resolve => { | ||||
|           reader.onloadend = () => { | ||||
|             resolve({ | ||||
|               url: icon.href, | ||||
|               base64: reader.result.split(",")[1] | ||||
|             }); | ||||
|           }; | ||||
|           reader.readAsDataURL(blob); | ||||
|         }); | ||||
|  | ||||
|       } catch (e) { | ||||
|         continue; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     // nothing found | ||||
|     return null; | ||||
|   }; | ||||
|  | ||||
|   // Auto-execute and return result for page.evaluate() | ||||
|   return await window.getFaviconAsBlob(); | ||||
| })(); | ||||
|  | ||||
| @@ -17,6 +17,7 @@ async () => { | ||||
|             'back in stock soon', | ||||
|             'back-order or out of stock', | ||||
|             'backordered', | ||||
|             'backorder', | ||||
|             'benachrichtigt mich', // notify me | ||||
|             'binnenkort leverbaar', // coming soon | ||||
|             'brak na stanie', | ||||
| @@ -39,12 +40,14 @@ async () => { | ||||
|             'mail me when available', | ||||
|             'message if back in stock', | ||||
|             'mevcut değil', | ||||
|             'more on order', | ||||
|             'nachricht bei', | ||||
|             'nicht auf lager', | ||||
|             'nicht lagernd', | ||||
|             'nicht lieferbar', | ||||
|             'nicht verfügbar', | ||||
|             'nicht vorrätig', | ||||
|             'nicht mehr lieferbar', | ||||
|             'nicht zur verfügung', | ||||
|             'nie znaleziono produktów', | ||||
|             'niet beschikbaar', | ||||
|   | ||||
| @@ -4,9 +4,10 @@ import time | ||||
| from loguru import logger | ||||
| from changedetectionio.content_fetchers.base import Fetcher | ||||
|  | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     if os.getenv("WEBDRIVER_URL"): | ||||
|         fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL")) | ||||
|         fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\"" | ||||
|     else: | ||||
|         fetcher_description = "WebDriver Chrome/Javascript" | ||||
|  | ||||
| @@ -25,7 +26,6 @@ class fetcher(Fetcher): | ||||
|             self.browser_connection_is_custom = True | ||||
|             self.browser_connection_url = custom_browser_connection_url | ||||
|  | ||||
|  | ||||
|         ##### PROXY SETUP ##### | ||||
|  | ||||
|         proxy_sources = [ | ||||
| @@ -38,7 +38,7 @@ class fetcher(Fetcher): | ||||
|             os.getenv('webdriver_proxyHttps'), | ||||
|             os.getenv('webdriver_httpsProxy'), | ||||
|             os.getenv('webdriver_sslProxy'), | ||||
|             proxy_override, # last one should override | ||||
|             proxy_override,  # last one should override | ||||
|         ] | ||||
|         # The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server= | ||||
|         for k in filter(None, proxy_sources): | ||||
| @@ -46,20 +46,21 @@ class fetcher(Fetcher): | ||||
|                 continue | ||||
|             self.proxy_url = k.strip() | ||||
|  | ||||
|  | ||||
|     async def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False, | ||||
|             empty_pages_are_a_change=False): | ||||
|                   fetch_favicon=True, | ||||
|                   current_include_filters=None, | ||||
|                   empty_pages_are_a_change=False, | ||||
|                   ignore_status_codes=False, | ||||
|                   is_binary=False, | ||||
|                   request_body=None, | ||||
|                   request_headers=None, | ||||
|                   request_method=None, | ||||
|                   timeout=None, | ||||
|                   url=None, | ||||
|                   ): | ||||
|  | ||||
|         import asyncio | ||||
|          | ||||
|  | ||||
|         # Wrap the entire selenium operation in a thread executor | ||||
|         def _run_sync(): | ||||
|             from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
| @@ -140,4 +141,3 @@ class fetcher(Fetcher): | ||||
|         # Run the selenium operations in a thread pool to avoid blocking the event loop | ||||
|         loop = asyncio.get_event_loop() | ||||
|         await loop.run_in_executor(None, _run_sync) | ||||
|  | ||||
|   | ||||
| @@ -1,8 +1,32 @@ | ||||
| import difflib | ||||
| from typing import List, Iterator, Union | ||||
|  | ||||
| REMOVED_STYLE = "background-color: #fadad7; color: #b30000;" | ||||
| ADDED_STYLE = "background-color: #eaf2c2; color: #406619;" | ||||
| # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050 | ||||
| #HTML_ADDED_STYLE = "background-color: #d2f7c2; color: #255d00;" | ||||
| #HTML_CHANGED_INTO_STYLE = "background-color: #dafbe1; color: #116329;" | ||||
| #HTML_CHANGED_STYLE = "background-color: #ffd6cc; color: #7a2000;" | ||||
| #HTML_REMOVED_STYLE = "background-color: #ffebe9; color: #82071e;" | ||||
|  | ||||
| # @todo - In the future we can make this configurable | ||||
| HTML_ADDED_STYLE = "background-color: #eaf2c2; color: #406619" | ||||
| HTML_REMOVED_STYLE = "background-color: #fadad7; color: #b30000" | ||||
| HTML_CHANGED_STYLE = HTML_REMOVED_STYLE | ||||
| HTML_CHANGED_INTO_STYLE = HTML_ADDED_STYLE | ||||
|  | ||||
|  | ||||
| # These get set to html or telegram type or discord compatible or whatever in handler.py | ||||
| # Something that cant get escaped to HTML by accident | ||||
| REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN' | ||||
| REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED' | ||||
|  | ||||
| ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN' | ||||
| ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED' | ||||
|  | ||||
| CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN' | ||||
| CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED' | ||||
|  | ||||
| CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN' | ||||
| CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED' | ||||
|  | ||||
| def same_slicer(lst: List[str], start: int, end: int) -> List[str]: | ||||
|     """Return a slice of the list, or a single element if start == end.""" | ||||
| @@ -15,8 +39,7 @@ def customSequenceMatcher( | ||||
|     include_removed: bool = True, | ||||
|     include_added: bool = True, | ||||
|     include_replaced: bool = True, | ||||
|     include_change_type_prefix: bool = True, | ||||
|     html_colour: bool = False | ||||
|     include_change_type_prefix: bool = True | ||||
| ) -> Iterator[List[str]]: | ||||
|     """ | ||||
|     Compare two sequences and yield differences based on specified parameters. | ||||
| @@ -29,8 +52,6 @@ def customSequenceMatcher( | ||||
|         include_added (bool): Include added parts | ||||
|         include_replaced (bool): Include replaced parts | ||||
|         include_change_type_prefix (bool): Add prefixes to indicate change types | ||||
|         html_colour (bool): Use HTML background colors for differences | ||||
|  | ||||
|     Yields: | ||||
|         List[str]: Differences between sequences | ||||
|     """ | ||||
| @@ -42,22 +63,22 @@ def customSequenceMatcher( | ||||
|         if include_equal and tag == 'equal': | ||||
|             yield before[alo:ahi] | ||||
|         elif include_removed and tag == 'delete': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] | ||||
|             else: | ||||
|                 yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi) | ||||
|                 yield same_slicer(before, alo, ahi) | ||||
|         elif include_replaced and tag == 'replace': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)] | ||||
|             else: | ||||
|                 yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi) | ||||
|                 yield same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi) | ||||
|         elif include_added and tag == 'insert': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)] | ||||
|             else: | ||||
|                 yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi) | ||||
|                 yield same_slicer(after, blo, bhi) | ||||
|  | ||||
|  | ||||
| def render_diff( | ||||
|     previous_version_file_contents: str, | ||||
| @@ -68,8 +89,7 @@ def render_diff( | ||||
|     include_replaced: bool = True, | ||||
|     line_feed_sep: str = "\n", | ||||
|     include_change_type_prefix: bool = True, | ||||
|     patch_format: bool = False, | ||||
|     html_colour: bool = False | ||||
|     patch_format: bool = False | ||||
| ) -> str: | ||||
|     """ | ||||
|     Render the difference between two file contents. | ||||
| @@ -84,8 +104,6 @@ def render_diff( | ||||
|         line_feed_sep (str): Separator for lines in output | ||||
|         include_change_type_prefix (bool): Add prefixes to indicate change types | ||||
|         patch_format (bool): Use patch format for output | ||||
|         html_colour (bool): Use HTML background colors for differences | ||||
|  | ||||
|     Returns: | ||||
|         str: Rendered difference | ||||
|     """ | ||||
| @@ -103,8 +121,7 @@ def render_diff( | ||||
|         include_removed=include_removed, | ||||
|         include_added=include_added, | ||||
|         include_replaced=include_replaced, | ||||
|         include_change_type_prefix=include_change_type_prefix, | ||||
|         html_colour=html_colour | ||||
|         include_change_type_prefix=include_change_type_prefix | ||||
|     ) | ||||
|  | ||||
|     def flatten(lst: List[Union[str, List[str]]]) -> str: | ||||
|   | ||||
| @@ -12,19 +12,17 @@ from blinker import signal | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from threading import Event | ||||
| from changedetectionio.custom_queue import SignalPriorityQueue, AsyncSignalPriorityQueue, NotificationQueue | ||||
| from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue | ||||
| from changedetectionio import worker_handler | ||||
|  | ||||
| from flask import ( | ||||
|     Flask, | ||||
|     abort, | ||||
|     flash, | ||||
|     make_response, | ||||
|     redirect, | ||||
|     render_template, | ||||
|     request, | ||||
|     send_from_directory, | ||||
|     session, | ||||
|     url_for, | ||||
| ) | ||||
| from flask_compress import Compress as FlaskCompress | ||||
| @@ -40,7 +38,7 @@ from loguru import logger | ||||
|  | ||||
| from changedetectionio import __version__ | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications | ||||
| from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon | ||||
| from changedetectionio.api.Search import Search | ||||
| from .time_handler import is_within_schedule | ||||
|  | ||||
| @@ -50,8 +48,8 @@ datastore = None | ||||
| ticker_thread = None | ||||
| extra_stylesheets = [] | ||||
|  | ||||
| # Use async queue by default, keep sync for backward compatibility   | ||||
| update_q = AsyncSignalPriorityQueue() if worker_handler.USE_ASYNC_WORKERS else SignalPriorityQueue() | ||||
| # Use bulletproof janus-based queues for sync/async reliability   | ||||
| update_q = RecheckPriorityQueue() | ||||
| notification_q = NotificationQueue() | ||||
| MAX_QUEUE_SIZE = 2000 | ||||
|  | ||||
| @@ -135,6 +133,11 @@ def get_socketio_path(): | ||||
|     # Socket.IO will be available at {prefix}/socket.io/ | ||||
|     return prefix | ||||
|  | ||||
| @app.template_global('is_safe_valid_url') | ||||
| def _is_safe_valid_url(test_url): | ||||
|     from .validate_url import is_safe_valid_url | ||||
|     return is_safe_valid_url(test_url) | ||||
|  | ||||
|  | ||||
| @app.template_filter('format_number_locale') | ||||
| def _jinja2_filter_format_number_locale(value: float) -> str: | ||||
| @@ -307,7 +310,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     watch_api.add_resource(WatchSingleHistory, | ||||
|                            '/api/v1/watch/<string:uuid>/history/<string:timestamp>', | ||||
|                            resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) | ||||
|  | ||||
|     watch_api.add_resource(WatchFavicon, | ||||
|                            '/api/v1/watch/<string:uuid>/favicon', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
|     watch_api.add_resource(WatchHistory, | ||||
|                            '/api/v1/watch/<string:uuid>/history', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
| @@ -329,7 +334,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
|  | ||||
|     watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
|                            resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) | ||||
|                             | ||||
|     watch_api.add_resource(Search, '/api/v1/search', | ||||
|                            resource_class_kwargs={'datastore': datastore}) | ||||
| @@ -382,7 +387,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # We would sometimes get login loop errors on sites hosted in sub-paths | ||||
|  | ||||
|             # note for the future: | ||||
|             #            if not is_safe_url(next): | ||||
|             #            if not is_safe_valid_url(next): | ||||
|             #                return flask.abort(400) | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
| @@ -427,6 +432,32 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             except FileNotFoundError: | ||||
|                 abort(404) | ||||
|  | ||||
|         if group == 'favicon': | ||||
|             # Could be sensitive, follow password requirements | ||||
|             if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated: | ||||
|                 abort(403) | ||||
|             # Get the watch object | ||||
|             watch = datastore.data['watching'].get(filename) | ||||
|             if not watch: | ||||
|                 abort(404) | ||||
|  | ||||
|             favicon_filename = watch.get_favicon_filename() | ||||
|             if favicon_filename: | ||||
|                 try: | ||||
|                     import magic | ||||
|                     mime = magic.from_file( | ||||
|                         os.path.join(watch.watch_data_dir, favicon_filename), | ||||
|                         mime=True | ||||
|                     ) | ||||
|                 except ImportError: | ||||
|                     # Fallback, no python-magic | ||||
|                     import mimetypes | ||||
|                     mime, encoding = mimetypes.guess_type(favicon_filename) | ||||
|  | ||||
|                 response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename)) | ||||
|                 response.headers['Content-type'] = mime | ||||
|                 response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate | ||||
|                 return response | ||||
|  | ||||
|         if group == 'visual_selector_data': | ||||
|             # Could be sensitive, follow password requirements | ||||
| @@ -769,7 +800,7 @@ def ticker_thread_check_time_launch_checks(): | ||||
|             else: | ||||
|                 time_schedule_limit = watch.get('time_schedule_limit') | ||||
|                 logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)") | ||||
|             tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') | ||||
|             tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip()) | ||||
|  | ||||
|             if time_schedule_limit and time_schedule_limit.get('enabled'): | ||||
|                 try: | ||||
| @@ -818,16 +849,22 @@ def ticker_thread_check_time_launch_checks(): | ||||
|  | ||||
|                     # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it. | ||||
|                     priority = int(time.time()) | ||||
|                     logger.debug( | ||||
|                         f"> Queued watch UUID {uuid} " | ||||
|                         f"last checked at {watch['last_checked']} " | ||||
|                         f"queued at {now:0.2f} priority {priority} " | ||||
|                         f"jitter {watch.jitter_seconds:0.2f}s, " | ||||
|                         f"{now - watch['last_checked']:0.2f}s since last checked") | ||||
|  | ||||
|                     # Into the queue with you | ||||
|                     worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid})) | ||||
|  | ||||
|                     queued_successfully = worker_handler.queue_item_async_safe(update_q, | ||||
|                                                                                queuedWatchMetaData.PrioritizedItem(priority=priority, | ||||
|                                                                                                                    item={'uuid': uuid}) | ||||
|                                                                                ) | ||||
|                     if queued_successfully: | ||||
|                         logger.debug( | ||||
|                             f"> Queued watch UUID {uuid} " | ||||
|                             f"last checked at {watch['last_checked']} " | ||||
|                             f"queued at {now:0.2f} priority {priority} " | ||||
|                             f"jitter {watch.jitter_seconds:0.2f}s, " | ||||
|                             f"{now - watch['last_checked']:0.2f}s since last checked") | ||||
|                     else: | ||||
|                         logger.critical(f"CRITICAL: Failed to queue watch UUID {uuid} in ticker thread!") | ||||
|                          | ||||
|                     # Reset for next time | ||||
|                     watch.jitter_seconds = 0 | ||||
|  | ||||
|   | ||||
| @@ -5,6 +5,7 @@ from wtforms.widgets.core import TimeInput | ||||
|  | ||||
| from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES | ||||
| from changedetectionio.conditions.form import ConditionFormRow | ||||
| from changedetectionio.notification_service import NotificationContextData | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from wtforms import ( | ||||
| @@ -23,11 +24,11 @@ from wtforms import ( | ||||
| ) | ||||
| from flask_wtf.file import FileField, FileAllowed | ||||
| from wtforms.fields import FieldList | ||||
| from wtforms.utils import unset_value | ||||
|  | ||||
| from wtforms.validators import ValidationError | ||||
|  | ||||
| from validators.url import url as url_validator | ||||
|  | ||||
| from changedetectionio.widgets import TernaryNoneBooleanField | ||||
|  | ||||
| # default | ||||
| # each select <option data-enabled="enabled-0-0" | ||||
| @@ -54,6 +55,8 @@ valid_method = { | ||||
|  | ||||
| default_method = 'GET' | ||||
| allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
| REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.' | ||||
| REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.' | ||||
|  | ||||
| class StringListField(StringField): | ||||
|     widget = widgets.TextArea() | ||||
| @@ -210,6 +213,35 @@ class ScheduleLimitForm(Form): | ||||
|         self.sunday.form.enabled.label.text = "Sunday" | ||||
|  | ||||
|  | ||||
| def validate_time_between_check_has_values(form): | ||||
|     """ | ||||
|     Custom validation function for TimeBetweenCheckForm. | ||||
|     Returns True if at least one time interval field has a value > 0. | ||||
|     """ | ||||
|     res = any([ | ||||
|         form.weeks.data and int(form.weeks.data) > 0, | ||||
|         form.days.data and int(form.days.data) > 0, | ||||
|         form.hours.data and int(form.hours.data) > 0, | ||||
|         form.minutes.data and int(form.minutes.data) > 0, | ||||
|         form.seconds.data and int(form.seconds.data) > 0 | ||||
|     ]) | ||||
|  | ||||
|     return res | ||||
|  | ||||
|  | ||||
| class RequiredTimeInterval(object): | ||||
|     """ | ||||
|     WTForms validator that ensures at least one time interval field has a value > 0. | ||||
|     Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]). | ||||
|     """ | ||||
|     def __init__(self, message=None): | ||||
|         self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.' | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         if not validate_time_between_check_has_values(field.form): | ||||
|             raise ValidationError(self.message) | ||||
|  | ||||
|  | ||||
| class TimeBetweenCheckForm(Form): | ||||
|     weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|     days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
| @@ -218,6 +250,123 @@ class TimeBetweenCheckForm(Form): | ||||
|     seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|     # @todo add total seconds minimum validatior = minimum_seconds_recheck_time | ||||
|  | ||||
|     def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): | ||||
|         super().__init__(formdata, obj, prefix, data, meta, **kwargs) | ||||
|         self.require_at_least_one = kwargs.get('require_at_least_one', False) | ||||
|         self.require_at_least_one_message = kwargs.get('require_at_least_one_message', REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT) | ||||
|  | ||||
|     def validate(self, **kwargs): | ||||
|         """Custom validation that can optionally require at least one time interval.""" | ||||
|         # Run normal field validation first | ||||
|         if not super().validate(**kwargs): | ||||
|             return False | ||||
|  | ||||
|         # Apply optional "at least one" validation | ||||
|         if self.require_at_least_one: | ||||
|             if not validate_time_between_check_has_values(self): | ||||
|                 # Add error to the form's general errors (not field-specific) | ||||
|                 if not hasattr(self, '_formdata_errors'): | ||||
|                     self._formdata_errors = [] | ||||
|                 self._formdata_errors.append(self.require_at_least_one_message) | ||||
|                 return False | ||||
|  | ||||
|         return True | ||||
|  | ||||
|  | ||||
| class EnhancedFormField(FormField): | ||||
|     """ | ||||
|     An enhanced FormField that supports conditional validation with top-level error messages. | ||||
|     Adds a 'top_errors' property for validation errors at the FormField level. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, form_class, label=None, validators=None, separator="-", | ||||
|                  conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs): | ||||
|         """ | ||||
|         Initialize EnhancedFormField with optional conditional validation. | ||||
|  | ||||
|         :param conditional_field: Name of the field this FormField depends on (e.g. 'time_between_check_use_default') | ||||
|         :param conditional_message: Error message to show when validation fails | ||||
|         :param conditional_test_function: Custom function to test if FormField has valid values. | ||||
|                                         Should take self.form as parameter and return True if valid. | ||||
|         """ | ||||
|         super().__init__(form_class, label, validators, separator, **kwargs) | ||||
|         self.top_errors = [] | ||||
|         self.conditional_field = conditional_field | ||||
|         self.conditional_message = conditional_message or "At least one field must have a value when not using defaults." | ||||
|         self.conditional_test_function = conditional_test_function | ||||
|  | ||||
|     def validate(self, form, extra_validators=()): | ||||
|         """ | ||||
|         Custom validation that supports conditional logic and stores top-level errors. | ||||
|         """ | ||||
|         self.top_errors = [] | ||||
|  | ||||
|         # First run the normal FormField validation | ||||
|         base_valid = super().validate(form, extra_validators) | ||||
|  | ||||
|         # Apply conditional validation if configured | ||||
|         if self.conditional_field and hasattr(form, self.conditional_field): | ||||
|             conditional_field_obj = getattr(form, self.conditional_field) | ||||
|  | ||||
|             # If the conditional field is False/unchecked, check if this FormField has any values | ||||
|             if not conditional_field_obj.data: | ||||
|                 # Use custom test function if provided, otherwise use generic fallback | ||||
|                 if self.conditional_test_function: | ||||
|                     has_any_value = self.conditional_test_function(self.form) | ||||
|                 else: | ||||
|                     # Generic fallback - check if any field has truthy data | ||||
|                     has_any_value = any(field.data for field in self.form if hasattr(field, 'data') and field.data) | ||||
|  | ||||
|                 if not has_any_value: | ||||
|                     self.top_errors.append(self.conditional_message) | ||||
|                     base_valid = False | ||||
|  | ||||
|         return base_valid | ||||
|  | ||||
|  | ||||
| class RequiredFormField(FormField): | ||||
|     """ | ||||
|     A FormField that passes require_at_least_one=True to TimeBetweenCheckForm. | ||||
|     Use this when you want the sub-form to always require at least one value. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, form_class, label=None, validators=None, separator="-", **kwargs): | ||||
|         super().__init__(form_class, label, validators, separator, **kwargs) | ||||
|  | ||||
|     def process(self, formdata, data=unset_value, extra_filters=None): | ||||
|         if extra_filters: | ||||
|             raise TypeError( | ||||
|                 "FormField cannot take filters, as the encapsulated" | ||||
|                 "data is not mutable." | ||||
|             ) | ||||
|  | ||||
|         if data is unset_value: | ||||
|             try: | ||||
|                 data = self.default() | ||||
|             except TypeError: | ||||
|                 data = self.default | ||||
|             self._obj = data | ||||
|  | ||||
|         self.object_data = data | ||||
|  | ||||
|         prefix = self.name + self.separator | ||||
|         # Pass require_at_least_one=True to the sub-form | ||||
|         if isinstance(data, dict): | ||||
|             self.form = self.form_class(formdata=formdata, prefix=prefix, require_at_least_one=True, **data) | ||||
|         else: | ||||
|             self.form = self.form_class(formdata=formdata, obj=data, prefix=prefix, require_at_least_one=True) | ||||
|  | ||||
|     @property | ||||
|     def errors(self): | ||||
|         """Include sub-form validation errors""" | ||||
|         form_errors = self.form.errors | ||||
|         # Add any general form errors to a special 'form' key | ||||
|         if hasattr(self.form, '_formdata_errors') and self.form._formdata_errors: | ||||
|             form_errors = dict(form_errors)  # Make a copy | ||||
|             form_errors['form'] = self.form._formdata_errors | ||||
|         return form_errors | ||||
|  | ||||
|  | ||||
| # Separated by  key:value | ||||
| class StringDictKeyValue(StringField): | ||||
|     widget = widgets.TextArea() | ||||
| @@ -318,11 +467,16 @@ class ValidateAppRiseServers(object): | ||||
|         import apprise | ||||
|         from .notification.apprise_plugin.assets import apprise_asset | ||||
|         from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401 | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         apobj = apprise.Apprise(asset=apprise_asset) | ||||
|  | ||||
|         for server_url in field.data: | ||||
|             url = server_url.strip() | ||||
|             generic_notification_context_data = NotificationContextData() | ||||
|             # Make sure something is atleast in all those regular token fields | ||||
|             generic_notification_context_data.set_random_for_validation() | ||||
|  | ||||
|             url = jinja_render(template_str=server_url.strip(), **generic_notification_context_data).strip() | ||||
|             if url.startswith("#"): | ||||
|                 continue | ||||
|  | ||||
| @@ -336,9 +490,8 @@ class ValidateJinja2Template(object): | ||||
|     """ | ||||
|     def __call__(self, form, field): | ||||
|         from changedetectionio import notification | ||||
|  | ||||
|         from changedetectionio.jinja2_custom import create_jinja_env | ||||
|         from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError | ||||
|         from jinja2.sandbox import ImmutableSandboxedEnvironment | ||||
|         from jinja2.meta import find_undeclared_variables | ||||
|         import jinja2.exceptions | ||||
|  | ||||
| @@ -346,9 +499,11 @@ class ValidateJinja2Template(object): | ||||
|         joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}" | ||||
|  | ||||
|         try: | ||||
|             jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader) | ||||
|             jinja2_env.globals.update(notification.valid_tokens) | ||||
|             # Extra validation tokens provided on the form_class(... extra_tokens={}) setup | ||||
|             # Use the shared helper to create a properly configured environment | ||||
|             jinja2_env = create_jinja_env(loader=BaseLoader) | ||||
|  | ||||
|             # Add notification tokens for validation | ||||
|             jinja2_env.globals.update(NotificationContextData()) | ||||
|             if hasattr(field, 'extra_notification_tokens'): | ||||
|                 jinja2_env.globals.update(field.extra_notification_tokens) | ||||
|  | ||||
| @@ -360,6 +515,7 @@ class ValidateJinja2Template(object): | ||||
|         except jinja2.exceptions.SecurityError as e: | ||||
|             raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e | ||||
|  | ||||
|         # Check for undeclared variables | ||||
|         ast = jinja2_env.parse(joined_data) | ||||
|         undefined = ", ".join(find_undeclared_variables(ast)) | ||||
|         if undefined: | ||||
| @@ -382,19 +538,23 @@ class validateURL(object): | ||||
|  | ||||
|  | ||||
| def validate_url(test_url): | ||||
|     # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|     try: | ||||
|         url_validator(test_url, simple_host=allow_simplehost) | ||||
|     except validators.ValidationError: | ||||
|         #@todo check for xss | ||||
|         message = f"'{test_url}' is not a valid URL." | ||||
|     from changedetectionio.validate_url import is_safe_valid_url | ||||
|     if not is_safe_valid_url(test_url): | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError(message) | ||||
|         raise ValidationError('Watch protocol is not permitted or invalid URL format') | ||||
|  | ||||
|  | ||||
| class ValidateSinglePythonRegexString(object): | ||||
|     def __init__(self, message=None): | ||||
|         self.message = message | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         try: | ||||
|             re.compile(field.data) | ||||
|         except re.error: | ||||
|             message = field.gettext('RegEx \'%s\' is not a valid regular expression.') | ||||
|             raise ValidationError(message % (field.data)) | ||||
|  | ||||
|     from .model.Watch import is_safe_url | ||||
|     if not is_safe_url(test_url): | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format') | ||||
|  | ||||
| class ValidateListRegex(object): | ||||
|     """ | ||||
| @@ -414,6 +574,7 @@ class ValidateListRegex(object): | ||||
|                     message = field.gettext('RegEx \'%s\' is not a valid regular expression.') | ||||
|                     raise ValidationError(message % (line)) | ||||
|  | ||||
|  | ||||
| class ValidateCSSJSONXPATHInput(object): | ||||
|     """ | ||||
|     Filter validation | ||||
| @@ -513,6 +674,51 @@ class ValidateCSSJSONXPATHInput(object): | ||||
|                 except: | ||||
|                     raise ValidationError("A system-error occurred when validating your jq expression") | ||||
|  | ||||
| class ValidateSimpleURL: | ||||
|     """Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc.""" | ||||
|     def __init__(self, message=None): | ||||
|         self.message = message or "Invalid URL." | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         data = (field.data or "").strip() | ||||
|         if not data: | ||||
|             return  # empty is OK — pair with validators.Optional() | ||||
|         from urllib.parse import urlparse | ||||
|  | ||||
|         parsed = urlparse(data) | ||||
|         if not parsed.scheme or not parsed.netloc: | ||||
|             raise ValidationError(self.message) | ||||
|  | ||||
| class ValidateStartsWithRegex(object): | ||||
|     def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True): | ||||
|         # compile with given flags (we’ll pass re.IGNORECASE below) | ||||
|         self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex | ||||
|         self.message = message | ||||
|         self.allow_empty = allow_empty | ||||
|         self.split_lines = split_lines | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         data = field.data | ||||
|         if not data: | ||||
|             return | ||||
|  | ||||
|         # normalize into list of lines | ||||
|         if isinstance(data, str) and self.split_lines: | ||||
|             lines = data.splitlines() | ||||
|         elif isinstance(data, (list, tuple)): | ||||
|             lines = data | ||||
|         else: | ||||
|             lines = [data] | ||||
|  | ||||
|         for line in lines: | ||||
|             stripped = line.strip() | ||||
|             if not stripped: | ||||
|                 if self.allow_empty: | ||||
|                     continue | ||||
|                 raise ValidationError(self.message or "Empty value not allowed.") | ||||
|             if not self.pattern.match(stripped): | ||||
|                 raise ValidationError(self.message or "Invalid value.") | ||||
|  | ||||
| class quickWatchForm(Form): | ||||
|     from . import processors | ||||
|  | ||||
| @@ -523,7 +729,6 @@ class quickWatchForm(Form): | ||||
|     edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|  | ||||
|  | ||||
| # Common to a single watch and the global settings | ||||
| class commonSettingsForm(Form): | ||||
|     from . import processors | ||||
| @@ -534,16 +739,23 @@ class commonSettingsForm(Form): | ||||
|         self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) | ||||
|         self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {}) | ||||
|  | ||||
|     extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False) | ||||
|     fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_format = SelectField('Notification format', choices=valid_notification_formats.keys()) | ||||
|     notification_format = SelectField('Notification format', choices=list(valid_notification_formats.items())) | ||||
|     notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()]) | ||||
|     processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff") | ||||
|     timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) | ||||
|     scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) | ||||
|     webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")]) | ||||
|  | ||||
| # Not true anymore but keep the validate_ hook for future use, we convert color tags | ||||
| #    def validate_notification_urls(self, field): | ||||
| #        """Validate that HTML Color format is not used with Telegram""" | ||||
| #        if self.notification_format.data == 'HTML Color' and field.data: | ||||
| #            for url in field.data: | ||||
| #                if url and ('tgram://' in url or 'discord://' in url or 'discord.com/api/webhooks' in url): | ||||
| #                    raise ValidationError('HTML Color format is not supported by Telegram and Discord. Please choose another Notification Format (Plain Text, HTML, or Markdown to HTML).') | ||||
|  | ||||
|  | ||||
| class importForm(Form): | ||||
|     from . import processors | ||||
| @@ -568,11 +780,16 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     url = fields.URLField('URL', validators=[validateURL()]) | ||||
|     tags = StringTagUUID('Group tag', [validators.Optional()], default='') | ||||
|  | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|     time_between_check = EnhancedFormField( | ||||
|         TimeBetweenCheckForm, | ||||
|         conditional_field='time_between_check_use_default', | ||||
|         conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT, | ||||
|         conditional_test_function=validate_time_between_check_has_values | ||||
|     ) | ||||
|  | ||||
|     time_schedule_limit = FormField(ScheduleLimitForm) | ||||
|  | ||||
|     time_between_check_use_default = BooleanField('Use global settings for time between check', default=False) | ||||
|     time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False) | ||||
|  | ||||
|     include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='') | ||||
|  | ||||
| @@ -590,6 +807,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False) | ||||
|     remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False) | ||||
|     sort_text_alphabetically =  BooleanField('Sort text alphabetically', default=False) | ||||
|     strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None) | ||||
|     trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False) | ||||
|  | ||||
|     filter_text_added = BooleanField('Added lines', default=True) | ||||
| @@ -602,18 +820,18 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()]) | ||||
|     webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()]) | ||||
|  | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"}) | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|     proxy = RadioField('Proxy') | ||||
|     # filter_failure_notification_send @todo make ternary | ||||
|     filter_failure_notification_send = BooleanField( | ||||
|         'Send a notification when the filter can no longer be found on the page', default=False) | ||||
|  | ||||
|     notification_muted = BooleanField('Notifications Muted / Off', default=False) | ||||
|     notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On") | ||||
|     notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False) | ||||
|  | ||||
|     conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL') | ||||
|     conditions = FieldList(FormField(ConditionFormRow), min_entries=1)  # Add rule logic here | ||||
|  | ||||
|     use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None) | ||||
|  | ||||
|     def extra_tab_content(self): | ||||
|         return None | ||||
| @@ -625,7 +843,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|         if not super().validate(): | ||||
|             return False | ||||
|  | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         result = True | ||||
|  | ||||
|         # Fail form validation when a body is set for a GET | ||||
| @@ -688,23 +906,36 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     ): | ||||
|         super().__init__(formdata, obj, prefix, data, meta, **kwargs) | ||||
|         if kwargs and kwargs.get('default_system_settings'): | ||||
|             default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone') | ||||
|             default_tz = kwargs.get('default_system_settings').get('application', {}).get('scheduler_timezone_default') | ||||
|             if default_tz: | ||||
|                 self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz | ||||
|  | ||||
|  | ||||
|  | ||||
| class SingleExtraProxy(Form): | ||||
|  | ||||
|     # maybe better to set some <script>var.. | ||||
|     proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"}) | ||||
|     proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50}) | ||||
|     # @todo do the validation here instead | ||||
|     proxy_url = StringField('Proxy URL', [ | ||||
|         validators.Optional(), | ||||
|         ValidateStartsWithRegex( | ||||
|             regex=r'^(https?|socks5)://',  # ✅ main pattern | ||||
|             flags=re.IGNORECASE,  # ✅ makes it case-insensitive | ||||
|             message='Proxy URLs must start with http://, https:// or socks5://', | ||||
|         ), | ||||
|         ValidateSimpleURL() | ||||
|     ], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50}) | ||||
|  | ||||
| class SingleExtraBrowser(Form): | ||||
|     browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"}) | ||||
|     browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) | ||||
|     # @todo do the validation here instead | ||||
|     browser_connection_url = StringField('Browser connection URL', [ | ||||
|         validators.Optional(), | ||||
|         ValidateStartsWithRegex( | ||||
|             regex=r'^(wss?|ws)://', | ||||
|             flags=re.IGNORECASE, | ||||
|             message='Browser URLs must start with wss:// or ws://' | ||||
|         ), | ||||
|         ValidateSimpleURL() | ||||
|     ], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) | ||||
|  | ||||
| class DefaultUAInputForm(Form): | ||||
|     html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"}) | ||||
| @@ -713,9 +944,9 @@ class DefaultUAInputForm(Form): | ||||
|  | ||||
| # datastore.data['settings']['requests'].. | ||||
| class globalSettingsRequestForm(Form): | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|     time_between_check = RequiredFormField(TimeBetweenCheckForm) | ||||
|     time_schedule_limit = FormField(ScheduleLimitForm) | ||||
|     proxy = RadioField('Proxy') | ||||
|     proxy = RadioField('Default proxy') | ||||
|     jitter_seconds = IntegerField('Random jitter seconds ± check', | ||||
|                                   render_kw={"style": "width: 5em;"}, | ||||
|                                   validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
| @@ -724,7 +955,12 @@ class globalSettingsRequestForm(Form): | ||||
|                           render_kw={"style": "width: 5em;"}, | ||||
|                           validators=[validators.NumberRange(min=1, max=50, | ||||
|                                                              message="Should be between 1 and 50")]) | ||||
|      | ||||
|  | ||||
|     timeout = IntegerField('Requests timeout in seconds', | ||||
|                            render_kw={"style": "width: 5em;"}, | ||||
|                            validators=[validators.NumberRange(min=1, max=999, | ||||
|                                                               message="Should be between 1 and 999")]) | ||||
|  | ||||
|     extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) | ||||
|     extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5) | ||||
|  | ||||
| @@ -740,6 +976,8 @@ class globalSettingsRequestForm(Form): | ||||
| class globalSettingsApplicationUIForm(Form): | ||||
|     open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()]) | ||||
|     socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()]) | ||||
|     favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()]) | ||||
|     use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True | ||||
|  | ||||
| # datastore.data['settings']['application'].. | ||||
| class globalSettingsApplicationForm(commonSettingsForm): | ||||
| @@ -764,9 +1002,14 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|  | ||||
|     removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) | ||||
|     shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()]) | ||||
|     shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()]) | ||||
|     strip_ignored_lines = BooleanField('Strip ignored lines') | ||||
|     rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True, | ||||
|                                       validators=[validators.Optional()]) | ||||
|  | ||||
|     rss_reader_mode = BooleanField('RSS reader mode ', default=False, | ||||
|                                       validators=[validators.Optional()]) | ||||
|  | ||||
|     filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', | ||||
|                                                                   render_kw={"style": "width: 5em;"}, | ||||
|                                                                   validators=[validators.NumberRange(min=0, | ||||
| @@ -786,9 +1029,9 @@ class globalSettingsForm(Form): | ||||
|  | ||||
|     requests = FormField(globalSettingsRequestForm) | ||||
|     application = FormField(globalSettingsApplicationForm) | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"}) | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|  | ||||
| class extractDataForm(Form): | ||||
|     extract_regex = StringField('RegEx to extract', validators=[validators.Length(min=1, message="Needs a RegEx")]) | ||||
|     extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()]) | ||||
|     extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| from functools import lru_cache | ||||
|  | ||||
| from loguru import logger | ||||
| from lxml import etree | ||||
| from typing import List | ||||
| import html | ||||
| import json | ||||
| import re | ||||
|  | ||||
| @@ -9,6 +11,10 @@ TEXT_FILTER_LIST_LINE_SUFFIX = "<br>" | ||||
| TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ') | ||||
| PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$' | ||||
|  | ||||
| TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S) | ||||
| META_CS  = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I) | ||||
| META_CT  = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I) | ||||
|  | ||||
| # 'price' , 'lowPrice', 'highPrice' are usually under here | ||||
| # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here | ||||
| LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] | ||||
| @@ -17,9 +23,9 @@ class JSONNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| # Doesn't look like python supports forward slash auto enclosure in re.findall | ||||
| # So convert it to inline flag "(?i)foobar" type configuration | ||||
| @lru_cache(maxsize=100) | ||||
| def perl_style_slash_enclosed_regex_to_options(regex): | ||||
|  | ||||
|     res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE) | ||||
| @@ -52,13 +58,17 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting | ||||
|  | ||||
|     return html_block | ||||
|  | ||||
| def subtractive_css_selector(css_selector, html_content): | ||||
| def subtractive_css_selector(css_selector, content): | ||||
|     from bs4 import BeautifulSoup | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     soup = BeautifulSoup(content, "html.parser") | ||||
|  | ||||
|     # So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM | ||||
|     elements_to_remove = soup.select(css_selector) | ||||
|  | ||||
|     if not elements_to_remove: | ||||
|         # Better to return the original that rebuild with BeautifulSoup | ||||
|         return content | ||||
|  | ||||
|     # Then, remove them in a separate loop | ||||
|     for item in elements_to_remove: | ||||
|         item.decompose() | ||||
| @@ -66,6 +76,7 @@ def subtractive_css_selector(css_selector, html_content): | ||||
|     return str(soup) | ||||
|  | ||||
| def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str: | ||||
|     from lxml import etree | ||||
|     # Parse the HTML content using lxml | ||||
|     html_tree = etree.HTML(html_content) | ||||
|  | ||||
| @@ -77,6 +88,10 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str: | ||||
|         # Collect elements for each selector | ||||
|         elements_to_remove.extend(html_tree.xpath(selector)) | ||||
|  | ||||
|     # If no elements were found, return the original HTML content | ||||
|     if not elements_to_remove: | ||||
|         return html_content | ||||
|  | ||||
|     # Then, remove them in a separate loop | ||||
|     for element in elements_to_remove: | ||||
|         if element.getparent() is not None:  # Ensure the element has a parent before removing | ||||
| @@ -94,7 +109,7 @@ def element_removal(selectors: List[str], html_content): | ||||
|     xpath_selectors = [] | ||||
|  | ||||
|     for selector in selectors: | ||||
|         if selector.startswith(('xpath:', 'xpath1:', '//')): | ||||
|         if selector.strip().startswith(('xpath:', 'xpath1:', '//')): | ||||
|             # Handle XPath selectors separately | ||||
|             xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:') | ||||
|             xpath_selectors.append(xpath_selector) | ||||
| @@ -171,8 +186,21 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser) | ||||
|     html_block = "" | ||||
|  | ||||
|     r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|     # Build namespace map for XPath queries | ||||
|     namespaces = {'re': 'http://exslt.org/regular-expressions'} | ||||
|  | ||||
|     # Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML) | ||||
|     # XPath spec: unprefixed element names have no namespace, not the default namespace | ||||
|     # Solution: Register the default namespace with empty string prefix in elementpath | ||||
|     # This is primarily for RSS/Atom feeds but works for any XML with default namespace | ||||
|     if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap: | ||||
|         # Register the default namespace with empty string prefix for elementpath | ||||
|         # This allows //title to match elements in the default namespace | ||||
|         namespaces[''] = tree.nsmap[None] | ||||
|  | ||||
|     r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser) | ||||
|     #@note: //title/text() now works with default namespaces (fixed by registering '' prefix) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first) | ||||
|  | ||||
|     if type(r) != list: | ||||
|         r = [r] | ||||
| @@ -207,8 +235,19 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser) | ||||
|     html_block = "" | ||||
|  | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|     # Build namespace map for XPath queries | ||||
|     namespaces = {'re': 'http://exslt.org/regular-expressions'} | ||||
|  | ||||
|     # NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace | ||||
|     # For documents with default namespace (RSS/Atom feeds), users must use: | ||||
|     #   - local-name(): //*[local-name()='title']/text() | ||||
|     #   - Or use xpath_filter (not xpath1_filter) which supports default namespaces | ||||
|     # XPath spec: unprefixed element names have no namespace, not the default namespace | ||||
|  | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces=namespaces) | ||||
|     #@note: xpath1 (lxml) does NOT automatically handle default namespaces | ||||
|     #@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support | ||||
|     #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first) | ||||
|  | ||||
|     for element in r: | ||||
|         # When there's more than 1 match, then add the suffix to separate each line | ||||
| @@ -289,70 +328,92 @@ def _get_stripped_text_from_json_match(match): | ||||
|  | ||||
|     return stripped_text_from_html | ||||
|  | ||||
| def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter): | ||||
|     from bs4 import BeautifulSoup | ||||
|     stripped_text_from_html = '' | ||||
|  | ||||
|     # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|     # As a last resort, try to parse the whole <body> | ||||
|     soup = BeautifulSoup(content, 'html.parser') | ||||
|  | ||||
|     if ensure_is_ldjson_info_type: | ||||
|         bs_result = soup.find_all('script', {"type": "application/ld+json"}) | ||||
|     else: | ||||
|         bs_result = soup.find_all('script') | ||||
|     bs_result += soup.find_all('body') | ||||
|  | ||||
|     bs_jsons = [] | ||||
|  | ||||
|     for result in bs_result: | ||||
|         # result.text is how bs4 magically strips JSON from the body | ||||
|         content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else '' | ||||
|         # Skip empty tags, and things that dont even look like JSON | ||||
|         if not result.text or not (content_start[0] == '{' or content_start[0] == '['): | ||||
|             continue | ||||
|         try: | ||||
|             json_data = json.loads(result.text) | ||||
|             bs_jsons.append(json_data) | ||||
|         except json.JSONDecodeError: | ||||
|             # Skip objects which cannot be parsed | ||||
|             continue | ||||
|  | ||||
|     if not bs_jsons: | ||||
|         raise JSONNotFound("No parsable JSON found in this document") | ||||
|  | ||||
|     for json_data in bs_jsons: | ||||
|         stripped_text_from_html = _parse_json(json_data, json_filter) | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             # Could sometimes be list, string or something else random | ||||
|             if isinstance(json_data, dict): | ||||
|                 # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search | ||||
|                 # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) | ||||
|                 # @type could also be a list although non-standard ("@type": ["Product", "SubType"],) | ||||
|                 # LD_JSON auto-extract also requires some content PLUS the ldjson to be present | ||||
|                 # 1833 - could be either str or dict, should not be anything else | ||||
|  | ||||
|                 t = json_data.get('@type') | ||||
|                 if t and stripped_text_from_html: | ||||
|  | ||||
|                     if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower(): | ||||
|                         break | ||||
|                     # The non-standard part, some have a list | ||||
|                     elif isinstance(t, list): | ||||
|                         if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]: | ||||
|                             break | ||||
|  | ||||
|         elif stripped_text_from_html: | ||||
|             break | ||||
|  | ||||
|     return stripped_text_from_html | ||||
|  | ||||
| # content - json | ||||
| # json_filter - ie json:$..price | ||||
| # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) | ||||
| def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): | ||||
|     from bs4 import BeautifulSoup | ||||
|  | ||||
|     stripped_text_from_html = False | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w | ||||
|     # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags | ||||
|     try: | ||||
|         # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work | ||||
|         stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter) | ||||
|     except json.JSONDecodeError as e: | ||||
|         logger.warning(str(e)) | ||||
|  | ||||
|         # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|         # As a last resort, try to parse the whole <body> | ||||
|         soup = BeautifulSoup(content, 'html.parser') | ||||
|     # Looks like clean JSON, dont bother extracting from HTML | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             bs_result = soup.find_all('script', {"type": "application/ld+json"}) | ||||
|         else: | ||||
|             bs_result = soup.find_all('script') | ||||
|         bs_result += soup.find_all('body') | ||||
|     content_start = content.lstrip("\ufeff").strip()[:100] | ||||
|  | ||||
|         bs_jsons = [] | ||||
|         for result in bs_result: | ||||
|             # Skip empty tags, and things that dont even look like JSON | ||||
|             if not result.text or '{' not in result.text: | ||||
|                 continue | ||||
|             try: | ||||
|                 json_data = json.loads(result.text) | ||||
|                 bs_jsons.append(json_data) | ||||
|             except json.JSONDecodeError: | ||||
|                 # Skip objects which cannot be parsed | ||||
|                 continue | ||||
|  | ||||
|         if not bs_jsons: | ||||
|             raise JSONNotFound("No parsable JSON found in this document") | ||||
|          | ||||
|         for json_data in bs_jsons: | ||||
|             stripped_text_from_html = _parse_json(json_data, json_filter) | ||||
|  | ||||
|             if ensure_is_ldjson_info_type: | ||||
|                 # Could sometimes be list, string or something else random | ||||
|                 if isinstance(json_data, dict): | ||||
|                     # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search | ||||
|                     # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) | ||||
|                     # @type could also be a list although non-standard ("@type": ["Product", "SubType"],) | ||||
|                     # LD_JSON auto-extract also requires some content PLUS the ldjson to be present | ||||
|                     # 1833 - could be either str or dict, should not be anything else | ||||
|  | ||||
|                     t = json_data.get('@type') | ||||
|                     if t and stripped_text_from_html: | ||||
|  | ||||
|                         if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower(): | ||||
|                             break | ||||
|                         # The non-standard part, some have a list | ||||
|                         elif isinstance(t, list): | ||||
|                             if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]: | ||||
|                                 break | ||||
|  | ||||
|             elif stripped_text_from_html: | ||||
|                 break | ||||
|     if content_start[0] == '{' or content_start[0] == '[': | ||||
|         try: | ||||
|             # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work | ||||
|             stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter) | ||||
|         except json.JSONDecodeError as e: | ||||
|             logger.warning(f"Error processing JSON {content[:20]}...{str(e)})") | ||||
|     else: | ||||
|         # Probably something else, go fish inside for it | ||||
|         try: | ||||
|             stripped_text_from_html = extract_json_blob_from_html(content=content, | ||||
|                                                                   ensure_is_ldjson_info_type=ensure_is_ldjson_info_type, | ||||
|                                                                   json_filter=json_filter                                                                  ) | ||||
|         except json.JSONDecodeError as e: | ||||
|             logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})") | ||||
|  | ||||
|     if not stripped_text_from_html: | ||||
|         # Re 265 - Just return an empty string when filter not found | ||||
| @@ -372,6 +433,9 @@ def strip_ignore_text(content, wordlist, mode="content"): | ||||
|     ignored_lines = [] | ||||
|  | ||||
|     for k in wordlist: | ||||
|         # Skip empty strings to avoid matching everything | ||||
|         if not k or not k.strip(): | ||||
|             continue | ||||
|         # Is it a regex? | ||||
|         res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE) | ||||
|         if res: | ||||
| @@ -510,3 +574,43 @@ def get_triggered_text(content, trigger_text): | ||||
|         i += 1 | ||||
|  | ||||
|     return triggered_text | ||||
|  | ||||
|  | ||||
| def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None: | ||||
|     try: | ||||
|         # Only decode/process the prefix we need for title extraction | ||||
|         match data: | ||||
|             case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")): | ||||
|                 prefix = data[:scan_chars * 2].decode("utf-16", errors="replace") | ||||
|             case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")): | ||||
|                 prefix = data[:scan_chars * 4].decode("utf-32", errors="replace") | ||||
|             case bytes(): | ||||
|                 try: | ||||
|                     prefix = data[:scan_chars].decode("utf-8") | ||||
|                 except UnicodeDecodeError: | ||||
|                     try: | ||||
|                         head = data[:sniff_bytes].decode("ascii", errors="ignore") | ||||
|                         if m := (META_CS.search(head) or META_CT.search(head)): | ||||
|                             enc = m.group(1).lower() | ||||
|                         else: | ||||
|                             enc = "cp1252" | ||||
|                         prefix = data[:scan_chars * 2].decode(enc, errors="replace") | ||||
|                     except Exception as e: | ||||
|                         logger.error(f"Title extraction encoding detection failed: {e}") | ||||
|                         return None | ||||
|             case str(): | ||||
|                 prefix = data[:scan_chars] if len(data) > scan_chars else data | ||||
|             case _: | ||||
|                 logger.error(f"Title extraction received unsupported data type: {type(data)}") | ||||
|                 return None | ||||
|  | ||||
|         # Search only in the prefix | ||||
|         if m := TITLE_RE.search(prefix): | ||||
|             title = html.unescape(" ".join(m.group(1).split())).strip() | ||||
|             # Some safe limit | ||||
|             return title[:2000] | ||||
|         return None | ||||
|          | ||||
|     except Exception as e: | ||||
|         logger.error(f"Title extraction failed: {e}") | ||||
|         return None | ||||
							
								
								
									
										22
									
								
								changedetectionio/jinja2_custom/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								changedetectionio/jinja2_custom/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| """ | ||||
| Jinja2 custom extensions and safe rendering utilities. | ||||
| """ | ||||
| from .extensions.TimeExtension import TimeExtension | ||||
| from .safe_jinja import ( | ||||
|     render, | ||||
|     render_fully_escaped, | ||||
|     create_jinja_env, | ||||
|     JINJA2_MAX_RETURN_PAYLOAD_SIZE, | ||||
|     DEFAULT_JINJA2_EXTENSIONS, | ||||
| ) | ||||
| from .plugins.regex import regex_replace | ||||
|  | ||||
| __all__ = [ | ||||
|     'TimeExtension', | ||||
|     'render', | ||||
|     'render_fully_escaped', | ||||
|     'create_jinja_env', | ||||
|     'JINJA2_MAX_RETURN_PAYLOAD_SIZE', | ||||
|     'DEFAULT_JINJA2_EXTENSIONS', | ||||
|     'regex_replace', | ||||
| ] | ||||
							
								
								
									
										221
									
								
								changedetectionio/jinja2_custom/extensions/TimeExtension.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										221
									
								
								changedetectionio/jinja2_custom/extensions/TimeExtension.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,221 @@ | ||||
| """ | ||||
| Jinja2 TimeExtension - Custom date/time handling for templates. | ||||
|  | ||||
| This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware | ||||
| date/time formatting with support for time offsets. | ||||
|  | ||||
| Why This Extension Exists: | ||||
|     The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot | ||||
|     directly call Python functions - they need extensions or filters to expose functionality. | ||||
|  | ||||
|     This TimeExtension serves as a Jinja2-to-Arrow bridge that: | ||||
|  | ||||
|     1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags | ||||
|        through extensions. You cannot use arrow.now() directly in a template. | ||||
|  | ||||
|     2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags: | ||||
|        {% now 'UTC' %} | ||||
|        {% now 'UTC' + 'hours=2' %} | ||||
|        {% now 'Europe/London', '%Y-%m-%d' %} | ||||
|  | ||||
|     3. Adds convenience features on top of Arrow: | ||||
|        - Default timezone from environment variable (TZ) or config | ||||
|        - Default datetime format configuration | ||||
|        - Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30) | ||||
|        - Empty string timezone support to use configured defaults | ||||
|  | ||||
|     4. Maintains security - Works within Jinja2's sandboxed environment so users | ||||
|        cannot access arbitrary Python code or objects. | ||||
|  | ||||
|     Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that | ||||
|     provides user-friendly template syntax while maintaining security. | ||||
|  | ||||
| Basic Usage: | ||||
|     {% now 'UTC' %} | ||||
|     # Output: Wed, 09 Dec 2015 23:33:01 | ||||
|  | ||||
| Custom Format: | ||||
|     {% now 'UTC', '%Y-%m-%d %H:%M:%S' %} | ||||
|     # Output: 2015-12-09 23:33:01 | ||||
|  | ||||
| Timezone Support: | ||||
|     {% now 'America/New_York' %} | ||||
|     {% now 'Europe/London' %} | ||||
|     {% now '' %}  # Uses default timezone from environment.default_timezone | ||||
|  | ||||
| Time Offsets (Addition): | ||||
|     {% now 'UTC' + 'hours=2' %} | ||||
|     {% now 'UTC' + 'hours=2,minutes=30' %} | ||||
|     {% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %} | ||||
|  | ||||
| Time Offsets (Subtraction): | ||||
|     {% now 'UTC' - 'minutes=11' %} | ||||
|     {% now 'UTC' - 'days=2,minutes=33,seconds=1' %} | ||||
|  | ||||
| Time Offsets with Custom Format: | ||||
|     {% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %} | ||||
|     # Output: 2015-12-10 01:33:01 | ||||
|  | ||||
| Weekday Support (for finding next/previous weekday): | ||||
|     {% now 'UTC' + 'weekday=0' %}  # Next Monday (0=Monday, 6=Sunday) | ||||
|     {% now 'UTC' + 'weekday=4' %}  # Next Friday | ||||
|  | ||||
| Configuration: | ||||
|     - Default timezone: Set via TZ environment variable or override environment.default_timezone | ||||
|     - Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format) | ||||
|  | ||||
| Environment Customization: | ||||
|     from changedetectionio.jinja2_custom import create_jinja_env | ||||
|  | ||||
|     jinja2_env = create_jinja_env() | ||||
|     jinja2_env.default_timezone = 'America/New_York'  # Override default timezone | ||||
|     jinja2_env.datetime_format = '%Y-%m-%d %H:%M'      # Override default format | ||||
|  | ||||
| Supported Offset Parameters: | ||||
|     - years, months, weeks, days | ||||
|     - hours, minutes, seconds, microseconds | ||||
|     - weekday (0=Monday through 6=Sunday, must be integer) | ||||
|  | ||||
| Note: | ||||
|     This extension uses the Arrow library for timezone-aware datetime handling. | ||||
|     All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York'). | ||||
| """ | ||||
| import arrow | ||||
|  | ||||
| from jinja2 import nodes | ||||
| from jinja2.ext import Extension | ||||
| import os | ||||
|  | ||||
| class TimeExtension(Extension): | ||||
|     """ | ||||
|     Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering. | ||||
|  | ||||
|     This extension adds two attributes to the Jinja2 environment: | ||||
|     - datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S') | ||||
|     - default_timezone: Default timezone for rendering (default: TZ env var or 'UTC') | ||||
|  | ||||
|     Both can be overridden after environment creation by setting the attributes directly. | ||||
|     """ | ||||
|  | ||||
|     tags = {'now'} | ||||
|  | ||||
|     def __init__(self, environment): | ||||
|         """Jinja2 Extension constructor.""" | ||||
|         super().__init__(environment) | ||||
|  | ||||
|         environment.extend( | ||||
|             datetime_format='%a, %d %b %Y %H:%M:%S', | ||||
|             default_timezone=os.getenv('TZ', 'UTC').strip() | ||||
|         ) | ||||
|  | ||||
|     def _datetime(self, timezone, operator, offset, datetime_format): | ||||
|         """ | ||||
|         Get current datetime with time offset applied. | ||||
|  | ||||
|         Args: | ||||
|             timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default | ||||
|             operator: '+' for addition or '-' for subtraction | ||||
|             offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30') | ||||
|             datetime_format: strftime format string or None to use environment default | ||||
|  | ||||
|         Returns: | ||||
|             Formatted datetime string with offset applied | ||||
|  | ||||
|         Example: | ||||
|             _datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S') | ||||
|             # Returns current time + 2.5 hours | ||||
|         """ | ||||
|         # Use default timezone if none specified | ||||
|         if not timezone or timezone == '': | ||||
|             timezone = self.environment.default_timezone | ||||
|  | ||||
|         d = arrow.now(timezone) | ||||
|  | ||||
|         # parse shift params from offset and include operator | ||||
|         shift_params = {} | ||||
|         for param in offset.split(','): | ||||
|             interval, value = param.split('=') | ||||
|             shift_params[interval.strip()] = float(operator + value.strip()) | ||||
|  | ||||
|         # Fix weekday parameter can not be float | ||||
|         if 'weekday' in shift_params: | ||||
|             shift_params['weekday'] = int(shift_params['weekday']) | ||||
|  | ||||
|         d = d.shift(**shift_params) | ||||
|  | ||||
|         if datetime_format is None: | ||||
|             datetime_format = self.environment.datetime_format | ||||
|         return d.strftime(datetime_format) | ||||
|  | ||||
|     def _now(self, timezone, datetime_format): | ||||
|         """ | ||||
|         Get current datetime without any offset. | ||||
|  | ||||
|         Args: | ||||
|             timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default | ||||
|             datetime_format: strftime format string or None to use environment default | ||||
|  | ||||
|         Returns: | ||||
|             Formatted datetime string for current time | ||||
|  | ||||
|         Example: | ||||
|             _now('America/New_York', '%Y-%m-%d %H:%M:%S') | ||||
|             # Returns current time in New York timezone | ||||
|         """ | ||||
|         # Use default timezone if none specified | ||||
|         if not timezone or timezone == '': | ||||
|             timezone = self.environment.default_timezone | ||||
|  | ||||
|         if datetime_format is None: | ||||
|             datetime_format = self.environment.datetime_format | ||||
|         return arrow.now(timezone).strftime(datetime_format) | ||||
|  | ||||
|     def parse(self, parser): | ||||
|         """ | ||||
|         Parse the {% now %} tag and generate appropriate AST nodes. | ||||
|  | ||||
|         This method is called by Jinja2 when it encounters a {% now %} tag. | ||||
|         It parses the tag syntax and determines whether to call _now() or _datetime() | ||||
|         based on whether offset operations (+ or -) are present. | ||||
|  | ||||
|         Supported syntax: | ||||
|             {% now 'timezone' %}                              -> calls _now() | ||||
|             {% now 'timezone', 'format' %}                    -> calls _now() | ||||
|             {% now 'timezone' + 'offset' %}                   -> calls _datetime() | ||||
|             {% now 'timezone' + 'offset', 'format' %}         -> calls _datetime() | ||||
|             {% now 'timezone' - 'offset', 'format' %}         -> calls _datetime() | ||||
|  | ||||
|         Args: | ||||
|             parser: Jinja2 parser instance | ||||
|  | ||||
|         Returns: | ||||
|             nodes.Output: AST output node containing the formatted datetime string | ||||
|         """ | ||||
|         lineno = next(parser.stream).lineno | ||||
|  | ||||
|         node = parser.parse_expression() | ||||
|  | ||||
|         if parser.stream.skip_if('comma'): | ||||
|             datetime_format = parser.parse_expression() | ||||
|         else: | ||||
|             datetime_format = nodes.Const(None) | ||||
|  | ||||
|         if isinstance(node, nodes.Add): | ||||
|             call_method = self.call_method( | ||||
|                 '_datetime', | ||||
|                 [node.left, nodes.Const('+'), node.right, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         elif isinstance(node, nodes.Sub): | ||||
|             call_method = self.call_method( | ||||
|                 '_datetime', | ||||
|                 [node.left, nodes.Const('-'), node.right, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         else: | ||||
|             call_method = self.call_method( | ||||
|                 '_now', | ||||
|                 [node, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         return nodes.Output([call_method], lineno=lineno) | ||||
							
								
								
									
										6
									
								
								changedetectionio/jinja2_custom/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								changedetectionio/jinja2_custom/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| """ | ||||
| Jinja2 custom filter plugins for changedetection.io | ||||
| """ | ||||
| from .regex import regex_replace | ||||
|  | ||||
| __all__ = ['regex_replace'] | ||||
							
								
								
									
										98
									
								
								changedetectionio/jinja2_custom/plugins/regex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								changedetectionio/jinja2_custom/plugins/regex.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| """ | ||||
| Regex filter plugin for Jinja2 templates. | ||||
|  | ||||
| Provides regex_replace filter for pattern-based string replacements in templates. | ||||
| """ | ||||
| import re | ||||
| import signal | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| def regex_replace(value: str, pattern: str, replacement: str = '', count: int = 0) -> str: | ||||
|     """ | ||||
|     Replace occurrences of a regex pattern in a string. | ||||
|  | ||||
|     Security: Protected against ReDoS (Regular Expression Denial of Service) attacks: | ||||
|     - Limits input value size to prevent excessive processing | ||||
|     - Uses timeout mechanism to prevent runaway regex operations | ||||
|     - Validates pattern complexity to prevent catastrophic backtracking | ||||
|  | ||||
|     Args: | ||||
|         value: The input string to perform replacements on | ||||
|         pattern: The regex pattern to search for | ||||
|         replacement: The replacement string (default: '') | ||||
|         count: Maximum number of replacements (0 = replace all, default: 0) | ||||
|  | ||||
|     Returns: | ||||
|         String with replacements applied, or original value on error | ||||
|  | ||||
|     Example: | ||||
|         {{ "hello world" | regex_replace("world", "universe") }} | ||||
|         {{ diff | regex_replace("<td>([^<]+)</td><td>([^<]+)</td>", "Label1: \\1\\nLabel2: \\2") }} | ||||
|  | ||||
|     Security limits: | ||||
|         - Maximum input size: 10MB | ||||
|         - Maximum pattern length: 500 characters | ||||
|         - Operation timeout: 10 seconds | ||||
|         - Dangerous nested quantifier patterns are rejected | ||||
|     """ | ||||
|     # Security limits | ||||
|     MAX_INPUT_SIZE = 1024 * 1024 * 10 # 10MB max input size | ||||
|     MAX_PATTERN_LENGTH = 500  # Maximum regex pattern length | ||||
|     REGEX_TIMEOUT_SECONDS = 10  # Maximum time for regex operation | ||||
|  | ||||
|     # Validate input sizes | ||||
|     value_str = str(value) | ||||
|     if len(value_str) > MAX_INPUT_SIZE: | ||||
|         logger.warning(f"regex_replace: Input too large ({len(value_str)} bytes), truncating") | ||||
|         value_str = value_str[:MAX_INPUT_SIZE] | ||||
|  | ||||
|     if len(pattern) > MAX_PATTERN_LENGTH: | ||||
|         logger.warning(f"regex_replace: Pattern too long ({len(pattern)} chars), rejecting") | ||||
|         return value_str | ||||
|  | ||||
|     # Check for potentially dangerous patterns (basic checks) | ||||
|     # Nested quantifiers like (a+)+ can cause catastrophic backtracking | ||||
|     dangerous_patterns = [ | ||||
|         r'\([^)]*\+[^)]*\)\+',  # (x+)+ | ||||
|         r'\([^)]*\*[^)]*\)\+',  # (x*)+ | ||||
|         r'\([^)]*\+[^)]*\)\*',  # (x+)* | ||||
|         r'\([^)]*\*[^)]*\)\*',  # (x*)* | ||||
|     ] | ||||
|  | ||||
|     for dangerous in dangerous_patterns: | ||||
|         if re.search(dangerous, pattern): | ||||
|             logger.warning(f"regex_replace: Potentially dangerous pattern detected: {pattern}") | ||||
|             return value_str | ||||
|  | ||||
|     def timeout_handler(signum, frame): | ||||
|         raise TimeoutError("Regex operation timed out") | ||||
|  | ||||
|     try: | ||||
|         # Set up timeout for regex operation (Unix-like systems only) | ||||
|         # This prevents ReDoS attacks | ||||
|         old_handler = None | ||||
|         if hasattr(signal, 'SIGALRM'): | ||||
|             old_handler = signal.signal(signal.SIGALRM, timeout_handler) | ||||
|             signal.alarm(REGEX_TIMEOUT_SECONDS) | ||||
|  | ||||
|         try: | ||||
|             result = re.sub(pattern, replacement, value_str, count=count) | ||||
|         finally: | ||||
|             # Cancel the alarm | ||||
|             if hasattr(signal, 'SIGALRM'): | ||||
|                 signal.alarm(0) | ||||
|                 if old_handler is not None: | ||||
|                     signal.signal(signal.SIGALRM, old_handler) | ||||
|  | ||||
|         return result | ||||
|  | ||||
|     except TimeoutError: | ||||
|         logger.error(f"regex_replace: Regex operation timed out - possible ReDoS attack. Pattern: {pattern}") | ||||
|         return value_str | ||||
|     except re.error as e: | ||||
|         logger.warning(f"regex_replace: Invalid regex pattern: {e}") | ||||
|         return value_str | ||||
|     except Exception as e: | ||||
|         logger.error(f"regex_replace: Unexpected error: {e}") | ||||
|         return value_str | ||||
							
								
								
									
										58
									
								
								changedetectionio/jinja2_custom/safe_jinja.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								changedetectionio/jinja2_custom/safe_jinja.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| """ | ||||
| Safe Jinja2 render with max payload sizes | ||||
|  | ||||
| See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations | ||||
| """ | ||||
|  | ||||
| import jinja2.sandbox | ||||
| import typing as t | ||||
| import os | ||||
| from .extensions.TimeExtension import TimeExtension | ||||
| from .plugins import regex_replace | ||||
|  | ||||
| JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10)) | ||||
|  | ||||
| # Default extensions - can be overridden in create_jinja_env() | ||||
| DEFAULT_JINJA2_EXTENSIONS = [TimeExtension] | ||||
|  | ||||
| def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment: | ||||
|     """ | ||||
|     Create a sandboxed Jinja2 environment with our custom extensions and default timezone. | ||||
|  | ||||
|     Args: | ||||
|         extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS) | ||||
|         **kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment | ||||
|  | ||||
|     Returns: | ||||
|         Configured Jinja2 environment | ||||
|     """ | ||||
|     if extensions is None: | ||||
|         extensions = DEFAULT_JINJA2_EXTENSIONS | ||||
|  | ||||
|     jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment( | ||||
|         extensions=extensions, | ||||
|         **kwargs | ||||
|     ) | ||||
|  | ||||
|     # Get default timezone from environment variable | ||||
|     default_timezone = os.getenv('TZ', 'UTC').strip() | ||||
|     jinja2_env.default_timezone = default_timezone | ||||
|  | ||||
|     # Register custom filters | ||||
|     jinja2_env.filters['regex_replace'] = regex_replace | ||||
|  | ||||
|     return jinja2_env | ||||
|  | ||||
|  | ||||
| # This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available. | ||||
| # (Which also limits available functions that could be called) | ||||
| def render(template_str, **args: t.Any) -> str: | ||||
|     jinja2_env = create_jinja_env() | ||||
|     output = jinja2_env.from_string(template_str).render(args) | ||||
|     return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE] | ||||
|  | ||||
| def render_fully_escaped(content): | ||||
|     env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True) | ||||
|     template = env.from_string("{{ some_html|e }}") | ||||
|     return template.render(some_html=content) | ||||
|  | ||||
| @@ -1,4 +1,5 @@ | ||||
| from os import getenv | ||||
| from copy import deepcopy | ||||
|  | ||||
| from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES | ||||
|  | ||||
| @@ -39,12 +40,12 @@ class model(dict): | ||||
|                     'api_access_token_enabled': True, | ||||
|                     'base_url' : None, | ||||
|                     'empty_pages_are_a_change': False, | ||||
|                     'extract_title_as_title': False, | ||||
|                     'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|                     'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, | ||||
|                     'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum | ||||
|                     'global_subtractive_selectors': [], | ||||
|                     'ignore_whitespace': True, | ||||
|                     'ignore_status_codes': False, #@todo implement, as ternary. | ||||
|                     'notification_body': default_notification_body, | ||||
|                     'notification_format': default_notification_format, | ||||
|                     'notification_title': default_notification_title, | ||||
| @@ -55,14 +56,18 @@ class model(dict): | ||||
|                     'rss_access_token': None, | ||||
|                     'rss_content_format': RSS_FORMAT_TYPES[0][0], | ||||
|                     'rss_hide_muted_watches': True, | ||||
|                     'rss_reader_mode': False, | ||||
|                     'scheduler_timezone_default': None,  # Default IANA timezone name | ||||
|                     'schema_version' : 0, | ||||
|                     'shared_diff_access': False, | ||||
|                     'webdriver_delay': None , # Extra delay in seconds before extracting text | ||||
|                     'strip_ignored_lines': False, | ||||
|                     'tags': {}, #@todo use Tag.model initialisers | ||||
|                     'timezone': None, # Default IANA timezone name | ||||
|                     'webdriver_delay': None , # Extra delay in seconds before extracting text | ||||
|                     'ui': { | ||||
|                         'use_page_title_in_list': True, | ||||
|                         'open_diff_in_new_tab': True, | ||||
|                         'socket_io_enabled': True | ||||
|                         'socket_io_enabled': True, | ||||
|                         'favicons_enabled': True | ||||
|                     }, | ||||
|                 } | ||||
|             } | ||||
| @@ -70,7 +75,8 @@ class model(dict): | ||||
|  | ||||
|     def __init__(self, *arg, **kw): | ||||
|         super(model, self).__init__(*arg, **kw) | ||||
|         self.update(self.base_config) | ||||
|         # CRITICAL: deepcopy to avoid sharing mutable objects between instances | ||||
|         self.update(deepcopy(self.base_config)) | ||||
|  | ||||
|  | ||||
| def parse_headers_from_text_file(filepath): | ||||
|   | ||||
| @@ -1,40 +1,24 @@ | ||||
| from blinker import signal | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
| from . import watch_base | ||||
| import os | ||||
| import re | ||||
| from pathlib import Path | ||||
| from loguru import logger | ||||
|  | ||||
| from .. import safe_jinja | ||||
| from .. import jinja2_custom as safe_jinja | ||||
| from ..diff import ADDED_PLACEMARKER_OPEN | ||||
| from ..html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
|  | ||||
| # Allowable protocols, protects against javascript: etc | ||||
| # file:// is further checked by ALLOW_FILE_URI | ||||
| SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' | ||||
| FAVICON_RESAVE_THRESHOLD_SECONDS=86400 | ||||
|  | ||||
|  | ||||
| minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) | ||||
| mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} | ||||
|  | ||||
|  | ||||
| def is_safe_url(test_url): | ||||
|     # See https://github.com/dgtlmoon/changedetection.io/issues/1358 | ||||
|  | ||||
|     # Remove 'source:' prefix so we dont get 'source:javascript:' etc | ||||
|     # 'source:' is a valid way to tell us to return the source | ||||
|  | ||||
|     r = re.compile(re.escape('source:'), re.IGNORECASE) | ||||
|     test_url = r.sub('', test_url) | ||||
|  | ||||
|     pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE) | ||||
|     if not pattern.match(test_url.strip()): | ||||
|         return False | ||||
|  | ||||
|     return True | ||||
|  | ||||
|  | ||||
| class model(watch_base): | ||||
|     __newest_history_key = None | ||||
|     __history_n = 0 | ||||
| @@ -77,7 +61,7 @@ class model(watch_base): | ||||
|     def link(self): | ||||
|  | ||||
|         url = self.get('url', '') | ||||
|         if not is_safe_url(url): | ||||
|         if not is_safe_valid_url(url): | ||||
|             return 'DISABLED' | ||||
|  | ||||
|         ready_url = url | ||||
| @@ -87,9 +71,8 @@ class model(watch_base): | ||||
|                 ready_url = jinja_render(template_str=url) | ||||
|             except Exception as e: | ||||
|                 logger.critical(f"Invalid URL template for: '{url}' - {str(e)}") | ||||
|                 from flask import ( | ||||
|                     flash, Markup, url_for | ||||
|                 ) | ||||
|                 from flask import flash, url_for | ||||
|                 from markupsafe import Markup | ||||
|                 message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format( | ||||
|                     url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', ''))) | ||||
|                 flash(message, 'error') | ||||
| @@ -99,10 +82,17 @@ class model(watch_base): | ||||
|             ready_url=ready_url.replace('source:', '') | ||||
|  | ||||
|         # Also double check it after any Jinja2 formatting just incase | ||||
|         if not is_safe_url(ready_url): | ||||
|         if not is_safe_valid_url(ready_url): | ||||
|             return 'DISABLED' | ||||
|         return ready_url | ||||
|  | ||||
|     @property | ||||
|     def domain_only_from_link(self): | ||||
|         from urllib.parse import urlparse | ||||
|         parsed = urlparse(self.link) | ||||
|         domain = parsed.hostname | ||||
|         return domain | ||||
|  | ||||
|     def clear_watch(self): | ||||
|         import pathlib | ||||
|  | ||||
| @@ -160,8 +150,8 @@ class model(watch_base): | ||||
|  | ||||
|     @property | ||||
|     def label(self): | ||||
|         # Used for sorting | ||||
|         return self.get('title') if self.get('title') else self.get('url') | ||||
|         # Used for sorting, display, etc | ||||
|         return self.get('title') or self.get('page_title') or self.link | ||||
|  | ||||
|     @property | ||||
|     def last_changed(self): | ||||
| @@ -413,6 +403,154 @@ class model(watch_base): | ||||
|         # False is not an option for AppRise, must be type None | ||||
|         return None | ||||
|  | ||||
|     def favicon_is_expired(self): | ||||
|         favicon_fname = self.get_favicon_filename() | ||||
|         import glob | ||||
|         import time | ||||
|  | ||||
|         if not favicon_fname: | ||||
|             return True | ||||
|         try: | ||||
|             fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None) | ||||
|             logger.trace(f"Favicon file maybe found at {fname}") | ||||
|             if os.path.isfile(fname): | ||||
|                 file_age = int(time.time() - os.path.getmtime(fname)) | ||||
|                 logger.trace(f"Favicon file age is {file_age}s") | ||||
|                 if file_age < FAVICON_RESAVE_THRESHOLD_SECONDS: | ||||
|                     return False | ||||
|         except Exception as e: | ||||
|             logger.critical(f"Exception checking Favicon age {str(e)}") | ||||
|             return True | ||||
|  | ||||
|         # Also in the case that the file didnt exist | ||||
|         return True | ||||
|  | ||||
|     def bump_favicon(self, url, favicon_base_64: str) -> None: | ||||
|         from urllib.parse import urlparse | ||||
|         import base64 | ||||
|         import binascii | ||||
|         decoded = None | ||||
|  | ||||
|         if url: | ||||
|             try: | ||||
|                 parsed = urlparse(url) | ||||
|                 filename = os.path.basename(parsed.path) | ||||
|                 (base, extension) = filename.lower().strip().rsplit('.', 1) | ||||
|             except ValueError: | ||||
|                 logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'") | ||||
|                 return None | ||||
|         else: | ||||
|             # Assume favicon.ico | ||||
|             base = "favicon" | ||||
|             extension = "ico" | ||||
|  | ||||
|         fname = os.path.join(self.watch_data_dir, f"favicon.{extension}") | ||||
|  | ||||
|         try: | ||||
|             # validate=True makes sure the string only contains valid base64 chars | ||||
|             decoded = base64.b64decode(favicon_base_64, validate=True) | ||||
|         except (binascii.Error, ValueError) as e: | ||||
|             logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}") | ||||
|         else: | ||||
|             if decoded: | ||||
|                 try: | ||||
|                     with open(fname, 'wb') as f: | ||||
|                         f.write(decoded) | ||||
|                     # A signal that could trigger the socket server to update the browser also | ||||
|                     watch_check_update = signal('watch_favicon_bump') | ||||
|                     if watch_check_update: | ||||
|                         watch_check_update.send(watch_uuid=self.get('uuid')) | ||||
|  | ||||
|                 except Exception as e: | ||||
|                     logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}") | ||||
|  | ||||
|         # @todo - Store some checksum and only write when its different | ||||
|         logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}") | ||||
|  | ||||
|     def get_favicon_filename(self) -> str | None: | ||||
|         """ | ||||
|         Find any favicon.* file in the current working directory | ||||
|         and return the contents of the newest one. | ||||
|  | ||||
|         Returns: | ||||
|             bytes: Contents of the newest favicon file, or None if not found. | ||||
|         """ | ||||
|         import glob | ||||
|  | ||||
|         # Search for all favicon.* files | ||||
|         files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*")) | ||||
|  | ||||
|         if not files: | ||||
|             return None | ||||
|  | ||||
|         # Find the newest by modification time | ||||
|         newest_file = max(files, key=os.path.getmtime) | ||||
|         return os.path.basename(newest_file) | ||||
|  | ||||
|     def get_screenshot_as_thumbnail(self, max_age=3200): | ||||
|         """Return path to a square thumbnail of the most recent screenshot. | ||||
|  | ||||
|         Creates a 150x150 pixel thumbnail from the top portion of the screenshot. | ||||
|  | ||||
|         Args: | ||||
|             max_age: Maximum age in seconds before recreating thumbnail | ||||
|  | ||||
|         Returns: | ||||
|             Path to thumbnail or None if no screenshot exists | ||||
|         """ | ||||
|         import os | ||||
|         import time | ||||
|  | ||||
|         thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg") | ||||
|         top_trim = 500  # Pixels from top of screenshot to use | ||||
|  | ||||
|         screenshot_path = self.get_screenshot() | ||||
|         if not screenshot_path: | ||||
|             return None | ||||
|  | ||||
|         # Reuse thumbnail if it's fresh and screenshot hasn't changed | ||||
|         if os.path.isfile(thumbnail_path): | ||||
|             thumbnail_mtime = os.path.getmtime(thumbnail_path) | ||||
|             screenshot_mtime = os.path.getmtime(screenshot_path) | ||||
|  | ||||
|             if screenshot_mtime <= thumbnail_mtime and time.time() - thumbnail_mtime < max_age: | ||||
|                 return thumbnail_path | ||||
|  | ||||
|         try: | ||||
|             from PIL import Image | ||||
|  | ||||
|             with Image.open(screenshot_path) as img: | ||||
|                 # Crop top portion first (full width, top_trim height) | ||||
|                 top_crop_height = min(top_trim, img.height) | ||||
|                 img = img.crop((0, 0, img.width, top_crop_height)) | ||||
|  | ||||
|                 # Create a smaller intermediate image (to reduce memory usage) | ||||
|                 aspect = img.width / img.height | ||||
|                 interim_width = min(top_trim, img.width) | ||||
|                 interim_height = int(interim_width / aspect) if aspect > 0 else top_trim | ||||
|                 img = img.resize((interim_width, interim_height), Image.NEAREST) | ||||
|  | ||||
|                 # Convert to RGB if needed | ||||
|                 if img.mode != 'RGB': | ||||
|                     img = img.convert('RGB') | ||||
|  | ||||
|                 # Crop to square from top center | ||||
|                 square_size = min(img.width, img.height) | ||||
|                 left = (img.width - square_size) // 2 | ||||
|                 img = img.crop((left, 0, left + square_size, square_size)) | ||||
|  | ||||
|                 # Final resize to exact thumbnail size with better filter | ||||
|                 img = img.resize((350, 350), Image.BILINEAR) | ||||
|  | ||||
|                 # Save with optimized settings | ||||
|                 img.save(thumbnail_path, "JPEG", quality=75, optimize=True) | ||||
|  | ||||
|             return thumbnail_path | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error creating thumbnail for {self.get('uuid')}: {str(e)}") | ||||
|             return None | ||||
|  | ||||
|     def __get_file_ctime(self, filename): | ||||
|         fname = os.path.join(self.watch_data_dir, filename) | ||||
|         if os.path.isfile(fname): | ||||
| @@ -506,7 +644,7 @@ class model(watch_base): | ||||
|                     if res: | ||||
|                         if not csv_writer: | ||||
|                             # A file on the disk can be transferred much faster via flask than a string reply | ||||
|                             csv_output_filename = 'report.csv' | ||||
|                             csv_output_filename = f"report-{self.get('uuid')}.csv" | ||||
|                             f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w') | ||||
|                             # @todo some headers in the future | ||||
|                             #fieldnames = ['Epoch seconds', 'Date'] | ||||
|   | ||||
| @@ -2,7 +2,8 @@ import os | ||||
| import uuid | ||||
|  | ||||
| from changedetectionio import strtobool | ||||
| default_notification_format_for_watch = 'System default' | ||||
| USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default' | ||||
| CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL' | ||||
|  | ||||
| class watch_base(dict): | ||||
|  | ||||
| @@ -15,13 +16,14 @@ class watch_base(dict): | ||||
|             'body': None, | ||||
|             'browser_steps': [], | ||||
|             'browser_steps_last_error_step': None, | ||||
|             'conditions' : {}, | ||||
|             'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT, | ||||
|             'check_count': 0, | ||||
|             'check_unique_lines': False,  # On change-detected, compare against all history if its something new | ||||
|             'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'content-type': None, | ||||
|             'date_created': None, | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'extract_title_as_title': False, | ||||
|             'fetch_backend': 'system',  # plaintext, playwright etc | ||||
|             'fetch_time': 0.0, | ||||
|             'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
| @@ -32,6 +34,7 @@ class watch_base(dict): | ||||
|             'has_ldjson_price_data': None, | ||||
|             'headers': {},  # Extra headers to send | ||||
|             'ignore_text': [],  # List of text to ignore when calculating the comparison checksum | ||||
|             'ignore_status_codes': None, | ||||
|             'in_stock_only': True,  # Only trigger change on going to instock from out-of-stock | ||||
|             'include_filters': [], | ||||
|             'last_checked': 0, | ||||
| @@ -41,11 +44,12 @@ class watch_base(dict): | ||||
|             'method': 'GET', | ||||
|             'notification_alert_count': 0, | ||||
|             'notification_body': None, | ||||
|             'notification_format': default_notification_format_for_watch, | ||||
|             'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, | ||||
|             'notification_muted': False, | ||||
|             'notification_screenshot': False,  # Include the latest screenshot if available and supported by the apprise URL | ||||
|             'notification_title': None, | ||||
|             'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise) | ||||
|             'page_title': None, # <title> from the page | ||||
|             'paused': False, | ||||
|             'previous_md5': False, | ||||
|             'previous_md5_before_filters': False,  # Used for skipping changedetection entirely | ||||
| @@ -54,6 +58,7 @@ class watch_base(dict): | ||||
|             'proxy': None,  # Preferred proxy connection | ||||
|             'remote_server_reply': None,  # From 'server' reply header | ||||
|             'sort_text_alphabetically': False, | ||||
|             'strip_ignored_lines': None, | ||||
|             'subtractive_selectors': [], | ||||
|             'tag': '',  # Old system of text name for a tag, to be removed | ||||
|             'tags': [],  # list of UUIDs to App.Tags | ||||
| @@ -119,12 +124,13 @@ class watch_base(dict): | ||||
|                     } | ||||
|                 }, | ||||
|             }, | ||||
|             'title': None, | ||||
|             'title': None, # An arbitrary field that overrides 'page_title' | ||||
|             'track_ldjson_price_data': None, | ||||
|             'trim_text_whitespace': False, | ||||
|             'remove_duplicate_lines': False, | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'url': '', | ||||
|             'use_page_title_in_list': None, # None = use system settings | ||||
|             'uuid': str(uuid.uuid4()), | ||||
|             'webdriver_delay': None, | ||||
|             'webdriver_js_execute_code': None,  # Run before change-detection | ||||
|   | ||||
| @@ -1,35 +1,16 @@ | ||||
| from changedetectionio.model import default_notification_format_for_watch | ||||
| from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|  | ||||
| ult_notification_format_for_watch = 'System default' | ||||
| default_notification_format = 'HTML Color' | ||||
| default_notification_format = 'htmlcolor' | ||||
| default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n' | ||||
| default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}' | ||||
|  | ||||
| # The values (markdown etc) are from apprise NotifyFormat, | ||||
| # But to avoid importing the whole heavy module just use the same strings here. | ||||
| valid_notification_formats = { | ||||
|     'Text': 'text', | ||||
|     'Markdown': 'markdown', | ||||
|     'HTML': 'html', | ||||
|     'HTML Color': 'htmlcolor', | ||||
|     'text': 'Plain Text', | ||||
|     'html': 'HTML', | ||||
|     'htmlcolor': 'HTML Color', | ||||
|     'markdown': 'Markdown to HTML', | ||||
|     # Used only for editing a watch (not for global) | ||||
|     default_notification_format_for_watch: default_notification_format_for_watch | ||||
| } | ||||
|  | ||||
|  | ||||
| valid_tokens = { | ||||
|     'base_url': '', | ||||
|     'current_snapshot': '', | ||||
|     'diff': '', | ||||
|     'diff_added': '', | ||||
|     'diff_full': '', | ||||
|     'diff_patch': '', | ||||
|     'diff_removed': '', | ||||
|     'diff_url': '', | ||||
|     'preview_url': '', | ||||
|     'triggered_text': '', | ||||
|     'watch_tag': '', | ||||
|     'watch_title': '', | ||||
|     'watch_url': '', | ||||
|     'watch_uuid': '', | ||||
|     USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| } | ||||
|   | ||||
| @@ -1,10 +1,61 @@ | ||||
| """ | ||||
| Custom Apprise HTTP Handlers with format= Parameter Support | ||||
|  | ||||
| IMPORTANT: This module works around a limitation in Apprise's @notify decorator. | ||||
|  | ||||
| THE PROBLEM: | ||||
| ------------- | ||||
| When using Apprise's @notify decorator to create custom notification handlers, the | ||||
| decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse | ||||
| URLs. This simple parsing mode does NOT extract the format= query parameter from the URL | ||||
| and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format. | ||||
|  | ||||
| As a result: | ||||
| 1. URL: post://example.com/webhook?format=html | ||||
| 2. Apprise parses this and sees format=html in qsd (query string dictionary) | ||||
| 3. But it does NOT extract it and pass it to NotifyBase.__init__ | ||||
| 4. NotifyBase defaults to notify_format=TEXT | ||||
| 5. When you call apobj.notify(body="<html>...", body_format="html"): | ||||
|    - Apprise sees: input format = html, output format (notify_format) = text | ||||
|    - Apprise calls convert_between("html", "text", body) | ||||
|    - This strips all HTML tags, leaving only plain text | ||||
| 6. Your custom handler receives stripped plain text instead of HTML | ||||
|  | ||||
| THE SOLUTION: | ||||
| ------------- | ||||
| Instead of using the @notify decorator directly, we: | ||||
| 1. Manually register custom plugins using plugins.N_MGR.add() | ||||
| 2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin | ||||
| 3. Override __init__ to extract format= from qsd and set it as kwargs['format'] | ||||
| 4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format'] | ||||
| 5. Set up _default_args like CustomNotifyPlugin does for compatibility | ||||
|  | ||||
| This ensures that when format=html is in the URL: | ||||
| - notify_format is set to HTML | ||||
| - Apprise sees: input format = html, output format = html | ||||
| - No conversion happens (convert_between returns content unchanged) | ||||
| - Your custom handler receives the original HTML intact | ||||
|  | ||||
| TESTING: | ||||
| -------- | ||||
| To verify this works: | ||||
| >>> apobj = apprise.Apprise() | ||||
| >>> apobj.add('post://localhost:5005/test?format=html') | ||||
| >>> for server in apobj: | ||||
| ...     print(server.notify_format)  # Should print: html (not text) | ||||
| >>> apobj.notify(body='<span>Test</span>', body_format='html') | ||||
| # Your handler should receive '<span>Test</span>' not 'Test' | ||||
| """ | ||||
|  | ||||
| import json | ||||
| import re | ||||
| from urllib.parse import unquote_plus | ||||
|  | ||||
| import requests | ||||
| from apprise.decorators import notify | ||||
| from apprise.utils.parse import parse_url as apprise_parse_url | ||||
| from apprise import plugins | ||||
| from apprise.decorators.base import CustomNotifyPlugin | ||||
| from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly | ||||
| from apprise.utils.logic import dict_full_update | ||||
| from loguru import logger | ||||
| from requests.structures import CaseInsensitiveDict | ||||
|  | ||||
| @@ -12,13 +63,66 @@ SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"} | ||||
|  | ||||
|  | ||||
| def notify_supported_methods(func): | ||||
|     """Register custom HTTP method handlers that properly support format= parameter.""" | ||||
|     for method in SUPPORTED_HTTP_METHODS: | ||||
|         func = notify(on=method)(func) | ||||
|         # Add support for https, for each supported http method | ||||
|         func = notify(on=f"{method}s")(func) | ||||
|         _register_http_handler(method, func) | ||||
|         _register_http_handler(f"{method}s", func) | ||||
|     return func | ||||
|  | ||||
|  | ||||
| def _register_http_handler(schema, send_func): | ||||
|     """Register a custom HTTP handler that extracts format= from URL query parameters.""" | ||||
|  | ||||
|     # Parse base URL | ||||
|     base_url = f"{schema}://" | ||||
|     base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True) | ||||
|  | ||||
|     class CustomHTTPHandler(CustomNotifyPlugin): | ||||
|         secure_protocol = schema | ||||
|         service_name = f"Custom HTTP - {schema.upper()}" | ||||
|         _base_args = base_args | ||||
|  | ||||
|         def __init__(self, **kwargs): | ||||
|             # Extract format from qsd and set it as a top-level kwarg | ||||
|             # This allows NotifyBase.__init__ to properly set notify_format | ||||
|             if 'qsd' in kwargs and 'format' in kwargs['qsd']: | ||||
|                 kwargs['format'] = kwargs['qsd']['format'] | ||||
|  | ||||
|             # Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__) | ||||
|             super(CustomNotifyPlugin, self).__init__(**kwargs) | ||||
|  | ||||
|             # Set up _default_args like CustomNotifyPlugin does | ||||
|             self._default_args = {} | ||||
|             kwargs.pop("secure", None) | ||||
|             dict_full_update(self._default_args, self._base_args) | ||||
|             dict_full_update(self._default_args, kwargs) | ||||
|             self._default_args["url"] = url_assembly(**self._default_args) | ||||
|  | ||||
|         __send = staticmethod(send_func) | ||||
|  | ||||
|         def send(self, body, title="", notify_type="info", *args, **kwargs): | ||||
|             """Call the custom send function.""" | ||||
|             try: | ||||
|                 result = self.__send( | ||||
|                     body, title, notify_type, | ||||
|                     *args, | ||||
|                     meta=self._default_args, | ||||
|                     **kwargs | ||||
|                 ) | ||||
|                 return True if result is None else bool(result) | ||||
|             except Exception as e: | ||||
|                 self.logger.warning(f"Exception in custom HTTP handler: {e}") | ||||
|                 return False | ||||
|  | ||||
|     # Register the plugin | ||||
|     plugins.N_MGR.add( | ||||
|         plugin=CustomHTTPHandler, | ||||
|         schemas=schema, | ||||
|         send_func=send_func, | ||||
|         url=base_url, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def _get_auth(parsed_url: dict) -> str | tuple[str, str]: | ||||
|     user: str | None = parsed_url.get("user") | ||||
|     password: str | None = parsed_url.get("password") | ||||
| @@ -70,9 +174,12 @@ def apprise_http_custom_handler( | ||||
|     title: str, | ||||
|     notify_type: str, | ||||
|     meta: dict, | ||||
|     body_format: str = None, | ||||
|     *args, | ||||
|     **kwargs, | ||||
| ) -> bool: | ||||
|  | ||||
|  | ||||
|     url: str = meta.get("url") | ||||
|     schema: str = meta.get("schema") | ||||
|     method: str = re.sub(r"s$", "", schema).upper() | ||||
| @@ -88,25 +195,16 @@ def apprise_http_custom_handler( | ||||
|  | ||||
|     url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url")) | ||||
|  | ||||
|     try: | ||||
|         response = requests.request( | ||||
|             method=method, | ||||
|             url=url, | ||||
|             auth=auth, | ||||
|             headers=headers, | ||||
|             params=params, | ||||
|             data=body.encode("utf-8") if isinstance(body, str) else body, | ||||
|         ) | ||||
|     response = requests.request( | ||||
|         method=method, | ||||
|         url=url, | ||||
|         auth=auth, | ||||
|         headers=headers, | ||||
|         params=params, | ||||
|         data=body.encode("utf-8") if isinstance(body, str) else body, | ||||
|     ) | ||||
|  | ||||
|         response.raise_for_status() | ||||
|     response.raise_for_status() | ||||
|  | ||||
|         logger.info(f"Successfully sent custom notification to {url}") | ||||
|         return True | ||||
|  | ||||
|     except requests.RequestException as e: | ||||
|         logger.error(f"Remote host error while sending custom notification to {url}: {e}") | ||||
|         return False | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}") | ||||
|         return False | ||||
|     logger.info(f"Successfully sent custom notification to {url}") | ||||
|     return True | ||||
|   | ||||
							
								
								
									
										286
									
								
								changedetectionio/notification/apprise_plugin/discord.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								changedetectionio/notification/apprise_plugin/discord.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,286 @@ | ||||
| """ | ||||
| Custom Discord plugin for changedetection.io | ||||
| Extends Apprise's Discord plugin to support custom colored embeds for removed/added content | ||||
| """ | ||||
| from apprise.plugins.discord import NotifyDiscord | ||||
| from apprise.decorators import notify | ||||
| from apprise.common import NotifyFormat | ||||
| from loguru import logger | ||||
|  | ||||
| # Import placeholders from changedetection's diff module | ||||
| from ...diff import ( | ||||
|     REMOVED_PLACEMARKER_OPEN, | ||||
|     REMOVED_PLACEMARKER_CLOSED, | ||||
|     ADDED_PLACEMARKER_OPEN, | ||||
|     ADDED_PLACEMARKER_CLOSED, | ||||
|     CHANGED_PLACEMARKER_OPEN, | ||||
|     CHANGED_PLACEMARKER_CLOSED, | ||||
|     CHANGED_INTO_PLACEMARKER_OPEN, | ||||
|     CHANGED_INTO_PLACEMARKER_CLOSED, | ||||
| ) | ||||
|  | ||||
| # Discord embed sidebar colors for different change types | ||||
| DISCORD_COLOR_UNCHANGED = 8421504   # Gray (#808080) | ||||
| DISCORD_COLOR_REMOVED = 16711680    # Red (#FF0000) | ||||
| DISCORD_COLOR_ADDED = 65280         # Green (#00FF00) | ||||
| DISCORD_COLOR_CHANGED = 16753920    # Orange (#FFA500) | ||||
| DISCORD_COLOR_CHANGED_INTO = 3447003  # Blue (#5865F2 - Discord blue) | ||||
| DISCORD_COLOR_WARNING = 16776960    # Yellow (#FFFF00) | ||||
|  | ||||
|  | ||||
| class NotifyDiscordCustom(NotifyDiscord): | ||||
|     """ | ||||
|     Custom Discord notification handler that supports multiple colored embeds | ||||
|     for showing removed (red) and added (green) content separately. | ||||
|     """ | ||||
|  | ||||
|     def send(self, body, title="", notify_type=None, attach=None, **kwargs): | ||||
|         """ | ||||
|         Override send method to create custom embeds with red/green colors | ||||
|         for removed/added content when placeholders are present. | ||||
|         """ | ||||
|  | ||||
|         # Check if body contains our diff placeholders | ||||
|         has_removed = REMOVED_PLACEMARKER_OPEN in body | ||||
|         has_added = ADDED_PLACEMARKER_OPEN in body | ||||
|         has_changed = CHANGED_PLACEMARKER_OPEN in body | ||||
|         has_changed_into = CHANGED_INTO_PLACEMARKER_OPEN in body | ||||
|  | ||||
|         # If we have diff placeholders and we're in markdown/html format, create custom embeds | ||||
|         if (has_removed or has_added or has_changed or has_changed_into) and self.notify_format in (NotifyFormat.MARKDOWN, NotifyFormat.HTML): | ||||
|             return self._send_with_colored_embeds(body, title, notify_type, attach, **kwargs) | ||||
|  | ||||
|         # Otherwise, use the parent class's default behavior | ||||
|         return super().send(body, title, notify_type, attach, **kwargs) | ||||
|  | ||||
|     def _send_with_colored_embeds(self, body, title, notify_type, attach, **kwargs): | ||||
|         """ | ||||
|         Send Discord message with embeds in the original diff order. | ||||
|         Preserves the sequence: unchanged -> removed -> added -> unchanged, etc. | ||||
|         """ | ||||
|         from datetime import datetime, timezone | ||||
|  | ||||
|         payload = { | ||||
|             "tts": self.tts, | ||||
|             "wait": self.tts is False, | ||||
|         } | ||||
|  | ||||
|         if self.flags: | ||||
|             payload["flags"] = self.flags | ||||
|  | ||||
|         # Acquire image_url | ||||
|         image_url = self.image_url(notify_type) | ||||
|  | ||||
|         if self.avatar and (image_url or self.avatar_url): | ||||
|             payload["avatar_url"] = self.avatar_url if self.avatar_url else image_url | ||||
|  | ||||
|         if self.user: | ||||
|             payload["username"] = self.user | ||||
|  | ||||
|         # Associate our thread_id with our message | ||||
|         params = {"thread_id": self.thread_id} if self.thread_id else None | ||||
|  | ||||
|         # Build embeds array preserving order | ||||
|         embeds = [] | ||||
|  | ||||
|         # Add title as plain bold text in message content (not an embed) | ||||
|         if title: | ||||
|             payload["content"] = f"**{title}**" | ||||
|  | ||||
|         # Parse the body into ordered chunks | ||||
|         chunks = self._parse_body_into_chunks(body) | ||||
|  | ||||
|         # Discord limits: | ||||
|         # - Max 10 embeds per message | ||||
|         # - Max 6000 characters total across all embeds | ||||
|         # - Max 4096 characters per embed description | ||||
|         max_embeds = 10 | ||||
|         max_total_chars = 6000 | ||||
|         max_embed_description = 4096 | ||||
|  | ||||
|         # All 10 embed slots are available for content | ||||
|         max_content_embeds = max_embeds | ||||
|  | ||||
|         # Start character count | ||||
|         total_chars = 0 | ||||
|  | ||||
|         # Create embeds from chunks in order (no titles, just color coding) | ||||
|         for chunk_type, content in chunks: | ||||
|             if not content.strip(): | ||||
|                 continue | ||||
|  | ||||
|             # Truncate individual embed description if needed | ||||
|             if len(content) > max_embed_description: | ||||
|                 content = content[:max_embed_description - 3] + "..." | ||||
|  | ||||
|             # Check if we're approaching the embed count limit | ||||
|             # We need room for the warning embed, so stop at max_content_embeds - 1 | ||||
|             current_content_embeds = len(embeds) | ||||
|             if current_content_embeds >= max_content_embeds - 1: | ||||
|                 # Add a truncation notice (this will be the 10th embed) | ||||
|                 embeds.append({ | ||||
|                     "description": "⚠️ Content truncated (Discord 10 embed limit reached) - Tip: Select 'Plain Text' or 'HTML' format for longer diffs", | ||||
|                     "color": DISCORD_COLOR_WARNING, | ||||
|                 }) | ||||
|                 break | ||||
|  | ||||
|             # Check if adding this embed would exceed total character limit | ||||
|             if total_chars + len(content) > max_total_chars: | ||||
|                 # Add a truncation notice | ||||
|                 remaining_chars = max_total_chars - total_chars | ||||
|                 if remaining_chars > 100: | ||||
|                     # Add partial content if we have room | ||||
|                     truncated_content = content[:remaining_chars - 100] + "..." | ||||
|                     embeds.append({ | ||||
|                         "description": truncated_content, | ||||
|                         "color": (DISCORD_COLOR_UNCHANGED if chunk_type == "unchanged" | ||||
|                                  else DISCORD_COLOR_REMOVED if chunk_type == "removed" | ||||
|                                  else DISCORD_COLOR_ADDED), | ||||
|                     }) | ||||
|                 embeds.append({ | ||||
|                     "description": "⚠️ Content truncated (Discord 6000 char limit reached)\nTip: Select 'Plain Text' or 'HTML' format for longer diffs", | ||||
|                     "color": DISCORD_COLOR_WARNING, | ||||
|                 }) | ||||
|                 break | ||||
|  | ||||
|             if chunk_type == "unchanged": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_UNCHANGED, | ||||
|                 }) | ||||
|             elif chunk_type == "removed": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_REMOVED, | ||||
|                 }) | ||||
|             elif chunk_type == "added": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_ADDED, | ||||
|                 }) | ||||
|             elif chunk_type == "changed": | ||||
|                 # Changed (old value) - use orange to distinguish from pure removal | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_CHANGED, | ||||
|                 }) | ||||
|             elif chunk_type == "changed_into": | ||||
|                 # Changed into (new value) - use blue to distinguish from pure addition | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_CHANGED_INTO, | ||||
|                 }) | ||||
|  | ||||
|             total_chars += len(content) | ||||
|  | ||||
|         if embeds: | ||||
|             payload["embeds"] = embeds | ||||
|  | ||||
|         # Send the payload using parent's _send method | ||||
|         if not self._send(payload, params=params): | ||||
|             return False | ||||
|  | ||||
|         # Handle attachments if present | ||||
|         if attach and self.attachment_support: | ||||
|             payload.update({ | ||||
|                 "tts": False, | ||||
|                 "wait": True, | ||||
|             }) | ||||
|             payload.pop("embeds", None) | ||||
|             payload.pop("content", None) | ||||
|             payload.pop("allow_mentions", None) | ||||
|  | ||||
|             for attachment in attach: | ||||
|                 self.logger.info(f"Posting Discord Attachment {attachment.name}") | ||||
|                 if not self._send(payload, params=params, attach=attachment): | ||||
|                     return False | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _parse_body_into_chunks(self, body): | ||||
|         """ | ||||
|         Parse the body into ordered chunks of (type, content) tuples. | ||||
|         Types: "unchanged", "removed", "added", "changed", "changed_into" | ||||
|         Preserves the original order of the diff. | ||||
|         """ | ||||
|         chunks = [] | ||||
|         position = 0 | ||||
|  | ||||
|         while position < len(body): | ||||
|             # Find the next marker | ||||
|             next_removed = body.find(REMOVED_PLACEMARKER_OPEN, position) | ||||
|             next_added = body.find(ADDED_PLACEMARKER_OPEN, position) | ||||
|             next_changed = body.find(CHANGED_PLACEMARKER_OPEN, position) | ||||
|             next_changed_into = body.find(CHANGED_INTO_PLACEMARKER_OPEN, position) | ||||
|  | ||||
|             # Determine which marker comes first | ||||
|             if next_removed == -1 and next_added == -1 and next_changed == -1 and next_changed_into == -1: | ||||
|                 # No more markers, rest is unchanged | ||||
|                 if position < len(body): | ||||
|                     chunks.append(("unchanged", body[position:])) | ||||
|                 break | ||||
|  | ||||
|             # Find the earliest marker | ||||
|             next_marker_pos = None | ||||
|             next_marker_type = None | ||||
|  | ||||
|             # Compare all marker positions to find the earliest | ||||
|             markers = [] | ||||
|             if next_removed != -1: | ||||
|                 markers.append((next_removed, "removed")) | ||||
|             if next_added != -1: | ||||
|                 markers.append((next_added, "added")) | ||||
|             if next_changed != -1: | ||||
|                 markers.append((next_changed, "changed")) | ||||
|             if next_changed_into != -1: | ||||
|                 markers.append((next_changed_into, "changed_into")) | ||||
|  | ||||
|             if markers: | ||||
|                 next_marker_pos, next_marker_type = min(markers, key=lambda x: x[0]) | ||||
|  | ||||
|             # Add unchanged content before the marker | ||||
|             if next_marker_pos > position: | ||||
|                 chunks.append(("unchanged", body[position:next_marker_pos])) | ||||
|  | ||||
|             # Find the closing marker | ||||
|             if next_marker_type == "removed": | ||||
|                 open_marker = REMOVED_PLACEMARKER_OPEN | ||||
|                 close_marker = REMOVED_PLACEMARKER_CLOSED | ||||
|             elif next_marker_type == "added": | ||||
|                 open_marker = ADDED_PLACEMARKER_OPEN | ||||
|                 close_marker = ADDED_PLACEMARKER_CLOSED | ||||
|             elif next_marker_type == "changed": | ||||
|                 open_marker = CHANGED_PLACEMARKER_OPEN | ||||
|                 close_marker = CHANGED_PLACEMARKER_CLOSED | ||||
|             else:  # changed_into | ||||
|                 open_marker = CHANGED_INTO_PLACEMARKER_OPEN | ||||
|                 close_marker = CHANGED_INTO_PLACEMARKER_CLOSED | ||||
|  | ||||
|             close_pos = body.find(close_marker, next_marker_pos) | ||||
|  | ||||
|             if close_pos == -1: | ||||
|                 # No closing marker, take rest as this type | ||||
|                 content = body[next_marker_pos + len(open_marker):] | ||||
|                 chunks.append((next_marker_type, content)) | ||||
|                 break | ||||
|             else: | ||||
|                 # Extract content between markers | ||||
|                 content = body[next_marker_pos + len(open_marker):close_pos] | ||||
|                 chunks.append((next_marker_type, content)) | ||||
|                 position = close_pos + len(close_marker) | ||||
|  | ||||
|         return chunks | ||||
|  | ||||
|  | ||||
| # Register the custom Discord handler with Apprise | ||||
| # This will override the built-in discord:// handler | ||||
| @notify(on="discord") | ||||
| def discord_custom_wrapper(body, title, notify_type, meta, body_format=None, *args, **kwargs): | ||||
|     """ | ||||
|     Wrapper function to make the custom Discord handler work with Apprise's decorator system. | ||||
|     Note: This decorator approach may not work for overriding built-in plugins. | ||||
|     The class-based approach above is the proper way to extend NotifyDiscord. | ||||
|     """ | ||||
|     logger.info("Custom Discord handler called") | ||||
|     # This is here for potential future use with decorator-based registration | ||||
|     return True | ||||
							
								
								
									
										42
									
								
								changedetectionio/notification/email_helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								changedetectionio/notification/email_helpers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| def as_monospaced_html_email(content: str, title: str) -> str: | ||||
|     """ | ||||
|     Wraps `content` in a minimal, email-safe HTML template | ||||
|     that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc. | ||||
|  | ||||
|     Args: | ||||
|         content: The body text (plain text or HTML-like). | ||||
|         title: The title plaintext | ||||
|     Returns: | ||||
|         A complete HTML document string suitable for sending as an email body. | ||||
|     """ | ||||
|  | ||||
|     # All line feed types should be removed and then this function should only be fed <br>'s | ||||
|     # Then it works with our <pre> styling without double linefeeds | ||||
|     content = content.translate(str.maketrans('', '', '\r\n')) | ||||
|  | ||||
|     if title: | ||||
|         import html | ||||
|         title = html.escape(title) | ||||
|     else: | ||||
|         title = '' | ||||
|     # 2. Full email-safe HTML | ||||
|     html_email = f"""<!DOCTYPE html> | ||||
| <html lang="en"> | ||||
| <head> | ||||
|   <meta charset="UTF-8"> | ||||
|   <meta name="x-apple-disable-message-reformatting"> | ||||
|   <meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||||
|   <!--[if mso]> | ||||
|     <style> | ||||
|       body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }} | ||||
|     </style> | ||||
|   <![endif]--> | ||||
|   <title>{title}</title> | ||||
| </head> | ||||
| <body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;"> | ||||
|   <pre role="article" aria-roledescription="email" lang="en" | ||||
|        style="font-family: monospace, 'Courier New', Courier; font-size: 0.9rem; | ||||
|               white-space: pre-wrap; word-break: break-word;">{content}</pre> | ||||
| </body> | ||||
| </html>""" | ||||
|     return html_email | ||||
| @@ -1,30 +1,274 @@ | ||||
|  | ||||
| import time | ||||
| import apprise | ||||
| from apprise import NotifyFormat | ||||
| from loguru import logger | ||||
| from urllib.parse import urlparse | ||||
| from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL | ||||
| from .apprise_plugin.custom_handlers import SUPPORTED_HTTP_METHODS | ||||
| from .email_helpers import as_monospaced_html_email | ||||
| from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \ | ||||
|     ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \ | ||||
|     CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE | ||||
| from ..notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER | ||||
|  | ||||
| def process_notification(n_object, datastore): | ||||
|     from changedetectionio.safe_jinja import render as jinja_render | ||||
|     from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats | ||||
|  | ||||
|  | ||||
| def markup_text_links_to_html(body): | ||||
|     """ | ||||
|     Convert plaintext to HTML with clickable links. | ||||
|     Uses Jinja2's escape and Markup for XSS safety. | ||||
|     """ | ||||
|     from linkify_it import LinkifyIt | ||||
|     from markupsafe import Markup, escape | ||||
|  | ||||
|     linkify = LinkifyIt() | ||||
|  | ||||
|     # Match URLs in the ORIGINAL text (before escaping) | ||||
|     matches = linkify.match(body) | ||||
|  | ||||
|     if not matches: | ||||
|         # No URLs, just escape everything | ||||
|         return Markup(escape(body)) | ||||
|  | ||||
|     result = [] | ||||
|     last_index = 0 | ||||
|  | ||||
|     # Process each URL match | ||||
|     for match in matches: | ||||
|         # Add escaped text before the URL | ||||
|         if match.index > last_index: | ||||
|             text_part = body[last_index:match.index] | ||||
|             result.append(escape(text_part)) | ||||
|  | ||||
|         # Add the link with escaped URL (both in href and display) | ||||
|         url = match.url | ||||
|         result.append(Markup(f'<a href="{escape(url)}">{escape(url)}</a>')) | ||||
|  | ||||
|         last_index = match.last_index | ||||
|  | ||||
|     # Add remaining escaped text | ||||
|     if last_index < len(body): | ||||
|         result.append(escape(body[last_index:])) | ||||
|  | ||||
|     # Join all parts | ||||
|     return str(Markup(''.join(str(part) for part in result))) | ||||
|  | ||||
| def notification_format_align_with_apprise(n_format : str): | ||||
|     """ | ||||
|     Correctly align changedetection's formats with apprise's formats | ||||
|     Probably these are the same - but good to be sure. | ||||
|     These set the expected OUTPUT format type | ||||
|     :param n_format: | ||||
|     :return: | ||||
|     """ | ||||
|  | ||||
|     if n_format.startswith('html'): | ||||
|         # Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here | ||||
|         n_format = NotifyFormat.HTML.value | ||||
|     elif n_format.startswith('markdown'): | ||||
|         # probably the same but just to be safe | ||||
|         n_format = NotifyFormat.MARKDOWN.value | ||||
|     elif n_format.startswith('text'): | ||||
|         # probably the same but just to be safe | ||||
|         n_format = NotifyFormat.TEXT.value | ||||
|     else: | ||||
|         n_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|     return n_format | ||||
|  | ||||
| def apply_discord_markdown_to_body(n_body): | ||||
|     """ | ||||
|     Discord does not support <del> but it supports non-standard ~~strikethrough~~ | ||||
|     :param n_body: | ||||
|     :return: | ||||
|     """ | ||||
|     import re | ||||
|     # Define the mapping between your placeholders and markdown markers | ||||
|     replacements = [ | ||||
|         (REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'), | ||||
|         (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'), | ||||
|         (CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'), | ||||
|         (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'), | ||||
|     ] | ||||
|     # So that the markdown gets added without any whitespace following it which would break it | ||||
|     for open_tag, open_md, close_tag, close_md in replacements: | ||||
|         # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag | ||||
|         pattern = re.compile( | ||||
|             re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag), | ||||
|             flags=re.DOTALL | ||||
|         ) | ||||
|         n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body) | ||||
|     return n_body | ||||
|  | ||||
| def apply_standard_markdown_to_body(n_body): | ||||
|     """ | ||||
|     Apprise does not support ~~strikethrough~~ but it will convert <del> to HTML strikethrough. | ||||
|     :param n_body: | ||||
|     :return: | ||||
|     """ | ||||
|     import re | ||||
|     # Define the mapping between your placeholders and markdown markers | ||||
|     replacements = [ | ||||
|         (REMOVED_PLACEMARKER_OPEN, '<del>', REMOVED_PLACEMARKER_CLOSED, '</del>'), | ||||
|         (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'), | ||||
|         (CHANGED_PLACEMARKER_OPEN, '<del>', CHANGED_PLACEMARKER_CLOSED, '</del>'), | ||||
|         (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'), | ||||
|     ] | ||||
|  | ||||
|     # So that the markdown gets added without any whitespace following it which would break it | ||||
|     for open_tag, open_md, close_tag, close_md in replacements: | ||||
|         # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag | ||||
|         pattern = re.compile( | ||||
|             re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag), | ||||
|             flags=re.DOTALL | ||||
|         ) | ||||
|         n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body) | ||||
|     return n_body | ||||
|  | ||||
|  | ||||
| def apply_service_tweaks(url, n_body, n_title, requested_output_format): | ||||
|  | ||||
|     # Re 323 - Limit discord length to their 2000 char limit total or it wont send. | ||||
|     # Because different notifications may require different pre-processing, run each sequentially :( | ||||
|     # 2000 bytes minus - | ||||
|     #     200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers | ||||
|     #     Length of URL - Incase they specify a longer custom avatar_url | ||||
|  | ||||
|     if not n_body or not n_body.strip(): | ||||
|         return url, n_body, n_title | ||||
|  | ||||
|     # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload | ||||
|     parsed = urlparse(url) | ||||
|     k = '?' if not parsed.query else '&' | ||||
|     if url and not 'avatar_url' in url \ | ||||
|             and not url.startswith('mail') \ | ||||
|             and not url.startswith('post') \ | ||||
|             and not url.startswith('get') \ | ||||
|             and not url.startswith('delete') \ | ||||
|             and not url.startswith('put'): | ||||
|         url += k + f"avatar_url={APPRISE_AVATAR_URL}" | ||||
|  | ||||
|     if url.startswith('tgram://'): | ||||
|         # Telegram only supports a limit subset of HTML, remove the '<br>' we place in. | ||||
|         # re https://github.com/dgtlmoon/changedetection.io/issues/555 | ||||
|         # @todo re-use an existing library we have already imported to strip all non-allowed tags | ||||
|         n_body = n_body.replace('<br>', '\n') | ||||
|         n_body = n_body.replace('</br>', '\n') | ||||
|         n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n') | ||||
|  | ||||
|         # Use strikethrough for removed content, bold for added content | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '<s>') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '</s>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '<b>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '</b>') | ||||
|         # Handle changed/replaced lines (old → new) | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, '<s>') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, '</s>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>') | ||||
|  | ||||
|         # real limit is 4096, but minus some for extra metadata | ||||
|         payload_max_size = 3600 | ||||
|         body_limit = max(0, payload_max_size - len(n_title)) | ||||
|         n_title = n_title[0:payload_max_size] | ||||
|         n_body = n_body[0:body_limit] | ||||
|  | ||||
|     elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') | ||||
|           or url.startswith('https://discord.com/api'))\ | ||||
|             and 'html' in requested_output_format: | ||||
|         # Discord doesn't support HTML, replace <br> with newlines | ||||
|         n_body = n_body.strip().replace('<br>', '\n') | ||||
|         n_body = n_body.replace('</br>', '\n') | ||||
|         n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n') | ||||
|  | ||||
|         # Don't replace placeholders or truncate here - let the custom Discord plugin handle it | ||||
|         # The plugin will use embeds (6000 char limit across all embeds) if placeholders are present, | ||||
|         # or plain content (2000 char limit) otherwise | ||||
|  | ||||
|         # Only do placeholder replacement if NOT using htmlcolor (which triggers embeds in custom plugin) | ||||
|         if requested_output_format == 'html': | ||||
|             # No diff placeholders, use Discord markdown for any other formatting | ||||
|             # Use Discord markdown: strikethrough for removed, bold for added | ||||
|             n_body = apply_discord_markdown_to_body(n_body=n_body) | ||||
|  | ||||
|             # Apply 2000 char limit for plain content | ||||
|             payload_max_size = 1700 | ||||
|             body_limit = max(0, payload_max_size - len(n_title)) | ||||
|             n_title = n_title[0:payload_max_size] | ||||
|             n_body = n_body[0:body_limit] | ||||
|         # else: our custom Discord plugin will convert any placeholders left over into embeds with color bars | ||||
|  | ||||
|     # Is not discord/tgram and they want htmlcolor | ||||
|     elif requested_output_format == 'htmlcolor': | ||||
|         # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050 | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         # Handle changed/replaced lines (old → new) | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n') | ||||
|     elif requested_output_format == 'html': | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n') | ||||
|     elif requested_output_format == 'markdown': | ||||
|         # Markdown to HTML - Apprise will convert this to HTML | ||||
|         n_body = apply_standard_markdown_to_body(n_body=n_body) | ||||
|  | ||||
|     else: #plaintext etc default | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') | ||||
|  | ||||
|     return url, n_body, n_title | ||||
|  | ||||
|  | ||||
| def process_notification(n_object: NotificationContextData, datastore): | ||||
|     from changedetectionio.jinja2_custom import render as jinja_render | ||||
|     from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats | ||||
|     # be sure its registered | ||||
|     from .apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
|     # Register custom Discord plugin | ||||
|     from .apprise_plugin.discord import NotifyDiscordCustom | ||||
|  | ||||
|     if not isinstance(n_object, NotificationContextData): | ||||
|         raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|     now = time.time() | ||||
|     if n_object.get('notification_timestamp'): | ||||
|         logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s") | ||||
|  | ||||
|     # Insert variables into the notification content | ||||
|     notification_parameters = create_notification_parameters(n_object, datastore) | ||||
|  | ||||
|     n_format = valid_notification_formats.get( | ||||
|         n_object.get('notification_format', default_notification_format), | ||||
|         valid_notification_formats[default_notification_format], | ||||
|     ) | ||||
|     requested_output_format = n_object.get('notification_format', default_notification_format) | ||||
|     logger.debug(f"Requested notification output format: '{requested_output_format}'") | ||||
|  | ||||
|     # If we arrived with 'System default' then look it up | ||||
|     if n_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch: | ||||
|     if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|         # Initially text or whatever | ||||
|         n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format]) | ||||
|         requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format) | ||||
|  | ||||
|     requested_output_format_original = requested_output_format | ||||
|  | ||||
|     # Now clean it up so it fits perfectly with apprise | ||||
|     requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format) | ||||
|  | ||||
|     logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.2f}s") | ||||
|  | ||||
| @@ -39,16 +283,23 @@ def process_notification(n_object, datastore): | ||||
|  | ||||
|     apobj = apprise.Apprise(debug=True, asset=apprise_asset) | ||||
|  | ||||
|     # Override Apprise's built-in Discord plugin with our custom one | ||||
|     # This allows us to use colored embeds for diff content | ||||
|     # First remove the built-in discord plugin, then add our custom one | ||||
|     apprise.plugins.N_MGR.remove('discord') | ||||
|     apprise.plugins.N_MGR.add(NotifyDiscordCustom, schemas='discord') | ||||
|  | ||||
|     if not n_object.get('notification_urls'): | ||||
|         return None | ||||
|  | ||||
|     with apprise.LogCapture(level=apprise.logging.DEBUG) as logs: | ||||
|     with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs): | ||||
|         for url in n_object['notification_urls']: | ||||
|  | ||||
|             # Get the notification body from datastore | ||||
|             n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters) | ||||
|             if n_object.get('notification_format', '').startswith('HTML'): | ||||
|                 n_body = n_body.replace("\n", '<br>') | ||||
|  | ||||
|             if n_object.get('markup_text_links_to_html_links'): | ||||
|                 n_body = markup_text_links_to_html(body=n_body) | ||||
|  | ||||
|             n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters) | ||||
|  | ||||
| @@ -64,74 +315,88 @@ def process_notification(n_object, datastore): | ||||
|             logger.info(f">> Process Notification: AppRise notifying {url}") | ||||
|             url = jinja_render(template_str=url, **notification_parameters) | ||||
|  | ||||
|             # Re 323 - Limit discord length to their 2000 char limit total or it wont send. | ||||
|             # Because different notifications may require different pre-processing, run each sequentially :( | ||||
|             # 2000 bytes minus - | ||||
|             #     200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers | ||||
|             #     Length of URL - Incase they specify a longer custom avatar_url | ||||
|             # If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped | ||||
|             watch_mime_type = n_object.get('watch_mime_type') | ||||
|             if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower(): | ||||
|                 if 'html' in requested_output_format: | ||||
|                     from markupsafe import escape | ||||
|                     n_body = str(escape(n_body)) | ||||
|  | ||||
|             # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload | ||||
|             k = '?' if not '?' in url else '&' | ||||
|             if not 'avatar_url' in url \ | ||||
|                     and not url.startswith('mail') \ | ||||
|                     and not url.startswith('post') \ | ||||
|                     and not url.startswith('get') \ | ||||
|                     and not url.startswith('delete') \ | ||||
|                     and not url.startswith('put'): | ||||
|                 url += k + f"avatar_url={APPRISE_AVATAR_URL}" | ||||
|             if 'html' in requested_output_format: | ||||
|                 # Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output | ||||
|                 # But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and this" etc which is too much. | ||||
|                 n_body = n_body.replace('  ', '  ') | ||||
|  | ||||
|             if url.startswith('tgram://'): | ||||
|                 # Telegram only supports a limit subset of HTML, remove the '<br>' we place in. | ||||
|                 # re https://github.com/dgtlmoon/changedetection.io/issues/555 | ||||
|                 # @todo re-use an existing library we have already imported to strip all non-allowed tags | ||||
|                 n_body = n_body.replace('<br>', '\n') | ||||
|                 n_body = n_body.replace('</br>', '\n') | ||||
|                 # real limit is 4096, but minus some for extra metadata | ||||
|                 payload_max_size = 3600 | ||||
|                 body_limit = max(0, payload_max_size - len(n_title)) | ||||
|                 n_title = n_title[0:payload_max_size] | ||||
|                 n_body = n_body[0:body_limit] | ||||
|             (url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original) | ||||
|  | ||||
|             elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith( | ||||
|                     'https://discord.com/api'): | ||||
|                 # real limit is 2000, but minus some for extra metadata | ||||
|                 payload_max_size = 1700 | ||||
|                 body_limit = max(0, payload_max_size - len(n_title)) | ||||
|                 n_title = n_title[0:payload_max_size] | ||||
|                 n_body = n_body[0:body_limit] | ||||
|             apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS" | ||||
|  | ||||
|             elif url.startswith('mailto'): | ||||
|                 # Apprise will default to HTML, so we need to override it | ||||
|                 # So that whats' generated in n_body is in line with what is going to be sent. | ||||
|                 # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321 | ||||
|                 if not 'format=' in url and (n_format == 'Text' or n_format == 'Markdown'): | ||||
|                     prefix = '?' if not '?' in url else '&' | ||||
|                     # Apprise format is lowercase text https://github.com/caronc/apprise/issues/633 | ||||
|                     n_format = n_format.lower() | ||||
|                     url = f"{url}{prefix}format={n_format}" | ||||
|                 # If n_format == HTML, then apprise email should default to text/html and we should be sending HTML only | ||||
|             if not 'format=' in url: | ||||
|                 parsed_url = urlparse(url) | ||||
|                 prefix_add_to_url = '?' if not parsed_url.query else '&' | ||||
|  | ||||
|             apobj.add(url) | ||||
|                 # THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC | ||||
|                 if 'html' in requested_output_format: | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}" | ||||
|                     apprise_input_format = NotifyFormat.HTML.value | ||||
|                 elif 'text' in requested_output_format: | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}" | ||||
|                     apprise_input_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|                 elif requested_output_format == NotifyFormat.MARKDOWN.value: | ||||
|                     # Convert markdown to HTML ourselves since not all plugins do this | ||||
|                     from apprise.conversion import markdown_to_html | ||||
|                     # Make sure there are paragraph breaks around horizontal rules | ||||
|                     n_body = n_body.replace('---', '\n\n---\n\n') | ||||
|                     n_body = markdown_to_html(n_body) | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}" | ||||
|                     requested_output_format = NotifyFormat.HTML.value | ||||
|                     apprise_input_format = NotifyFormat.HTML.value  # Changed from MARKDOWN to HTML | ||||
|  | ||||
|                 # Could have arrived at any stage, so we dont end up running .escape on it | ||||
|                 if 'html' in requested_output_format: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n') | ||||
|                 else: | ||||
|                     # texty types | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n') | ||||
|  | ||||
|             else: | ||||
|                 # ?format was IN the apprise URL, they are kind of on their own here, we will try our best | ||||
|                 if 'format=html' in url: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n') | ||||
|                     # This will also prevent apprise from doing conversion | ||||
|                     apprise_input_format = NotifyFormat.HTML.value | ||||
|                     requested_output_format = NotifyFormat.HTML.value | ||||
|                 elif 'format=text' in url: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n') | ||||
|                     apprise_input_format = NotifyFormat.TEXT.value | ||||
|                     requested_output_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|             sent_objs.append({'title': n_title, | ||||
|                               'body': n_body, | ||||
|                               'url': url, | ||||
|                               'body_format': n_format}) | ||||
|                               'url': url}) | ||||
|             apobj.add(url) | ||||
|  | ||||
|             # Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely. | ||||
|             # It should always be similar to the 'history' part of the UI. | ||||
|             if url.startswith('mail') and 'html' in requested_output_format: | ||||
|                 if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already | ||||
|                     n_body = as_monospaced_html_email(content=n_body, title=n_title) | ||||
|  | ||||
|         # Blast off the notifications tht are set in .add() | ||||
|         apobj.notify( | ||||
|             title=n_title, | ||||
|             body=n_body, | ||||
|             body_format=n_format, | ||||
|             # `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise | ||||
|             # &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between) | ||||
|             body_format=apprise_input_format, | ||||
|             # False is not an option for AppRise, must be type None | ||||
|             attach=n_object.get('screenshot', None) | ||||
|         ) | ||||
|  | ||||
|  | ||||
|         # Returns empty string if nothing found, multi-line string otherwise | ||||
|         log_value = logs.getvalue() | ||||
|  | ||||
|         if log_value and 'WARNING' in log_value or 'ERROR' in log_value: | ||||
|         if log_value and ('WARNING' in log_value or 'ERROR' in log_value): | ||||
|             logger.critical(log_value) | ||||
|             raise Exception(log_value) | ||||
|  | ||||
| @@ -141,17 +406,15 @@ def process_notification(n_object, datastore): | ||||
|  | ||||
| # Notification title + body content parameters get created here. | ||||
| # ( Where we prepare the tokens in the notification to be replaced with actual values ) | ||||
| def create_notification_parameters(n_object, datastore): | ||||
|     from copy import deepcopy | ||||
|     from . import valid_tokens | ||||
| def create_notification_parameters(n_object: NotificationContextData, datastore): | ||||
|     if not isinstance(n_object, NotificationContextData): | ||||
|         raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|     # in the case we send a test notification from the main settings, there is no UUID. | ||||
|     uuid = n_object['uuid'] if 'uuid' in n_object else '' | ||||
|  | ||||
|     if uuid: | ||||
|         watch_title = datastore.data['watching'][uuid].get('title', '') | ||||
|     watch = datastore.data['watching'].get(n_object['uuid']) | ||||
|     if watch: | ||||
|         watch_title = datastore.data['watching'][n_object['uuid']].label | ||||
|         tag_list = [] | ||||
|         tags = datastore.get_all_tags_for_watch(uuid) | ||||
|         tags = datastore.get_all_tags_for_watch(n_object['uuid']) | ||||
|         if tags: | ||||
|             for tag_uuid, tag in tags.items(): | ||||
|                 tag_list.append(tag.get('title')) | ||||
| @@ -166,14 +429,10 @@ def create_notification_parameters(n_object, datastore): | ||||
|  | ||||
|     watch_url = n_object['watch_url'] | ||||
|  | ||||
|     diff_url = "{}/diff/{}".format(base_url, uuid) | ||||
|     preview_url = "{}/preview/{}".format(base_url, uuid) | ||||
|     diff_url = "{}/diff/{}".format(base_url, n_object['uuid']) | ||||
|     preview_url = "{}/preview/{}".format(base_url, n_object['uuid']) | ||||
|  | ||||
|     # Not sure deepcopy is needed here, but why not | ||||
|     tokens = deepcopy(valid_tokens) | ||||
|  | ||||
|     # Valid_tokens also used as a field validator | ||||
|     tokens.update( | ||||
|     n_object.update( | ||||
|         { | ||||
|             'base_url': base_url, | ||||
|             'diff_url': diff_url, | ||||
| @@ -181,13 +440,10 @@ def create_notification_parameters(n_object, datastore): | ||||
|             'watch_tag': watch_tag if watch_tag is not None else '', | ||||
|             'watch_title': watch_title if watch_title is not None else '', | ||||
|             'watch_url': watch_url, | ||||
|             'watch_uuid': uuid, | ||||
|             'watch_uuid': n_object['uuid'], | ||||
|         }) | ||||
|  | ||||
|     # n_object will contain diff, diff_added etc etc | ||||
|     tokens.update(n_object) | ||||
|     if watch: | ||||
|         n_object.update(datastore.data['watching'].get(n_object['uuid']).extra_notification_token_values()) | ||||
|  | ||||
|     if uuid: | ||||
|         tokens.update(datastore.data['watching'].get(uuid).extra_notification_token_values()) | ||||
|  | ||||
|     return tokens | ||||
|     return n_object | ||||
|   | ||||
| @@ -6,9 +6,70 @@ Extracted from update_worker.py to provide standalone notification functionality | ||||
| for both sync and async workers | ||||
| """ | ||||
|  | ||||
| import time | ||||
| from loguru import logger | ||||
| import time | ||||
|  | ||||
| from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| from changedetectionio.notification import default_notification_format, valid_notification_formats | ||||
|  | ||||
| # This gets modified on notification time (handler.py) depending on the required notification output | ||||
| CUSTOM_LINEBREAK_PLACEHOLDER='@BR@' | ||||
|  | ||||
|  | ||||
| # What is passed around as notification context, also used as the complete list of valid {{ tokens }} | ||||
| class NotificationContextData(dict): | ||||
|     def __init__(self, initial_data=None, **kwargs): | ||||
|         super().__init__({ | ||||
|             'base_url': None, | ||||
|             'current_snapshot': None, | ||||
|             'diff': None, | ||||
|             'diff_added': None, | ||||
|             'diff_full': None, | ||||
|             'diff_patch': None, | ||||
|             'diff_removed': None, | ||||
|             'diff_url': None, | ||||
|             'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen | ||||
|             'notification_timestamp': time.time(), | ||||
|             'preview_url': None, | ||||
|             'screenshot': None, | ||||
|             'triggered_text': None, | ||||
|             'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX',  # Converted to 'watch_uuid' in create_notification_parameters | ||||
|             'watch_mime_type': None, | ||||
|             'watch_tag': None, | ||||
|             'watch_title': None, | ||||
|             'watch_url': 'https://WATCH-PLACE-HOLDER/', | ||||
|         }) | ||||
|  | ||||
|         # Apply any initial data passed in | ||||
|         self.update({'watch_uuid': self.get('uuid')}) | ||||
|         if initial_data: | ||||
|             self.update(initial_data) | ||||
|  | ||||
|         # Apply any keyword arguments | ||||
|         if kwargs: | ||||
|             self.update(kwargs) | ||||
|  | ||||
|         n_format = self.get('notification_format') | ||||
|         if n_format and not valid_notification_formats.get(n_format): | ||||
|             raise ValueError(f'Invalid notification format: "{n_format}"') | ||||
|  | ||||
|     def set_random_for_validation(self): | ||||
|         import random, string | ||||
|         """Randomly fills all dict keys with random strings (for validation/testing).  | ||||
|         So we can test the output in the notification body | ||||
|         """ | ||||
|         for key in self.keys(): | ||||
|             if key in ['uuid', 'time', 'watch_uuid']: | ||||
|                 continue | ||||
|             rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12)) | ||||
|             self[key] = rand_str | ||||
|  | ||||
|     def __setitem__(self, key, value): | ||||
|         if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'): | ||||
|             if not valid_notification_formats.get(value): | ||||
|                 raise ValueError(f'Invalid notification format: "{value}"') | ||||
|  | ||||
|         super().__setitem__(key, value) | ||||
|  | ||||
| class NotificationService: | ||||
|     """ | ||||
| @@ -20,12 +81,15 @@ class NotificationService: | ||||
|         self.datastore = datastore | ||||
|         self.notification_q = notification_q | ||||
|      | ||||
|     def queue_notification_for_watch(self, n_object, watch): | ||||
|     def queue_notification_for_watch(self, n_object: NotificationContextData, watch): | ||||
|         """ | ||||
|         Queue a notification for a watch with full diff rendering and template variables | ||||
|         """ | ||||
|         from changedetectionio import diff | ||||
|         from changedetectionio.notification import default_notification_format_for_watch | ||||
|         from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|  | ||||
|         if not isinstance(n_object, NotificationContextData): | ||||
|             raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|         dates = [] | ||||
|         trigger_text = '' | ||||
| @@ -44,29 +108,16 @@ class NotificationService: | ||||
|             snapshot_contents = "No snapshot/history available, the watch should fetch atleast once." | ||||
|  | ||||
|         # If we ended up here with "System default" | ||||
|         if n_object.get('notification_format') == default_notification_format_for_watch: | ||||
|         if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|             n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|         html_colour_enable = False | ||||
|         # HTML needs linebreak, but MarkDown and Text can use a linefeed | ||||
|         if n_object.get('notification_format') == 'HTML': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|         elif n_object.get('notification_format') == 'HTML Color': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|             html_colour_enable = True | ||||
|         else: | ||||
|             line_feed_sep = "\n" | ||||
|  | ||||
|         triggered_text = '' | ||||
|         if len(trigger_text): | ||||
|             from . import html_tools | ||||
|             triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text) | ||||
|             if triggered_text: | ||||
|                 triggered_text = line_feed_sep.join(triggered_text) | ||||
|                 triggered_text = CUSTOM_LINEBREAK_PLACEHOLDER.join(triggered_text) | ||||
|  | ||||
|         # Could be called as a 'test notification' with only 1 snapshot available | ||||
|         prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n" | ||||
| @@ -78,16 +129,17 @@ class NotificationService: | ||||
|  | ||||
|         n_object.update({ | ||||
|             'current_snapshot': snapshot_contents, | ||||
|             'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep), | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep), | ||||
|             'notification_timestamp': now, | ||||
|             'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None, | ||||
|             'triggered_text': triggered_text, | ||||
|             'uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_url': watch.get('url') if watch else None, | ||||
|             'watch_uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_mime_type': watch.get('content-type') | ||||
|         }) | ||||
|  | ||||
|         if watch: | ||||
| @@ -103,7 +155,7 @@ class NotificationService: | ||||
|         Individual watch settings > Tag settings > Global settings | ||||
|         """ | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch, | ||||
|             USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, | ||||
|             default_notification_body, | ||||
|             default_notification_title | ||||
|         ) | ||||
| @@ -111,7 +163,7 @@ class NotificationService: | ||||
|         # Would be better if this was some kind of Object where Watch can reference the parent datastore etc | ||||
|         v = watch.get(var_name) | ||||
|         if v and not watch.get('notification_muted'): | ||||
|             if var_name == 'notification_format' and v == default_notification_format_for_watch: | ||||
|             if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|                 return self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|             return v | ||||
| @@ -128,7 +180,7 @@ class NotificationService: | ||||
|  | ||||
|         # Otherwise could be defaults | ||||
|         if var_name == 'notification_format': | ||||
|             return default_notification_format_for_watch | ||||
|             return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         if var_name == 'notification_body': | ||||
|             return default_notification_body | ||||
|         if var_name == 'notification_title': | ||||
| @@ -140,7 +192,7 @@ class NotificationService: | ||||
|         """ | ||||
|         Send notification when content changes are detected | ||||
|         """ | ||||
|         n_object = {} | ||||
|         n_object = NotificationContextData() | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|         if not watch: | ||||
|             return | ||||
| @@ -183,11 +235,25 @@ class NotificationService: | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         ", ".join(watch['include_filters']), | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|         filter_list = ", ".join(watch['include_filters']) | ||||
|         # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed | ||||
|         body = f"""Hello, | ||||
|  | ||||
| Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts. | ||||
|  | ||||
| It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab ) | ||||
|  | ||||
| Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}} | ||||
|  | ||||
| Thanks - Your omniscient changedetection.io installation. | ||||
| """ | ||||
|  | ||||
|         n_object = NotificationContextData({ | ||||
|             'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|             'notification_body': body, | ||||
|             'notification_format': self._check_cascading_vars('notification_format', watch), | ||||
|         }) | ||||
|         n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
| @@ -215,12 +281,28 @@ class NotificationService: | ||||
|         if not watch: | ||||
|             return | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1), | ||||
|                     'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} " | ||||
|                                          "did not appear on the page after {} attempts, did the page change layout? " | ||||
|                                          "Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n" | ||||
|                                          "Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         step = step_n + 1 | ||||
|         # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed | ||||
|  | ||||
|         # {{{{ }}}} because this will be Jinja2 {{ }} tokens | ||||
|         body = f"""Hello, | ||||
|          | ||||
| Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout? | ||||
|  | ||||
| The element may have moved and needs editing, or does it need a delay added? | ||||
|  | ||||
| Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}} | ||||
|  | ||||
| Thanks - Your omniscient changedetection.io installation. | ||||
| """ | ||||
|  | ||||
|         n_object = NotificationContextData({ | ||||
|             'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run", | ||||
|             'notification_body': body, | ||||
|             'notification_format': self._check_cascading_vars('notification_format', watch), | ||||
|         }) | ||||
|         n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|   | ||||
| @@ -91,6 +91,8 @@ class difference_detection_processor(): | ||||
|             else: | ||||
|                 logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ") | ||||
|  | ||||
|         logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}") | ||||
|  | ||||
|         # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. | ||||
|         # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) | ||||
|         self.fetcher = fetcher_obj(proxy_override=proxy_url, | ||||
| @@ -102,7 +104,7 @@ class difference_detection_processor(): | ||||
|             self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid')) | ||||
|  | ||||
|         # Tweak the base config with the per-watch ones | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         request_headers = CaseInsensitiveDict() | ||||
|  | ||||
|         ua = self.datastore.data['settings']['requests'].get('default_ua') | ||||
| @@ -146,18 +148,19 @@ class difference_detection_processor(): | ||||
|  | ||||
|         # And here we go! call the right browser with browser-specific settings | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|  | ||||
|         # All fetchers are now async | ||||
|         await self.fetcher.run(url=url, | ||||
|                                timeout=timeout, | ||||
|                                request_headers=request_headers, | ||||
|                                request_body=request_body, | ||||
|                                request_method=request_method, | ||||
|                                ignore_status_codes=ignore_status_codes, | ||||
|                                current_include_filters=self.watch.get('include_filters'), | ||||
|                                is_binary=is_binary, | ||||
|                                empty_pages_are_a_change=empty_pages_are_a_change | ||||
|                                ) | ||||
|         await self.fetcher.run( | ||||
|             current_include_filters=self.watch.get('include_filters'), | ||||
|             empty_pages_are_a_change=empty_pages_are_a_change, | ||||
|             fetch_favicon=self.watch.favicon_is_expired(), | ||||
|             ignore_status_codes=ignore_status_codes, | ||||
|             is_binary=is_binary, | ||||
|             request_body=request_body, | ||||
|             request_headers=request_headers, | ||||
|             request_method=request_method, | ||||
|             timeout=timeout, | ||||
|             url=url, | ||||
|        ) | ||||
|  | ||||
|         #@todo .quit here could go on close object, so we can run JS if change-detected | ||||
|         self.fetcher.quit(watch=self.watch) | ||||
|   | ||||
							
								
								
									
										133
									
								
								changedetectionio/processors/magic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								changedetectionio/processors/magic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| """ | ||||
| Content Type Detection and Stream Classification | ||||
|  | ||||
| This module provides intelligent content-type detection for changedetection.io. | ||||
| It addresses the common problem where HTTP Content-Type headers are missing, incorrect, | ||||
| or too generic, which would otherwise cause the wrong processor to be used. | ||||
|  | ||||
| The guess_stream_type class combines: | ||||
| 1. HTTP Content-Type headers (when available and reliable) | ||||
| 2. Python-magic library for MIME detection (analyzing actual file content) | ||||
| 3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.) | ||||
|  | ||||
| This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF, | ||||
| plain text, CSV, YAML, and XML formats - even when servers provide misleading headers. | ||||
|  | ||||
| Used by: processors/text_json_diff/processor.py and other content processors | ||||
| """ | ||||
|  | ||||
| # When to apply the 'cdata to real HTML' hack | ||||
| RSS_XML_CONTENT_TYPES = [ | ||||
|     "application/rss+xml", | ||||
|     "application/rdf+xml", | ||||
|     "application/atom+xml", | ||||
|     "text/rss+xml",  # rare, non-standard | ||||
|     "application/x-rss+xml",  # legacy (older feed software) | ||||
|     "application/x-atom+xml",  # legacy (older Atom) | ||||
| ] | ||||
|  | ||||
| # JSON Content-types | ||||
| JSON_CONTENT_TYPES = [ | ||||
|     "application/activity+json", | ||||
|     "application/feed+json", | ||||
|     "application/json", | ||||
|     "application/ld+json", | ||||
|     "application/vnd.api+json", | ||||
| ] | ||||
|  | ||||
|  | ||||
| # Generic XML Content-types (non-RSS/Atom) | ||||
| XML_CONTENT_TYPES = [ | ||||
|     "text/xml", | ||||
|     "application/xml", | ||||
| ] | ||||
|  | ||||
| HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div'] | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| class guess_stream_type(): | ||||
|     is_pdf = False | ||||
|     is_json = False | ||||
|     is_html = False | ||||
|     is_plaintext = False | ||||
|     is_rss = False | ||||
|     is_csv = False | ||||
|     is_xml = False  # Generic XML, not RSS/Atom | ||||
|     is_yaml = False | ||||
|  | ||||
|     def __init__(self, http_content_header, content): | ||||
|         import re | ||||
|         magic_content_header = http_content_header | ||||
|         test_content = content[:200].lower().strip() | ||||
|  | ||||
|         # Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.) | ||||
|         test_content_normalized = re.sub(r'<\s+', '<', test_content) | ||||
|  | ||||
|         # Use puremagic for lightweight MIME detection (saves ~14MB vs python-magic) | ||||
|         magic_result = None | ||||
|         try: | ||||
|             import puremagic | ||||
|  | ||||
|             # puremagic needs bytes, so encode if we have a string | ||||
|             content_bytes = content[:200].encode('utf-8') if isinstance(content, str) else content[:200] | ||||
|  | ||||
|             # puremagic returns a list of PureMagic objects with confidence scores | ||||
|             detections = puremagic.magic_string(content_bytes) | ||||
|             if detections: | ||||
|                 # Get the highest confidence detection | ||||
|                 mime = detections[0].mime_type | ||||
|                 logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'") | ||||
|                 if mime and "/" in mime: | ||||
|                     magic_result = mime | ||||
|                     # Ignore generic/fallback mime types | ||||
|                     if mime in ['application/octet-stream', 'application/x-empty', 'binary']: | ||||
|                         logger.debug(f"Ignoring generic mime type '{mime}' from puremagic library") | ||||
|                     # Trust puremagic for non-text types immediately | ||||
|                     elif mime not in ['text/html', 'text/plain']: | ||||
|                         magic_content_header = mime | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.warning(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection") | ||||
|  | ||||
|         # Content-based detection (most reliable for text formats) | ||||
|         # Check for HTML patterns first - if found, override magic's text/plain | ||||
|         has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS) | ||||
|  | ||||
|         # Always trust headers first | ||||
|         if 'text/plain' in http_content_header: | ||||
|             self.is_plaintext = True | ||||
|         if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES): | ||||
|             self.is_rss = True | ||||
|         elif any(s in http_content_header for s in JSON_CONTENT_TYPES): | ||||
|             self.is_json = True | ||||
|         elif 'pdf' in magic_content_header: | ||||
|             self.is_pdf = True | ||||
|         elif has_html_patterns or http_content_header == 'text/html': | ||||
|             self.is_html = True | ||||
|         elif any(s in magic_content_header for s in JSON_CONTENT_TYPES): | ||||
|             self.is_json = True | ||||
|         # magic will call a rss document 'xml' | ||||
|         # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss | ||||
|         # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list | ||||
|         elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized: | ||||
|             self.is_rss = True | ||||
|         elif any(s in http_content_header for s in XML_CONTENT_TYPES): | ||||
|             # Only mark as generic XML if not already detected as RSS | ||||
|             if not self.is_rss: | ||||
|                 self.is_xml = True | ||||
|         elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES): | ||||
|             # Generic XML that's not RSS/Atom (RSS/Atom checked above) | ||||
|             self.is_xml = True | ||||
|         elif '%pdf-1' in test_content: | ||||
|             self.is_pdf = True | ||||
|         elif http_content_header.startswith('text/'): | ||||
|             self.is_plaintext = True | ||||
|         # Only trust magic for 'text' if no other patterns matched | ||||
|         elif 'text' in magic_content_header: | ||||
|             self.is_plaintext = True | ||||
|         # If magic says text/plain and we found no HTML patterns, trust it | ||||
|         elif magic_result == 'text/plain': | ||||
|             self.is_plaintext = True | ||||
|             logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)") | ||||
|  | ||||
| @@ -32,7 +32,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data): | ||||
|     '''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])''' | ||||
|     from changedetectionio import forms, html_tools | ||||
|     from changedetectionio.model.Watch import model as watch_model | ||||
|     from concurrent.futures import ProcessPoolExecutor | ||||
|     from concurrent.futures import ThreadPoolExecutor | ||||
|     from copy import deepcopy | ||||
|     from flask import request | ||||
|     import brotli | ||||
| @@ -76,13 +76,16 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data): | ||||
|             update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') | ||||
|  | ||||
|             # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk | ||||
|             # Do this as a parallel process because it could take some time | ||||
|             with ProcessPoolExecutor(max_workers=2) as executor: | ||||
|                 future1 = executor.submit(_task, tmp_watch, update_handler) | ||||
|                 future2 = executor.submit(_task, blank_watch_no_filters, update_handler) | ||||
|             # Do this as parallel threads (not processes) to avoid pickle issues with Lock objects | ||||
|             try: | ||||
|                 with ThreadPoolExecutor(max_workers=2) as executor: | ||||
|                     future1 = executor.submit(_task, tmp_watch, update_handler) | ||||
|                     future2 = executor.submit(_task, blank_watch_no_filters, update_handler) | ||||
|  | ||||
|                 text_after_filter = future1.result() | ||||
|                 text_before_filter = future2.result() | ||||
|                     text_after_filter = future1.result() | ||||
|                     text_before_filter = future2.result() | ||||
|             except Exception as e: | ||||
|                 x=1 | ||||
|  | ||||
|     try: | ||||
|         trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter, | ||||
|   | ||||
| @@ -7,18 +7,24 @@ import re | ||||
| import urllib3 | ||||
|  | ||||
| from changedetectionio.conditions import execute_ruleset_against_all_plugins | ||||
| from changedetectionio.diff import ADDED_PLACEMARKER_OPEN | ||||
| from changedetectionio.processors import difference_detection_processor | ||||
| from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE | ||||
| from changedetectionio import html_tools, content_fetchers | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.processors.magic import guess_stream_type | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
| name = 'Webpage Text/HTML, JSON and PDF changes' | ||||
| description = 'Detects all text changes where possible' | ||||
|  | ||||
| json_filter_prefixes = ['json:', 'jq:', 'jqraw:'] | ||||
| JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:'] | ||||
|  | ||||
| # Assume it's this type if the server says nothing on content-type | ||||
| DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html' | ||||
|  | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg, screenshot=None, xpath_data=None): | ||||
| @@ -32,356 +38,560 @@ class PDFToHTMLToolNotFound(ValueError): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| class FilterConfig: | ||||
|     """Consolidates all filter and rule configurations from watch, tags, and global settings.""" | ||||
|  | ||||
|     def __init__(self, watch, datastore): | ||||
|         self.watch = watch | ||||
|         self.datastore = datastore | ||||
|         self.watch_uuid = watch.get('uuid') | ||||
|         # Cache computed properties to avoid repeated list operations | ||||
|         self._include_filters_cache = None | ||||
|         self._subtractive_selectors_cache = None | ||||
|  | ||||
|     def _get_merged_rules(self, attr, include_global=False): | ||||
|         """Merge rules from watch, tags, and optionally global settings.""" | ||||
|         watch_rules = self.watch.get(attr, []) | ||||
|         tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr) | ||||
|         rules = list(dict.fromkeys(watch_rules + tag_rules)) | ||||
|  | ||||
|         if include_global: | ||||
|             global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', []) | ||||
|             rules = list(dict.fromkeys(rules + global_rules)) | ||||
|  | ||||
|         return rules | ||||
|  | ||||
|     @property | ||||
|     def include_filters(self): | ||||
|         if self._include_filters_cache is None: | ||||
|             filters = self._get_merged_rules('include_filters') | ||||
|             # Inject LD+JSON price tracker rule if enabled | ||||
|             if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT: | ||||
|                 filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS | ||||
|             self._include_filters_cache = filters | ||||
|         return self._include_filters_cache | ||||
|  | ||||
|     @property | ||||
|     def subtractive_selectors(self): | ||||
|         if self._subtractive_selectors_cache is None: | ||||
|             watch_selectors = self.watch.get("subtractive_selectors", []) | ||||
|             tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors') | ||||
|             global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", []) | ||||
|             self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors] | ||||
|         return self._subtractive_selectors_cache | ||||
|  | ||||
|     @property | ||||
|     def extract_text(self): | ||||
|         return self._get_merged_rules('extract_text') | ||||
|  | ||||
|     @property | ||||
|     def ignore_text(self): | ||||
|         return self._get_merged_rules('ignore_text', include_global=True) | ||||
|  | ||||
|     @property | ||||
|     def trigger_text(self): | ||||
|         return self._get_merged_rules('trigger_text') | ||||
|  | ||||
|     @property | ||||
|     def text_should_not_be_present(self): | ||||
|         return self._get_merged_rules('text_should_not_be_present') | ||||
|  | ||||
|     @property | ||||
|     def has_include_filters(self): | ||||
|         return bool(self.include_filters) and bool(self.include_filters[0].strip()) | ||||
|  | ||||
|     @property | ||||
|     def has_include_json_filters(self): | ||||
|         return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES) | ||||
|  | ||||
|     @property | ||||
|     def has_subtractive_selectors(self): | ||||
|         return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip()) | ||||
|  | ||||
|  | ||||
| class ContentTransformer: | ||||
|     """Handles text transformations like trimming, sorting, and deduplication.""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def trim_whitespace(text): | ||||
|         """Remove leading/trailing whitespace from each line.""" | ||||
|         # Use generator expression to avoid building intermediate list | ||||
|         return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines()) | ||||
|  | ||||
|     @staticmethod | ||||
|     def remove_duplicate_lines(text): | ||||
|         """Remove duplicate lines while preserving order.""" | ||||
|         return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|     @staticmethod | ||||
|     def sort_alphabetically(text): | ||||
|         """Sort lines alphabetically (case-insensitive).""" | ||||
|         # Remove double line feeds before sorting | ||||
|         text = text.replace("\n\n", "\n") | ||||
|         return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower())) | ||||
|  | ||||
|     @staticmethod | ||||
|     def extract_by_regex(text, regex_patterns): | ||||
|         """Extract text matching regex patterns.""" | ||||
|         # Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior) | ||||
|         regex_matched_output = [] | ||||
|  | ||||
|         for s_re in regex_patterns: | ||||
|             # Check if it's perl-style regex /.../ | ||||
|             if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): | ||||
|                 regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) | ||||
|                 result = re.findall(regex, text) | ||||
|  | ||||
|                 for match in result: | ||||
|                     if type(match) is tuple: | ||||
|                         regex_matched_output.extend(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|                     else: | ||||
|                         regex_matched_output.append(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|             else: | ||||
|                 # Plain text search (case-insensitive) | ||||
|                 r = re.compile(re.escape(s_re), re.IGNORECASE) | ||||
|                 res = r.findall(text) | ||||
|                 if res: | ||||
|                     for match in res: | ||||
|                         regex_matched_output.append(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|  | ||||
|         return ''.join(regex_matched_output) if regex_matched_output else '' | ||||
|  | ||||
|  | ||||
| class RuleEngine: | ||||
|     """Evaluates blocking rules (triggers, conditions, text_should_not_be_present).""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_trigger_text(content, trigger_patterns): | ||||
|         """ | ||||
|         Check if trigger text is present. If trigger_text is configured, | ||||
|         content is blocked UNLESS the trigger is found. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not trigger_patterns: | ||||
|             return False | ||||
|  | ||||
|         # Assume blocked if trigger_text is configured | ||||
|         result = html_tools.strip_ignore_text( | ||||
|             content=str(content), | ||||
|             wordlist=trigger_patterns, | ||||
|             mode="line numbers" | ||||
|         ) | ||||
|         # Unblock if trigger was found | ||||
|         return not bool(result) | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_text_should_not_be_present(content, patterns): | ||||
|         """ | ||||
|         Check if forbidden text is present. If found, block the change. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not patterns: | ||||
|             return False | ||||
|  | ||||
|         result = html_tools.strip_ignore_text( | ||||
|             content=str(content), | ||||
|             wordlist=patterns, | ||||
|             mode="line numbers" | ||||
|         ) | ||||
|         # Block if forbidden text was found | ||||
|         return bool(result) | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_conditions(watch, datastore, content): | ||||
|         """ | ||||
|         Evaluate custom conditions ruleset. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not watch.get('conditions') or not watch.get('conditions_match_logic'): | ||||
|             return False | ||||
|  | ||||
|         conditions_result = execute_ruleset_against_all_plugins( | ||||
|             current_watch_uuid=watch.get('uuid'), | ||||
|             application_datastruct=datastore.data, | ||||
|             ephemeral_data={'text': content} | ||||
|         ) | ||||
|  | ||||
|         # Block if conditions not met | ||||
|         return not conditions_result.get('result') | ||||
|  | ||||
|  | ||||
| class ContentProcessor: | ||||
|     """Handles content preprocessing, filtering, and extraction.""" | ||||
|  | ||||
|     def __init__(self, fetcher, watch, filter_config, datastore): | ||||
|         self.fetcher = fetcher | ||||
|         self.watch = watch | ||||
|         self.filter_config = filter_config | ||||
|         self.datastore = datastore | ||||
|  | ||||
|     def preprocess_rss(self, content): | ||||
|         """ | ||||
|         Convert CDATA/comments in RSS to usable text. | ||||
|  | ||||
|         Supports two RSS processing modes: | ||||
|         - 'default': Inline CDATA replacement (original behavior) | ||||
|         - 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked) | ||||
|         """ | ||||
|         from changedetectionio import rss_tools | ||||
|         rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode") | ||||
|         if rss_mode: | ||||
|             # Format RSS items nicely with CDATA content unmarked and converted to text | ||||
|             return rss_tools.format_rss_items(content) | ||||
|         else: | ||||
|             # Default: Original inline CDATA replacement | ||||
|             return cdata_in_document_to_text(html_content=content) | ||||
|  | ||||
|     def preprocess_pdf(self, raw_content): | ||||
|         """Convert PDF to HTML using external tool.""" | ||||
|         from shutil import which | ||||
|         tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml") | ||||
|         if not which(tool): | ||||
|             raise PDFToHTMLToolNotFound( | ||||
|                 f"Command-line `{tool}` tool was not found in system PATH, was it installed?" | ||||
|             ) | ||||
|  | ||||
|         import subprocess | ||||
|         proc = subprocess.Popen( | ||||
|             [tool, '-stdout', '-', '-s', 'out.pdf', '-i'], | ||||
|             stdout=subprocess.PIPE, | ||||
|             stdin=subprocess.PIPE | ||||
|         ) | ||||
|         proc.stdin.write(raw_content) | ||||
|         proc.stdin.close() | ||||
|         html_content = proc.stdout.read().decode('utf-8') | ||||
|         proc.wait(timeout=60) | ||||
|  | ||||
|         # Add metadata for change detection | ||||
|         metadata = ( | ||||
|             f"<p>Added by changedetection.io: Document checksum - " | ||||
|             f"{hashlib.md5(raw_content).hexdigest().upper()} " | ||||
|             f"Original file size - {len(raw_content)} bytes</p>" | ||||
|         ) | ||||
|         return html_content.replace('</body>', metadata + '</body>') | ||||
|  | ||||
|     def preprocess_json(self, raw_content): | ||||
|         """Format and sort JSON content.""" | ||||
|         # Then we re-format it, else it does have filters (later on) which will reformat it anyway | ||||
|         content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$") | ||||
|  | ||||
|         # Sort JSON to avoid false alerts from reordering | ||||
|         try: | ||||
|             content = json.dumps(json.loads(content), sort_keys=True, indent=4) | ||||
|         except Exception: | ||||
|             # Might be malformed JSON, continue anyway | ||||
|             pass | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def apply_include_filters(self, content, stream_content_type): | ||||
|         """Apply CSS, XPath, or JSON filters to extract specific content.""" | ||||
|         filtered_content = "" | ||||
|  | ||||
|         for filter_rule in self.filter_config.include_filters: | ||||
|             # XPath filters | ||||
|             if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|                 filtered_content += html_tools.xpath_filter( | ||||
|                     xpath_filter=filter_rule.replace('xpath:', ''), | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url, | ||||
|                     is_rss=stream_content_type.is_rss | ||||
|                 ) | ||||
|  | ||||
|             # XPath1 filters (first match only) | ||||
|             elif filter_rule.startswith('xpath1:'): | ||||
|                 filtered_content += html_tools.xpath1_filter( | ||||
|                     xpath_filter=filter_rule.replace('xpath1:', ''), | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url, | ||||
|                     is_rss=stream_content_type.is_rss | ||||
|                 ) | ||||
|  | ||||
|             # JSON filters | ||||
|             elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES): | ||||
|                 filtered_content += html_tools.extract_json_as_string( | ||||
|                     content=content, | ||||
|                     json_filter=filter_rule | ||||
|                 ) | ||||
|  | ||||
|             # CSS selectors, default fallback | ||||
|             else: | ||||
|                 filtered_content += html_tools.include_filters( | ||||
|                     include_filters=filter_rule, | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url | ||||
|                 ) | ||||
|  | ||||
|         # Raise error if filter returned nothing | ||||
|         if not filtered_content.strip(): | ||||
|             raise FilterNotFoundInResponse( | ||||
|                 msg=self.filter_config.include_filters, | ||||
|                 screenshot=self.fetcher.screenshot, | ||||
|                 xpath_data=self.fetcher.xpath_data | ||||
|             ) | ||||
|  | ||||
|         return filtered_content | ||||
|  | ||||
|     def apply_subtractive_selectors(self, content): | ||||
|         """Remove elements matching subtractive selectors.""" | ||||
|         return html_tools.element_removal(self.filter_config.subtractive_selectors, content) | ||||
|  | ||||
|     def extract_text_from_html(self, html_content, stream_content_type): | ||||
|         """Convert HTML to plain text.""" | ||||
|         do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|         return html_tools.html_to_text( | ||||
|             html_content=html_content, | ||||
|             render_anchor_tag_content=do_anchor, | ||||
|             is_rss=stream_content_type.is_rss | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class ChecksumCalculator: | ||||
|     """Calculates checksums with various options.""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def calculate(text, ignore_whitespace=False): | ||||
|         """Calculate MD5 checksum of text content.""" | ||||
|         if ignore_whitespace: | ||||
|             text = text.translate(TRANSLATE_WHITESPACE_TABLE) | ||||
|         return hashlib.md5(text.encode('utf-8')).hexdigest() | ||||
|  | ||||
|  | ||||
| # Some common stuff here that can be moved to a base class | ||||
| # (set_proxy_from_list) | ||||
| class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|     def run_changedetection(self, watch): | ||||
|         changed_detected = False | ||||
|         html_content = "" | ||||
|         screenshot = False  # as bytes | ||||
|         stripped_text_from_html = "" | ||||
|  | ||||
|         if not watch: | ||||
|             raise Exception("Watch no longer exists.") | ||||
|  | ||||
|         # Initialize components | ||||
|         filter_config = FilterConfig(watch, self.datastore) | ||||
|         content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore) | ||||
|         transformer = ContentTransformer() | ||||
|         rule_engine = RuleEngine() | ||||
|  | ||||
|         # Get content type and stream info | ||||
|         ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower() | ||||
|         stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content) | ||||
|  | ||||
|         # Unset any existing notification error | ||||
|         update_obj = {'last_notification_error': False, 'last_error': False} | ||||
|  | ||||
|         url = watch.link | ||||
|  | ||||
|         self.screenshot = self.fetcher.screenshot | ||||
|         self.xpath_data = self.fetcher.xpath_data | ||||
|  | ||||
|         # Track the content type | ||||
|         update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|  | ||||
|         # Watches added automatically in the queue manager will skip if its the same checksum as the previous run | ||||
|         # Saves a lot of CPU | ||||
|         # Track the content type and checksum before filters | ||||
|         update_obj['content_type'] = ctype_header | ||||
|         update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         # Fetching complete, now filters | ||||
|         # === CONTENT PREPROCESSING === | ||||
|         # Avoid creating unnecessary intermediate string copies by reassigning only when needed | ||||
|         content = self.fetcher.content | ||||
|  | ||||
|         # @note: I feel like the following should be in a more obvious chain system | ||||
|         #  - Check filter text | ||||
|         #  - Is the checksum different? | ||||
|         #  - Do we convert to JSON? | ||||
|         # https://stackoverflow.com/questions/41817578/basic-method-chaining ? | ||||
|         # return content().textfilter().jsonextract().checksumcompare() ? | ||||
|         # RSS preprocessing | ||||
|         if stream_content_type.is_rss: | ||||
|             content = content_processor.preprocess_rss(content) | ||||
|             if self.datastore.data["settings"]["application"].get("rss_reader_mode"): | ||||
|                 # Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc) | ||||
|                 stream_content_type.is_rss = False | ||||
|                 stream_content_type.is_html = True | ||||
|                 self.fetcher.content = content | ||||
|  | ||||
|         is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|         is_html = not is_json | ||||
|         is_rss = False | ||||
|         # PDF preprocessing | ||||
|         if watch.is_pdf or stream_content_type.is_pdf: | ||||
|             content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content) | ||||
|             stream_content_type.is_html = True | ||||
|  | ||||
|         ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|         # Go into RSS preprocess for converting CDATA/comment to usable text | ||||
|         if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']): | ||||
|             if '<rss' in self.fetcher.content[:100].lower(): | ||||
|                 self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content) | ||||
|                 is_rss = True | ||||
|         # JSON - Always reformat it nicely for consistency. | ||||
|  | ||||
|         # source: support, basically treat it as plaintext | ||||
|         if stream_content_type.is_json: | ||||
|             if not filter_config.has_include_json_filters: | ||||
|                 content = content_processor.preprocess_json(raw_content=content) | ||||
|         #else, otherwise it gets sorted/formatted in the filter stage anyway | ||||
|  | ||||
|         # HTML obfuscation workarounds | ||||
|         if stream_content_type.is_html: | ||||
|             content = html_tools.workarounds_for_obfuscations(content) | ||||
|  | ||||
|         # Check for LD+JSON price data (for HTML content) | ||||
|         if stream_content_type.is_html: | ||||
|             update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content) | ||||
|  | ||||
|         # === FILTER APPLICATION === | ||||
|         # Start with content reference, avoid copy until modification | ||||
|         html_content = content | ||||
|  | ||||
|         # Apply include filters (CSS, XPath, JSON) | ||||
|         # Except for plaintext (incase they tried to confuse the system, it will HTML escape | ||||
|         #if not stream_content_type.is_plaintext: | ||||
|         if filter_config.has_include_filters: | ||||
|             html_content = content_processor.apply_include_filters(content, stream_content_type) | ||||
|  | ||||
|         # Apply subtractive selectors | ||||
|         if filter_config.has_subtractive_selectors: | ||||
|             html_content = content_processor.apply_subtractive_selectors(html_content) | ||||
|  | ||||
|         # === TEXT EXTRACTION === | ||||
|         if watch.is_source_type_url: | ||||
|             is_html = False | ||||
|             is_json = False | ||||
|  | ||||
|         inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10] | ||||
|         if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf: | ||||
|             from shutil import which | ||||
|             tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml") | ||||
|             if not which(tool): | ||||
|                 raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool)) | ||||
|  | ||||
|             import subprocess | ||||
|             proc = subprocess.Popen( | ||||
|                 [tool, '-stdout', '-', '-s', 'out.pdf', '-i'], | ||||
|                 stdout=subprocess.PIPE, | ||||
|                 stdin=subprocess.PIPE) | ||||
|             proc.stdin.write(self.fetcher.raw_content) | ||||
|             proc.stdin.close() | ||||
|             self.fetcher.content = proc.stdout.read().decode('utf-8') | ||||
|             proc.wait(timeout=60) | ||||
|  | ||||
|             # Add a little metadata so we know if the file changes (like if an image changes, but the text is the same | ||||
|             # @todo may cause problems with non-UTF8? | ||||
|             metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format( | ||||
|                 hashlib.md5(self.fetcher.raw_content).hexdigest().upper(), | ||||
|                 len(self.fetcher.content)) | ||||
|  | ||||
|             self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>') | ||||
|  | ||||
|         # Better would be if Watch.model could access the global data also | ||||
|         # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__ | ||||
|         # https://realpython.com/inherit-python-dict/ instead of doing it procedurely | ||||
|         include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters') | ||||
|  | ||||
|         # 1845 - remove duplicated filters in both group and watch include filter | ||||
|         include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags)) | ||||
|  | ||||
|         subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'), | ||||
|                                  *watch.get("subtractive_selectors", []), | ||||
|                                  *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", []) | ||||
|                                  ] | ||||
|  | ||||
|         # Inject a virtual LD+JSON price tracker rule | ||||
|         if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT: | ||||
|             include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS | ||||
|  | ||||
|         has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip()) | ||||
|         has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip()) | ||||
|  | ||||
|         if is_json and not has_filter_rule: | ||||
|             include_filters_rule.append("json:$") | ||||
|             has_filter_rule = True | ||||
|  | ||||
|         if is_json: | ||||
|             # Sort the JSON so we dont get false alerts when the content is just re-ordered | ||||
|             try: | ||||
|                 self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True) | ||||
|             except Exception as e: | ||||
|                 # Might have just been a snippet, or otherwise bad JSON, continue | ||||
|                 pass | ||||
|  | ||||
|         if has_filter_rule: | ||||
|             for filter in include_filters_rule: | ||||
|                 if any(prefix in filter for prefix in json_filter_prefixes): | ||||
|                     stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter) | ||||
|                     is_html = False | ||||
|  | ||||
|         if is_html or watch.is_source_type_url: | ||||
|  | ||||
|             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|             self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content) | ||||
|             html_content = self.fetcher.content | ||||
|  | ||||
|             # If not JSON,  and if it's not text/plain.. | ||||
|             if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower(): | ||||
|                 # Don't run get_text or xpath/css filters on plaintext | ||||
|                 stripped_text_from_html = html_content | ||||
|             # For source URLs, keep raw content | ||||
|             stripped_text = html_content | ||||
|         elif stream_content_type.is_plaintext: | ||||
|             # For plaintext, keep as-is without HTML-to-text conversion | ||||
|             stripped_text = html_content | ||||
|         else: | ||||
|             # Extract text from HTML/RSS content (not generic XML) | ||||
|             if stream_content_type.is_html or stream_content_type.is_rss: | ||||
|                 stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type) | ||||
|             else: | ||||
|                 # Does it have some ld+json price data? used for easier monitoring | ||||
|                 update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content) | ||||
|  | ||||
|                 # Then we assume HTML | ||||
|                 if has_filter_rule: | ||||
|                     html_content = "" | ||||
|  | ||||
|                     for filter_rule in include_filters_rule: | ||||
|                         # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." | ||||
|                         if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|                             html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''), | ||||
|                                                                     html_content=self.fetcher.content, | ||||
|                                                                     append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                     is_rss=is_rss) | ||||
|  | ||||
|                         elif filter_rule.startswith('xpath1:'): | ||||
|                             html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''), | ||||
|                                                                      html_content=self.fetcher.content, | ||||
|                                                                      append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                      is_rss=is_rss) | ||||
|                         else: | ||||
|                             html_content += html_tools.include_filters(include_filters=filter_rule, | ||||
|                                                                        html_content=self.fetcher.content, | ||||
|                                                                        append_pretty_line_formatting=not watch.is_source_type_url) | ||||
|  | ||||
|                     if not html_content.strip(): | ||||
|                         raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data) | ||||
|  | ||||
|                 if has_subtractive_selectors: | ||||
|                     html_content = html_tools.element_removal(subtractive_selectors, html_content) | ||||
|  | ||||
|                 if watch.is_source_type_url: | ||||
|                     stripped_text_from_html = html_content | ||||
|                 else: | ||||
|                     # extract text | ||||
|                     do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|                     stripped_text_from_html = html_tools.html_to_text(html_content=html_content, | ||||
|                                                                       render_anchor_tag_content=do_anchor, | ||||
|                                                                       is_rss=is_rss)  # 1874 activate the <title workaround hack | ||||
|                 stripped_text = html_content | ||||
|  | ||||
|         # === TEXT TRANSFORMATIONS === | ||||
|         if watch.get('trim_text_whitespace'): | ||||
|             stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()) | ||||
|             stripped_text = transformer.trim_whitespace(stripped_text) | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         # Also used to calculate/show what was removed | ||||
|         text_content_before_ignored_filter = stripped_text_from_html | ||||
|  | ||||
|         # @todo whitespace coming from missing rtrim()? | ||||
|         # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about. | ||||
|         # Rewrite's the processing text based on only what diff result they want to see | ||||
|         # Save text before ignore filters (for diff calculation) | ||||
|         text_content_before_ignored_filter = stripped_text | ||||
|  | ||||
|         # === DIFF FILTERING === | ||||
|         # If user wants specific diff types (added/removed/replaced only) | ||||
|         if watch.has_special_diff_filter_options_set() and len(watch.history.keys()): | ||||
|             # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences | ||||
|             from changedetectionio import diff | ||||
|             # needs to not include (added) etc or it may get used twice | ||||
|             # Replace the processed text with the preferred result | ||||
|             rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(), | ||||
|                                              newest_version_file_contents=stripped_text_from_html, | ||||
|                                              include_equal=False,  # not the same lines | ||||
|                                              include_added=watch.get('filter_text_added', True), | ||||
|                                              include_removed=watch.get('filter_text_removed', True), | ||||
|                                              include_replaced=watch.get('filter_text_replaced', True), | ||||
|                                              line_feed_sep="\n", | ||||
|                                              include_change_type_prefix=False) | ||||
|             stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter) | ||||
|             if stripped_text is None: | ||||
|                 # No differences found, but content exists | ||||
|                 c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True) | ||||
|                 return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8') | ||||
|  | ||||
|             watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8')) | ||||
|  | ||||
|             if not rendered_diff and stripped_text_from_html: | ||||
|                 # We had some content, but no differences were found | ||||
|                 # Store our new file as the MD5 so it will trigger in the future | ||||
|                 c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|                 return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8') | ||||
|             else: | ||||
|                 stripped_text_from_html = rendered_diff | ||||
|  | ||||
|         # Treat pages with no renderable text content as a change? No by default | ||||
|         # === EMPTY PAGE CHECK === | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|         if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: | ||||
|             raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url, | ||||
|                                                             status_code=self.fetcher.get_last_status_code(), | ||||
|                                                             screenshot=self.fetcher.screenshot, | ||||
|                                                             has_filters=has_filter_rule, | ||||
|                                                             html_content=html_content, | ||||
|                                                             xpath_data=self.fetcher.xpath_data | ||||
|                                                             ) | ||||
|  | ||||
|         # We rely on the actual text in the html output.. many sites have random script vars etc, | ||||
|         # in the future we'll implement other mechanisms. | ||||
|         if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0: | ||||
|             raise content_fetchers.exceptions.ReplyWithContentButNoText( | ||||
|                 url=url, | ||||
|                 status_code=self.fetcher.get_last_status_code(), | ||||
|                 screenshot=self.fetcher.screenshot, | ||||
|                 has_filters=filter_config.has_include_filters, | ||||
|                 html_content=html_content, | ||||
|                 xpath_data=self.fetcher.xpath_data | ||||
|             ) | ||||
|  | ||||
|         update_obj["last_check_status"] = self.fetcher.get_last_status_code() | ||||
|  | ||||
|         # 615 Extract text by regex | ||||
|         extract_text = watch.get('extract_text', []) | ||||
|         extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text') | ||||
|         if len(extract_text) > 0: | ||||
|             regex_matched_output = [] | ||||
|             for s_re in extract_text: | ||||
|                 # incase they specified something in '/.../x' | ||||
|                 if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): | ||||
|                     regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) | ||||
|                     result = re.findall(regex, stripped_text_from_html) | ||||
|  | ||||
|                     for l in result: | ||||
|                         if type(l) is tuple: | ||||
|                             # @todo - some formatter option default (between groups) | ||||
|                             regex_matched_output += list(l) + ['\n'] | ||||
|                         else: | ||||
|                             # @todo - some formatter option default (between each ungrouped result) | ||||
|                             regex_matched_output += [l] + ['\n'] | ||||
|                 else: | ||||
|                     # Doesnt look like regex, just hunt for plaintext and return that which matches | ||||
|                     # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes | ||||
|                     r = re.compile(re.escape(s_re), re.IGNORECASE) | ||||
|                     res = r.findall(stripped_text_from_html) | ||||
|                     if res: | ||||
|                         for match in res: | ||||
|                             regex_matched_output += [match] + ['\n'] | ||||
|  | ||||
|             ########################################################## | ||||
|             stripped_text_from_html = '' | ||||
|  | ||||
|             if regex_matched_output: | ||||
|                 # @todo some formatter for presentation? | ||||
|                 stripped_text_from_html = ''.join(regex_matched_output) | ||||
|         # === REGEX EXTRACTION === | ||||
|         if filter_config.extract_text: | ||||
|             extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text) | ||||
|             stripped_text = extracted | ||||
|  | ||||
|         # === MORE TEXT TRANSFORMATIONS === | ||||
|         if watch.get('remove_duplicate_lines'): | ||||
|             stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|             stripped_text = transformer.remove_duplicate_lines(stripped_text) | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically'): | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") | ||||
|             stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) | ||||
|             stripped_text = transformer.sort_alphabetically(stripped_text) | ||||
|  | ||||
| ### CALCULATE MD5 | ||||
|         # If there's text to ignore | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text') | ||||
|         # === CHECKSUM CALCULATION === | ||||
|         text_for_checksuming = stripped_text | ||||
|  | ||||
|         text_for_checksuming = stripped_text_from_html | ||||
|         if text_to_ignore: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
|         # Apply ignore_text for checksum calculation | ||||
|         if filter_config.ignore_text: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text) | ||||
|  | ||||
|         # Re #133 - if we should strip whitespaces from triggering the change detected comparison | ||||
|         if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|         else: | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest() | ||||
|             # Optionally remove ignored lines from output | ||||
|             strip_ignored_lines = watch.get('strip_ignored_lines') | ||||
|             if strip_ignored_lines is None: | ||||
|                 strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines') | ||||
|             if strip_ignored_lines: | ||||
|                 stripped_text = text_for_checksuming | ||||
|  | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         # Calculate checksum | ||||
|         ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False) | ||||
|         fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace) | ||||
|  | ||||
|         # === BLOCKING RULES EVALUATION === | ||||
|         blocked = False | ||||
|         trigger_text = watch.get('trigger_text', []) | ||||
|         trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text') | ||||
|         if len(trigger_text): | ||||
|             # Assume blocked | ||||
|  | ||||
|         # Check trigger_text | ||||
|         if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text): | ||||
|             blocked = True | ||||
|             # Filter and trigger works the same, so reuse it | ||||
|             # It should return the line numbers that match | ||||
|             # Unblock flow if the trigger was found (some text remained after stripped what didnt match) | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=trigger_text, | ||||
|                                                   mode="line numbers") | ||||
|             # Unblock if the trigger was found | ||||
|             if result: | ||||
|                 blocked = False | ||||
|  | ||||
|         text_should_not_be_present = watch.get('text_should_not_be_present', []) | ||||
|         text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present') | ||||
|         if len(text_should_not_be_present): | ||||
|             # If anything matched, then we should block a change from happening | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=text_should_not_be_present, | ||||
|                                                   mode="line numbers") | ||||
|             if result: | ||||
|                 blocked = True | ||||
|         # Check text_should_not_be_present | ||||
|         if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present): | ||||
|             blocked = True | ||||
|  | ||||
|         # And check if 'conditions' will let this pass through | ||||
|         if watch.get('conditions') and watch.get('conditions_match_logic'): | ||||
|             conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'), | ||||
|                                                                     application_datastruct=self.datastore.data, | ||||
|                                                                     ephemeral_data={ | ||||
|                                                                         'text': stripped_text_from_html | ||||
|                                                                     } | ||||
|                                                                     ) | ||||
|         # Check custom conditions | ||||
|         if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text): | ||||
|             blocked = True | ||||
|  | ||||
|             if not conditions_result.get('result'): | ||||
|                 # Conditions say "Condition not met" so we block it. | ||||
|                 blocked = True | ||||
|  | ||||
|         # Looks like something changed, but did it match all the rules? | ||||
|         # === CHANGE DETECTION === | ||||
|         if blocked: | ||||
|             changed_detected = False | ||||
|         else: | ||||
|             # The main thing that all this at the moment comes down to :) | ||||
|             # Compare checksums | ||||
|             if watch.get('previous_md5') != fetched_md5: | ||||
|                 changed_detected = True | ||||
|  | ||||
|             # Always record the new checksum | ||||
|             update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|             # On the first run of a site, watch['previous_md5'] will be None, set it the current one. | ||||
|             # On first run, initialize previous_md5 | ||||
|             if not watch.get('previous_md5'): | ||||
|                 watch['previous_md5'] = fetched_md5 | ||||
|  | ||||
|         logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") | ||||
|  | ||||
|         if changed_detected: | ||||
|             if watch.get('check_unique_lines', False): | ||||
|                 ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace') | ||||
|         # === UNIQUE LINES CHECK === | ||||
|         if changed_detected and watch.get('check_unique_lines', False): | ||||
|             has_unique_lines = watch.lines_contain_something_unique_compared_to_history( | ||||
|                 lines=stripped_text.splitlines(), | ||||
|                 ignore_whitespace=ignore_whitespace | ||||
|             ) | ||||
|  | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history( | ||||
|                     lines=stripped_text_from_html.splitlines(), | ||||
|                     ignore_whitespace=ignore_whitespace | ||||
|                 ) | ||||
|             if not has_unique_lines: | ||||
|                 logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") | ||||
|                 changed_detected = False | ||||
|             else: | ||||
|                 logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") | ||||
|  | ||||
|                 # One or more lines? unsure? | ||||
|                 if not has_unique_lines: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") | ||||
|                     changed_detected = False | ||||
|                 else: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") | ||||
|         # Note: Explicit cleanup is only needed here because text_json_diff handles | ||||
|         # large strings (100KB-300KB for RSS/HTML). The other processors work with | ||||
|         # small strings and don't need this. | ||||
|         # | ||||
|         # Python would clean these up automatically, but explicit `del` frees memory | ||||
|         # immediately rather than waiting for function return, reducing peak memory usage. | ||||
|         del content | ||||
|         if 'html_content' in locals() and html_content is not stripped_text: | ||||
|             del html_content | ||||
|         if 'text_content_before_ignored_filter' in locals() and text_content_before_ignored_filter is not stripped_text: | ||||
|             del text_content_before_ignored_filter | ||||
|         if 'text_for_checksuming' in locals() and text_for_checksuming is not stripped_text: | ||||
|             del text_for_checksuming | ||||
|  | ||||
|         return changed_detected, update_obj, stripped_text | ||||
|  | ||||
|         # stripped_text_from_html - Everything after filters and NO 'ignored' content | ||||
|         return changed_detected, update_obj, stripped_text_from_html | ||||
|     def _apply_diff_filtering(self, watch, stripped_text, text_before_filter): | ||||
|         """Apply user's diff filtering preferences (show only added/removed/replaced lines).""" | ||||
|         from changedetectionio import diff | ||||
|  | ||||
|         rendered_diff = diff.render_diff( | ||||
|             previous_version_file_contents=watch.get_last_fetched_text_before_filters(), | ||||
|             newest_version_file_contents=stripped_text, | ||||
|             include_equal=False, | ||||
|             include_added=watch.get('filter_text_added', True), | ||||
|             include_removed=watch.get('filter_text_removed', True), | ||||
|             include_replaced=watch.get('filter_text_replaced', True), | ||||
|             line_feed_sep="\n", | ||||
|             include_change_type_prefix=False | ||||
|         ) | ||||
|  | ||||
|         watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8')) | ||||
|  | ||||
|         if not rendered_diff and stripped_text: | ||||
|             # No differences found | ||||
|             return None | ||||
|  | ||||
|         return rendered_diff | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| [pytest] | ||||
| addopts = --no-start-live-server --live-server-port=5005 | ||||
| addopts = --no-start-live-server --live-server-port=0 | ||||
| #testpaths = tests pytest_invenio | ||||
| #live_server_scope = function | ||||
|  | ||||
|   | ||||
							
								
								
									
										435
									
								
								changedetectionio/queue_handlers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										435
									
								
								changedetectionio/queue_handlers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,435 @@ | ||||
| from blinker import signal | ||||
| from loguru import logger | ||||
| from typing import Dict, List, Any, Optional | ||||
| import heapq | ||||
| import queue | ||||
| import threading | ||||
|  | ||||
| try: | ||||
|     import janus | ||||
| except ImportError: | ||||
|     logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus") | ||||
|     raise | ||||
|  | ||||
|  | ||||
| class RecheckPriorityQueue: | ||||
|     """ | ||||
|     Ultra-reliable priority queue using janus for async/sync bridging. | ||||
|      | ||||
|     CRITICAL DESIGN NOTE: Both sync_q and async_q are required because: | ||||
|     - sync_q: Used by Flask routes, ticker threads, and other synchronous code | ||||
|     - async_q: Used by async workers (the actual fetchers/processors) and coroutines | ||||
|      | ||||
|     DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts: | ||||
|     - Synchronous code (Flask, threads) cannot use async methods without blocking | ||||
|     - Async code cannot use sync methods without blocking the event loop | ||||
|     - janus provides the only safe bridge between these two worlds | ||||
|      | ||||
|     Attempting to unify to async-only would require: | ||||
|     - Converting all Flask routes to async (major breaking change) | ||||
|     - Using asyncio.run() in sync contexts (causes deadlocks) | ||||
|     - Thread-pool wrapping (adds complexity and overhead) | ||||
|      | ||||
|     Minimal implementation focused on reliability: | ||||
|     - Pure janus for sync/async bridge | ||||
|     - Thread-safe priority ordering   | ||||
|     - Bulletproof error handling with critical logging | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize: int = 0): | ||||
|         try: | ||||
|             self._janus_queue = janus.Queue(maxsize=maxsize) | ||||
|             # BOTH interfaces required - see class docstring for why | ||||
|             self.sync_q = self._janus_queue.sync_q   # Flask routes, ticker thread | ||||
|             self.async_q = self._janus_queue.async_q # Async workers | ||||
|              | ||||
|             # Priority storage - thread-safe | ||||
|             self._priority_items = [] | ||||
|             self._lock = threading.RLock() | ||||
|              | ||||
|             # Signals for UI updates | ||||
|             self.queue_length_signal = signal('queue_length') | ||||
|              | ||||
|             logger.debug("RecheckPriorityQueue initialized successfully") | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     # SYNC INTERFACE (for ticker thread) | ||||
|     def put(self, item, block: bool = True, timeout: Optional[float] = None): | ||||
|         """Thread-safe sync put with priority ordering""" | ||||
|         try: | ||||
|             # Add to priority storage | ||||
|             with self._lock: | ||||
|                 heapq.heappush(self._priority_items, item) | ||||
|              | ||||
|             # Notify via janus sync queue | ||||
|             self.sync_q.put(True, block=block, timeout=timeout) | ||||
|              | ||||
|             # Emit signals | ||||
|             self._emit_put_signals(item) | ||||
|              | ||||
|             logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}") | ||||
|             return True | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}") | ||||
|             # Remove from priority storage if janus put failed | ||||
|             try: | ||||
|                 with self._lock: | ||||
|                     if item in self._priority_items: | ||||
|                         self._priority_items.remove(item) | ||||
|                         heapq.heapify(self._priority_items) | ||||
|             except Exception as cleanup_e: | ||||
|                 logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}") | ||||
|             return False | ||||
|      | ||||
|     def get(self, block: bool = True, timeout: Optional[float] = None): | ||||
|         """Thread-safe sync get with priority ordering""" | ||||
|         try: | ||||
|             # Wait for notification | ||||
|             self.sync_q.get(block=block, timeout=timeout) | ||||
|              | ||||
|             # Get highest priority item | ||||
|             with self._lock: | ||||
|                 if not self._priority_items: | ||||
|                     logger.critical(f"CRITICAL: Queue notification received but no priority items available") | ||||
|                     raise Exception("Priority queue inconsistency") | ||||
|                 item = heapq.heappop(self._priority_items) | ||||
|              | ||||
|             # Emit signals | ||||
|             self._emit_get_signals() | ||||
|              | ||||
|             logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}") | ||||
|             return item | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get item from queue: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     # ASYNC INTERFACE (for workers) | ||||
|     async def async_put(self, item): | ||||
|         """Pure async put with priority ordering""" | ||||
|         try: | ||||
|             # Add to priority storage | ||||
|             with self._lock: | ||||
|                 heapq.heappush(self._priority_items, item) | ||||
|              | ||||
|             # Notify via janus async queue | ||||
|             await self.async_q.put(True) | ||||
|              | ||||
|             # Emit signals | ||||
|             self._emit_put_signals(item) | ||||
|              | ||||
|             logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}") | ||||
|             return True | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}") | ||||
|             # Remove from priority storage if janus put failed | ||||
|             try: | ||||
|                 with self._lock: | ||||
|                     if item in self._priority_items: | ||||
|                         self._priority_items.remove(item) | ||||
|                         heapq.heapify(self._priority_items) | ||||
|             except Exception as cleanup_e: | ||||
|                 logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}") | ||||
|             return False | ||||
|      | ||||
|     async def async_get(self): | ||||
|         """Pure async get with priority ordering""" | ||||
|         try: | ||||
|             # Wait for notification | ||||
|             await self.async_q.get() | ||||
|              | ||||
|             # Get highest priority item | ||||
|             with self._lock: | ||||
|                 if not self._priority_items: | ||||
|                     logger.critical(f"CRITICAL: Async queue notification received but no priority items available") | ||||
|                     raise Exception("Priority queue inconsistency") | ||||
|                 item = heapq.heappop(self._priority_items) | ||||
|              | ||||
|             # Emit signals | ||||
|             self._emit_get_signals() | ||||
|              | ||||
|             logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}") | ||||
|             return item | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     # UTILITY METHODS | ||||
|     def qsize(self) -> int: | ||||
|         """Get current queue size""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 return len(self._priority_items) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get queue size: {str(e)}") | ||||
|             return 0 | ||||
|      | ||||
|     def empty(self) -> bool: | ||||
|         """Check if queue is empty""" | ||||
|         return self.qsize() == 0 | ||||
|      | ||||
|     def close(self): | ||||
|         """Close the janus queue""" | ||||
|         try: | ||||
|             self._janus_queue.close() | ||||
|             logger.debug("RecheckPriorityQueue closed successfully") | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}") | ||||
|      | ||||
|     # COMPATIBILITY METHODS (from original implementation) | ||||
|     @property | ||||
|     def queue(self): | ||||
|         """Provide compatibility with original queue access""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 return list(self._priority_items) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get queue list: {str(e)}") | ||||
|             return [] | ||||
|      | ||||
|     def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]: | ||||
|         """Find position of UUID in queue""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 queue_list = list(self._priority_items) | ||||
|                 total_items = len(queue_list) | ||||
|                  | ||||
|                 if total_items == 0: | ||||
|                     return {'position': None, 'total_items': 0, 'priority': None, 'found': False} | ||||
|                  | ||||
|                 # Find target item | ||||
|                 for item in queue_list: | ||||
|                     if (hasattr(item, 'item') and isinstance(item.item, dict) and  | ||||
|                         item.item.get('uuid') == target_uuid): | ||||
|                          | ||||
|                         # Count items with higher priority | ||||
|                         position = sum(1 for other in queue_list if other.priority < item.priority) | ||||
|                         return { | ||||
|                             'position': position, | ||||
|                             'total_items': total_items,  | ||||
|                             'priority': item.priority, | ||||
|                             'found': True | ||||
|                         } | ||||
|                  | ||||
|                 return {'position': None, 'total_items': total_items, 'priority': None, 'found': False} | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {str(e)}") | ||||
|             return {'position': None, 'total_items': 0, 'priority': None, 'found': False} | ||||
|      | ||||
|     def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]: | ||||
|         """Get all queued UUIDs with pagination""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 queue_list = sorted(self._priority_items)  # Sort by priority | ||||
|                 total_items = len(queue_list) | ||||
|                  | ||||
|                 if total_items == 0: | ||||
|                     return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False} | ||||
|                  | ||||
|                 # Apply pagination | ||||
|                 end_idx = min(offset + limit, total_items) if limit else total_items | ||||
|                 items_to_process = queue_list[offset:end_idx] | ||||
|                  | ||||
|                 result = [] | ||||
|                 for position, item in enumerate(items_to_process, start=offset): | ||||
|                     if (hasattr(item, 'item') and isinstance(item.item, dict) and  | ||||
|                         'uuid' in item.item): | ||||
|                         result.append({ | ||||
|                             'uuid': item.item['uuid'], | ||||
|                             'position': position, | ||||
|                             'priority': item.priority | ||||
|                         }) | ||||
|                  | ||||
|                 return { | ||||
|                     'items': result, | ||||
|                     'total_items': total_items, | ||||
|                     'returned_items': len(result), | ||||
|                     'has_more': (offset + len(result)) < total_items | ||||
|                 } | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {str(e)}") | ||||
|             return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False} | ||||
|      | ||||
|     def get_queue_summary(self) -> Dict[str, Any]: | ||||
|         """Get queue summary statistics""" | ||||
|         try: | ||||
|             with self._lock: | ||||
|                 queue_list = list(self._priority_items) | ||||
|                 total_items = len(queue_list) | ||||
|                  | ||||
|                 if total_items == 0: | ||||
|                     return { | ||||
|                         'total_items': 0, 'priority_breakdown': {}, | ||||
|                         'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0 | ||||
|                     } | ||||
|                  | ||||
|                 immediate_items = clone_items = scheduled_items = 0 | ||||
|                 priority_counts = {} | ||||
|                  | ||||
|                 for item in queue_list: | ||||
|                     priority = item.priority | ||||
|                     priority_counts[priority] = priority_counts.get(priority, 0) + 1 | ||||
|                      | ||||
|                     if priority == 1: | ||||
|                         immediate_items += 1 | ||||
|                     elif priority == 5: | ||||
|                         clone_items += 1 | ||||
|                     elif priority > 100: | ||||
|                         scheduled_items += 1 | ||||
|                  | ||||
|                 return { | ||||
|                     'total_items': total_items, | ||||
|                     'priority_breakdown': priority_counts, | ||||
|                     'immediate_items': immediate_items, | ||||
|                     'clone_items': clone_items, | ||||
|                     'scheduled_items': scheduled_items, | ||||
|                     'min_priority': min(priority_counts.keys()) if priority_counts else None, | ||||
|                     'max_priority': max(priority_counts.keys()) if priority_counts else None | ||||
|                 } | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get queue summary: {str(e)}") | ||||
|             return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0,  | ||||
|                    'clone_items': 0, 'scheduled_items': 0} | ||||
|      | ||||
|     # PRIVATE METHODS | ||||
|     def _get_item_uuid(self, item) -> str: | ||||
|         """Safely extract UUID from item for logging""" | ||||
|         try: | ||||
|             if hasattr(item, 'item') and isinstance(item.item, dict): | ||||
|                 return item.item.get('uuid', 'unknown') | ||||
|         except Exception: | ||||
|             pass | ||||
|         return 'unknown' | ||||
|      | ||||
|     def _emit_put_signals(self, item): | ||||
|         """Emit signals when item is added""" | ||||
|         try: | ||||
|             # Watch update signal | ||||
|             if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item: | ||||
|                 watch_check_update = signal('watch_check_update') | ||||
|                 if watch_check_update: | ||||
|                     watch_check_update.send(watch_uuid=item.item['uuid']) | ||||
|              | ||||
|             # Queue length signal | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}") | ||||
|      | ||||
|     def _emit_get_signals(self): | ||||
|         """Emit signals when item is removed""" | ||||
|         try: | ||||
|             if self.queue_length_signal: | ||||
|                 self.queue_length_signal.send(length=self.qsize()) | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to emit get signals: {str(e)}") | ||||
|  | ||||
|  | ||||
| class NotificationQueue: | ||||
|     """ | ||||
|     Ultra-reliable notification queue using pure janus. | ||||
|      | ||||
|     CRITICAL DESIGN NOTE: Both sync_q and async_q are required because: | ||||
|     - sync_q: Used by Flask routes, ticker threads, and other synchronous code | ||||
|     - async_q: Used by async workers and coroutines | ||||
|      | ||||
|     DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts. | ||||
|     See RecheckPriorityQueue docstring above for detailed explanation. | ||||
|      | ||||
|     Simple wrapper around janus with bulletproof error handling. | ||||
|     """ | ||||
|      | ||||
|     def __init__(self, maxsize: int = 0): | ||||
|         try: | ||||
|             self._janus_queue = janus.Queue(maxsize=maxsize) | ||||
|             # BOTH interfaces required - see class docstring for why | ||||
|             self.sync_q = self._janus_queue.sync_q   # Flask routes, threads | ||||
|             self.async_q = self._janus_queue.async_q # Async workers | ||||
|             self.notification_event_signal = signal('notification_event') | ||||
|             logger.debug("NotificationQueue initialized successfully") | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}") | ||||
|             raise | ||||
|      | ||||
|     def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None): | ||||
|         """Thread-safe sync put with signal emission""" | ||||
|         try: | ||||
|             self.sync_q.put(item, block=block, timeout=timeout) | ||||
|             self._emit_notification_signal(item) | ||||
|             logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}") | ||||
|             return True | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}") | ||||
|             return False | ||||
|      | ||||
|     async def async_put(self, item: Dict[str, Any]): | ||||
|         """Pure async put with signal emission""" | ||||
|         try: | ||||
|             await self.async_q.put(item) | ||||
|             self._emit_notification_signal(item) | ||||
|             logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}") | ||||
|             return True | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}") | ||||
|             return False | ||||
|      | ||||
|     def get(self, block: bool = True, timeout: Optional[float] = None): | ||||
|         """Thread-safe sync get""" | ||||
|         try: | ||||
|             return self.sync_q.get(block=block, timeout=timeout) | ||||
|         except queue.Empty as e: | ||||
|             raise e | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get notification: {str(e)}") | ||||
|             raise e | ||||
|      | ||||
|     async def async_get(self): | ||||
|         """Pure async get""" | ||||
|         try: | ||||
|             return await self.async_q.get() | ||||
|         except queue.Empty as e: | ||||
|             raise e | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}") | ||||
|             raise e | ||||
|      | ||||
|     def qsize(self) -> int: | ||||
|         """Get current queue size""" | ||||
|         try: | ||||
|             return self.sync_q.qsize() | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}") | ||||
|             return 0 | ||||
|      | ||||
|     def empty(self) -> bool: | ||||
|         """Check if queue is empty""" | ||||
|         return self.qsize() == 0 | ||||
|      | ||||
|     def close(self): | ||||
|         """Close the janus queue""" | ||||
|         try: | ||||
|             self._janus_queue.close() | ||||
|             logger.debug("NotificationQueue closed successfully") | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}") | ||||
|      | ||||
|     def _emit_notification_signal(self, item: Dict[str, Any]): | ||||
|         """Emit notification signal""" | ||||
|         try: | ||||
|             if self.notification_event_signal and isinstance(item, dict): | ||||
|                 watch_uuid = item.get('uuid') | ||||
|                 if watch_uuid: | ||||
|                     self.notification_event_signal.send(watch_uuid=watch_uuid) | ||||
|                 else: | ||||
|                     self.notification_event_signal.send() | ||||
|         except Exception as e: | ||||
|             logger.critical(f"CRITICAL: Failed to emit notification signal: {str(e)}") | ||||
| @@ -29,6 +29,9 @@ class SignalHandler: | ||||
|         watch_delete_signal = signal('watch_deleted') | ||||
|         watch_delete_signal.connect(self.handle_deleted_signal, weak=False) | ||||
|  | ||||
|         watch_favicon_bumped_signal = signal('watch_favicon_bump') | ||||
|         watch_favicon_bumped_signal.connect(self.handle_watch_bumped_favicon_signal, weak=False) | ||||
|  | ||||
|         # Connect to the notification_event signal | ||||
|         notification_event_signal = signal('notification_event') | ||||
|         notification_event_signal.connect(self.handle_notification_event, weak=False) | ||||
| @@ -37,7 +40,7 @@ class SignalHandler: | ||||
|         # Create and start the queue update thread using standard threading | ||||
|         import threading | ||||
|         self.polling_emitter_thread = threading.Thread( | ||||
|             target=self.polling_emit_running_or_queued_watches_threaded,  | ||||
|             target=self.polling_emit_running_or_queued_watches_threaded, | ||||
|             daemon=True | ||||
|         ) | ||||
|         self.polling_emitter_thread.start() | ||||
| @@ -69,6 +72,16 @@ class SignalHandler: | ||||
|             else: | ||||
|                 logger.warning(f"Watch UUID {watch_uuid} not found in datastore") | ||||
|  | ||||
|     def handle_watch_bumped_favicon_signal(self, *args, **kwargs): | ||||
|         watch_uuid = kwargs.get('watch_uuid') | ||||
|         if watch_uuid: | ||||
|             # Emit the queue size to all connected clients | ||||
|             self.socketio_instance.emit("watch_bumped_favicon", { | ||||
|                 "uuid": watch_uuid, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|         logger.debug(f"Watch UUID {watch_uuid} got its favicon updated") | ||||
|  | ||||
|     def handle_deleted_signal(self, *args, **kwargs): | ||||
|         watch_uuid = kwargs.get('watch_uuid') | ||||
|         if watch_uuid: | ||||
| @@ -105,39 +118,38 @@ class SignalHandler: | ||||
|                 "watch_uuid": watch_uuid, | ||||
|                 "event_timestamp": time.time() | ||||
|             }) | ||||
|              | ||||
|  | ||||
|             logger.trace(f"Socket.IO: Emitted notification_event for watch UUID {watch_uuid}") | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Socket.IO error in handle_notification_event: {str(e)}") | ||||
|  | ||||
|  | ||||
|     def polling_emit_running_or_queued_watches_threaded(self): | ||||
|         """Threading version of polling for Windows compatibility""" | ||||
|         import time | ||||
|         import threading | ||||
|         logger.info("Queue update thread started (threading mode)") | ||||
|          | ||||
|  | ||||
|         # Import here to avoid circular imports | ||||
|         from changedetectionio.flask_app import app | ||||
|         from changedetectionio import worker_handler | ||||
|         watch_check_update = signal('watch_check_update') | ||||
|          | ||||
|  | ||||
|         # Track previous state to avoid unnecessary emissions | ||||
|         previous_running_uuids = set() | ||||
|          | ||||
|  | ||||
|         # Run until app shutdown - check exit flag more frequently for fast shutdown | ||||
|         exit_event = getattr(app.config, 'exit', threading.Event()) | ||||
|          | ||||
|  | ||||
|         while not exit_event.is_set(): | ||||
|             try: | ||||
|                 # Get current running UUIDs from async workers | ||||
|                 running_uuids = set(worker_handler.get_running_uuids()) | ||||
|                  | ||||
|  | ||||
|                 # Only send updates for UUIDs that changed state | ||||
|                 newly_running = running_uuids - previous_running_uuids | ||||
|                 no_longer_running = previous_running_uuids - running_uuids | ||||
|                  | ||||
|  | ||||
|                 # Send updates for newly running UUIDs (but exit fast if shutdown requested) | ||||
|                 for uuid in newly_running: | ||||
|                     if exit_event.is_set(): | ||||
| @@ -146,7 +158,7 @@ class SignalHandler: | ||||
|                     with app.app_context(): | ||||
|                         watch_check_update.send(app_context=app, watch_uuid=uuid) | ||||
|                     time.sleep(0.01)  # Small yield | ||||
|                  | ||||
|  | ||||
|                 # Send updates for UUIDs that finished processing (but exit fast if shutdown requested) | ||||
|                 if not exit_event.is_set(): | ||||
|                     for uuid in no_longer_running: | ||||
| @@ -156,16 +168,16 @@ class SignalHandler: | ||||
|                         with app.app_context(): | ||||
|                             watch_check_update.send(app_context=app, watch_uuid=uuid) | ||||
|                         time.sleep(0.01)  # Small yield | ||||
|                  | ||||
|  | ||||
|                 # Update tracking for next iteration | ||||
|                 previous_running_uuids = running_uuids | ||||
|                  | ||||
|  | ||||
|                 # Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown | ||||
|                 for _ in range(20):  # 20 * 0.5 = 10 seconds total | ||||
|                     if exit_event.is_set(): | ||||
|                         break | ||||
|                     time.sleep(0.5) | ||||
|                  | ||||
|  | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Error in threading polling: {str(e)}") | ||||
|                 # Even during error recovery, check for exit quickly | ||||
| @@ -173,11 +185,11 @@ class SignalHandler: | ||||
|                     if exit_event.is_set(): | ||||
|                         break | ||||
|                     time.sleep(0.5) | ||||
|          | ||||
|  | ||||
|         # Check if we're in pytest environment - if so, be more gentle with logging | ||||
|         import sys | ||||
|         in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ | ||||
|          | ||||
|  | ||||
|         if not in_pytest: | ||||
|             logger.info("Queue update thread stopped (threading mode)") | ||||
|  | ||||
| @@ -208,20 +220,20 @@ def handle_watch_update(socketio, **kwargs): | ||||
|  | ||||
|         watch_data = { | ||||
|             'checking_now': True if watch.get('uuid') in running_uuids else False, | ||||
|             'error_text': error_texts, | ||||
|             'event_timestamp': time.time(), | ||||
|             'fetch_time': watch.get('fetch_time'), | ||||
|             'has_error': True if error_texts else False, | ||||
|             'last_changed': watch.get('last_changed'), | ||||
|             'last_checked': watch.get('last_checked'), | ||||
|             'error_text': error_texts, | ||||
|             'has_favicon': True if watch.get_favicon_filename() else False, | ||||
|             'history_n': watch.history_n, | ||||
|             'last_checked_text': _jinja2_filter_datetime(watch), | ||||
|             'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet', | ||||
|             'queued': True if watch.get('uuid') in queue_list else False, | ||||
|             'paused': True if watch.get('paused') else False, | ||||
|             'last_checked': watch.get('last_checked'), | ||||
|             'last_checked_text': _jinja2_filter_datetime(watch), | ||||
|             'notification_muted': True if watch.get('notification_muted') else False, | ||||
|             'paused': True if watch.get('paused') else False, | ||||
|             'queued': True if watch.get('uuid') in queue_list else False, | ||||
|             'unviewed': watch.has_unviewed, | ||||
|             'uuid': watch.get('uuid'), | ||||
|             'event_timestamp': time.time() | ||||
|         } | ||||
|  | ||||
|         errored_count = 0 | ||||
| @@ -231,14 +243,15 @@ def handle_watch_update(socketio, **kwargs): | ||||
|  | ||||
|         general_stats = { | ||||
|             'count_errors': errored_count, | ||||
|             'has_unviewed': datastore.has_unviewed | ||||
|             'unread_changes_count': datastore.unread_changes_count | ||||
|         } | ||||
|  | ||||
|         # Debug what's being emitted | ||||
|         # logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}") | ||||
|  | ||||
|         # Emit to all clients (no 'broadcast' parameter needed - it's the default behavior) | ||||
|         socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats}) | ||||
|         socketio.emit("watch_update", {'watch': watch_data}) | ||||
|         socketio.emit("general_stats_update", general_stats) | ||||
|  | ||||
|         # Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues | ||||
|         logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}") | ||||
| @@ -251,15 +264,15 @@ def init_socketio(app, datastore): | ||||
|     """Initialize SocketIO with the main Flask app""" | ||||
|     import platform | ||||
|     import sys | ||||
|      | ||||
|  | ||||
|     # Platform-specific async_mode selection for better stability | ||||
|     system = platform.system().lower() | ||||
|     python_version = sys.version_info | ||||
|      | ||||
|  | ||||
|     # Check for SocketIO mode configuration via environment variable | ||||
|     # Default is 'threading' for best cross-platform compatibility | ||||
|     socketio_mode = os.getenv('SOCKETIO_MODE', 'threading').lower() | ||||
|      | ||||
|  | ||||
|     if socketio_mode == 'gevent': | ||||
|         # Use gevent mode (higher concurrency but platform limitations) | ||||
|         try: | ||||
| @@ -277,7 +290,7 @@ def init_socketio(app, datastore): | ||||
|         # Invalid mode specified, use default | ||||
|         async_mode = 'threading' | ||||
|         logger.warning(f"Invalid SOCKETIO_MODE='{socketio_mode}', using default {async_mode} mode for Socket.IO") | ||||
|      | ||||
|  | ||||
|     # Log platform info for debugging | ||||
|     logger.info(f"Platform: {system}, Python: {python_version.major}.{python_version.minor}, Socket.IO mode: {async_mode}") | ||||
|  | ||||
| @@ -315,7 +328,6 @@ def init_socketio(app, datastore): | ||||
|             emit_flash=False | ||||
|         ) | ||||
|  | ||||
|  | ||||
|     @socketio.on('connect') | ||||
|     def handle_connect(): | ||||
|         """Handle client connection""" | ||||
| @@ -393,4 +405,4 @@ def init_socketio(app, datastore): | ||||
|  | ||||
|     logger.info("Socket.IO initialized and attached to main Flask app") | ||||
|     logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}") | ||||
|     return socketio | ||||
|     return socketio | ||||
							
								
								
									
										130
									
								
								changedetectionio/rss_tools.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								changedetectionio/rss_tools.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,130 @@ | ||||
| """ | ||||
| RSS/Atom feed processing tools for changedetection.io | ||||
| """ | ||||
|  | ||||
| from loguru import logger | ||||
| import re | ||||
|  | ||||
|  | ||||
| def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|     """ | ||||
|     Process CDATA sections in HTML/XML content - inline replacement. | ||||
|  | ||||
|     Args: | ||||
|         html_content: The HTML/XML content to process | ||||
|         render_anchor_tag_content: Whether to render anchor tag content | ||||
|  | ||||
|     Returns: | ||||
|         Processed HTML/XML content with CDATA sections replaced inline | ||||
|     """ | ||||
|     from xml.sax.saxutils import escape as xml_escape | ||||
|     from .html_tools import html_to_text | ||||
|  | ||||
|     pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>' | ||||
|  | ||||
|     def repl(m): | ||||
|         text = m.group(1) | ||||
|         return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip() | ||||
|  | ||||
|     return re.sub(pattern, repl, html_content) | ||||
|  | ||||
|  | ||||
| def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str: | ||||
|     """ | ||||
|     Format RSS/Atom feed items in a readable text format using feedparser. | ||||
|  | ||||
|     Converts RSS <item> or Atom <entry> elements to formatted text with: | ||||
|     - <title> → <h1>Title</h1> | ||||
|     - <link> → Link: [url] | ||||
|     - <guid> → Guid: [id] | ||||
|     - <pubDate> → PubDate: [date] | ||||
|     - <description> or <content> → Raw HTML content (CDATA and entities automatically handled) | ||||
|  | ||||
|     Args: | ||||
|         rss_content: The RSS/Atom feed content | ||||
|         render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility) | ||||
|  | ||||
|     Returns: | ||||
|         Formatted HTML content ready for html_to_text conversion | ||||
|     """ | ||||
|     try: | ||||
|         import feedparser | ||||
|         from xml.sax.saxutils import escape as xml_escape | ||||
|  | ||||
|         # Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc. | ||||
|         feed = feedparser.parse(rss_content) | ||||
|  | ||||
|         formatted_items = [] | ||||
|  | ||||
|         # Determine feed type for appropriate labels when fields are missing | ||||
|         # feedparser sets feed.version to things like 'rss20', 'atom10', etc. | ||||
|         is_atom = feed.version and 'atom' in feed.version | ||||
|  | ||||
|         for entry in feed.entries: | ||||
|             item_parts = [] | ||||
|  | ||||
|             # Title - feedparser handles CDATA and entity unescaping automatically | ||||
|             if hasattr(entry, 'title') and entry.title: | ||||
|                 item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>') | ||||
|  | ||||
|             # Link | ||||
|             if hasattr(entry, 'link') and entry.link: | ||||
|                 item_parts.append(f'Link: {xml_escape(entry.link)}<br>') | ||||
|  | ||||
|             # GUID/ID | ||||
|             if hasattr(entry, 'id') and entry.id: | ||||
|                 item_parts.append(f'Guid: {xml_escape(entry.id)}<br>') | ||||
|  | ||||
|             # Date - feedparser normalizes all date field names to 'published' | ||||
|             if hasattr(entry, 'published') and entry.published: | ||||
|                 item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>') | ||||
|  | ||||
|             # Description/Content - feedparser handles CDATA and entity unescaping automatically | ||||
|             # Only add "Summary:" label for Atom <summary> tags | ||||
|             content = None | ||||
|             add_label = False | ||||
|  | ||||
|             if hasattr(entry, 'content') and entry.content: | ||||
|                 # Atom <content> - no label, just content | ||||
|                 content = entry.content[0].value if entry.content[0].value else None | ||||
|             elif hasattr(entry, 'summary'): | ||||
|                 # Could be RSS <description> or Atom <summary> | ||||
|                 # feedparser maps both to entry.summary | ||||
|                 content = entry.summary if entry.summary else None | ||||
|                 # Only add "Summary:" label for Atom feeds (which use <summary> tag) | ||||
|                 if is_atom: | ||||
|                     add_label = True | ||||
|  | ||||
|             # Add content with or without label | ||||
|             if content: | ||||
|                 if add_label: | ||||
|                     item_parts.append(f'Summary:<br>{content}') | ||||
|                 else: | ||||
|                     item_parts.append(content) | ||||
|             else: | ||||
|                 # No content - just show <none> | ||||
|                 item_parts.append('<none>') | ||||
|  | ||||
|             # Join all parts of this item | ||||
|             if item_parts: | ||||
|                 formatted_items.append('\n'.join(item_parts)) | ||||
|  | ||||
|         # Wrap each item in a div with classes (first, last, item-N) | ||||
|         items_html = [] | ||||
|         total_items = len(formatted_items) | ||||
|         for idx, item in enumerate(formatted_items): | ||||
|             classes = ['rss-item'] | ||||
|             if idx == 0: | ||||
|                 classes.append('first') | ||||
|             if idx == total_items - 1: | ||||
|                 classes.append('last') | ||||
|             classes.append(f'item-{idx + 1}') | ||||
|  | ||||
|             class_str = ' '.join(classes) | ||||
|             items_html.append(f'<div class="{class_str}">{item}</div>') | ||||
|         return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>' | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Error formatting RSS items: {str(e)}") | ||||
|         # Fall back to original content | ||||
|         return rss_content | ||||
| @@ -11,32 +11,29 @@ set -e | ||||
|  | ||||
| SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||||
|  | ||||
| find tests/test_*py -type f|while read test_name | ||||
| do | ||||
|   echo "TEST RUNNING $test_name" | ||||
|   # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser | ||||
|   REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name | ||||
| done | ||||
| # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load  tests/test_*.py | ||||
|  | ||||
| #time pytest -n auto --dist loadfile -vv --tb=long tests/test_*.py | ||||
| echo "RUNNING WITH BASE_URL SET" | ||||
|  | ||||
| # Now re-run some tests with BASE_URL enabled | ||||
| # Re #65 - Ability to include a link back to the installation, in the notification. | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv --maxfail=1 tests/test_notification.py | ||||
|  | ||||
|  | ||||
| # Re-run with HIDE_REFERER set - could affect login | ||||
| export HIDE_REFERER=True | ||||
| pytest tests/test_access_control.py | ||||
| pytest -vv -s --maxfail=1 tests/test_access_control.py | ||||
|  | ||||
| # Re-run a few tests that will trigger brotli based storage | ||||
| export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5 | ||||
| pytest tests/test_access_control.py | ||||
| pytest -vv -s --maxfail=1 tests/test_access_control.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
| pytest tests/test_backend.py | ||||
| pytest tests/test_rss.py | ||||
| pytest tests/test_unique_lines.py | ||||
| pytest -vv -s --maxfail=1 tests/test_backend.py | ||||
| pytest -vv -s --maxfail=1 tests/test_rss.py | ||||
| pytest -vv -s --maxfail=1 tests/test_unique_lines.py | ||||
|  | ||||
| # Try high concurrency | ||||
| FETCH_WORKERS=130 pytest  tests/test_history_consistency.py -v -l | ||||
|   | ||||
| @@ -6,6 +6,8 @@ | ||||
|  | ||||
| # enable debug | ||||
| set -x | ||||
| docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network | ||||
| docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|  | ||||
| # A extra browser is configured, but we never chose to use it, so it should NOT show in the logs | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url' | ||||
|   | ||||
| @@ -19,12 +19,13 @@ docker run --network changedet-network -d \ | ||||
|   -v `pwd`/tests/proxy_list/squid-passwords.txt:/etc/squid3/passwords \ | ||||
|   ubuntu/squid:4.13-21.10_edge | ||||
|  | ||||
|  | ||||
| sleep 5 | ||||
| ## 2nd test actually choose the preferred proxy from proxies.json | ||||
| # This will force a request via "proxy-two" | ||||
| docker run --network changedet-network \ | ||||
|   -v `pwd`/tests/proxy_list/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -v `pwd`/tests/proxy_list/proxies.json-example:/tmp/proxies.json \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest -s tests/proxy_list/test_multiple_proxy.py --datastore-path /tmp' | ||||
|  | ||||
| set +e | ||||
| echo "- Looking for chosen.changedetection.io request in squid-one - it should NOT be here" | ||||
| @@ -48,8 +49,10 @@ fi | ||||
| # Test the UI configurable proxies | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py --datastore-path /tmp' | ||||
|  | ||||
| # Give squid proxies a moment to flush their logs | ||||
| sleep 2 | ||||
|  | ||||
| # Should see a request for one.changedetection.io in there | ||||
| echo "- Looking for .changedetection.io request in squid-custom" | ||||
| @@ -63,7 +66,10 @@ fi | ||||
| # Test "no-proxy" option | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py --datastore-path /tmp' | ||||
|  | ||||
| # Give squid proxies a moment to flush their logs | ||||
| sleep 2 | ||||
|  | ||||
| # We need to handle grep returning 1 | ||||
| set +e | ||||
| @@ -80,6 +86,8 @@ for c in $(echo "squid-one squid-two squid-custom"); do | ||||
|   fi | ||||
| done | ||||
|  | ||||
| echo "docker ps output" | ||||
| docker ps | ||||
|  | ||||
| docker kill squid-one squid-two squid-custom | ||||
|  | ||||
| @@ -88,19 +96,19 @@ docker kill squid-one squid-two squid-custom | ||||
| # Requests | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp' | ||||
|  | ||||
| # Playwright | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp' | ||||
|  | ||||
| # Puppeteer fast | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp' | ||||
|  | ||||
| # Selenium | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp' | ||||
|   | ||||
| @@ -5,22 +5,23 @@ set -e | ||||
| # enable debug | ||||
| set -x | ||||
|  | ||||
| docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network | ||||
|  | ||||
| # SOCKS5 related - start simple Socks5 proxy server | ||||
| # SOCKSTEST=xyz should show in the logs of this service to confirm it fetched | ||||
| docker run --network changedet-network -d --hostname socks5proxy --rm  --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy | ||||
| docker run --network changedet-network -d --hostname socks5proxy-noauth --rm  -p 1081:1080 --name socks5proxy-noauth  serjs/go-socks5-proxy | ||||
| docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy | ||||
|  | ||||
| echo "---------------------------------- SOCKS5 -------------------" | ||||
| # SOCKS5 related - test from proxies.json | ||||
| docker run --network changedet-network \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example:/tmp/proxies.json \ | ||||
|   --rm \ | ||||
|   -e "FLASK_SERVER_NAME=cdio" \ | ||||
|   --hostname cdio \ | ||||
|   -e "SOCKSTEST=proxiesjson" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py  --datastore-path /tmp' | ||||
|  | ||||
| # SOCKS5 related - by manually entering in UI | ||||
| docker run --network changedet-network \ | ||||
| @@ -29,18 +30,18 @@ docker run --network changedet-network \ | ||||
|   --hostname cdio \ | ||||
|   -e "SOCKSTEST=manual" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py --datastore-path /tmp' | ||||
|  | ||||
| # SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY | ||||
| docker run --network changedet-network \ | ||||
|   -e "SOCKSTEST=manual-playwright" \ | ||||
|   --hostname cdio \ | ||||
|   -e "FLASK_SERVER_NAME=cdio" \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/tmp/proxies.json \ | ||||
|   -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \ | ||||
|   --rm \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp' | ||||
|  | ||||
| echo "socks5 server logs" | ||||
| docker logs socks5proxy | ||||
|   | ||||
| @@ -1,24 +0,0 @@ | ||||
| """ | ||||
| Safe Jinja2 render with max payload sizes | ||||
|  | ||||
| See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations | ||||
| """ | ||||
|  | ||||
| import jinja2.sandbox | ||||
| import typing as t | ||||
| import os | ||||
|  | ||||
| JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10)) | ||||
|  | ||||
| # This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available. | ||||
| # (Which also limits available functions that could be called) | ||||
| def render(template_str, **args: t.Any) -> str: | ||||
|     jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension']) | ||||
|     output = jinja2_env.from_string(template_str).render(args) | ||||
|     return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE] | ||||
|  | ||||
| def render_fully_escaped(content): | ||||
|     env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True) | ||||
|     template = env.from_string("{{ some_html|e }}") | ||||
|     return template.render(some_html=content) | ||||
|  | ||||
| @@ -29,7 +29,7 @@ $(document).ready(function () { | ||||
|         $(this).text(new Date($(this).data("utc")).toLocaleString()); | ||||
|     }) | ||||
|  | ||||
|     const timezoneInput = $('#application-timezone'); | ||||
|     const timezoneInput = $('#application-scheduler_timezone_default'); | ||||
|     if(timezoneInput.length) { | ||||
|         const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; | ||||
|         if (!timezoneInput.val().trim()) { | ||||
|   | ||||
| @@ -14,10 +14,10 @@ $(document).ready(function () { | ||||
|         e.preventDefault(); | ||||
|  | ||||
|         data = { | ||||
|             notification_body: $('#notification_body').val(), | ||||
|             notification_format: $('#notification_format').val(), | ||||
|             notification_title: $('#notification_title').val(), | ||||
|             notification_urls: $('.notification-urls').val(), | ||||
|             notification_urls: $('textarea.notification-urls').val(), | ||||
|             notification_title: $('input.notification-title').val(), | ||||
|             notification_body: $('textarea.notification-body').val(), | ||||
|             notification_format: $('select.notification-format').val(), | ||||
|             tags: $('#tags').val(), | ||||
|             window_url: window.location.href, | ||||
|         } | ||||
|   | ||||
| @@ -159,6 +159,7 @@ | ||||
|         // Return the current request in case it's needed | ||||
|         return requests[namespace]; | ||||
|     }; | ||||
|  | ||||
| })(jQuery); | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -2,6 +2,13 @@ | ||||
|  | ||||
| $(document).ready(function () { | ||||
|  | ||||
|     function reapplyTableStripes() { | ||||
|         $('.watch-table tbody tr').each(function(index) { | ||||
|             $(this).removeClass('pure-table-odd pure-table-even'); | ||||
|             $(this).addClass(index % 2 === 0 ? 'pure-table-odd' : 'pure-table-even'); | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     function bindSocketHandlerButtonsEvents(socket) { | ||||
|         $('.ajax-op').on('click.socketHandlerNamespace', function (e) { | ||||
|             e.preventDefault(); | ||||
| @@ -101,52 +108,57 @@ $(document).ready(function () { | ||||
|             socket.on('watch_deleted', function (data) { | ||||
|                 $('tr[data-watch-uuid="' + data.uuid + '"] td').fadeOut(500, function () { | ||||
|                     $(this).closest('tr').remove(); | ||||
|                     reapplyTableStripes(); | ||||
|                 }); | ||||
|             }); | ||||
|  | ||||
|             // Listen for periodically emitted watch data | ||||
|             console.log('Adding watch_update event listener'); | ||||
|             // So that the favicon is only updated when the server has written the scraped favicon to disk. | ||||
|             socket.on('watch_bumped_favicon', function (watch) { | ||||
|                 const $watchRow = $(`tr[data-watch-uuid="${watch.uuid}"]`); | ||||
|                 if ($watchRow.length) { | ||||
|                     $watchRow.addClass('has-favicon'); | ||||
|                     // Because the event could be emitted from a process that is outside the app context, url_for() might not work. | ||||
|                     // Lets use url_for at template generation time to give us a PLACEHOLDER instead | ||||
|                     let favicon_url = favicon_baseURL.replace('/PLACEHOLDER', `/${watch.uuid}?cache=${watch.event_timestamp}`); | ||||
|                     console.log(`Setting favicon for UUID - ${watch.uuid} - ${favicon_url}`); | ||||
|                     $('img.favicon', $watchRow).attr('src', favicon_url); | ||||
|                 } | ||||
|             }) | ||||
|  | ||||
|             socket.on('general_stats_update', function (general_stats) { | ||||
|                 // Tabs at bottom of list | ||||
|                 $('#watch-table-wrapper').toggleClass("has-unread-changes", general_stats.unread_changes_count !==0) | ||||
|                 $('#watch-table-wrapper').toggleClass("has-error", general_stats.count_errors !== 0) | ||||
|                 $('#post-list-with-errors a').text(`With errors (${ new Intl.NumberFormat(navigator.language).format(general_stats.count_errors) })`); | ||||
|                 $('#unread-tab-counter').text(new Intl.NumberFormat(navigator.language).format(general_stats.unread_changes_count)); | ||||
|             }); | ||||
|  | ||||
|             socket.on('watch_update', function (data) { | ||||
|                 const watch = data.watch; | ||||
|                 const general_stats = data.general_stats; | ||||
|  | ||||
|                 // Log the entire watch object for debugging | ||||
|                 console.log('!!! WATCH UPDATE EVENT RECEIVED !!!'); | ||||
|                 console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`); | ||||
|                 console.log('Watch data:', watch); | ||||
|                 console.log('General stats:', general_stats); | ||||
|                  | ||||
|                 // Updating watch table rows | ||||
|                 const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]'); | ||||
|                 console.log('Found watch row elements:', $watchRow.length); | ||||
|                  | ||||
|  | ||||
|                 if ($watchRow.length) { | ||||
|                     $($watchRow).toggleClass('checking-now', watch.checking_now); | ||||
|                     $($watchRow).toggleClass('queued', watch.queued); | ||||
|                     $($watchRow).toggleClass('unviewed', watch.unviewed); | ||||
|                     $($watchRow).toggleClass('has-error', watch.has_error); | ||||
|                     $($watchRow).toggleClass('has-favicon', watch.has_favicon); | ||||
|                     $($watchRow).toggleClass('notification_muted', watch.notification_muted); | ||||
|                     $($watchRow).toggleClass('paused', watch.paused); | ||||
|                     $($watchRow).toggleClass('single-history', watch.history_n === 1); | ||||
|                     $($watchRow).toggleClass('multiple-history', watch.history_n >= 2); | ||||
|  | ||||
|                     $('td.title-col .error-text', $watchRow).html(watch.error_text) | ||||
|  | ||||
|                     $('td.last-changed', $watchRow).text(watch.last_changed_text) | ||||
|  | ||||
|                     $('td.last-checked .innertext', $watchRow).text(watch.last_checked_text) | ||||
|                     $('td.last-checked', $watchRow).data('timestamp', watch.last_checked).data('fetchduration', watch.fetch_time); | ||||
|                     $('td.last-checked', $watchRow).data('eta_complete', watch.last_checked + watch.fetch_time); | ||||
|                      | ||||
|  | ||||
|                     console.log('Updated UI for watch:', watch.uuid); | ||||
|                 } | ||||
|  | ||||
|                 // Tabs at bottom of list | ||||
|                 $('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed); | ||||
|                 $('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0) | ||||
|                 $('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`); | ||||
|  | ||||
|                 $('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid)); | ||||
|             }); | ||||
|  | ||||
|   | ||||
| @@ -51,6 +51,7 @@ $(document).ready(function () { | ||||
|         $('#notification_body').val(''); | ||||
|         $('#notification_format').val('System default'); | ||||
|         $('#notification_urls').val(''); | ||||
|         $('#notification_muted_none').prop('checked', true); // in the case of a ternary field | ||||
|         e.preventDefault(); | ||||
|     }); | ||||
|     $("#notification-token-toggle").click(function (e) { | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -3,15 +3,16 @@ | ||||
|   "version": "0.0.3", | ||||
|   "description": "", | ||||
|   "main": "index.js", | ||||
|   "scripts": { | ||||
|     "watch": "node-sass -w scss -o .", | ||||
|     "build": "node-sass scss -o ." | ||||
|   "engines": { | ||||
|     "node": ">=18.0.0" | ||||
|   }, | ||||
|   "author": "", | ||||
|   "license": "ISC", | ||||
|   "scripts": { | ||||
|     "watch": "sass --watch scss:. --style=compressed --no-source-map", | ||||
|     "build": "sass scss:. --style=compressed --no-source-map" | ||||
|   }, | ||||
|   "author": "Leigh Morresi / Web Technologies s.r.o.", | ||||
|   "license": "Apache", | ||||
|   "dependencies": { | ||||
|     "node-sass": "^7.0.0", | ||||
|     "tar": "^6.1.9", | ||||
|     "trim-newlines": "^3.0.1" | ||||
|     "sass": "^1.77.8" | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| @import "parts/_variables.scss"; | ||||
| @use "parts/variables"; | ||||
|  | ||||
| #diff-ui { | ||||
|  | ||||
|   | ||||
| @@ -64,17 +64,17 @@ body.proxy-check-active { | ||||
| #recommended-proxy { | ||||
|   display: grid; | ||||
|   gap: 2rem; | ||||
|     @media  (min-width: 991px) { | ||||
|       grid-template-columns: repeat(2, 1fr); | ||||
|     } | ||||
|   padding-bottom: 1em; | ||||
|    | ||||
|   @media  (min-width: 991px) { | ||||
|     grid-template-columns: repeat(2, 1fr); | ||||
|   } | ||||
|  | ||||
|   > div { | ||||
|     border: 1px #aaa solid; | ||||
|     border-radius: 4px; | ||||
|     padding: 1em; | ||||
|   } | ||||
|  | ||||
|   padding-bottom: 1em; | ||||
| } | ||||
|  | ||||
| #extra-proxies-setting { | ||||
|   | ||||
| @@ -0,0 +1,92 @@ | ||||
| .watch-table { | ||||
|   &.favicon-not-enabled { | ||||
|     tr { | ||||
|       .favicon { | ||||
|         display: none; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   tr { | ||||
|     /* make the icons and the text inline-ish */ | ||||
|     td.inline.title-col { | ||||
|       .flex-wrapper { | ||||
|         display: flex; | ||||
|         align-items: center; | ||||
|         gap: 4px; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  | ||||
|   td, | ||||
|   th { | ||||
|     vertical-align: middle; | ||||
|   } | ||||
|  | ||||
|   tr.has-favicon { | ||||
|     &.unviewed { | ||||
|       img.favicon { | ||||
|         opacity: 1.0 !important; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   .status-icons { | ||||
|     white-space: nowrap; | ||||
|     display: flex; | ||||
|     align-items: center; /* Vertical centering */ | ||||
|     gap: 4px; /* Space between image and text */ | ||||
|     > * { | ||||
|       vertical-align: middle; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| .title-col { | ||||
|   /* Optional, for spacing */ | ||||
|   padding: 10px; | ||||
| } | ||||
|  | ||||
| .title-wrapper { | ||||
|   display: flex; | ||||
|   align-items: center; /* Vertical centering */ | ||||
|   gap: 10px; /* Space between image and text */ | ||||
| } | ||||
|  | ||||
| /* Make sure .title-col-inner doesn't collapse or misalign */ | ||||
| .title-col-inner { | ||||
|   display: inline-block; | ||||
|   vertical-align: middle; | ||||
| } | ||||
|  | ||||
| /* favicon styling */ | ||||
| .watch-table { | ||||
|   img.favicon { | ||||
|     vertical-align: middle; | ||||
|     max-width: 25px; | ||||
|     max-height: 25px; | ||||
|     height: 25px; | ||||
|     padding-right: 4px; | ||||
|   } | ||||
|  | ||||
|     // Reserved for future use | ||||
|   /*  &.thumbnail-type-screenshot { | ||||
|       tr.has-favicon { | ||||
|         td.inline.title-col { | ||||
|           img.thumbnail { | ||||
|             background-color: #fff; !* fallback bg for SVGs without bg *! | ||||
|             border-radius: 4px; !* subtle rounded corners *! | ||||
|             border: 1px solid #ddd; !* light border for contrast *! | ||||
|             box-shadow: 0 2px 6px rgba(0, 0, 0, 0.15); !* soft shadow *! | ||||
|             filter: contrast(1.05) saturate(1.1) drop-shadow(0 0 0.5px rgba(0, 0, 0, 0.2)); | ||||
|             object-fit: cover; !* crop/fill if needed *! | ||||
|             opacity: 0.8; | ||||
|             max-width: 30px; | ||||
|             max-height: 30px; | ||||
|             height: 30px; | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     }*/ | ||||
| } | ||||
| @@ -1,4 +1,4 @@ | ||||
| @import "minitabs"; | ||||
| @use "minitabs"; | ||||
|  | ||||
| body.preview-text-enabled { | ||||
|  | ||||
|   | ||||
| @@ -17,15 +17,6 @@ body.checking-now { | ||||
|   position: fixed; | ||||
| } | ||||
|  | ||||
| #post-list-buttons { | ||||
|   #post-list-with-errors.has-error { | ||||
|     display: inline-block !important; | ||||
|   } | ||||
|   #post-list-mark-views.has-unviewed { | ||||
|     display: inline-block !important; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -0,0 +1,178 @@ | ||||
| $grid-col-checkbox: 20px; | ||||
| $grid-col-watch: 100px; | ||||
| $grid-gap: 0.5rem; | ||||
|  | ||||
|  | ||||
| @media (max-width: 767px) { | ||||
|  | ||||
|   /* | ||||
|   Max width before this PARTICULAR table gets nasty | ||||
|   This query will take effect for any screen smaller than 760px | ||||
|   and also iPads specifically. | ||||
|   */ | ||||
|   .watch-table { | ||||
|     /* make headings work on mobile */ | ||||
|     thead { | ||||
|       display: block; | ||||
|  | ||||
|       tr { | ||||
|         th { | ||||
|           display: inline-block; | ||||
|           // Hide the "Last" text for smaller screens | ||||
|           @media (max-width: 768px) { | ||||
|             .hide-on-mobile { | ||||
|               display: none; | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|  | ||||
|       .empty-cell { | ||||
|         display: none; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     .last-checked { | ||||
|       margin-left: calc($grid-col-checkbox + $grid-gap); | ||||
|  | ||||
|       > span { | ||||
|         vertical-align: middle; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     .last-changed { | ||||
|       margin-left: calc($grid-col-checkbox + $grid-gap); | ||||
|     } | ||||
|  | ||||
|     .last-checked::before { | ||||
|       color: var(--color-text); | ||||
|       content: "Last Checked "; | ||||
|     } | ||||
|  | ||||
|     .last-changed::before { | ||||
|       color: var(--color-text); | ||||
|       content: "Last Changed "; | ||||
|     } | ||||
|  | ||||
|     /* Force table to not be like tables anymore */ | ||||
|     td.inline { | ||||
|       display: inline-block; | ||||
|     } | ||||
|  | ||||
|     .pure-table td, | ||||
|     .pure-table th { | ||||
|       border: none; | ||||
|     } | ||||
|  | ||||
|     td { | ||||
|       /* Behave  like a "row" */ | ||||
|       border: none; | ||||
|       border-bottom: 1px solid var(--color-border-watch-table-cell); | ||||
|       vertical-align: middle; | ||||
|  | ||||
|       &:before { | ||||
|         /* Top/left values mimic padding */ | ||||
|         top: 6px; | ||||
|         left: 6px; | ||||
|         width: 45%; | ||||
|         padding-right: 10px; | ||||
|         white-space: nowrap; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     &.pure-table-striped { | ||||
|       tr { | ||||
|         background-color: var(--color-table-background); | ||||
|       } | ||||
|  | ||||
|       tr:nth-child(2n-1) { | ||||
|         background-color: var(--color-table-stripe); | ||||
|       } | ||||
|  | ||||
|       tr:nth-child(2n-1) td { | ||||
|         background-color: inherit; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| @media (max-width: 767px) { | ||||
|   .watch-table { | ||||
|     tbody { | ||||
|       tr { | ||||
|         padding-bottom: 10px; | ||||
|         padding-top: 10px; | ||||
|         display: grid; | ||||
|         grid-template-columns: $grid-col-checkbox 1fr $grid-col-watch; | ||||
|         grid-template-rows: auto auto auto auto; | ||||
|         gap: $grid-gap; | ||||
|  | ||||
|         .counter-i { | ||||
|           display: none; | ||||
|         } | ||||
|  | ||||
|         td.checkbox-uuid { | ||||
|           display: grid; | ||||
|           place-items: center; | ||||
|         } | ||||
|  | ||||
|         td.inline { | ||||
|           /* display: block !important;;*/ | ||||
|         } | ||||
|  | ||||
|         > td { | ||||
|           border-bottom: none; | ||||
|         } | ||||
|  | ||||
|         > td.title-col { | ||||
|           grid-column: 1 / -1; | ||||
|           grid-row: 1; | ||||
|           .watch-title { | ||||
|             font-size: 0.92rem; | ||||
|           } | ||||
|           .link-spread { | ||||
|             display: none; | ||||
|           } | ||||
|         } | ||||
|  | ||||
|         > td.last-checked { | ||||
|           grid-column: 1 / -1; | ||||
|           grid-row: 2; | ||||
|         } | ||||
|  | ||||
|         > td.last-changed { | ||||
|           grid-column: 1 / -1; | ||||
|           grid-row: 3; | ||||
|         } | ||||
|  | ||||
|         > td.checkbox-uuid { | ||||
|           grid-column: 1; | ||||
|           grid-row: 4; | ||||
|         } | ||||
|  | ||||
|         > td.buttons { | ||||
|           grid-column: 2; | ||||
|           grid-row: 4; | ||||
|           display: flex; | ||||
|           align-items: center; | ||||
|           justify-content: flex-start; | ||||
|         } | ||||
|  | ||||
|         > td.watch-controls { | ||||
|           grid-column: 3; | ||||
|           grid-row: 4; | ||||
|           display: grid; | ||||
|           place-items: center; | ||||
|  | ||||
|           a img { | ||||
|             padding: 10px; | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|   .pure-table td { | ||||
|     padding: 3px !important; | ||||
|   } | ||||
| } | ||||
| @@ -7,6 +7,7 @@ | ||||
|     &.unviewed { | ||||
|       font-weight: bold; | ||||
|     } | ||||
|  | ||||
|     color: var(--color-watch-table-row-text); | ||||
|   } | ||||
|  | ||||
| @@ -48,17 +49,17 @@ | ||||
|   /* Row with 'checking-now' */ | ||||
|   tr.checking-now { | ||||
|     td:first-child { | ||||
|         position: relative; | ||||
|       position: relative; | ||||
|     } | ||||
|  | ||||
|     td:first-child::before { | ||||
|         content: ""; | ||||
|         position: absolute; | ||||
|         top: 0; | ||||
|         bottom: 0; | ||||
|         left: 0; | ||||
|         width: 3px; | ||||
|         background-color: #293eff; | ||||
|       content: ""; | ||||
|       position: absolute; | ||||
|       top: 0; | ||||
|       bottom: 0; | ||||
|       left: 0; | ||||
|       width: 3px; | ||||
|       background-color: #293eff; | ||||
|     } | ||||
|  | ||||
|     td.last-checked { | ||||
| @@ -109,6 +110,7 @@ | ||||
|  | ||||
|   tr.has-error { | ||||
|     color: var(--color-watch-table-error); | ||||
|  | ||||
|     .error-text { | ||||
|       display: block !important; | ||||
|     } | ||||
| @@ -119,12 +121,50 @@ | ||||
|       display: inline-block !important; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   tr.multiple-history { | ||||
|     a.history-link { | ||||
|       display: inline-block !important; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
| #watch-table-wrapper { | ||||
|   /* general styling */ | ||||
|   #post-list-buttons { | ||||
|     text-align: right; | ||||
|     padding: 0px; | ||||
|     margin: 0px; | ||||
|  | ||||
|     li { | ||||
|       display: inline-block; | ||||
|     } | ||||
|  | ||||
|     a { | ||||
|       border-top-left-radius: initial; | ||||
|       border-top-right-radius: initial; | ||||
|       border-bottom-left-radius: 5px; | ||||
|       border-bottom-right-radius: 5px; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /* post list dynamically on/off stuff */ | ||||
|  | ||||
|   &.has-error { | ||||
|     #post-list-buttons { | ||||
|       #post-list-with-errors { | ||||
|         display: inline-block !important; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   &.has-unread-changes { | ||||
|     #post-list-buttons { | ||||
|       #post-list-unread, #post-list-mark-views, #post-list-unread { | ||||
|         display: inline-block !important; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|   | ||||
							
								
								
									
										114
									
								
								changedetectionio/static/styles/scss/parts/_widgets.scss
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										114
									
								
								changedetectionio/static/styles/scss/parts/_widgets.scss
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,114 @@ | ||||
|  | ||||
| // Ternary radio button group component | ||||
| .ternary-radio-group { | ||||
|   display: flex; | ||||
|   gap: 0; | ||||
|   border: 1px solid var(--color-grey-750); | ||||
|   border-radius: 4px; | ||||
|   overflow: hidden; | ||||
|   width: fit-content; | ||||
|   background: var(--color-background); | ||||
|  | ||||
|   .ternary-radio-option { | ||||
|     position: relative; | ||||
|     cursor: pointer; | ||||
|     margin: 0; | ||||
|     display: flex; | ||||
|     align-items: center; | ||||
|  | ||||
|     input[type="radio"] { | ||||
|       position: absolute; | ||||
|       opacity: 0; | ||||
|       width: 0; | ||||
|       height: 0; | ||||
|     } | ||||
|  | ||||
|     .ternary-radio-label { | ||||
|       padding: 8px 16px; | ||||
|       background: var(--color-grey-900); | ||||
|       border: none; | ||||
|       border-right: 1px solid var(--color-grey-750); | ||||
|       font-size: 13px; | ||||
|       font-weight: 500; | ||||
|       color: var(--color-text); | ||||
|       transition: all 0.2s ease; | ||||
|       cursor: pointer; | ||||
|       display: block; | ||||
|       text-align: center; | ||||
|     } | ||||
|  | ||||
|     &:last-child .ternary-radio-label { | ||||
|       border-right: none; | ||||
|     } | ||||
|  | ||||
|     input:checked + .ternary-radio-label { | ||||
|       background: var(--color-link); | ||||
|       color: var(--color-text-button); | ||||
|       font-weight: 600; | ||||
|  | ||||
|       &.ternary-default { | ||||
|         background: var(--color-grey-600); | ||||
|         color: var(--color-text-button); | ||||
|       } | ||||
|  | ||||
|       &:hover { | ||||
|         background: #1a7bc4; | ||||
|  | ||||
|         &.ternary-default { | ||||
|           background: var(--color-grey-500); | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     &:hover .ternary-radio-label { | ||||
|       background: var(--color-grey-800); | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   @media (max-width: 480px) { | ||||
|     width: 100%; | ||||
|  | ||||
|     .ternary-radio-label { | ||||
|       flex: 1; | ||||
|       min-width: auto; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| // Standard radio button styling | ||||
| input[type="radio"].pure-radio:checked + label, | ||||
| input[type="radio"].pure-radio:checked { | ||||
|   background: var(--color-link); | ||||
|   color: var(--color-text-button); | ||||
| } | ||||
|  | ||||
| html[data-darkmode="true"] { | ||||
|   .ternary-radio-group { | ||||
|     .ternary-radio-option { | ||||
|       .ternary-radio-label { | ||||
|         background: var(--color-grey-350); | ||||
|       } | ||||
|  | ||||
|       &:hover .ternary-radio-label { | ||||
|         background: var(--color-grey-400); | ||||
|       } | ||||
|  | ||||
|       input:checked + .ternary-radio-label { | ||||
|         background: var(--color-link); | ||||
|         color: var(--color-text-button); | ||||
|  | ||||
|         &.ternary-default { | ||||
|           background: var(--color-grey-600); | ||||
|         } | ||||
|  | ||||
|         &:hover { | ||||
|           background: #1a7bc4; | ||||
|  | ||||
|           &.ternary-default { | ||||
|             background: var(--color-grey-500); | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
| @@ -2,21 +2,25 @@ | ||||
|  * -- BASE STYLES -- | ||||
|  */ | ||||
|  | ||||
| @import "parts/_arrows"; | ||||
| @import "parts/_browser-steps"; | ||||
| @import "parts/_extra_proxies"; | ||||
| @import "parts/_extra_browsers"; | ||||
| @import "parts/_pagination"; | ||||
| @import "parts/_spinners"; | ||||
| @import "parts/_variables"; | ||||
| @import "parts/_darkmode"; | ||||
| @import "parts/_menu"; | ||||
| @import "parts/_love"; | ||||
| @import "parts/preview_text_filter"; | ||||
| @import "parts/_watch_table"; | ||||
| @import "parts/_edit"; | ||||
| @import "parts/_conditions_table"; | ||||
| @import "parts/_socket"; | ||||
| @use "parts/variables"; | ||||
| @use "parts/arrows"; | ||||
| @use "parts/browser-steps"; | ||||
| @use "parts/extra_proxies"; | ||||
| @use "parts/extra_browsers"; | ||||
| @use "parts/pagination"; | ||||
| @use "parts/spinners"; | ||||
| @use "parts/darkmode"; | ||||
| @use "parts/menu"; | ||||
| @use "parts/love"; | ||||
| @use "parts/preview_text_filter"; | ||||
| @use "parts/watch_table"; | ||||
| @use "parts/watch_table-mobile"; | ||||
| @use "parts/edit"; | ||||
| @use "parts/conditions_table"; | ||||
| @use "parts/lister_extra"; | ||||
| @use "parts/socket"; | ||||
| @use "parts/visualselector"; | ||||
| @use "parts/widgets"; | ||||
|  | ||||
| body { | ||||
|   color: var(--color-text); | ||||
| @@ -184,33 +188,21 @@ code { | ||||
|   @extend .inline-tag; | ||||
| } | ||||
|  | ||||
| @media (min-width: 768px) { | ||||
|   .box { | ||||
|     margin: 0 1em !important; | ||||
|   } | ||||
| } | ||||
|  | ||||
| .box { | ||||
|   max-width: 100%; | ||||
|   margin: 0 1em; | ||||
|   margin: 0 0.3em; | ||||
|   flex-direction: column; | ||||
|   display: flex; | ||||
|   justify-content: center; | ||||
| } | ||||
|  | ||||
|  | ||||
| #post-list-buttons { | ||||
|   text-align: right; | ||||
|   padding: 0px; | ||||
|   margin: 0px; | ||||
|  | ||||
|   li { | ||||
|     display: inline-block; | ||||
|   } | ||||
|  | ||||
|   a { | ||||
|     border-top-left-radius: initial; | ||||
|     border-top-right-radius: initial; | ||||
|     border-bottom-left-radius: 5px; | ||||
|     border-bottom-right-radius: 5px; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| body:after { | ||||
|   content: ""; | ||||
|   background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%); | ||||
| @@ -352,7 +344,7 @@ label { | ||||
|  }   | ||||
| } | ||||
|  | ||||
| #notification-customisation { | ||||
| .grey-form-border { | ||||
|   border: 1px solid var(--color-border-notification); | ||||
|   padding: 0.5rem; | ||||
|   border-radius: 5px; | ||||
| @@ -694,114 +686,6 @@ footer { | ||||
|     width: 100%; | ||||
|   } | ||||
|  | ||||
|   /* | ||||
|   Max width before this PARTICULAR table gets nasty | ||||
|   This query will take effect for any screen smaller than 760px | ||||
|   and also iPads specifically. | ||||
|   */ | ||||
|   .watch-table { | ||||
|     /* make headings work on mobile */ | ||||
|     thead { | ||||
|       display: block; | ||||
|       tr { | ||||
|         th { | ||||
|           display: inline-block; | ||||
|           // Hide the "Last" text for smaller screens | ||||
|           @media (max-width: 768px) { | ||||
|             .hide-on-mobile { | ||||
|               display: none;  | ||||
|             } | ||||
|           } | ||||
|         } | ||||
|       } | ||||
|       .empty-cell { | ||||
|         display: none; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     /* Force table to not be like tables anymore */ | ||||
|     tbody { | ||||
|       td, | ||||
|       tr { | ||||
|         display: block; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     tbody { | ||||
|       tr { | ||||
|         display: flex; | ||||
|         flex-wrap: wrap; | ||||
|  | ||||
|         // The third child of each row will take up the remaining space | ||||
|         // This is useful for the URL column, which should expand to fill the remaining space | ||||
|         :nth-child(3) { | ||||
|           flex-grow: 1; | ||||
|         } | ||||
|         // The last three children (from the end) of each row will take up the full width | ||||
|         // This is useful for the "Last Checked", "Last Changed", and the action buttons columns, which should each take up the full width | ||||
|         :nth-last-child(-n+3) { | ||||
|           flex-basis: 100%; | ||||
|         } | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     .last-checked { | ||||
|       >span { | ||||
|         vertical-align: middle; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     .last-checked::before { | ||||
|       color: var(--color-last-checked); | ||||
|       content: "Last Checked "; | ||||
|     } | ||||
|  | ||||
|     .last-changed::before { | ||||
|       color: var(--color-last-checked); | ||||
|       content: "Last Changed "; | ||||
|     } | ||||
|  | ||||
|     /* Force table to not be like tables anymore */ | ||||
|     td.inline { | ||||
|       display: inline-block; | ||||
|     } | ||||
|  | ||||
|     .pure-table td, | ||||
|     .pure-table th { | ||||
|       border: none; | ||||
|     } | ||||
|  | ||||
|     td { | ||||
|       /* Behave  like a "row" */ | ||||
|       border: none; | ||||
|       border-bottom: 1px solid var(--color-border-watch-table-cell); | ||||
|       vertical-align: middle; | ||||
|  | ||||
|       &:before { | ||||
|         /* Top/left values mimic padding */ | ||||
|         top: 6px; | ||||
|         left: 6px; | ||||
|         width: 45%; | ||||
|         padding-right: 10px; | ||||
|         white-space: nowrap; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|     &.pure-table-striped { | ||||
|       tr { | ||||
|         background-color: var(--color-table-background); | ||||
|       } | ||||
|  | ||||
|       tr:nth-child(2n-1) { | ||||
|         background-color: var(--color-table-stripe); | ||||
|       } | ||||
|  | ||||
|       tr:nth-child(2n-1) td { | ||||
|         background-color: inherit; | ||||
|       } | ||||
|     } | ||||
|  | ||||
|   } | ||||
| } | ||||
|  | ||||
| .pure-table { | ||||
| @@ -1056,8 +940,6 @@ ul { | ||||
|   } | ||||
| } | ||||
|  | ||||
| @import "parts/_visualselector"; | ||||
|  | ||||
| #webdriver_delay { | ||||
|     width: 5em; | ||||
| } | ||||
| @@ -1175,17 +1057,23 @@ ul { | ||||
|  | ||||
|  | ||||
| #quick-watch-processor-type { | ||||
|   color: #fff; | ||||
|   ul { | ||||
|     padding: 0.3rem; | ||||
|   ul#processor { | ||||
|     color: #fff; | ||||
|     padding-left: 0px; | ||||
|     li { | ||||
|       list-style: none; | ||||
|       font-size: 0.9rem; | ||||
|       > * { | ||||
|         display: inline-block; | ||||
|       } | ||||
|       display: grid; | ||||
|       grid-template-columns: auto 1fr; | ||||
|       align-items: center; | ||||
|       gap: 0.5rem; | ||||
|       margin-bottom: 0.5rem; | ||||
|     } | ||||
|   } | ||||
|   label, input { | ||||
|     padding: 0; | ||||
|     margin: 0; | ||||
|   } | ||||
| } | ||||
|  | ||||
| .restock-label { | ||||
| @@ -1224,11 +1112,12 @@ ul { | ||||
| } | ||||
|  | ||||
| #realtime-conn-error { | ||||
|   position: absolute; | ||||
|   position: fixed; | ||||
|   bottom: 0; | ||||
|   left: 30px; | ||||
|   left: 0; | ||||
|   background: var(--color-warning); | ||||
|   padding: 10px; | ||||
|   font-size: 0.8rem; | ||||
|   color: #fff; | ||||
|   opacity: 0.8; | ||||
| } | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -1,11 +1,13 @@ | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from flask import ( | ||||
|     flash | ||||
| ) | ||||
|  | ||||
| from .html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
| from . model import App, Watch | ||||
| from .model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| from copy import deepcopy, copy | ||||
| from os import path, unlink | ||||
| from threading import Lock | ||||
| @@ -40,17 +42,24 @@ class ChangeDetectionStore: | ||||
|     needs_write_urgent = False | ||||
|  | ||||
|     __version_check = True | ||||
|     save_data_thread = None | ||||
|  | ||||
|     def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): | ||||
|         # Should only be active for docker | ||||
|         # logging.basicConfig(filename='/dev/stdout', level=logging.INFO) | ||||
|         self.__data = App.model() | ||||
|         self.datastore_path = datastore_path | ||||
|         self.json_store_path = os.path.join(self.datastore_path, "url-watches.json") | ||||
|         logger.info(f"Datastore path is '{self.json_store_path}'") | ||||
|  | ||||
|         self.needs_write = False | ||||
|         self.start_time = time.time() | ||||
|         self.stop_thread = False | ||||
|         self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag) | ||||
|  | ||||
|  | ||||
|     def reload_state(self, datastore_path, include_default_watches, version_tag): | ||||
|         logger.info(f"Datastore path is '{datastore_path}'") | ||||
|  | ||||
|         self.__data = App.model() | ||||
|         self.datastore_path = datastore_path | ||||
|         self.json_store_path = os.path.join(self.datastore_path, "url-watches.json") | ||||
|         # Base definition for all watchers | ||||
|         # deepcopy part of #569 - not sure why its needed exactly | ||||
|         self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) | ||||
| @@ -143,7 +152,10 @@ class ChangeDetectionStore: | ||||
|         self.needs_write = True | ||||
|  | ||||
|         # Finally start the thread that will manage periodic data saves to JSON | ||||
|         save_data_thread = threading.Thread(target=self.save_datastore).start() | ||||
|         # Only start if thread is not already running (reload_state might be called multiple times) | ||||
|         if not self.save_data_thread or not self.save_data_thread.is_alive(): | ||||
|             self.save_data_thread = threading.Thread(target=self.save_datastore) | ||||
|             self.save_data_thread.start() | ||||
|  | ||||
|     def rehydrate_entity(self, uuid, entity, processor_override=None): | ||||
|         """Set the dict back to the dict Watch object""" | ||||
| @@ -202,14 +214,13 @@ class ChangeDetectionStore: | ||||
|         return seconds | ||||
|  | ||||
|     @property | ||||
|     def has_unviewed(self): | ||||
|         if not self.__data.get('watching'): | ||||
|             return None | ||||
|  | ||||
|     def unread_changes_count(self): | ||||
|         unread_changes_count = 0 | ||||
|         for uuid, watch in self.__data['watching'].items(): | ||||
|             if watch.history_n >= 2 and watch.viewed == False: | ||||
|                 return True | ||||
|         return False | ||||
|                 unread_changes_count += 1 | ||||
|  | ||||
|         return unread_changes_count | ||||
|  | ||||
|     @property | ||||
|     def data(self): | ||||
| @@ -229,26 +240,37 @@ class ChangeDetectionStore: | ||||
|         d['settings']['application']['active_base_url'] = active_base_url.strip('" ') | ||||
|         return d | ||||
|  | ||||
|     from pathlib import Path | ||||
|  | ||||
|     def delete_path(self, path: Path): | ||||
|         import shutil | ||||
|         """Delete a file or directory tree, including the path itself.""" | ||||
|         if not path.exists(): | ||||
|             return | ||||
|         if path.is_file() or path.is_symlink(): | ||||
|             path.unlink(missing_ok=True)  # deletes a file or symlink | ||||
|         else: | ||||
|             shutil.rmtree(path, ignore_errors=True)  # deletes dir *and* its contents | ||||
|  | ||||
|     # Delete a single watch by UUID | ||||
|     def delete(self, uuid): | ||||
|         import pathlib | ||||
|         import shutil | ||||
|  | ||||
|         with self.lock: | ||||
|             if uuid == 'all': | ||||
|                 self.__data['watching'] = {} | ||||
|                 time.sleep(1) # Mainly used for testing to allow all items to flush before running next test | ||||
|  | ||||
|                 # GitHub #30 also delete history records | ||||
|                 for uuid in self.data['watching']: | ||||
|                     path = pathlib.Path(os.path.join(self.datastore_path, uuid)) | ||||
|                     path = pathlib.Path( | ||||
|                         os.path.join(self.datastore_path, uuid)) | ||||
|                     if os.path.exists(path): | ||||
|                         shutil.rmtree(path) | ||||
|                         self.delete(uuid) | ||||
|  | ||||
|             else: | ||||
|                 path = pathlib.Path(os.path.join(self.datastore_path, uuid)) | ||||
|                 if os.path.exists(path): | ||||
|                     shutil.rmtree(path) | ||||
|                     self.delete_path(path) | ||||
|  | ||||
|                 del self.data['watching'][uuid] | ||||
|  | ||||
|         self.needs_write_urgent = True | ||||
| @@ -262,11 +284,6 @@ class ChangeDetectionStore: | ||||
|         extras = deepcopy(self.data['watching'][uuid]) | ||||
|         new_uuid = self.add_watch(url=url, extras=extras) | ||||
|         watch = self.data['watching'][new_uuid] | ||||
|  | ||||
|         if self.data['settings']['application'].get('extract_title_as_title') or watch['extract_title_as_title']: | ||||
|             # Because it will be recalculated on the next fetch | ||||
|             self.data['watching'][new_uuid]['title'] = None | ||||
|  | ||||
|         return new_uuid | ||||
|  | ||||
|     def url_exists(self, url): | ||||
| @@ -308,7 +325,6 @@ class ChangeDetectionStore: | ||||
|                     'browser_steps', | ||||
|                     'css_filter', | ||||
|                     'extract_text', | ||||
|                     'extract_title_as_title', | ||||
|                     'headers', | ||||
|                     'ignore_text', | ||||
|                     'include_filters', | ||||
| @@ -323,6 +339,7 @@ class ChangeDetectionStore: | ||||
|                     'title', | ||||
|                     'trigger_text', | ||||
|                     'url', | ||||
|                     'use_page_title_in_list', | ||||
|                     'webdriver_js_execute_code', | ||||
|                 ]: | ||||
|                     if res.get(k): | ||||
| @@ -336,9 +353,10 @@ class ChangeDetectionStore: | ||||
|                 logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}") | ||||
|                 flash("Error fetching metadata for {}".format(url), 'error') | ||||
|                 return False | ||||
|         from .model.Watch import is_safe_url | ||||
|         if not is_safe_url(url): | ||||
|             flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error') | ||||
|  | ||||
|         if not is_safe_valid_url(url): | ||||
|             flash('Watch protocol is not permitted or invalid URL format', 'error') | ||||
|  | ||||
|             return None | ||||
|  | ||||
|         if tag and type(tag) == str: | ||||
| @@ -404,7 +422,6 @@ class ChangeDetectionStore: | ||||
|             self.sync_to_json() | ||||
|             return | ||||
|         else: | ||||
|  | ||||
|             try: | ||||
|                 # Re #286  - First write to a temp file, then confirm it looks OK and rename it | ||||
|                 # This is a fairly basic strategy to deal with the case that the file is corrupted, | ||||
| @@ -412,7 +429,7 @@ class ChangeDetectionStore: | ||||
|                 with open(self.json_store_path+".tmp", 'w') as json_file: | ||||
|                     # Use compact JSON in production for better performance | ||||
|                     json.dump(data, json_file, indent=2) | ||||
|                     os.replace(self.json_store_path+".tmp", self.json_store_path) | ||||
|                 os.replace(self.json_store_path+".tmp", self.json_store_path) | ||||
|             except Exception as e: | ||||
|                 logger.error(f"Error writing JSON!! (Main JSON file save was skipped) : {str(e)}") | ||||
|  | ||||
| @@ -434,7 +451,7 @@ class ChangeDetectionStore: | ||||
|                 logger.remove() | ||||
|                 logger.add(sys.stderr) | ||||
|  | ||||
|                 logger.critical("Shutting down datastore thread") | ||||
|                 logger.info(f"Shutting down datastore '{self.datastore_path}' thread") | ||||
|                 return | ||||
|  | ||||
|             if self.needs_write or self.needs_write_urgent: | ||||
| @@ -973,6 +990,55 @@ class ChangeDetectionStore: | ||||
|                         f_d.write(zlib.compress(f_j.read())) | ||||
|                         os.unlink(json_path) | ||||
|  | ||||
|     def update_20(self): | ||||
|         for uuid, watch in self.data['watching'].items(): | ||||
|             if self.data['watching'][uuid].get('extract_title_as_title'): | ||||
|                 self.data['watching'][uuid]['use_page_title_in_list'] = self.data['watching'][uuid].get('extract_title_as_title') | ||||
|                 del self.data['watching'][uuid]['extract_title_as_title'] | ||||
|  | ||||
|         if self.data['settings']['application'].get('extract_title_as_title'): | ||||
|             self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title') | ||||
|  | ||||
|     def update_21(self): | ||||
|         if self.data['settings']['application'].get('timezone'): | ||||
|             self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone') | ||||
|             del self.data['settings']['application']['timezone'] | ||||
|  | ||||
|  | ||||
|     # Some notification formats got the wrong name type | ||||
|     def update_23(self): | ||||
|  | ||||
|         def re_run(formats): | ||||
|             sys_n_format = self.data['settings']['application'].get('notification_format') | ||||
|             key_exists_as_value = next((k for k, v in formats.items() if v == sys_n_format), None) | ||||
|             if key_exists_as_value:  # key of "Plain text" | ||||
|                 logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'") | ||||
|                 self.data['settings']['application']['notification_format'] = key_exists_as_value | ||||
|  | ||||
|             for uuid, watch in self.data['watching'].items(): | ||||
|                 n_format = self.data['watching'][uuid].get('notification_format') | ||||
|                 key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None) | ||||
|                 if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:  # key of "Plain text" | ||||
|                     logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'") | ||||
|                     self.data['watching'][uuid]['notification_format'] = key_exists_as_value  # should be 'text' or whatever | ||||
|  | ||||
|             for uuid, tag in self.data['settings']['application']['tags'].items(): | ||||
|                 n_format = self.data['settings']['application']['tags'][uuid].get('notification_format') | ||||
|                 key_exists_as_value = next((k for k, v in formats.items() if v == n_format), None) | ||||
|                 if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:  # key of "Plain text" | ||||
|                     logger.success( | ||||
|                         f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'") | ||||
|                     self.data['settings']['application']['tags'][uuid][ | ||||
|                         'notification_format'] = key_exists_as_value  # should be 'text' or whatever | ||||
|  | ||||
|         from .notification import valid_notification_formats | ||||
|         formats = deepcopy(valid_notification_formats) | ||||
|         re_run(formats) | ||||
|         # And in previous versions, it was "text" instead of Plain text, Markdown instead of "Markdown to HTML" | ||||
|         formats['text'] = 'Text' | ||||
|         formats['markdown'] = 'Markdown' | ||||
|         re_run(formats) | ||||
|  | ||||
|     def add_notification_url(self, notification_url): | ||||
|          | ||||
|         logger.debug(f">>> Adding new notification_url - '{notification_url}'") | ||||
|   | ||||
| @@ -33,7 +33,7 @@ | ||||
|                                 <div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div> | ||||
|                             </div> | ||||
|                         </div> | ||||
|                         <div id="notification-customisation" class="pure-control-group"> | ||||
|                         <div class="pure-control-group grey-form-border"> | ||||
|                             <div class="pure-control-group"> | ||||
|                                 {{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }} | ||||
|                                 <span class="pure-form-message-inline">Title for all notifications</span> | ||||
| @@ -70,7 +70,7 @@ | ||||
|                                     </tr> | ||||
|                                     <tr> | ||||
|                                         <td><code>{{ '{{watch_title}}' }}</code></td> | ||||
|                                         <td>The title of the watch.</td> | ||||
|                                         <td>The page title of the watch, uses <title> if not set, falls back to URL</td> | ||||
|                                     </tr> | ||||
|                                     <tr> | ||||
|                                         <td><code>{{ '{{watch_tag}}' }}</code></td> | ||||
| @@ -134,6 +134,12 @@ | ||||
|                                     <p> | ||||
|                                         URL encoding, use <strong>|urlencode</strong>, for example - <code>gets://hook-website.com/test.php?title={{ '{{ watch_title|urlencode }}' }}</code> | ||||
|                                     </p> | ||||
|                                     <p> | ||||
|                                         Regular-expression replace, use <strong>|regex_replace</strong>, for example -   <code>{{ "{{ \"hello world 123\" | regex_replace('[0-9]+', 'no-more-numbers') }}" }}</code> | ||||
|                                     </p> | ||||
|                                     <p> | ||||
|                                         For a complete reference of all Jinja2 built-in filters, users can refer to the <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters">https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters</a> | ||||
|                                     </p> | ||||
|                                 </div> | ||||
|                             </div> | ||||
|                             <div class="pure-control-group"> | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user