mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			113 Commits
		
	
	
		
			browser-no
			...
			parallel-d
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | ca63dad896 | ||
|   | bd9b72dbfa | ||
|   | 8473da4bdb | ||
|   | 762e2dacb2 | ||
|   | 62ea1f9b24 | ||
|   | 14a6ced8f4 | ||
|   | 465e5e2ecc | ||
|   | ada63a3200 | ||
|   | eef5425908 | ||
|   | 096bd21663 | ||
|   | 0f53233272 | ||
|   | faaa9937d6 | ||
|   | 950d59ccfa | ||
|   | bd3f0360e4 | ||
|   | 57347fd55c | ||
|   | 8ef782760a | ||
|   | 4e20fce82c | ||
|   | 7d8c127e1f | ||
|   | 0ca2acd38c | ||
|   | 2a0131d0f4 | ||
|   | 9ed236434e | ||
|   | ab0b85d088 | ||
|   | 66aec365c2 | ||
|   | e09cea60ef | ||
|   | f304ae19db | ||
|   | 2116b2cb93 | ||
|   | 8f580ac96b | ||
|   | a8cadc3d16 | ||
|   | c9290d73e0 | ||
|   | 2db5e906e9 | ||
|   | 0751bd371a | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 3ffa0805e9 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 3335270692 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | a7573b10ec | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | df945ad743 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 4536e95205 | ||
|   | 1479d7bd46 | ||
|   | 9ba2094f75 | ||
|   | 8aa012ba8e | ||
|   | 8bc6b10db1 | ||
|   | 76d799c95b | ||
|   | 7c8bdfcc9f | ||
|   | 01a938d7ce | ||
|   | e44853c439 | ||
|   | 3830bec891 | ||
|   | 88ab663330 | ||
|   | 68335b95c3 | ||
|   | 7bbfa0ef32 | ||
|   | e233d52931 | ||
|   | 181d32e82a | ||
|   | a51614f83d | ||
|   | 07f98d6bd3 | ||
|   | f71550da4d | ||
|   | 8c3d0d7e31 | ||
|   | 46658a85d6 | ||
|   | d699652955 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 9e88db5d9b | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 5d9c102aff | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | cb1c36d97d | ||
|   | cc29ba5ea9 | ||
|   | 6f371b1bc6 | ||
|   | 785dabd071 | ||
|   | 09914d54a0 | ||
|   | 58b5586674 | ||
|   | cb02ccc8b4 | ||
|   | ec692ed727 | ||
|   | 2fb2ea573e | ||
|   | ada2dc6112 | ||
|   | ad9024a4f0 | ||
|   | 047c10e23c | ||
|   | 4f83164544 | ||
|   | 6f926ed595 | ||
|   | 249dc55212 | ||
|   | 46252bc6f3 | ||
|   | 64350a2e78 | ||
|   | 2902c63a3b | ||
|   | 55b8588f1f | ||
|   | 02ecc4ae9a | ||
|   | 3ee50b7832 | ||
|   | 66ddd87ee4 | ||
|   | 233189e4f7 | ||
|   | b237fd7201 | ||
|   | 3c81efe2f4 | ||
|   | 0fcfb94690 | ||
|   | bb6d4c2756 | ||
|   | b59ce190ac | ||
|   | 80be1a30f2 | ||
|   | 93b4f79006 | ||
|   | 3009e46617 | ||
|   | 8f040a1a84 | ||
|   | 4dbab8d77a | ||
|   | cde42c8a49 | ||
|   | 3b9d19df43 | ||
|   | 6ad4acc9fc | ||
|   | 3e59521f48 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 0970c087c8 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 676c550e6e | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 78fa47f6f8 | ||
|   | 4aa5bb6da3 | ||
|   | f7dfc9bbb8 | ||
|   | 584b6e378d | ||
|   | 754febfd33 | ||
|   | 0c9c475f32 | ||
|   | e4baca1127 | ||
|   | bb61a35a54 | ||
|   | 4b9ae5a97c | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | c8caa0662d | ||
|   | f4e8d1963f | ||
|   | 45d5e961dc | ||
|   | 45f2863966 | ||
|   | 01c1ac4c0c | ||
|   | b2f9aec383 | ||
|   | a95aa67aef | 
							
								
								
									
										51
									
								
								.github/actions/extract-memory-report/action.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								.github/actions/extract-memory-report/action.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,51 @@ | ||||
| name: 'Extract Memory Test Report' | ||||
| description: 'Extracts and displays memory test report from a container' | ||||
| inputs: | ||||
|   container-name: | ||||
|     description: 'Name of the container to extract logs from' | ||||
|     required: true | ||||
|   python-version: | ||||
|     description: 'Python version for artifact naming' | ||||
|     required: true | ||||
|   output-dir: | ||||
|     description: 'Directory to store output logs' | ||||
|     required: false | ||||
|     default: 'output-logs' | ||||
|  | ||||
| runs: | ||||
|   using: "composite" | ||||
|   steps: | ||||
|     - name: Create output directory | ||||
|       shell: bash | ||||
|       run: | | ||||
|         mkdir -p ${{ inputs.output-dir }} | ||||
|  | ||||
|     - name: Dump container log | ||||
|       shell: bash | ||||
|       run: | | ||||
|         echo "Disabled for now" | ||||
| #        return | ||||
| #        docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout" | ||||
| #        docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr" | ||||
|  | ||||
|     - name: Extract and display memory test report | ||||
|       shell: bash | ||||
|       run: | | ||||
|         echo "Disabled for now" | ||||
| #        echo "Extracting test-memory.log from container..." | ||||
| #        docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container" | ||||
| # | ||||
| #        echo "=== Top 10 Highest Peak Memory Tests ===" | ||||
| #        if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then | ||||
| #          grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \ | ||||
| #            sed 's/.*Peak memory: //' | \ | ||||
| #            paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \ | ||||
| #            sort -t'|' -k1 -nr | \ | ||||
| #            cut -d'|' -f2 | \ | ||||
| #            head -10 | ||||
| #          echo "" | ||||
| #          echo "=== Full Memory Test Report ===" | ||||
| #          cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | ||||
| #        else | ||||
| #          echo "No memory log available" | ||||
| #        fi | ||||
							
								
								
									
										8
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							| @@ -4,11 +4,11 @@ updates: | ||||
|     directory: / | ||||
|     schedule: | ||||
|       interval: "weekly" | ||||
|     "caronc/apprise": | ||||
|       versioning-strategy: "increase" | ||||
|       schedule: | ||||
|         interval: "daily" | ||||
|     groups: | ||||
|       all: | ||||
|         patterns: | ||||
|         - "*" | ||||
|   - package-ecosystem: pip | ||||
|     directory: / | ||||
|     schedule: | ||||
|       interval: "weekly" | ||||
|   | ||||
							
								
								
									
										6
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							| @@ -34,7 +34,7 @@ jobs: | ||||
|  | ||||
|     # Initializes the CodeQL tools for scanning. | ||||
|     - name: Initialize CodeQL | ||||
|       uses: github/codeql-action/init@v3 | ||||
|       uses: github/codeql-action/init@v4 | ||||
|       with: | ||||
|         languages: ${{ matrix.language }} | ||||
|         # If you wish to specify custom queries, you can do so here or in a config file. | ||||
| @@ -45,7 +45,7 @@ jobs: | ||||
|     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java). | ||||
|     # If this step fails, then you should remove it and run the build manually (see below) | ||||
|     - name: Autobuild | ||||
|       uses: github/codeql-action/autobuild@v3 | ||||
|       uses: github/codeql-action/autobuild@v4 | ||||
|  | ||||
|     # ℹ️ Command-line programs to run using the OS shell. | ||||
|     # 📚 https://git.io/JvXDl | ||||
| @@ -59,4 +59,4 @@ jobs: | ||||
|     #   make release | ||||
|  | ||||
|     - name: Perform CodeQL Analysis | ||||
|       uses: github/codeql-action/analyze@v3 | ||||
|       uses: github/codeql-action/analyze@v4 | ||||
|   | ||||
							
								
								
									
										12
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							| @@ -45,6 +45,14 @@ jobs: | ||||
|         with: | ||||
|           python-version: 3.11 | ||||
|  | ||||
|       - name: Cache pip packages | ||||
|         uses: actions/cache@v4 | ||||
|         with: | ||||
|           path: ~/.cache/pip | ||||
|           key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} | ||||
|           restore-keys: | | ||||
|             ${{ runner.os }}-pip- | ||||
|  | ||||
|       - name: Install dependencies | ||||
|         run: | | ||||
|           python -m pip install --upgrade pip | ||||
| @@ -95,7 +103,7 @@ jobs: | ||||
|           push: true | ||||
|           tags: | | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 | ||||
|           cache-from: type=gha | ||||
|           cache-to: type=gha,mode=max | ||||
|  | ||||
| @@ -133,7 +141,7 @@ jobs: | ||||
|           file: ./Dockerfile | ||||
|           push: true | ||||
|           tags: ${{ steps.meta.outputs.tags }} | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 | ||||
|           cache-from: type=gha | ||||
|           cache-to: type=gha,mode=max | ||||
| # Looks like this was disabled | ||||
|   | ||||
							
								
								
									
										33
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										33
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							| @@ -21,20 +21,20 @@ jobs: | ||||
|     - name: Build a binary wheel and a source tarball | ||||
|       run: python3 -m build | ||||
|     - name: Store the distribution packages | ||||
|       uses: actions/upload-artifact@v4 | ||||
|       uses: actions/upload-artifact@v5 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|  | ||||
|  | ||||
|   test-pypi-package: | ||||
|     name: Test the built 📦 package works basically. | ||||
|     name: Test the built package works basically. | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: | ||||
|     - build | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v5 | ||||
|       uses: actions/download-artifact@v6 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
| @@ -42,18 +42,39 @@ jobs: | ||||
|       uses: actions/setup-python@v6 | ||||
|       with: | ||||
|         python-version: '3.11' | ||||
|  | ||||
|     - name: Test that the basic pip built package runs without error | ||||
|       run: | | ||||
|         set -ex | ||||
|         ls -alR  | ||||
|          | ||||
|         # Find and install the first .whl file | ||||
|         find dist -type f -name "*.whl" -exec pip3 install {} \; -quit | ||||
|         # Install the first wheel found in dist/ | ||||
|         WHEEL=$(find dist -type f -name "*.whl" -print -quit) | ||||
|         echo Installing $WHEEL | ||||
|         python3 -m pip install --upgrade pip | ||||
|         python3 -m pip install "$WHEEL" | ||||
|         changedetection.io -d /tmp -p 10000 & | ||||
|          | ||||
|         sleep 3 | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null | ||||
|          | ||||
|         # --- API test --- | ||||
|         # This also means that the docs/api-spec.yml was shipped and could be read | ||||
|         test -f /tmp/url-watches.json | ||||
|         API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json) | ||||
|         echo Test API KEY is $API_KEY | ||||
|         curl -X POST "http://127.0.0.1:10000/api/v1/watch" \ | ||||
|           -H "x-api-key: ${API_KEY}" \ | ||||
|           -H "Content-Type: application/json" \ | ||||
|           --show-error --fail \ | ||||
|           --retry 6 --retry-delay 1 --retry-connrefused \ | ||||
|           -d '{ | ||||
|             "url": "https://example.com", | ||||
|             "title": "Example Site Monitor", | ||||
|             "time_between_check": { "hours": 1 } | ||||
|           }' | ||||
|            | ||||
|         killall changedetection.io | ||||
|  | ||||
|  | ||||
| @@ -72,7 +93,7 @@ jobs: | ||||
|  | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v5 | ||||
|       uses: actions/download-artifact@v6 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|   | ||||
							
								
								
									
										12
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							| @@ -38,8 +38,6 @@ jobs: | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm64/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           # Alpine Dockerfile platforms (musl via alpine check) | ||||
|           - platform: linux/amd64 | ||||
|             dockerfile: ./.github/test/Dockerfile-alpine | ||||
| @@ -52,6 +50,14 @@ jobs: | ||||
|           with: | ||||
|             python-version: 3.11 | ||||
|  | ||||
|         - name: Cache pip packages | ||||
|           uses: actions/cache@v4 | ||||
|           with: | ||||
|             path: ~/.cache/pip | ||||
|             key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }} | ||||
|             restore-keys: | | ||||
|               ${{ runner.os }}-pip- | ||||
|  | ||||
|         # Just test that the build works, some libraries won't compile on ARM/rPi etc | ||||
|         - name: Set up QEMU | ||||
|           uses: docker/setup-qemu-action@v3 | ||||
| @@ -76,5 +82,5 @@ jobs: | ||||
|             file: ${{ matrix.dockerfile }} | ||||
|             platforms: ${{ matrix.platform }} | ||||
|             cache-from: type=gha | ||||
|             cache-to: type=gha,mode=max | ||||
|             cache-to: type=gha,mode=min | ||||
|  | ||||
|   | ||||
							
								
								
									
										443
									
								
								.github/workflows/test-stack-reusable-workflow.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										443
									
								
								.github/workflows/test-stack-reusable-workflow.yml
									
									
									
									
										vendored
									
									
								
							| @@ -15,138 +15,294 @@ on: | ||||
|         default: false | ||||
|  | ||||
| jobs: | ||||
|   test-application: | ||||
|   # Build the Docker image once and share it with all test jobs | ||||
|   build: | ||||
|     runs-on: ubuntu-latest | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       # Mainly just for link/flake8 | ||||
|       - name: Set up Python ${{ env.PYTHON_VERSION }} | ||||
|         uses: actions/setup-python@v6 | ||||
|         with: | ||||
|           python-version: ${{ env.PYTHON_VERSION }} | ||||
|  | ||||
|       - name: Cache pip packages | ||||
|         uses: actions/cache@v4 | ||||
|         with: | ||||
|           path: ~/.cache/pip | ||||
|           key: ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt') }} | ||||
|           restore-keys: | | ||||
|             ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}- | ||||
|             ${{ runner.os }}-pip- | ||||
|  | ||||
|       - name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }} | ||||
|         run: | | ||||
|           echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----" | ||||
|           # Build a changedetection.io container and start testing inside | ||||
|           docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio . | ||||
|           # Debug info | ||||
|           docker run test-changedetectionio  bash -c 'pip list'          | ||||
|           docker run test-changedetectionio bash -c 'pip list' | ||||
|  | ||||
|       - name: We should be Python ${{ env.PYTHON_VERSION }} ... | ||||
|         run: |          | ||||
|           docker run test-changedetectionio  bash -c 'python3 --version' | ||||
|  | ||||
|       - name: Spin up ancillary testable services | ||||
|         run: | | ||||
|            | ||||
|           docker network create changedet-network | ||||
|            | ||||
|           # Selenium | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|            | ||||
|           # SocketPuppetBrowser + Extra for custom browser test | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest                     | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url  -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|           docker run test-changedetectionio bash -c 'python3 --version' | ||||
|  | ||||
|       - name: Spin up ancillary SMTP+Echo message test server | ||||
|       - name: Save Docker image | ||||
|         run: | | ||||
|           # Debug SMTP server/echo message back server | ||||
|           docker run --network changedet-network -d -p 11025:11025 -p 11080:11080  --hostname mailserver test-changedetectionio  bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py' | ||||
|           docker ps | ||||
|           docker save test-changedetectionio -o /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Show docker container state and other debug info | ||||
|       - name: Upload Docker image artifact | ||||
|         uses: actions/upload-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp/test-changedetectionio.tar | ||||
|           retention-days: 1 | ||||
|  | ||||
|   # Unit tests (lightweight, no ancillary services needed) | ||||
|   unit-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           set -x | ||||
|           echo "Running processes in docker..." | ||||
|           docker ps | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Run Unit Tests | ||||
|         run: | | ||||
|           # Unit tests | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver' | ||||
|           docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' | ||||
|           docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' | ||||
|           docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' | ||||
|           docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver' | ||||
|  | ||||
|       - name: Test built container with Pytest (generally as requests/plaintext fetching) | ||||
|   # Basic pytest tests with ancillary services | ||||
|   basic-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 25 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           # All tests | ||||
|           echo "run test with pytest" | ||||
|           # The default pytest logger_level is TRACE | ||||
|           # To change logger_level for pytest(test/conftest.py), | ||||
|           # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG' | ||||
|           docker run --name test-cdio-basic-tests --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio && ./run_basic_tests.sh' | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
| # PLAYWRIGHT/NODE-> CDP | ||||
|       - name: Playwright and SocketPuppetBrowser - Specific tests in built container | ||||
|       - name: Test built container with Pytest | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch | ||||
|           # tests/visualselector/test_fetch_data.py will do browser steps   | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest  -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|           docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network | ||||
|           docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh' | ||||
|  | ||||
|       - name: Extract memory report and logs | ||||
|         if: always() | ||||
|         uses: ./.github/actions/extract-memory-report | ||||
|         with: | ||||
|           container-name: test-cdio-basic-tests | ||||
|           python-version: ${{ env.PYTHON_VERSION }} | ||||
|  | ||||
|       - name: Playwright and SocketPuppetBrowser - Headers and requests | ||||
|         run: |        | ||||
|           # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py; pwd;find .' | ||||
|       - name: Store test artifacts | ||||
|         if: always() | ||||
|         uses: actions/upload-artifact@v5 | ||||
|         with: | ||||
|           name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }} | ||||
|           path: output-logs | ||||
|  | ||||
|       - name: Playwright and SocketPuppetBrowser - Restock detection | ||||
|         run: |                             | ||||
|           # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|   # Playwright tests | ||||
|   playwright-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
| # STRAIGHT TO CDP | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container | ||||
|         if: ${{ inputs.skip-pypuppeteer == false }} | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch  | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks | ||||
|         if: ${{ inputs.skip-pypuppeteer == false }} | ||||
|       - name: Spin up ancillary services | ||||
|         run: | | ||||
|           # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm  -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Restock detection | ||||
|         if: ${{ inputs.skip-pypuppeteer == false }} | ||||
|         run: |                             | ||||
|           # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet"  -e "FAST_PUPPETEER_CHROME_FETCHER=True"  -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|       - name: Playwright - Specific tests in built container | ||||
|         run: | | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|  | ||||
|       - name: Playwright - Headers and requests | ||||
|         run: | | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .' | ||||
|  | ||||
|       - name: Playwright - Restock detection | ||||
|         run: | | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
|   # Pyppeteer tests | ||||
|   pyppeteer-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     if: ${{ inputs.skip-pypuppeteer == false }} | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up ancillary services | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Pyppeteer - Specific tests in built container | ||||
|         run: | | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|  | ||||
|       - name: Pyppeteer - Headers and requests checks | ||||
|         run: | | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
|       - name: Pyppeteer - Restock detection | ||||
|         run: | | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
|   # Selenium tests | ||||
|   selenium-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up ancillary services | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 | ||||
|           sleep 3 | ||||
|  | ||||
|       - name: Specific tests for headers and requests checks with Selenium | ||||
|         run: | | ||||
|  | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
| # SELENIUM | ||||
|       - name: Specific tests in built container for Selenium | ||||
|         run: | | ||||
|           # Selenium fetch | ||||
|           docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' | ||||
|           docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' | ||||
|  | ||||
|       - name: Specific tests in built container for headers and requests checks with Selenium | ||||
|  | ||||
|   # SMTP tests | ||||
|   smtp-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up SMTP test server | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py' | ||||
|  | ||||
| # OTHER STUFF | ||||
|       - name: Test SMTP notification mime types | ||||
|         run: | | ||||
|           # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above | ||||
|           # "mailserver" hostname defined above | ||||
|           docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' | ||||
|  | ||||
|       # @todo Add a test via playwright/puppeteer | ||||
|       # squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py | ||||
|       - name: Test proxy squid style interaction | ||||
|   # Proxy tests | ||||
|   proxy-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up services | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4 | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Test proxy Squid style interaction | ||||
|         run: | | ||||
|           cd changedetectionio | ||||
|           ./run_proxy_tests.sh | ||||
|           docker ps | ||||
|           cd .. | ||||
|  | ||||
|       - name: Test proxy SOCKS5 style interaction | ||||
| @@ -155,28 +311,65 @@ jobs: | ||||
|           ./run_socks_proxy_tests.sh | ||||
|           cd .. | ||||
|  | ||||
|   # Custom browser URL tests | ||||
|   custom-browser-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Spin up ancillary services | ||||
|         run: | | ||||
|           docker network create changedet-network | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Test custom browser URL | ||||
|         run: | | ||||
|           cd changedetectionio | ||||
|           ./run_custom_browser_url_tests.sh | ||||
|           cd .. | ||||
|  | ||||
|       - name: Test changedetection.io container starts+runs basically without error | ||||
|   # Container startup tests | ||||
|   container-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker run --name test-changedetectionio -p 5556:5000  -d test-changedetectionio | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Test container starts+runs basically without error | ||||
|         run: | | ||||
|           docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it | ||||
|           curl --retry-connrefused --retry 6  -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|            | ||||
|           # and IPv6 | ||||
|           curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|  | ||||
|           # Check whether TRACE log is enabled. | ||||
|           # Also, check whether TRACE came from STDOUT | ||||
|           curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|           curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Check whether DEBUG is came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 | ||||
|  | ||||
|           docker kill test-changedetectionio | ||||
|  | ||||
|       - name: Test HTTPS SSL mode | ||||
| @@ -184,78 +377,66 @@ jobs: | ||||
|           openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost" | ||||
|           docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it | ||||
|           # -k because its self-signed | ||||
|           curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid | ||||
|        | ||||
|           docker kill test-changedetectionio-ssl | ||||
|  | ||||
|       - name: Test IPv6 Mode | ||||
|         run: | | ||||
|           # IPv6 - :: bind to all interfaces inside container (like 0.0.0.0), ::1 would be localhost only | ||||
|           docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it on localhost | ||||
|           curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid | ||||
|           docker kill test-changedetectionio-ipv6 | ||||
|  | ||||
|       - name: Test changedetection.io SIGTERM and SIGINT signal shutdown | ||||
|   # Signal tests | ||||
|   signal-tests: | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: build | ||||
|     timeout-minutes: 10 | ||||
|     env: | ||||
|       PYTHON_VERSION: ${{ inputs.python-version }} | ||||
|     steps: | ||||
|       - uses: actions/checkout@v5 | ||||
|  | ||||
|       - name: Download Docker image artifact | ||||
|         uses: actions/download-artifact@v5 | ||||
|         with: | ||||
|           name: test-changedetectionio-${{ env.PYTHON_VERSION }} | ||||
|           path: /tmp | ||||
|  | ||||
|       - name: Load Docker image | ||||
|         run: | | ||||
|           docker load -i /tmp/test-changedetectionio.tar | ||||
|  | ||||
|       - name: Test SIGTERM and SIGINT signal shutdown | ||||
|         run: | | ||||
|            | ||||
|           echo SIGINT Shutdown request test | ||||
|           docker run --name sig-test -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           echo ">>> Sending SIGINT to sig-test container" | ||||
|           docker kill --signal=SIGINT sig-test | ||||
|           sleep 3 | ||||
|           # invert the check (it should be not 0/not running) | ||||
|           docker ps | ||||
|           # check signal catch(STDERR) log. Because of | ||||
|           # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) | ||||
|           docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1 | ||||
|           test -z "`docker ps|grep sig-test`" | ||||
|           if [ $? -ne 0 ] | ||||
|           then | ||||
|           if [ $? -ne 0 ]; then | ||||
|             echo "Looks like container was running when it shouldnt be" | ||||
|             docker ps | ||||
|             exit 1 | ||||
|           fi | ||||
|            | ||||
|           # @todo - scan the container log to see the right "graceful shutdown" text exists  | ||||
|           docker rm sig-test | ||||
|            | ||||
|  | ||||
|           echo SIGTERM Shutdown request test | ||||
|           docker run --name sig-test -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           echo ">>> Sending SIGTERM to sig-test container" | ||||
|           docker kill --signal=SIGTERM sig-test | ||||
|           sleep 3 | ||||
|           # invert the check (it should be not 0/not running) | ||||
|           docker ps | ||||
|           # check signal catch(STDERR) log. Because of | ||||
|           # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level) | ||||
|           docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1 | ||||
|           test -z "`docker ps|grep sig-test`" | ||||
|           if [ $? -ne 0 ] | ||||
|           then | ||||
|           if [ $? -ne 0 ]; then | ||||
|             echo "Looks like container was running when it shouldnt be" | ||||
|             docker ps | ||||
|             exit 1 | ||||
|           fi | ||||
|            | ||||
|           # @todo - scan the container log to see the right "graceful shutdown" text exists            | ||||
|           docker rm sig-test | ||||
|  | ||||
|       - name: Dump container log | ||||
|         if: always() | ||||
|         run: | | ||||
|           mkdir output-logs | ||||
|           docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt | ||||
|           docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt | ||||
|  | ||||
|       - name: Store everything including test-datastore | ||||
|         if: always() | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }} | ||||
|           path: . | ||||
|   | ||||
| @@ -36,6 +36,7 @@ ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl" | ||||
| ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
| RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
|     pip install \ | ||||
|     --prefer-binary \ | ||||
|     --extra-index-url https://www.piwheels.org/simple \ | ||||
|     --extra-index-url https://pypi.anaconda.org/ARM-software/simple \ | ||||
|     --cache-dir=/tmp/pip-cache \ | ||||
| @@ -47,6 +48,7 @@ RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) | ||||
| RUN --mount=type=cache,target=/tmp/pip-cache \ | ||||
|     pip install \ | ||||
|     --prefer-binary \ | ||||
|     --cache-dir=/tmp/pip-cache \ | ||||
|     --target=/dependencies \ | ||||
|     playwright~=1.48.0 \ | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| recursive-include changedetectionio/api * | ||||
| include docs/api-spec.yaml | ||||
| recursive-include changedetectionio/blueprint * | ||||
| recursive-include changedetectionio/conditions * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/jinja2_custom * | ||||
| recursive-include changedetectionio/model * | ||||
| recursive-include changedetectionio/notification * | ||||
| recursive-include changedetectionio/processors * | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.50.14' | ||||
| __version__ = '0.50.33' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
|   | ||||
| @@ -1,9 +1,22 @@ | ||||
| import os | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request | ||||
| import validators | ||||
| from functools import wraps | ||||
| from . import auth, validate_openapi_request | ||||
| from ..validate_url import is_safe_valid_url | ||||
|  | ||||
|  | ||||
| def default_content_type(content_type='text/plain'): | ||||
|     """Decorator to set a default Content-Type header if none is provided.""" | ||||
|     def decorator(f): | ||||
|         @wraps(f) | ||||
|         def wrapper(*args, **kwargs): | ||||
|             if not request.content_type: | ||||
|                 # Set default content type in the request environment | ||||
|                 request.environ['CONTENT_TYPE'] = content_type | ||||
|             return f(*args, **kwargs) | ||||
|         return wrapper | ||||
|     return decorator | ||||
|  | ||||
|  | ||||
| class Import(Resource): | ||||
| @@ -12,6 +25,7 @@ class Import(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @default_content_type('text/plain') #3547 #3542 | ||||
|     @validate_openapi_request('importWatches') | ||||
|     def post(self): | ||||
|         """Import a list of watched URLs.""" | ||||
| @@ -35,14 +49,13 @@ class Import(Resource): | ||||
|  | ||||
|         urls = request.get_data().decode('utf8').splitlines() | ||||
|         added = [] | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         for url in urls: | ||||
|             url = url.strip() | ||||
|             if not len(url): | ||||
|                 continue | ||||
|  | ||||
|             # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|             if not validators.url(url, simple_host=allow_simplehost): | ||||
|             if not is_safe_valid_url(url): | ||||
|                 return f"Invalid or unsupported URL - {url}", 400 | ||||
|  | ||||
|             if dedupe and self.datastore.url_exists(url): | ||||
|   | ||||
| @@ -1,12 +1,12 @@ | ||||
| import os | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from flask_expects_json import expects_json | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request, make_response, send_from_directory | ||||
| import validators | ||||
| from . import auth | ||||
| import copy | ||||
|  | ||||
| @@ -121,6 +121,10 @@ class Watch(Resource): | ||||
|         if validation_error: | ||||
|             return validation_error, 400 | ||||
|  | ||||
|         # XSS etc protection | ||||
|         if request.json.get('url') and not is_safe_valid_url(request.json.get('url')): | ||||
|             return "Invalid URL", 400 | ||||
|  | ||||
|         watch.update(request.json) | ||||
|  | ||||
|         return "OK", 200 | ||||
| @@ -226,9 +230,7 @@ class CreateWatch(Resource): | ||||
|         json_data = request.get_json() | ||||
|         url = json_data['url'].strip() | ||||
|  | ||||
|         # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         if not validators.url(url, simple_host=allow_simplehost): | ||||
|         if not is_safe_valid_url(url): | ||||
|             return "Invalid or unsupported URL", 400 | ||||
|  | ||||
|         if json_data.get('proxy'): | ||||
|   | ||||
| @@ -1,10 +1,7 @@ | ||||
| import copy | ||||
| import yaml | ||||
| import functools | ||||
| from flask import request, abort | ||||
| from loguru import logger | ||||
| from openapi_core import OpenAPI | ||||
| from openapi_core.contrib.flask import FlaskOpenAPIRequest | ||||
| from . import api_schema | ||||
| from ..model import watch_base | ||||
|  | ||||
| @@ -34,8 +31,16 @@ schema_delete_notification_urls['required'] = ['notification_urls'] | ||||
|  | ||||
| @functools.cache | ||||
| def get_openapi_spec(): | ||||
|     """Lazy load OpenAPI spec and dependencies only when validation is needed.""" | ||||
|     import os | ||||
|     import yaml  # Lazy import - only loaded when API validation is actually used | ||||
|     from openapi_core import OpenAPI  # Lazy import - saves ~10.7 MB on startup | ||||
|  | ||||
|     spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml') | ||||
|     if not os.path.exists(spec_path): | ||||
|         # Possibly for pip3 packages | ||||
|         spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') | ||||
|  | ||||
|     with open(spec_path, 'r') as f: | ||||
|         spec_dict = yaml.safe_load(f) | ||||
|     _openapi_spec = OpenAPI.from_dict(spec_dict) | ||||
| @@ -49,6 +54,9 @@ def validate_openapi_request(operation_id): | ||||
|             try: | ||||
|                 # Skip OpenAPI validation for GET requests since they don't have request bodies | ||||
|                 if request.method.upper() != 'GET': | ||||
|                     # Lazy import - only loaded when actually validating a request | ||||
|                     from openapi_core.contrib.flask import FlaskOpenAPIRequest | ||||
|  | ||||
|                     spec = get_openapi_spec() | ||||
|                     openapi_request = FlaskOpenAPIRequest(request) | ||||
|                     result = spec.unmarshal_request(openapi_request) | ||||
|   | ||||
| @@ -334,6 +334,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                             if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change): | ||||
|                                 watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time)) | ||||
|  | ||||
|                             # Explicitly delete large content variables to free memory IMMEDIATELY after saving | ||||
|                             # These are no longer needed after being saved to history | ||||
|                             del contents | ||||
|  | ||||
|                             # Send notifications on second+ check | ||||
|                             if watch.history_n >= 2: | ||||
|                                 logger.info(f"Change detected in UUID {uuid} - {watch['url']}") | ||||
| @@ -372,6 +376,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                 datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3), | ||||
|                                                                'check_count': count}) | ||||
|  | ||||
|                 # NOW clear fetcher content - after all processing is complete | ||||
|                 # This is the last point where we need the fetcher data | ||||
|                 if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher: | ||||
|                     update_handler.fetcher.clear_content() | ||||
|                     logger.debug(f"Cleared fetcher content for UUID {uuid}") | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}") | ||||
|             logger.error(f"Worker {worker_id} traceback:", exc_info=True) | ||||
| @@ -392,7 +402,28 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                         #logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}") | ||||
|                         watch_check_update.send(watch_uuid=watch['uuid']) | ||||
|  | ||||
|                     update_handler = None | ||||
|                     # Explicitly clean up update_handler and all its references | ||||
|                     if update_handler: | ||||
|                         # Clear fetcher content using the proper method | ||||
|                         if hasattr(update_handler, 'fetcher') and update_handler.fetcher: | ||||
|                             update_handler.fetcher.clear_content() | ||||
|  | ||||
|                         # Clear processor references | ||||
|                         if hasattr(update_handler, 'content_processor'): | ||||
|                             update_handler.content_processor = None | ||||
|  | ||||
|                         update_handler = None | ||||
|  | ||||
|                     # Clear local contents variable if it still exists | ||||
|                     if 'contents' in locals(): | ||||
|                         del contents | ||||
|  | ||||
|                     # Note: We don't set watch = None here because: | ||||
|                     # 1. watch is just a local reference to datastore.data['watching'][uuid] | ||||
|                     # 2. Setting it to None doesn't affect the datastore | ||||
|                     # 3. GC can't collect the object anyway (still referenced by datastore) | ||||
|                     # 4. It would just cause confusion | ||||
|  | ||||
|                     logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s") | ||||
|                 except Exception as cleanup_error: | ||||
|                     logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}") | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT | ||||
| from changedetectionio.content_fetchers.base import manage_user_agent | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     def long_task(uuid, preferred_proxy): | ||||
|         import time | ||||
|         from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         status = {'status': '', 'length': 0, 'text': ''} | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
|  | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
| from changedetectionio.notification.handler import apply_service_tweaks | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from feedgen.feed import FeedGenerator | ||||
| from flask import Blueprint, make_response, request, url_for, redirect | ||||
| @@ -120,9 +121,13 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                     html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]), | ||||
|                                                  newest_version_file_contents=watch.get_history_snapshot(dates[-1]), | ||||
|                                                  include_equal=False, | ||||
|                                                  line_feed_sep="<br>", | ||||
|                                                  html_colour=html_colour_enable | ||||
|                                                  line_feed_sep="<br>" | ||||
|                                                  ) | ||||
|  | ||||
|  | ||||
|                     requested_output_format = 'htmlcolor' if html_colour_enable else 'html' | ||||
|                     html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format) | ||||
|  | ||||
|                 except FileNotFoundError as e: | ||||
|                     html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found." | ||||
|  | ||||
|   | ||||
| @@ -119,7 +119,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                                 hide_remove_pass=os.getenv("SALTED_PASS", False), | ||||
|                                 min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)), | ||||
|                                 settings_application=datastore.data['settings']['application'], | ||||
|                                 timezone_default_config=datastore.data['settings']['application'].get('timezone'), | ||||
|                                 timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'), | ||||
|                                 utc_time=utc_time, | ||||
|                                 ) | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| {% extends 'base.html' %} | ||||
|  | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}"; | ||||
| @@ -72,25 +72,23 @@ | ||||
|                         <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page) | ||||
|                         </span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.rss_content_format) }} | ||||
|                         <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} | ||||
|                         <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span> | ||||
|                     </div> | ||||
|                 {% if form.requests.proxy %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                         {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                         Choose a default proxy for all watches | ||||
|                         </span> | ||||
|                     <div class="grey-form-border"> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} | ||||
|                         </div> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_field(form.application.form.rss_content_format) }} | ||||
|                             <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span> | ||||
|                         </div> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_checkbox_field(form.application.form.rss_reader_mode) }} | ||||
|                             <span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 {% endif %} | ||||
|                 </fieldset> | ||||
|             </div> | ||||
|  | ||||
| @@ -133,6 +131,10 @@ | ||||
|                     <span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br> | ||||
|                     Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.requests.form.timeout) }} | ||||
|                     <span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group inline-radio"> | ||||
|                     {{ render_field(form.requests.form.default_ua) }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
| @@ -191,6 +193,12 @@ nav | ||||
|                         </ul> | ||||
|                      </span> | ||||
|                     </fieldset> | ||||
|                     <fieldset class="pure-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.strip_ignored_lines) }} | ||||
|                         <span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br> | ||||
|                         <i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc. | ||||
|                         </span> | ||||
|                     </fieldset> | ||||
|            </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="api"> | ||||
| @@ -230,11 +238,9 @@ nav | ||||
|                     <p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p> | ||||
|                     <p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p> | ||||
|                     <p> | ||||
|                        {{ render_field(form.application.form.timezone) }} | ||||
|                        {{ render_field(form.application.form.scheduler_timezone_default) }} | ||||
|                         <datalist id="timezones" style="display: none;"> | ||||
|                             {% for tz_name in available_timezones %} | ||||
|                                 <option value="{{ tz_name }}">{{ tz_name }}</option> | ||||
|                             {% endfor %} | ||||
|                             {%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%} | ||||
|                         </datalist> | ||||
|                     </p> | ||||
|                 </div> | ||||
| @@ -308,17 +314,27 @@ nav | ||||
|                <p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites. | ||||
|  | ||||
|                 <div class="pure-control-group" id="extra-proxies-setting"> | ||||
|                 {{ render_field(form.requests.form.extra_proxies) }} | ||||
|                 {{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }} | ||||
|                 <span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br> | ||||
|                 <span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span> | ||||
|                 {% if form.requests.proxy %} | ||||
|                 <div> | ||||
|                 <br> | ||||
|                     <div class="inline-radio"> | ||||
|                         {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} | ||||
|                         <span class="pure-form-message-inline">Choose a default proxy for all watches</span> | ||||
|                     </div> | ||||
|                 </div> | ||||
|                 {% endif %} | ||||
|                 </div> | ||||
|                 <div class="pure-control-group" id="extra-browsers-setting"> | ||||
|                     <p> | ||||
|                     <span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br> | ||||
|                     <span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span> | ||||
|                     </p> | ||||
|                     {{ render_field(form.requests.form.extra_browsers) }} | ||||
|                     {{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }} | ||||
|                 </div> | ||||
|              | ||||
|             </div> | ||||
|             <div id="actions"> | ||||
|                 <div class="pure-control-group"> | ||||
|   | ||||
| @@ -76,14 +76,14 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat | ||||
|  | ||||
|     elif (op == 'notification-default'): | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch | ||||
|             USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         ) | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_title'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_body'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_urls'] = [] | ||||
|                 datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch | ||||
|                 datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches set to use default notification settings") | ||||
|  | ||||
|   | ||||
| @@ -187,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|  | ||||
|             tz_name = time_schedule_limit.get('timezone') | ||||
|             if not tz_name: | ||||
|                 tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') | ||||
|                 tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip()) | ||||
|  | ||||
|             if time_schedule_limit and time_schedule_limit.get('enabled'): | ||||
|                 try: | ||||
| @@ -257,7 +257,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), | ||||
|                 'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch), | ||||
|                 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'), | ||||
|                 'using_global_webdriver_wait': not default['webdriver_delay'], | ||||
|                 'uuid': uuid, | ||||
|                 'watch': watch, | ||||
|   | ||||
| @@ -2,6 +2,7 @@ from flask import Blueprint, request, make_response | ||||
| import random | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.notification_service import NotificationContextData | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
|  | ||||
| @@ -19,6 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         import apprise | ||||
|         from changedetectionio.notification.handler import process_notification | ||||
|         from changedetectionio.notification.apprise_plugin.assets import apprise_asset | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
|  | ||||
| @@ -61,16 +63,20 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             return 'Error: No Notification URLs set/found' | ||||
|  | ||||
|         for n_url in notification_urls: | ||||
|             # We are ONLY validating the apprise:// part here, convert all tags to something so as not to break apprise URLs | ||||
|             generic_notification_context_data = NotificationContextData() | ||||
|             generic_notification_context_data.set_random_for_validation() | ||||
|             n_url = jinja_render(template_str=n_url, **generic_notification_context_data).strip() | ||||
|             if len(n_url.strip()): | ||||
|                 if not apobj.add(n_url): | ||||
|                     return f'Error:  {n_url} is not a valid AppRise URL.' | ||||
|  | ||||
|         try: | ||||
|             # use the same as when it is triggered, but then override it with the form test values | ||||
|             n_object = { | ||||
|             n_object = NotificationContextData({ | ||||
|                 'watch_url': request.form.get('window_url', "https://changedetection.io"), | ||||
|                 'notification_urls': notification_urls | ||||
|             } | ||||
|             }) | ||||
|  | ||||
|             # Only use if present, if not set in n_object it should use the default system value | ||||
|             if 'notification_format' in request.form and request.form['notification_format'].strip(): | ||||
|   | ||||
| @@ -87,7 +87,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             form=form, | ||||
|             guid=datastore.data['app_guid'], | ||||
|             has_proxies=datastore.proxy_list, | ||||
|             has_unviewed=datastore.has_unviewed, | ||||
|             hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|             now_time_server=round(time.time()), | ||||
|             pagination=pagination, | ||||
| @@ -97,6 +96,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), | ||||
|             system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'), | ||||
|             tags=sorted_tags, | ||||
|             unread_changes_count=datastore.unread_changes_count, | ||||
|             watches=sorted_watches | ||||
|         ) | ||||
|  | ||||
|   | ||||
| @@ -82,8 +82,11 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|         {%- set cols_required = cols_required + 1 -%} | ||||
|     {%- endif -%} | ||||
|     {%- set ui_settings = datastore.data['settings']['application']['ui'] -%} | ||||
|  | ||||
|     <div id="watch-table-wrapper"> | ||||
|     {%- set wrapper_classes = [ | ||||
|         'has-unread-changes' if unread_changes_count else '', | ||||
|         'has-error' if errored_count else '', | ||||
|     ] -%} | ||||
|     <div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}"> | ||||
|         {%- set table_classes = [ | ||||
|             'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled', | ||||
|         ] -%} | ||||
| @@ -241,10 +244,10 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <ul id="post-list-buttons"> | ||||
|             <li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" > | ||||
|             <li id="post-list-with-errors" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a> | ||||
|             </li> | ||||
|             <li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" > | ||||
|             <li id="post-list-mark-views" style="display: none;" > | ||||
|                 <a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a> | ||||
|             </li> | ||||
|         {%-  if active_tag_uuid -%} | ||||
| @@ -252,8 +255,8 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|                 <a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a> | ||||
|             </li> | ||||
|         {%-  endif -%} | ||||
|             <li id="post-list-unread" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread</a> | ||||
|             <li id="post-list-unread" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a> | ||||
|             </li> | ||||
|             <li> | ||||
|                <a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck | ||||
|   | ||||
| @@ -64,6 +64,18 @@ class Fetcher(): | ||||
|     # Time ONTOP of the system defined env minimum time | ||||
|     render_extract_delay = 0 | ||||
|  | ||||
|     def clear_content(self): | ||||
|         """ | ||||
|         Explicitly clear all content from memory to free up heap space. | ||||
|         Call this after content has been saved to disk. | ||||
|         """ | ||||
|         self.content = None | ||||
|         if hasattr(self, 'raw_content'): | ||||
|             self.raw_content = None | ||||
|         self.screenshot = None | ||||
|         self.xpath_data = None | ||||
|         # Keep headers and status_code as they're small | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_error(self): | ||||
|         return self.error | ||||
| @@ -128,7 +140,7 @@ class Fetcher(): | ||||
|     async def iterate_browser_steps(self, start_url=None): | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|         from playwright._impl._errors import TimeoutError, Error | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         step_n = 0 | ||||
|  | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|   | ||||
| @@ -51,6 +51,7 @@ class fetcher(Fetcher): | ||||
|  | ||||
|         session = requests.Session() | ||||
|  | ||||
|  | ||||
|         if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'): | ||||
|             from requests_file import FileAdapter | ||||
|             session.mount('file://', FileAdapter()) | ||||
|   | ||||
| @@ -1,8 +1,32 @@ | ||||
| import difflib | ||||
| from typing import List, Iterator, Union | ||||
|  | ||||
| REMOVED_STYLE = "background-color: #fadad7; color: #b30000;" | ||||
| ADDED_STYLE = "background-color: #eaf2c2; color: #406619;" | ||||
| # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050 | ||||
| #HTML_ADDED_STYLE = "background-color: #d2f7c2; color: #255d00;" | ||||
| #HTML_CHANGED_INTO_STYLE = "background-color: #dafbe1; color: #116329;" | ||||
| #HTML_CHANGED_STYLE = "background-color: #ffd6cc; color: #7a2000;" | ||||
| #HTML_REMOVED_STYLE = "background-color: #ffebe9; color: #82071e;" | ||||
|  | ||||
| # @todo - In the future we can make this configurable | ||||
| HTML_ADDED_STYLE = "background-color: #eaf2c2; color: #406619" | ||||
| HTML_REMOVED_STYLE = "background-color: #fadad7; color: #b30000" | ||||
| HTML_CHANGED_STYLE = HTML_REMOVED_STYLE | ||||
| HTML_CHANGED_INTO_STYLE = HTML_ADDED_STYLE | ||||
|  | ||||
|  | ||||
| # These get set to html or telegram type or discord compatible or whatever in handler.py | ||||
| # Something that cant get escaped to HTML by accident | ||||
| REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN' | ||||
| REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED' | ||||
|  | ||||
| ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN' | ||||
| ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED' | ||||
|  | ||||
| CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN' | ||||
| CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED' | ||||
|  | ||||
| CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN' | ||||
| CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED' | ||||
|  | ||||
| def same_slicer(lst: List[str], start: int, end: int) -> List[str]: | ||||
|     """Return a slice of the list, or a single element if start == end.""" | ||||
| @@ -15,8 +39,7 @@ def customSequenceMatcher( | ||||
|     include_removed: bool = True, | ||||
|     include_added: bool = True, | ||||
|     include_replaced: bool = True, | ||||
|     include_change_type_prefix: bool = True, | ||||
|     html_colour: bool = False | ||||
|     include_change_type_prefix: bool = True | ||||
| ) -> Iterator[List[str]]: | ||||
|     """ | ||||
|     Compare two sequences and yield differences based on specified parameters. | ||||
| @@ -29,8 +52,6 @@ def customSequenceMatcher( | ||||
|         include_added (bool): Include added parts | ||||
|         include_replaced (bool): Include replaced parts | ||||
|         include_change_type_prefix (bool): Add prefixes to indicate change types | ||||
|         html_colour (bool): Use HTML background colors for differences | ||||
|  | ||||
|     Yields: | ||||
|         List[str]: Differences between sequences | ||||
|     """ | ||||
| @@ -42,22 +63,22 @@ def customSequenceMatcher( | ||||
|         if include_equal and tag == 'equal': | ||||
|             yield before[alo:ahi] | ||||
|         elif include_removed and tag == 'delete': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] | ||||
|             else: | ||||
|                 yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi) | ||||
|                 yield same_slicer(before, alo, ahi) | ||||
|         elif include_replaced and tag == 'replace': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)] | ||||
|             else: | ||||
|                 yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi) | ||||
|                 yield same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi) | ||||
|         elif include_added and tag == 'insert': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)] | ||||
|             else: | ||||
|                 yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi) | ||||
|                 yield same_slicer(after, blo, bhi) | ||||
|  | ||||
|  | ||||
| def render_diff( | ||||
|     previous_version_file_contents: str, | ||||
| @@ -68,8 +89,7 @@ def render_diff( | ||||
|     include_replaced: bool = True, | ||||
|     line_feed_sep: str = "\n", | ||||
|     include_change_type_prefix: bool = True, | ||||
|     patch_format: bool = False, | ||||
|     html_colour: bool = False | ||||
|     patch_format: bool = False | ||||
| ) -> str: | ||||
|     """ | ||||
|     Render the difference between two file contents. | ||||
| @@ -84,8 +104,6 @@ def render_diff( | ||||
|         line_feed_sep (str): Separator for lines in output | ||||
|         include_change_type_prefix (bool): Add prefixes to indicate change types | ||||
|         patch_format (bool): Use patch format for output | ||||
|         html_colour (bool): Use HTML background colors for differences | ||||
|  | ||||
|     Returns: | ||||
|         str: Rendered difference | ||||
|     """ | ||||
| @@ -103,8 +121,7 @@ def render_diff( | ||||
|         include_removed=include_removed, | ||||
|         include_added=include_added, | ||||
|         include_replaced=include_replaced, | ||||
|         include_change_type_prefix=include_change_type_prefix, | ||||
|         html_colour=html_colour | ||||
|         include_change_type_prefix=include_change_type_prefix | ||||
|     ) | ||||
|  | ||||
|     def flatten(lst: List[Union[str, List[str]]]) -> str: | ||||
|   | ||||
| @@ -133,6 +133,11 @@ def get_socketio_path(): | ||||
|     # Socket.IO will be available at {prefix}/socket.io/ | ||||
|     return prefix | ||||
|  | ||||
| @app.template_global('is_safe_valid_url') | ||||
| def _is_safe_valid_url(test_url): | ||||
|     from .validate_url import is_safe_valid_url | ||||
|     return is_safe_valid_url(test_url) | ||||
|  | ||||
|  | ||||
| @app.template_filter('format_number_locale') | ||||
| def _jinja2_filter_format_number_locale(value: float) -> str: | ||||
| @@ -382,7 +387,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # We would sometimes get login loop errors on sites hosted in sub-paths | ||||
|  | ||||
|             # note for the future: | ||||
|             #            if not is_safe_url(next): | ||||
|             #            if not is_safe_valid_url(next): | ||||
|             #                return flask.abort(400) | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
| @@ -795,7 +800,7 @@ def ticker_thread_check_time_launch_checks(): | ||||
|             else: | ||||
|                 time_schedule_limit = watch.get('time_schedule_limit') | ||||
|                 logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)") | ||||
|             tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') | ||||
|             tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip()) | ||||
|  | ||||
|             if time_schedule_limit and time_schedule_limit.get('enabled'): | ||||
|                 try: | ||||
|   | ||||
| @@ -5,6 +5,7 @@ from wtforms.widgets.core import TimeInput | ||||
|  | ||||
| from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES | ||||
| from changedetectionio.conditions.form import ConditionFormRow | ||||
| from changedetectionio.notification_service import NotificationContextData | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from wtforms import ( | ||||
| @@ -27,11 +28,8 @@ from wtforms.utils import unset_value | ||||
|  | ||||
| from wtforms.validators import ValidationError | ||||
|  | ||||
| from validators.url import url as url_validator | ||||
|  | ||||
| from changedetectionio.widgets import TernaryNoneBooleanField | ||||
|  | ||||
|  | ||||
| # default | ||||
| # each select <option data-enabled="enabled-0-0" | ||||
| from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config | ||||
| @@ -469,11 +467,16 @@ class ValidateAppRiseServers(object): | ||||
|         import apprise | ||||
|         from .notification.apprise_plugin.assets import apprise_asset | ||||
|         from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401 | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         apobj = apprise.Apprise(asset=apprise_asset) | ||||
|  | ||||
|         for server_url in field.data: | ||||
|             url = server_url.strip() | ||||
|             generic_notification_context_data = NotificationContextData() | ||||
|             # Make sure something is atleast in all those regular token fields | ||||
|             generic_notification_context_data.set_random_for_validation() | ||||
|  | ||||
|             url = jinja_render(template_str=server_url.strip(), **generic_notification_context_data).strip() | ||||
|             if url.startswith("#"): | ||||
|                 continue | ||||
|  | ||||
| @@ -487,9 +490,8 @@ class ValidateJinja2Template(object): | ||||
|     """ | ||||
|     def __call__(self, form, field): | ||||
|         from changedetectionio import notification | ||||
|  | ||||
|         from changedetectionio.jinja2_custom import create_jinja_env | ||||
|         from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError | ||||
|         from jinja2.sandbox import ImmutableSandboxedEnvironment | ||||
|         from jinja2.meta import find_undeclared_variables | ||||
|         import jinja2.exceptions | ||||
|  | ||||
| @@ -497,9 +499,11 @@ class ValidateJinja2Template(object): | ||||
|         joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}" | ||||
|  | ||||
|         try: | ||||
|             jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader, extensions=['jinja2_time.TimeExtension']) | ||||
|             jinja2_env.globals.update(notification.valid_tokens) | ||||
|             # Extra validation tokens provided on the form_class(... extra_tokens={}) setup | ||||
|             # Use the shared helper to create a properly configured environment | ||||
|             jinja2_env = create_jinja_env(loader=BaseLoader) | ||||
|  | ||||
|             # Add notification tokens for validation | ||||
|             jinja2_env.globals.update(NotificationContextData()) | ||||
|             if hasattr(field, 'extra_notification_tokens'): | ||||
|                 jinja2_env.globals.update(field.extra_notification_tokens) | ||||
|  | ||||
| @@ -511,6 +515,7 @@ class ValidateJinja2Template(object): | ||||
|         except jinja2.exceptions.SecurityError as e: | ||||
|             raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e | ||||
|  | ||||
|         # Check for undeclared variables | ||||
|         ast = jinja2_env.parse(joined_data) | ||||
|         undefined = ", ".join(find_undeclared_variables(ast)) | ||||
|         if undefined: | ||||
| @@ -533,19 +538,10 @@ class validateURL(object): | ||||
|  | ||||
|  | ||||
| def validate_url(test_url): | ||||
|     # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|     try: | ||||
|         url_validator(test_url, simple_host=allow_simplehost) | ||||
|     except validators.ValidationError: | ||||
|         #@todo check for xss | ||||
|         message = f"'{test_url}' is not a valid URL." | ||||
|     from changedetectionio.validate_url import is_safe_valid_url | ||||
|     if not is_safe_valid_url(test_url): | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError(message) | ||||
|  | ||||
|     from .model.Watch import is_safe_url | ||||
|     if not is_safe_url(test_url): | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format') | ||||
|         raise ValidationError('Watch protocol is not permitted or invalid URL format') | ||||
|  | ||||
|  | ||||
| class ValidateSinglePythonRegexString(object): | ||||
| @@ -678,6 +674,51 @@ class ValidateCSSJSONXPATHInput(object): | ||||
|                 except: | ||||
|                     raise ValidationError("A system-error occurred when validating your jq expression") | ||||
|  | ||||
| class ValidateSimpleURL: | ||||
|     """Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc.""" | ||||
|     def __init__(self, message=None): | ||||
|         self.message = message or "Invalid URL." | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         data = (field.data or "").strip() | ||||
|         if not data: | ||||
|             return  # empty is OK — pair with validators.Optional() | ||||
|         from urllib.parse import urlparse | ||||
|  | ||||
|         parsed = urlparse(data) | ||||
|         if not parsed.scheme or not parsed.netloc: | ||||
|             raise ValidationError(self.message) | ||||
|  | ||||
| class ValidateStartsWithRegex(object): | ||||
|     def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True): | ||||
|         # compile with given flags (we’ll pass re.IGNORECASE below) | ||||
|         self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex | ||||
|         self.message = message | ||||
|         self.allow_empty = allow_empty | ||||
|         self.split_lines = split_lines | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         data = field.data | ||||
|         if not data: | ||||
|             return | ||||
|  | ||||
|         # normalize into list of lines | ||||
|         if isinstance(data, str) and self.split_lines: | ||||
|             lines = data.splitlines() | ||||
|         elif isinstance(data, (list, tuple)): | ||||
|             lines = data | ||||
|         else: | ||||
|             lines = [data] | ||||
|  | ||||
|         for line in lines: | ||||
|             stripped = line.strip() | ||||
|             if not stripped: | ||||
|                 if self.allow_empty: | ||||
|                     continue | ||||
|                 raise ValidationError(self.message or "Empty value not allowed.") | ||||
|             if not self.pattern.match(stripped): | ||||
|                 raise ValidationError(self.message or "Invalid value.") | ||||
|  | ||||
| class quickWatchForm(Form): | ||||
|     from . import processors | ||||
|  | ||||
| @@ -688,7 +729,6 @@ class quickWatchForm(Form): | ||||
|     edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|  | ||||
|  | ||||
| # Common to a single watch and the global settings | ||||
| class commonSettingsForm(Form): | ||||
|     from . import processors | ||||
| @@ -701,13 +741,21 @@ class commonSettingsForm(Form): | ||||
|  | ||||
|     fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_format = SelectField('Notification format', choices=valid_notification_formats.keys()) | ||||
|     notification_format = SelectField('Notification format', choices=list(valid_notification_formats.items())) | ||||
|     notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()]) | ||||
|     processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff") | ||||
|     timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) | ||||
|     scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) | ||||
|     webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")]) | ||||
|  | ||||
| # Not true anymore but keep the validate_ hook for future use, we convert color tags | ||||
| #    def validate_notification_urls(self, field): | ||||
| #        """Validate that HTML Color format is not used with Telegram""" | ||||
| #        if self.notification_format.data == 'HTML Color' and field.data: | ||||
| #            for url in field.data: | ||||
| #                if url and ('tgram://' in url or 'discord://' in url or 'discord.com/api/webhooks' in url): | ||||
| #                    raise ValidationError('HTML Color format is not supported by Telegram and Discord. Please choose another Notification Format (Plain Text, HTML, or Markdown to HTML).') | ||||
|  | ||||
|  | ||||
| class importForm(Form): | ||||
|     from . import processors | ||||
| @@ -759,6 +807,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False) | ||||
|     remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False) | ||||
|     sort_text_alphabetically =  BooleanField('Sort text alphabetically', default=False) | ||||
|     strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None) | ||||
|     trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False) | ||||
|  | ||||
|     filter_text_added = BooleanField('Added lines', default=True) | ||||
| @@ -794,7 +843,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|         if not super().validate(): | ||||
|             return False | ||||
|  | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         result = True | ||||
|  | ||||
|         # Fail form validation when a body is set for a GET | ||||
| @@ -857,23 +906,36 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     ): | ||||
|         super().__init__(formdata, obj, prefix, data, meta, **kwargs) | ||||
|         if kwargs and kwargs.get('default_system_settings'): | ||||
|             default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone') | ||||
|             default_tz = kwargs.get('default_system_settings').get('application', {}).get('scheduler_timezone_default') | ||||
|             if default_tz: | ||||
|                 self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz | ||||
|  | ||||
|  | ||||
|  | ||||
| class SingleExtraProxy(Form): | ||||
|  | ||||
|     # maybe better to set some <script>var.. | ||||
|     proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"}) | ||||
|     proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50}) | ||||
|     # @todo do the validation here instead | ||||
|     proxy_url = StringField('Proxy URL', [ | ||||
|         validators.Optional(), | ||||
|         ValidateStartsWithRegex( | ||||
|             regex=r'^(https?|socks5)://',  # ✅ main pattern | ||||
|             flags=re.IGNORECASE,  # ✅ makes it case-insensitive | ||||
|             message='Proxy URLs must start with http://, https:// or socks5://', | ||||
|         ), | ||||
|         ValidateSimpleURL() | ||||
|     ], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50}) | ||||
|  | ||||
| class SingleExtraBrowser(Form): | ||||
|     browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"}) | ||||
|     browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) | ||||
|     # @todo do the validation here instead | ||||
|     browser_connection_url = StringField('Browser connection URL', [ | ||||
|         validators.Optional(), | ||||
|         ValidateStartsWithRegex( | ||||
|             regex=r'^(wss?|ws)://', | ||||
|             flags=re.IGNORECASE, | ||||
|             message='Browser URLs must start with wss:// or ws://' | ||||
|         ), | ||||
|         ValidateSimpleURL() | ||||
|     ], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) | ||||
|  | ||||
| class DefaultUAInputForm(Form): | ||||
|     html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"}) | ||||
| @@ -884,7 +946,7 @@ class DefaultUAInputForm(Form): | ||||
| class globalSettingsRequestForm(Form): | ||||
|     time_between_check = RequiredFormField(TimeBetweenCheckForm) | ||||
|     time_schedule_limit = FormField(ScheduleLimitForm) | ||||
|     proxy = RadioField('Proxy') | ||||
|     proxy = RadioField('Default proxy') | ||||
|     jitter_seconds = IntegerField('Random jitter seconds ± check', | ||||
|                                   render_kw={"style": "width: 5em;"}, | ||||
|                                   validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
| @@ -893,7 +955,12 @@ class globalSettingsRequestForm(Form): | ||||
|                           render_kw={"style": "width: 5em;"}, | ||||
|                           validators=[validators.NumberRange(min=1, max=50, | ||||
|                                                              message="Should be between 1 and 50")]) | ||||
|      | ||||
|  | ||||
|     timeout = IntegerField('Requests timeout in seconds', | ||||
|                            render_kw={"style": "width: 5em;"}, | ||||
|                            validators=[validators.NumberRange(min=1, max=999, | ||||
|                                                               message="Should be between 1 and 999")]) | ||||
|  | ||||
|     extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) | ||||
|     extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5) | ||||
|  | ||||
| @@ -936,8 +1003,13 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|     removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) | ||||
|     shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()]) | ||||
|     strip_ignored_lines = BooleanField('Strip ignored lines') | ||||
|     rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True, | ||||
|                                       validators=[validators.Optional()]) | ||||
|  | ||||
|     rss_reader_mode = BooleanField('RSS reader mode ', default=False, | ||||
|                                       validators=[validators.Optional()]) | ||||
|  | ||||
|     filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', | ||||
|                                                                   render_kw={"style": "width: 5em;"}, | ||||
|                                                                   validators=[validators.NumberRange(min=0, | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| from functools import lru_cache | ||||
|  | ||||
| from loguru import logger | ||||
| from lxml import etree | ||||
| from typing import List | ||||
| import html | ||||
| import json | ||||
| @@ -14,7 +15,6 @@ TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S) | ||||
| META_CS  = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I) | ||||
| META_CT  = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I) | ||||
|  | ||||
|  | ||||
| # 'price' , 'lowPrice', 'highPrice' are usually under here | ||||
| # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here | ||||
| LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] | ||||
| @@ -23,9 +23,9 @@ class JSONNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| # Doesn't look like python supports forward slash auto enclosure in re.findall | ||||
| # So convert it to inline flag "(?i)foobar" type configuration | ||||
| @lru_cache(maxsize=100) | ||||
| def perl_style_slash_enclosed_regex_to_options(regex): | ||||
|  | ||||
|     res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE) | ||||
| @@ -58,13 +58,17 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting | ||||
|  | ||||
|     return html_block | ||||
|  | ||||
| def subtractive_css_selector(css_selector, html_content): | ||||
| def subtractive_css_selector(css_selector, content): | ||||
|     from bs4 import BeautifulSoup | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     soup = BeautifulSoup(content, "html.parser") | ||||
|  | ||||
|     # So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM | ||||
|     elements_to_remove = soup.select(css_selector) | ||||
|  | ||||
|     if not elements_to_remove: | ||||
|         # Better to return the original that rebuild with BeautifulSoup | ||||
|         return content | ||||
|  | ||||
|     # Then, remove them in a separate loop | ||||
|     for item in elements_to_remove: | ||||
|         item.decompose() | ||||
| @@ -72,6 +76,7 @@ def subtractive_css_selector(css_selector, html_content): | ||||
|     return str(soup) | ||||
|  | ||||
| def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str: | ||||
|     from lxml import etree | ||||
|     # Parse the HTML content using lxml | ||||
|     html_tree = etree.HTML(html_content) | ||||
|  | ||||
| @@ -83,6 +88,10 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str: | ||||
|         # Collect elements for each selector | ||||
|         elements_to_remove.extend(html_tree.xpath(selector)) | ||||
|  | ||||
|     # If no elements were found, return the original HTML content | ||||
|     if not elements_to_remove: | ||||
|         return html_content | ||||
|  | ||||
|     # Then, remove them in a separate loop | ||||
|     for element in elements_to_remove: | ||||
|         if element.getparent() is not None:  # Ensure the element has a parent before removing | ||||
| @@ -100,7 +109,7 @@ def element_removal(selectors: List[str], html_content): | ||||
|     xpath_selectors = [] | ||||
|  | ||||
|     for selector in selectors: | ||||
|         if selector.startswith(('xpath:', 'xpath1:', '//')): | ||||
|         if selector.strip().startswith(('xpath:', 'xpath1:', '//')): | ||||
|             # Handle XPath selectors separately | ||||
|             xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:') | ||||
|             xpath_selectors.append(xpath_selector) | ||||
| @@ -177,8 +186,21 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser) | ||||
|     html_block = "" | ||||
|  | ||||
|     r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|     # Build namespace map for XPath queries | ||||
|     namespaces = {'re': 'http://exslt.org/regular-expressions'} | ||||
|  | ||||
|     # Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML) | ||||
|     # XPath spec: unprefixed element names have no namespace, not the default namespace | ||||
|     # Solution: Register the default namespace with empty string prefix in elementpath | ||||
|     # This is primarily for RSS/Atom feeds but works for any XML with default namespace | ||||
|     if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap: | ||||
|         # Register the default namespace with empty string prefix for elementpath | ||||
|         # This allows //title to match elements in the default namespace | ||||
|         namespaces[''] = tree.nsmap[None] | ||||
|  | ||||
|     r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser) | ||||
|     #@note: //title/text() now works with default namespaces (fixed by registering '' prefix) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first) | ||||
|  | ||||
|     if type(r) != list: | ||||
|         r = [r] | ||||
| @@ -213,8 +235,19 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser) | ||||
|     html_block = "" | ||||
|  | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|     # Build namespace map for XPath queries | ||||
|     namespaces = {'re': 'http://exslt.org/regular-expressions'} | ||||
|  | ||||
|     # NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace | ||||
|     # For documents with default namespace (RSS/Atom feeds), users must use: | ||||
|     #   - local-name(): //*[local-name()='title']/text() | ||||
|     #   - Or use xpath_filter (not xpath1_filter) which supports default namespaces | ||||
|     # XPath spec: unprefixed element names have no namespace, not the default namespace | ||||
|  | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces=namespaces) | ||||
|     #@note: xpath1 (lxml) does NOT automatically handle default namespaces | ||||
|     #@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support | ||||
|     #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first) | ||||
|  | ||||
|     for element in r: | ||||
|         # When there's more than 1 match, then add the suffix to separate each line | ||||
| @@ -295,70 +328,92 @@ def _get_stripped_text_from_json_match(match): | ||||
|  | ||||
|     return stripped_text_from_html | ||||
|  | ||||
| def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter): | ||||
|     from bs4 import BeautifulSoup | ||||
|     stripped_text_from_html = '' | ||||
|  | ||||
|     # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|     # As a last resort, try to parse the whole <body> | ||||
|     soup = BeautifulSoup(content, 'html.parser') | ||||
|  | ||||
|     if ensure_is_ldjson_info_type: | ||||
|         bs_result = soup.find_all('script', {"type": "application/ld+json"}) | ||||
|     else: | ||||
|         bs_result = soup.find_all('script') | ||||
|     bs_result += soup.find_all('body') | ||||
|  | ||||
|     bs_jsons = [] | ||||
|  | ||||
|     for result in bs_result: | ||||
|         # result.text is how bs4 magically strips JSON from the body | ||||
|         content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else '' | ||||
|         # Skip empty tags, and things that dont even look like JSON | ||||
|         if not result.text or not (content_start[0] == '{' or content_start[0] == '['): | ||||
|             continue | ||||
|         try: | ||||
|             json_data = json.loads(result.text) | ||||
|             bs_jsons.append(json_data) | ||||
|         except json.JSONDecodeError: | ||||
|             # Skip objects which cannot be parsed | ||||
|             continue | ||||
|  | ||||
|     if not bs_jsons: | ||||
|         raise JSONNotFound("No parsable JSON found in this document") | ||||
|  | ||||
|     for json_data in bs_jsons: | ||||
|         stripped_text_from_html = _parse_json(json_data, json_filter) | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             # Could sometimes be list, string or something else random | ||||
|             if isinstance(json_data, dict): | ||||
|                 # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search | ||||
|                 # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) | ||||
|                 # @type could also be a list although non-standard ("@type": ["Product", "SubType"],) | ||||
|                 # LD_JSON auto-extract also requires some content PLUS the ldjson to be present | ||||
|                 # 1833 - could be either str or dict, should not be anything else | ||||
|  | ||||
|                 t = json_data.get('@type') | ||||
|                 if t and stripped_text_from_html: | ||||
|  | ||||
|                     if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower(): | ||||
|                         break | ||||
|                     # The non-standard part, some have a list | ||||
|                     elif isinstance(t, list): | ||||
|                         if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]: | ||||
|                             break | ||||
|  | ||||
|         elif stripped_text_from_html: | ||||
|             break | ||||
|  | ||||
|     return stripped_text_from_html | ||||
|  | ||||
| # content - json | ||||
| # json_filter - ie json:$..price | ||||
| # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) | ||||
| def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): | ||||
|     from bs4 import BeautifulSoup | ||||
|  | ||||
|     stripped_text_from_html = False | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w | ||||
|     # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags | ||||
|     try: | ||||
|         # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work | ||||
|         stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter) | ||||
|     except json.JSONDecodeError as e: | ||||
|         logger.warning(str(e)) | ||||
|  | ||||
|         # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|         # As a last resort, try to parse the whole <body> | ||||
|         soup = BeautifulSoup(content, 'html.parser') | ||||
|     # Looks like clean JSON, dont bother extracting from HTML | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             bs_result = soup.find_all('script', {"type": "application/ld+json"}) | ||||
|         else: | ||||
|             bs_result = soup.find_all('script') | ||||
|         bs_result += soup.find_all('body') | ||||
|     content_start = content.lstrip("\ufeff").strip()[:100] | ||||
|  | ||||
|         bs_jsons = [] | ||||
|         for result in bs_result: | ||||
|             # Skip empty tags, and things that dont even look like JSON | ||||
|             if not result.text or '{' not in result.text: | ||||
|                 continue | ||||
|             try: | ||||
|                 json_data = json.loads(result.text) | ||||
|                 bs_jsons.append(json_data) | ||||
|             except json.JSONDecodeError: | ||||
|                 # Skip objects which cannot be parsed | ||||
|                 continue | ||||
|  | ||||
|         if not bs_jsons: | ||||
|             raise JSONNotFound("No parsable JSON found in this document") | ||||
|          | ||||
|         for json_data in bs_jsons: | ||||
|             stripped_text_from_html = _parse_json(json_data, json_filter) | ||||
|  | ||||
|             if ensure_is_ldjson_info_type: | ||||
|                 # Could sometimes be list, string or something else random | ||||
|                 if isinstance(json_data, dict): | ||||
|                     # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search | ||||
|                     # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) | ||||
|                     # @type could also be a list although non-standard ("@type": ["Product", "SubType"],) | ||||
|                     # LD_JSON auto-extract also requires some content PLUS the ldjson to be present | ||||
|                     # 1833 - could be either str or dict, should not be anything else | ||||
|  | ||||
|                     t = json_data.get('@type') | ||||
|                     if t and stripped_text_from_html: | ||||
|  | ||||
|                         if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower(): | ||||
|                             break | ||||
|                         # The non-standard part, some have a list | ||||
|                         elif isinstance(t, list): | ||||
|                             if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]: | ||||
|                                 break | ||||
|  | ||||
|             elif stripped_text_from_html: | ||||
|                 break | ||||
|     if content_start[0] == '{' or content_start[0] == '[': | ||||
|         try: | ||||
|             # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work | ||||
|             stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter) | ||||
|         except json.JSONDecodeError as e: | ||||
|             logger.warning(f"Error processing JSON {content[:20]}...{str(e)})") | ||||
|     else: | ||||
|         # Probably something else, go fish inside for it | ||||
|         try: | ||||
|             stripped_text_from_html = extract_json_blob_from_html(content=content, | ||||
|                                                                   ensure_is_ldjson_info_type=ensure_is_ldjson_info_type, | ||||
|                                                                   json_filter=json_filter                                                                  ) | ||||
|         except json.JSONDecodeError as e: | ||||
|             logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})") | ||||
|  | ||||
|     if not stripped_text_from_html: | ||||
|         # Re 265 - Just return an empty string when filter not found | ||||
| @@ -378,6 +433,9 @@ def strip_ignore_text(content, wordlist, mode="content"): | ||||
|     ignored_lines = [] | ||||
|  | ||||
|     for k in wordlist: | ||||
|         # Skip empty strings to avoid matching everything | ||||
|         if not k or not k.strip(): | ||||
|             continue | ||||
|         # Is it a regex? | ||||
|         res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE) | ||||
|         if res: | ||||
|   | ||||
							
								
								
									
										22
									
								
								changedetectionio/jinja2_custom/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								changedetectionio/jinja2_custom/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| """ | ||||
| Jinja2 custom extensions and safe rendering utilities. | ||||
| """ | ||||
| from .extensions.TimeExtension import TimeExtension | ||||
| from .safe_jinja import ( | ||||
|     render, | ||||
|     render_fully_escaped, | ||||
|     create_jinja_env, | ||||
|     JINJA2_MAX_RETURN_PAYLOAD_SIZE, | ||||
|     DEFAULT_JINJA2_EXTENSIONS, | ||||
| ) | ||||
| from .plugins.regex import regex_replace | ||||
|  | ||||
| __all__ = [ | ||||
|     'TimeExtension', | ||||
|     'render', | ||||
|     'render_fully_escaped', | ||||
|     'create_jinja_env', | ||||
|     'JINJA2_MAX_RETURN_PAYLOAD_SIZE', | ||||
|     'DEFAULT_JINJA2_EXTENSIONS', | ||||
|     'regex_replace', | ||||
| ] | ||||
							
								
								
									
										221
									
								
								changedetectionio/jinja2_custom/extensions/TimeExtension.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										221
									
								
								changedetectionio/jinja2_custom/extensions/TimeExtension.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,221 @@ | ||||
| """ | ||||
| Jinja2 TimeExtension - Custom date/time handling for templates. | ||||
|  | ||||
| This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware | ||||
| date/time formatting with support for time offsets. | ||||
|  | ||||
| Why This Extension Exists: | ||||
|     The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot | ||||
|     directly call Python functions - they need extensions or filters to expose functionality. | ||||
|  | ||||
|     This TimeExtension serves as a Jinja2-to-Arrow bridge that: | ||||
|  | ||||
|     1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags | ||||
|        through extensions. You cannot use arrow.now() directly in a template. | ||||
|  | ||||
|     2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags: | ||||
|        {% now 'UTC' %} | ||||
|        {% now 'UTC' + 'hours=2' %} | ||||
|        {% now 'Europe/London', '%Y-%m-%d' %} | ||||
|  | ||||
|     3. Adds convenience features on top of Arrow: | ||||
|        - Default timezone from environment variable (TZ) or config | ||||
|        - Default datetime format configuration | ||||
|        - Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30) | ||||
|        - Empty string timezone support to use configured defaults | ||||
|  | ||||
|     4. Maintains security - Works within Jinja2's sandboxed environment so users | ||||
|        cannot access arbitrary Python code or objects. | ||||
|  | ||||
|     Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that | ||||
|     provides user-friendly template syntax while maintaining security. | ||||
|  | ||||
| Basic Usage: | ||||
|     {% now 'UTC' %} | ||||
|     # Output: Wed, 09 Dec 2015 23:33:01 | ||||
|  | ||||
| Custom Format: | ||||
|     {% now 'UTC', '%Y-%m-%d %H:%M:%S' %} | ||||
|     # Output: 2015-12-09 23:33:01 | ||||
|  | ||||
| Timezone Support: | ||||
|     {% now 'America/New_York' %} | ||||
|     {% now 'Europe/London' %} | ||||
|     {% now '' %}  # Uses default timezone from environment.default_timezone | ||||
|  | ||||
| Time Offsets (Addition): | ||||
|     {% now 'UTC' + 'hours=2' %} | ||||
|     {% now 'UTC' + 'hours=2,minutes=30' %} | ||||
|     {% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %} | ||||
|  | ||||
| Time Offsets (Subtraction): | ||||
|     {% now 'UTC' - 'minutes=11' %} | ||||
|     {% now 'UTC' - 'days=2,minutes=33,seconds=1' %} | ||||
|  | ||||
| Time Offsets with Custom Format: | ||||
|     {% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %} | ||||
|     # Output: 2015-12-10 01:33:01 | ||||
|  | ||||
| Weekday Support (for finding next/previous weekday): | ||||
|     {% now 'UTC' + 'weekday=0' %}  # Next Monday (0=Monday, 6=Sunday) | ||||
|     {% now 'UTC' + 'weekday=4' %}  # Next Friday | ||||
|  | ||||
| Configuration: | ||||
|     - Default timezone: Set via TZ environment variable or override environment.default_timezone | ||||
|     - Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format) | ||||
|  | ||||
| Environment Customization: | ||||
|     from changedetectionio.jinja2_custom import create_jinja_env | ||||
|  | ||||
|     jinja2_env = create_jinja_env() | ||||
|     jinja2_env.default_timezone = 'America/New_York'  # Override default timezone | ||||
|     jinja2_env.datetime_format = '%Y-%m-%d %H:%M'      # Override default format | ||||
|  | ||||
| Supported Offset Parameters: | ||||
|     - years, months, weeks, days | ||||
|     - hours, minutes, seconds, microseconds | ||||
|     - weekday (0=Monday through 6=Sunday, must be integer) | ||||
|  | ||||
| Note: | ||||
|     This extension uses the Arrow library for timezone-aware datetime handling. | ||||
|     All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York'). | ||||
| """ | ||||
| import arrow | ||||
|  | ||||
| from jinja2 import nodes | ||||
| from jinja2.ext import Extension | ||||
| import os | ||||
|  | ||||
| class TimeExtension(Extension): | ||||
|     """ | ||||
|     Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering. | ||||
|  | ||||
|     This extension adds two attributes to the Jinja2 environment: | ||||
|     - datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S') | ||||
|     - default_timezone: Default timezone for rendering (default: TZ env var or 'UTC') | ||||
|  | ||||
|     Both can be overridden after environment creation by setting the attributes directly. | ||||
|     """ | ||||
|  | ||||
|     tags = {'now'} | ||||
|  | ||||
|     def __init__(self, environment): | ||||
|         """Jinja2 Extension constructor.""" | ||||
|         super().__init__(environment) | ||||
|  | ||||
|         environment.extend( | ||||
|             datetime_format='%a, %d %b %Y %H:%M:%S', | ||||
|             default_timezone=os.getenv('TZ', 'UTC').strip() | ||||
|         ) | ||||
|  | ||||
|     def _datetime(self, timezone, operator, offset, datetime_format): | ||||
|         """ | ||||
|         Get current datetime with time offset applied. | ||||
|  | ||||
|         Args: | ||||
|             timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default | ||||
|             operator: '+' for addition or '-' for subtraction | ||||
|             offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30') | ||||
|             datetime_format: strftime format string or None to use environment default | ||||
|  | ||||
|         Returns: | ||||
|             Formatted datetime string with offset applied | ||||
|  | ||||
|         Example: | ||||
|             _datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S') | ||||
|             # Returns current time + 2.5 hours | ||||
|         """ | ||||
|         # Use default timezone if none specified | ||||
|         if not timezone or timezone == '': | ||||
|             timezone = self.environment.default_timezone | ||||
|  | ||||
|         d = arrow.now(timezone) | ||||
|  | ||||
|         # parse shift params from offset and include operator | ||||
|         shift_params = {} | ||||
|         for param in offset.split(','): | ||||
|             interval, value = param.split('=') | ||||
|             shift_params[interval.strip()] = float(operator + value.strip()) | ||||
|  | ||||
|         # Fix weekday parameter can not be float | ||||
|         if 'weekday' in shift_params: | ||||
|             shift_params['weekday'] = int(shift_params['weekday']) | ||||
|  | ||||
|         d = d.shift(**shift_params) | ||||
|  | ||||
|         if datetime_format is None: | ||||
|             datetime_format = self.environment.datetime_format | ||||
|         return d.strftime(datetime_format) | ||||
|  | ||||
|     def _now(self, timezone, datetime_format): | ||||
|         """ | ||||
|         Get current datetime without any offset. | ||||
|  | ||||
|         Args: | ||||
|             timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default | ||||
|             datetime_format: strftime format string or None to use environment default | ||||
|  | ||||
|         Returns: | ||||
|             Formatted datetime string for current time | ||||
|  | ||||
|         Example: | ||||
|             _now('America/New_York', '%Y-%m-%d %H:%M:%S') | ||||
|             # Returns current time in New York timezone | ||||
|         """ | ||||
|         # Use default timezone if none specified | ||||
|         if not timezone or timezone == '': | ||||
|             timezone = self.environment.default_timezone | ||||
|  | ||||
|         if datetime_format is None: | ||||
|             datetime_format = self.environment.datetime_format | ||||
|         return arrow.now(timezone).strftime(datetime_format) | ||||
|  | ||||
|     def parse(self, parser): | ||||
|         """ | ||||
|         Parse the {% now %} tag and generate appropriate AST nodes. | ||||
|  | ||||
|         This method is called by Jinja2 when it encounters a {% now %} tag. | ||||
|         It parses the tag syntax and determines whether to call _now() or _datetime() | ||||
|         based on whether offset operations (+ or -) are present. | ||||
|  | ||||
|         Supported syntax: | ||||
|             {% now 'timezone' %}                              -> calls _now() | ||||
|             {% now 'timezone', 'format' %}                    -> calls _now() | ||||
|             {% now 'timezone' + 'offset' %}                   -> calls _datetime() | ||||
|             {% now 'timezone' + 'offset', 'format' %}         -> calls _datetime() | ||||
|             {% now 'timezone' - 'offset', 'format' %}         -> calls _datetime() | ||||
|  | ||||
|         Args: | ||||
|             parser: Jinja2 parser instance | ||||
|  | ||||
|         Returns: | ||||
|             nodes.Output: AST output node containing the formatted datetime string | ||||
|         """ | ||||
|         lineno = next(parser.stream).lineno | ||||
|  | ||||
|         node = parser.parse_expression() | ||||
|  | ||||
|         if parser.stream.skip_if('comma'): | ||||
|             datetime_format = parser.parse_expression() | ||||
|         else: | ||||
|             datetime_format = nodes.Const(None) | ||||
|  | ||||
|         if isinstance(node, nodes.Add): | ||||
|             call_method = self.call_method( | ||||
|                 '_datetime', | ||||
|                 [node.left, nodes.Const('+'), node.right, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         elif isinstance(node, nodes.Sub): | ||||
|             call_method = self.call_method( | ||||
|                 '_datetime', | ||||
|                 [node.left, nodes.Const('-'), node.right, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         else: | ||||
|             call_method = self.call_method( | ||||
|                 '_now', | ||||
|                 [node, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         return nodes.Output([call_method], lineno=lineno) | ||||
							
								
								
									
										6
									
								
								changedetectionio/jinja2_custom/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								changedetectionio/jinja2_custom/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| """ | ||||
| Jinja2 custom filter plugins for changedetection.io | ||||
| """ | ||||
| from .regex import regex_replace | ||||
|  | ||||
| __all__ = ['regex_replace'] | ||||
							
								
								
									
										98
									
								
								changedetectionio/jinja2_custom/plugins/regex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								changedetectionio/jinja2_custom/plugins/regex.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| """ | ||||
| Regex filter plugin for Jinja2 templates. | ||||
|  | ||||
| Provides regex_replace filter for pattern-based string replacements in templates. | ||||
| """ | ||||
| import re | ||||
| import signal | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| def regex_replace(value: str, pattern: str, replacement: str = '', count: int = 0) -> str: | ||||
|     """ | ||||
|     Replace occurrences of a regex pattern in a string. | ||||
|  | ||||
|     Security: Protected against ReDoS (Regular Expression Denial of Service) attacks: | ||||
|     - Limits input value size to prevent excessive processing | ||||
|     - Uses timeout mechanism to prevent runaway regex operations | ||||
|     - Validates pattern complexity to prevent catastrophic backtracking | ||||
|  | ||||
|     Args: | ||||
|         value: The input string to perform replacements on | ||||
|         pattern: The regex pattern to search for | ||||
|         replacement: The replacement string (default: '') | ||||
|         count: Maximum number of replacements (0 = replace all, default: 0) | ||||
|  | ||||
|     Returns: | ||||
|         String with replacements applied, or original value on error | ||||
|  | ||||
|     Example: | ||||
|         {{ "hello world" | regex_replace("world", "universe") }} | ||||
|         {{ diff | regex_replace("<td>([^<]+)</td><td>([^<]+)</td>", "Label1: \\1\\nLabel2: \\2") }} | ||||
|  | ||||
|     Security limits: | ||||
|         - Maximum input size: 10MB | ||||
|         - Maximum pattern length: 500 characters | ||||
|         - Operation timeout: 10 seconds | ||||
|         - Dangerous nested quantifier patterns are rejected | ||||
|     """ | ||||
|     # Security limits | ||||
|     MAX_INPUT_SIZE = 1024 * 1024 * 10 # 10MB max input size | ||||
|     MAX_PATTERN_LENGTH = 500  # Maximum regex pattern length | ||||
|     REGEX_TIMEOUT_SECONDS = 10  # Maximum time for regex operation | ||||
|  | ||||
|     # Validate input sizes | ||||
|     value_str = str(value) | ||||
|     if len(value_str) > MAX_INPUT_SIZE: | ||||
|         logger.warning(f"regex_replace: Input too large ({len(value_str)} bytes), truncating") | ||||
|         value_str = value_str[:MAX_INPUT_SIZE] | ||||
|  | ||||
|     if len(pattern) > MAX_PATTERN_LENGTH: | ||||
|         logger.warning(f"regex_replace: Pattern too long ({len(pattern)} chars), rejecting") | ||||
|         return value_str | ||||
|  | ||||
|     # Check for potentially dangerous patterns (basic checks) | ||||
|     # Nested quantifiers like (a+)+ can cause catastrophic backtracking | ||||
|     dangerous_patterns = [ | ||||
|         r'\([^)]*\+[^)]*\)\+',  # (x+)+ | ||||
|         r'\([^)]*\*[^)]*\)\+',  # (x*)+ | ||||
|         r'\([^)]*\+[^)]*\)\*',  # (x+)* | ||||
|         r'\([^)]*\*[^)]*\)\*',  # (x*)* | ||||
|     ] | ||||
|  | ||||
|     for dangerous in dangerous_patterns: | ||||
|         if re.search(dangerous, pattern): | ||||
|             logger.warning(f"regex_replace: Potentially dangerous pattern detected: {pattern}") | ||||
|             return value_str | ||||
|  | ||||
|     def timeout_handler(signum, frame): | ||||
|         raise TimeoutError("Regex operation timed out") | ||||
|  | ||||
|     try: | ||||
|         # Set up timeout for regex operation (Unix-like systems only) | ||||
|         # This prevents ReDoS attacks | ||||
|         old_handler = None | ||||
|         if hasattr(signal, 'SIGALRM'): | ||||
|             old_handler = signal.signal(signal.SIGALRM, timeout_handler) | ||||
|             signal.alarm(REGEX_TIMEOUT_SECONDS) | ||||
|  | ||||
|         try: | ||||
|             result = re.sub(pattern, replacement, value_str, count=count) | ||||
|         finally: | ||||
|             # Cancel the alarm | ||||
|             if hasattr(signal, 'SIGALRM'): | ||||
|                 signal.alarm(0) | ||||
|                 if old_handler is not None: | ||||
|                     signal.signal(signal.SIGALRM, old_handler) | ||||
|  | ||||
|         return result | ||||
|  | ||||
|     except TimeoutError: | ||||
|         logger.error(f"regex_replace: Regex operation timed out - possible ReDoS attack. Pattern: {pattern}") | ||||
|         return value_str | ||||
|     except re.error as e: | ||||
|         logger.warning(f"regex_replace: Invalid regex pattern: {e}") | ||||
|         return value_str | ||||
|     except Exception as e: | ||||
|         logger.error(f"regex_replace: Unexpected error: {e}") | ||||
|         return value_str | ||||
							
								
								
									
										58
									
								
								changedetectionio/jinja2_custom/safe_jinja.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								changedetectionio/jinja2_custom/safe_jinja.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| """ | ||||
| Safe Jinja2 render with max payload sizes | ||||
|  | ||||
| See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations | ||||
| """ | ||||
|  | ||||
| import jinja2.sandbox | ||||
| import typing as t | ||||
| import os | ||||
| from .extensions.TimeExtension import TimeExtension | ||||
| from .plugins import regex_replace | ||||
|  | ||||
| JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10)) | ||||
|  | ||||
| # Default extensions - can be overridden in create_jinja_env() | ||||
| DEFAULT_JINJA2_EXTENSIONS = [TimeExtension] | ||||
|  | ||||
| def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment: | ||||
|     """ | ||||
|     Create a sandboxed Jinja2 environment with our custom extensions and default timezone. | ||||
|  | ||||
|     Args: | ||||
|         extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS) | ||||
|         **kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment | ||||
|  | ||||
|     Returns: | ||||
|         Configured Jinja2 environment | ||||
|     """ | ||||
|     if extensions is None: | ||||
|         extensions = DEFAULT_JINJA2_EXTENSIONS | ||||
|  | ||||
|     jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment( | ||||
|         extensions=extensions, | ||||
|         **kwargs | ||||
|     ) | ||||
|  | ||||
|     # Get default timezone from environment variable | ||||
|     default_timezone = os.getenv('TZ', 'UTC').strip() | ||||
|     jinja2_env.default_timezone = default_timezone | ||||
|  | ||||
|     # Register custom filters | ||||
|     jinja2_env.filters['regex_replace'] = regex_replace | ||||
|  | ||||
|     return jinja2_env | ||||
|  | ||||
|  | ||||
| # This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available. | ||||
| # (Which also limits available functions that could be called) | ||||
| def render(template_str, **args: t.Any) -> str: | ||||
|     jinja2_env = create_jinja_env() | ||||
|     output = jinja2_env.from_string(template_str).render(args) | ||||
|     return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE] | ||||
|  | ||||
| def render_fully_escaped(content): | ||||
|     env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True) | ||||
|     template = env.from_string("{{ some_html|e }}") | ||||
|     return template.render(some_html=content) | ||||
|  | ||||
| @@ -1,4 +1,5 @@ | ||||
| from os import getenv | ||||
| from copy import deepcopy | ||||
|  | ||||
| from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES | ||||
|  | ||||
| @@ -55,10 +56,12 @@ class model(dict): | ||||
|                     'rss_access_token': None, | ||||
|                     'rss_content_format': RSS_FORMAT_TYPES[0][0], | ||||
|                     'rss_hide_muted_watches': True, | ||||
|                     'rss_reader_mode': False, | ||||
|                     'scheduler_timezone_default': None,  # Default IANA timezone name | ||||
|                     'schema_version' : 0, | ||||
|                     'shared_diff_access': False, | ||||
|                     'strip_ignored_lines': False, | ||||
|                     'tags': {}, #@todo use Tag.model initialisers | ||||
|                     'timezone': None, # Default IANA timezone name | ||||
|                     'webdriver_delay': None , # Extra delay in seconds before extracting text | ||||
|                     'ui': { | ||||
|                         'use_page_title_in_list': True, | ||||
| @@ -72,7 +75,8 @@ class model(dict): | ||||
|  | ||||
|     def __init__(self, *arg, **kw): | ||||
|         super(model, self).__init__(*arg, **kw) | ||||
|         self.update(self.base_config) | ||||
|         # CRITICAL: deepcopy to avoid sharing mutable objects between instances | ||||
|         self.update(deepcopy(self.base_config)) | ||||
|  | ||||
|  | ||||
| def parse_headers_from_text_file(filepath): | ||||
|   | ||||
| @@ -1,42 +1,24 @@ | ||||
| from blinker import signal | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
| from . import watch_base | ||||
| import os | ||||
| import re | ||||
| from pathlib import Path | ||||
| from loguru import logger | ||||
|  | ||||
| from .. import safe_jinja | ||||
| from .. import jinja2_custom as safe_jinja | ||||
| from ..diff import ADDED_PLACEMARKER_OPEN | ||||
| from ..html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
|  | ||||
| # Allowable protocols, protects against javascript: etc | ||||
| # file:// is further checked by ALLOW_FILE_URI | ||||
| SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' | ||||
| FAVICON_RESAVE_THRESHOLD_SECONDS=86400 | ||||
|  | ||||
|  | ||||
| minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) | ||||
| mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} | ||||
|  | ||||
|  | ||||
| def is_safe_url(test_url): | ||||
|     # See https://github.com/dgtlmoon/changedetection.io/issues/1358 | ||||
|  | ||||
|     # Remove 'source:' prefix so we dont get 'source:javascript:' etc | ||||
|     # 'source:' is a valid way to tell us to return the source | ||||
|  | ||||
|     r = re.compile(re.escape('source:'), re.IGNORECASE) | ||||
|     test_url = r.sub('', test_url) | ||||
|  | ||||
|     pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE) | ||||
|     if not pattern.match(test_url.strip()): | ||||
|         return False | ||||
|  | ||||
|     return True | ||||
|  | ||||
|  | ||||
| class model(watch_base): | ||||
|     __newest_history_key = None | ||||
|     __history_n = 0 | ||||
| @@ -79,7 +61,7 @@ class model(watch_base): | ||||
|     def link(self): | ||||
|  | ||||
|         url = self.get('url', '') | ||||
|         if not is_safe_url(url): | ||||
|         if not is_safe_valid_url(url): | ||||
|             return 'DISABLED' | ||||
|  | ||||
|         ready_url = url | ||||
| @@ -89,9 +71,8 @@ class model(watch_base): | ||||
|                 ready_url = jinja_render(template_str=url) | ||||
|             except Exception as e: | ||||
|                 logger.critical(f"Invalid URL template for: '{url}' - {str(e)}") | ||||
|                 from flask import ( | ||||
|                     flash, Markup, url_for | ||||
|                 ) | ||||
|                 from flask import flash, url_for | ||||
|                 from markupsafe import Markup | ||||
|                 message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format( | ||||
|                     url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', ''))) | ||||
|                 flash(message, 'error') | ||||
| @@ -101,7 +82,7 @@ class model(watch_base): | ||||
|             ready_url=ready_url.replace('source:', '') | ||||
|  | ||||
|         # Also double check it after any Jinja2 formatting just incase | ||||
|         if not is_safe_url(ready_url): | ||||
|         if not is_safe_valid_url(ready_url): | ||||
|             return 'DISABLED' | ||||
|         return ready_url | ||||
|  | ||||
|   | ||||
| @@ -2,7 +2,7 @@ import os | ||||
| import uuid | ||||
|  | ||||
| from changedetectionio import strtobool | ||||
| default_notification_format_for_watch = 'System default' | ||||
| USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default' | ||||
| CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL' | ||||
|  | ||||
| class watch_base(dict): | ||||
| @@ -44,7 +44,7 @@ class watch_base(dict): | ||||
|             'method': 'GET', | ||||
|             'notification_alert_count': 0, | ||||
|             'notification_body': None, | ||||
|             'notification_format': default_notification_format_for_watch, | ||||
|             'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, | ||||
|             'notification_muted': False, | ||||
|             'notification_screenshot': False,  # Include the latest screenshot if available and supported by the apprise URL | ||||
|             'notification_title': None, | ||||
| @@ -58,6 +58,7 @@ class watch_base(dict): | ||||
|             'proxy': None,  # Preferred proxy connection | ||||
|             'remote_server_reply': None,  # From 'server' reply header | ||||
|             'sort_text_alphabetically': False, | ||||
|             'strip_ignored_lines': None, | ||||
|             'subtractive_selectors': [], | ||||
|             'tag': '',  # Old system of text name for a tag, to be removed | ||||
|             'tags': [],  # list of UUIDs to App.Tags | ||||
|   | ||||
| @@ -1,35 +1,16 @@ | ||||
| from changedetectionio.model import default_notification_format_for_watch | ||||
| from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|  | ||||
| ult_notification_format_for_watch = 'System default' | ||||
| default_notification_format = 'HTML Color' | ||||
| default_notification_format = 'htmlcolor' | ||||
| default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n' | ||||
| default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}' | ||||
|  | ||||
| # The values (markdown etc) are from apprise NotifyFormat, | ||||
| # But to avoid importing the whole heavy module just use the same strings here. | ||||
| valid_notification_formats = { | ||||
|     'Text': 'text', | ||||
|     'Markdown': 'markdown', | ||||
|     'HTML': 'html', | ||||
|     'HTML Color': 'htmlcolor', | ||||
|     'text': 'Plain Text', | ||||
|     'html': 'HTML', | ||||
|     'htmlcolor': 'HTML Color', | ||||
|     'markdown': 'Markdown to HTML', | ||||
|     # Used only for editing a watch (not for global) | ||||
|     default_notification_format_for_watch: default_notification_format_for_watch | ||||
| } | ||||
|  | ||||
|  | ||||
| valid_tokens = { | ||||
|     'base_url': '', | ||||
|     'current_snapshot': '', | ||||
|     'diff': '', | ||||
|     'diff_added': '', | ||||
|     'diff_full': '', | ||||
|     'diff_patch': '', | ||||
|     'diff_removed': '', | ||||
|     'diff_url': '', | ||||
|     'preview_url': '', | ||||
|     'triggered_text': '', | ||||
|     'watch_tag': '', | ||||
|     'watch_title': '', | ||||
|     'watch_url': '', | ||||
|     'watch_uuid': '', | ||||
|     USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| } | ||||
|   | ||||
| @@ -1,10 +1,61 @@ | ||||
| """ | ||||
| Custom Apprise HTTP Handlers with format= Parameter Support | ||||
|  | ||||
| IMPORTANT: This module works around a limitation in Apprise's @notify decorator. | ||||
|  | ||||
| THE PROBLEM: | ||||
| ------------- | ||||
| When using Apprise's @notify decorator to create custom notification handlers, the | ||||
| decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse | ||||
| URLs. This simple parsing mode does NOT extract the format= query parameter from the URL | ||||
| and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format. | ||||
|  | ||||
| As a result: | ||||
| 1. URL: post://example.com/webhook?format=html | ||||
| 2. Apprise parses this and sees format=html in qsd (query string dictionary) | ||||
| 3. But it does NOT extract it and pass it to NotifyBase.__init__ | ||||
| 4. NotifyBase defaults to notify_format=TEXT | ||||
| 5. When you call apobj.notify(body="<html>...", body_format="html"): | ||||
|    - Apprise sees: input format = html, output format (notify_format) = text | ||||
|    - Apprise calls convert_between("html", "text", body) | ||||
|    - This strips all HTML tags, leaving only plain text | ||||
| 6. Your custom handler receives stripped plain text instead of HTML | ||||
|  | ||||
| THE SOLUTION: | ||||
| ------------- | ||||
| Instead of using the @notify decorator directly, we: | ||||
| 1. Manually register custom plugins using plugins.N_MGR.add() | ||||
| 2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin | ||||
| 3. Override __init__ to extract format= from qsd and set it as kwargs['format'] | ||||
| 4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format'] | ||||
| 5. Set up _default_args like CustomNotifyPlugin does for compatibility | ||||
|  | ||||
| This ensures that when format=html is in the URL: | ||||
| - notify_format is set to HTML | ||||
| - Apprise sees: input format = html, output format = html | ||||
| - No conversion happens (convert_between returns content unchanged) | ||||
| - Your custom handler receives the original HTML intact | ||||
|  | ||||
| TESTING: | ||||
| -------- | ||||
| To verify this works: | ||||
| >>> apobj = apprise.Apprise() | ||||
| >>> apobj.add('post://localhost:5005/test?format=html') | ||||
| >>> for server in apobj: | ||||
| ...     print(server.notify_format)  # Should print: html (not text) | ||||
| >>> apobj.notify(body='<span>Test</span>', body_format='html') | ||||
| # Your handler should receive '<span>Test</span>' not 'Test' | ||||
| """ | ||||
|  | ||||
| import json | ||||
| import re | ||||
| from urllib.parse import unquote_plus | ||||
|  | ||||
| import requests | ||||
| from apprise.decorators import notify | ||||
| from apprise.utils.parse import parse_url as apprise_parse_url | ||||
| from apprise import plugins | ||||
| from apprise.decorators.base import CustomNotifyPlugin | ||||
| from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly | ||||
| from apprise.utils.logic import dict_full_update | ||||
| from loguru import logger | ||||
| from requests.structures import CaseInsensitiveDict | ||||
|  | ||||
| @@ -12,13 +63,66 @@ SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"} | ||||
|  | ||||
|  | ||||
| def notify_supported_methods(func): | ||||
|     """Register custom HTTP method handlers that properly support format= parameter.""" | ||||
|     for method in SUPPORTED_HTTP_METHODS: | ||||
|         func = notify(on=method)(func) | ||||
|         # Add support for https, for each supported http method | ||||
|         func = notify(on=f"{method}s")(func) | ||||
|         _register_http_handler(method, func) | ||||
|         _register_http_handler(f"{method}s", func) | ||||
|     return func | ||||
|  | ||||
|  | ||||
| def _register_http_handler(schema, send_func): | ||||
|     """Register a custom HTTP handler that extracts format= from URL query parameters.""" | ||||
|  | ||||
|     # Parse base URL | ||||
|     base_url = f"{schema}://" | ||||
|     base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True) | ||||
|  | ||||
|     class CustomHTTPHandler(CustomNotifyPlugin): | ||||
|         secure_protocol = schema | ||||
|         service_name = f"Custom HTTP - {schema.upper()}" | ||||
|         _base_args = base_args | ||||
|  | ||||
|         def __init__(self, **kwargs): | ||||
|             # Extract format from qsd and set it as a top-level kwarg | ||||
|             # This allows NotifyBase.__init__ to properly set notify_format | ||||
|             if 'qsd' in kwargs and 'format' in kwargs['qsd']: | ||||
|                 kwargs['format'] = kwargs['qsd']['format'] | ||||
|  | ||||
|             # Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__) | ||||
|             super(CustomNotifyPlugin, self).__init__(**kwargs) | ||||
|  | ||||
|             # Set up _default_args like CustomNotifyPlugin does | ||||
|             self._default_args = {} | ||||
|             kwargs.pop("secure", None) | ||||
|             dict_full_update(self._default_args, self._base_args) | ||||
|             dict_full_update(self._default_args, kwargs) | ||||
|             self._default_args["url"] = url_assembly(**self._default_args) | ||||
|  | ||||
|         __send = staticmethod(send_func) | ||||
|  | ||||
|         def send(self, body, title="", notify_type="info", *args, **kwargs): | ||||
|             """Call the custom send function.""" | ||||
|             try: | ||||
|                 result = self.__send( | ||||
|                     body, title, notify_type, | ||||
|                     *args, | ||||
|                     meta=self._default_args, | ||||
|                     **kwargs | ||||
|                 ) | ||||
|                 return True if result is None else bool(result) | ||||
|             except Exception as e: | ||||
|                 self.logger.warning(f"Exception in custom HTTP handler: {e}") | ||||
|                 return False | ||||
|  | ||||
|     # Register the plugin | ||||
|     plugins.N_MGR.add( | ||||
|         plugin=CustomHTTPHandler, | ||||
|         schemas=schema, | ||||
|         send_func=send_func, | ||||
|         url=base_url, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def _get_auth(parsed_url: dict) -> str | tuple[str, str]: | ||||
|     user: str | None = parsed_url.get("user") | ||||
|     password: str | None = parsed_url.get("password") | ||||
| @@ -70,9 +174,12 @@ def apprise_http_custom_handler( | ||||
|     title: str, | ||||
|     notify_type: str, | ||||
|     meta: dict, | ||||
|     body_format: str = None, | ||||
|     *args, | ||||
|     **kwargs, | ||||
| ) -> bool: | ||||
|  | ||||
|  | ||||
|     url: str = meta.get("url") | ||||
|     schema: str = meta.get("schema") | ||||
|     method: str = re.sub(r"s$", "", schema).upper() | ||||
| @@ -88,25 +195,16 @@ def apprise_http_custom_handler( | ||||
|  | ||||
|     url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url")) | ||||
|  | ||||
|     try: | ||||
|         response = requests.request( | ||||
|             method=method, | ||||
|             url=url, | ||||
|             auth=auth, | ||||
|             headers=headers, | ||||
|             params=params, | ||||
|             data=body.encode("utf-8") if isinstance(body, str) else body, | ||||
|         ) | ||||
|     response = requests.request( | ||||
|         method=method, | ||||
|         url=url, | ||||
|         auth=auth, | ||||
|         headers=headers, | ||||
|         params=params, | ||||
|         data=body.encode("utf-8") if isinstance(body, str) else body, | ||||
|     ) | ||||
|  | ||||
|         response.raise_for_status() | ||||
|     response.raise_for_status() | ||||
|  | ||||
|         logger.info(f"Successfully sent custom notification to {url}") | ||||
|         return True | ||||
|  | ||||
|     except requests.RequestException as e: | ||||
|         logger.error(f"Remote host error while sending custom notification to {url}: {e}") | ||||
|         return False | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}") | ||||
|         return False | ||||
|     logger.info(f"Successfully sent custom notification to {url}") | ||||
|     return True | ||||
|   | ||||
							
								
								
									
										286
									
								
								changedetectionio/notification/apprise_plugin/discord.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								changedetectionio/notification/apprise_plugin/discord.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,286 @@ | ||||
| """ | ||||
| Custom Discord plugin for changedetection.io | ||||
| Extends Apprise's Discord plugin to support custom colored embeds for removed/added content | ||||
| """ | ||||
| from apprise.plugins.discord import NotifyDiscord | ||||
| from apprise.decorators import notify | ||||
| from apprise.common import NotifyFormat | ||||
| from loguru import logger | ||||
|  | ||||
| # Import placeholders from changedetection's diff module | ||||
| from ...diff import ( | ||||
|     REMOVED_PLACEMARKER_OPEN, | ||||
|     REMOVED_PLACEMARKER_CLOSED, | ||||
|     ADDED_PLACEMARKER_OPEN, | ||||
|     ADDED_PLACEMARKER_CLOSED, | ||||
|     CHANGED_PLACEMARKER_OPEN, | ||||
|     CHANGED_PLACEMARKER_CLOSED, | ||||
|     CHANGED_INTO_PLACEMARKER_OPEN, | ||||
|     CHANGED_INTO_PLACEMARKER_CLOSED, | ||||
| ) | ||||
|  | ||||
| # Discord embed sidebar colors for different change types | ||||
| DISCORD_COLOR_UNCHANGED = 8421504   # Gray (#808080) | ||||
| DISCORD_COLOR_REMOVED = 16711680    # Red (#FF0000) | ||||
| DISCORD_COLOR_ADDED = 65280         # Green (#00FF00) | ||||
| DISCORD_COLOR_CHANGED = 16753920    # Orange (#FFA500) | ||||
| DISCORD_COLOR_CHANGED_INTO = 3447003  # Blue (#5865F2 - Discord blue) | ||||
| DISCORD_COLOR_WARNING = 16776960    # Yellow (#FFFF00) | ||||
|  | ||||
|  | ||||
| class NotifyDiscordCustom(NotifyDiscord): | ||||
|     """ | ||||
|     Custom Discord notification handler that supports multiple colored embeds | ||||
|     for showing removed (red) and added (green) content separately. | ||||
|     """ | ||||
|  | ||||
|     def send(self, body, title="", notify_type=None, attach=None, **kwargs): | ||||
|         """ | ||||
|         Override send method to create custom embeds with red/green colors | ||||
|         for removed/added content when placeholders are present. | ||||
|         """ | ||||
|  | ||||
|         # Check if body contains our diff placeholders | ||||
|         has_removed = REMOVED_PLACEMARKER_OPEN in body | ||||
|         has_added = ADDED_PLACEMARKER_OPEN in body | ||||
|         has_changed = CHANGED_PLACEMARKER_OPEN in body | ||||
|         has_changed_into = CHANGED_INTO_PLACEMARKER_OPEN in body | ||||
|  | ||||
|         # If we have diff placeholders and we're in markdown/html format, create custom embeds | ||||
|         if (has_removed or has_added or has_changed or has_changed_into) and self.notify_format in (NotifyFormat.MARKDOWN, NotifyFormat.HTML): | ||||
|             return self._send_with_colored_embeds(body, title, notify_type, attach, **kwargs) | ||||
|  | ||||
|         # Otherwise, use the parent class's default behavior | ||||
|         return super().send(body, title, notify_type, attach, **kwargs) | ||||
|  | ||||
|     def _send_with_colored_embeds(self, body, title, notify_type, attach, **kwargs): | ||||
|         """ | ||||
|         Send Discord message with embeds in the original diff order. | ||||
|         Preserves the sequence: unchanged -> removed -> added -> unchanged, etc. | ||||
|         """ | ||||
|         from datetime import datetime, timezone | ||||
|  | ||||
|         payload = { | ||||
|             "tts": self.tts, | ||||
|             "wait": self.tts is False, | ||||
|         } | ||||
|  | ||||
|         if self.flags: | ||||
|             payload["flags"] = self.flags | ||||
|  | ||||
|         # Acquire image_url | ||||
|         image_url = self.image_url(notify_type) | ||||
|  | ||||
|         if self.avatar and (image_url or self.avatar_url): | ||||
|             payload["avatar_url"] = self.avatar_url if self.avatar_url else image_url | ||||
|  | ||||
|         if self.user: | ||||
|             payload["username"] = self.user | ||||
|  | ||||
|         # Associate our thread_id with our message | ||||
|         params = {"thread_id": self.thread_id} if self.thread_id else None | ||||
|  | ||||
|         # Build embeds array preserving order | ||||
|         embeds = [] | ||||
|  | ||||
|         # Add title as plain bold text in message content (not an embed) | ||||
|         if title: | ||||
|             payload["content"] = f"**{title}**" | ||||
|  | ||||
|         # Parse the body into ordered chunks | ||||
|         chunks = self._parse_body_into_chunks(body) | ||||
|  | ||||
|         # Discord limits: | ||||
|         # - Max 10 embeds per message | ||||
|         # - Max 6000 characters total across all embeds | ||||
|         # - Max 4096 characters per embed description | ||||
|         max_embeds = 10 | ||||
|         max_total_chars = 6000 | ||||
|         max_embed_description = 4096 | ||||
|  | ||||
|         # All 10 embed slots are available for content | ||||
|         max_content_embeds = max_embeds | ||||
|  | ||||
|         # Start character count | ||||
|         total_chars = 0 | ||||
|  | ||||
|         # Create embeds from chunks in order (no titles, just color coding) | ||||
|         for chunk_type, content in chunks: | ||||
|             if not content.strip(): | ||||
|                 continue | ||||
|  | ||||
|             # Truncate individual embed description if needed | ||||
|             if len(content) > max_embed_description: | ||||
|                 content = content[:max_embed_description - 3] + "..." | ||||
|  | ||||
|             # Check if we're approaching the embed count limit | ||||
|             # We need room for the warning embed, so stop at max_content_embeds - 1 | ||||
|             current_content_embeds = len(embeds) | ||||
|             if current_content_embeds >= max_content_embeds - 1: | ||||
|                 # Add a truncation notice (this will be the 10th embed) | ||||
|                 embeds.append({ | ||||
|                     "description": "⚠️ Content truncated (Discord 10 embed limit reached) - Tip: Select 'Plain Text' or 'HTML' format for longer diffs", | ||||
|                     "color": DISCORD_COLOR_WARNING, | ||||
|                 }) | ||||
|                 break | ||||
|  | ||||
|             # Check if adding this embed would exceed total character limit | ||||
|             if total_chars + len(content) > max_total_chars: | ||||
|                 # Add a truncation notice | ||||
|                 remaining_chars = max_total_chars - total_chars | ||||
|                 if remaining_chars > 100: | ||||
|                     # Add partial content if we have room | ||||
|                     truncated_content = content[:remaining_chars - 100] + "..." | ||||
|                     embeds.append({ | ||||
|                         "description": truncated_content, | ||||
|                         "color": (DISCORD_COLOR_UNCHANGED if chunk_type == "unchanged" | ||||
|                                  else DISCORD_COLOR_REMOVED if chunk_type == "removed" | ||||
|                                  else DISCORD_COLOR_ADDED), | ||||
|                     }) | ||||
|                 embeds.append({ | ||||
|                     "description": "⚠️ Content truncated (Discord 6000 char limit reached)\nTip: Select 'Plain Text' or 'HTML' format for longer diffs", | ||||
|                     "color": DISCORD_COLOR_WARNING, | ||||
|                 }) | ||||
|                 break | ||||
|  | ||||
|             if chunk_type == "unchanged": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_UNCHANGED, | ||||
|                 }) | ||||
|             elif chunk_type == "removed": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_REMOVED, | ||||
|                 }) | ||||
|             elif chunk_type == "added": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_ADDED, | ||||
|                 }) | ||||
|             elif chunk_type == "changed": | ||||
|                 # Changed (old value) - use orange to distinguish from pure removal | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_CHANGED, | ||||
|                 }) | ||||
|             elif chunk_type == "changed_into": | ||||
|                 # Changed into (new value) - use blue to distinguish from pure addition | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_CHANGED_INTO, | ||||
|                 }) | ||||
|  | ||||
|             total_chars += len(content) | ||||
|  | ||||
|         if embeds: | ||||
|             payload["embeds"] = embeds | ||||
|  | ||||
|         # Send the payload using parent's _send method | ||||
|         if not self._send(payload, params=params): | ||||
|             return False | ||||
|  | ||||
|         # Handle attachments if present | ||||
|         if attach and self.attachment_support: | ||||
|             payload.update({ | ||||
|                 "tts": False, | ||||
|                 "wait": True, | ||||
|             }) | ||||
|             payload.pop("embeds", None) | ||||
|             payload.pop("content", None) | ||||
|             payload.pop("allow_mentions", None) | ||||
|  | ||||
|             for attachment in attach: | ||||
|                 self.logger.info(f"Posting Discord Attachment {attachment.name}") | ||||
|                 if not self._send(payload, params=params, attach=attachment): | ||||
|                     return False | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _parse_body_into_chunks(self, body): | ||||
|         """ | ||||
|         Parse the body into ordered chunks of (type, content) tuples. | ||||
|         Types: "unchanged", "removed", "added", "changed", "changed_into" | ||||
|         Preserves the original order of the diff. | ||||
|         """ | ||||
|         chunks = [] | ||||
|         position = 0 | ||||
|  | ||||
|         while position < len(body): | ||||
|             # Find the next marker | ||||
|             next_removed = body.find(REMOVED_PLACEMARKER_OPEN, position) | ||||
|             next_added = body.find(ADDED_PLACEMARKER_OPEN, position) | ||||
|             next_changed = body.find(CHANGED_PLACEMARKER_OPEN, position) | ||||
|             next_changed_into = body.find(CHANGED_INTO_PLACEMARKER_OPEN, position) | ||||
|  | ||||
|             # Determine which marker comes first | ||||
|             if next_removed == -1 and next_added == -1 and next_changed == -1 and next_changed_into == -1: | ||||
|                 # No more markers, rest is unchanged | ||||
|                 if position < len(body): | ||||
|                     chunks.append(("unchanged", body[position:])) | ||||
|                 break | ||||
|  | ||||
|             # Find the earliest marker | ||||
|             next_marker_pos = None | ||||
|             next_marker_type = None | ||||
|  | ||||
|             # Compare all marker positions to find the earliest | ||||
|             markers = [] | ||||
|             if next_removed != -1: | ||||
|                 markers.append((next_removed, "removed")) | ||||
|             if next_added != -1: | ||||
|                 markers.append((next_added, "added")) | ||||
|             if next_changed != -1: | ||||
|                 markers.append((next_changed, "changed")) | ||||
|             if next_changed_into != -1: | ||||
|                 markers.append((next_changed_into, "changed_into")) | ||||
|  | ||||
|             if markers: | ||||
|                 next_marker_pos, next_marker_type = min(markers, key=lambda x: x[0]) | ||||
|  | ||||
|             # Add unchanged content before the marker | ||||
|             if next_marker_pos > position: | ||||
|                 chunks.append(("unchanged", body[position:next_marker_pos])) | ||||
|  | ||||
|             # Find the closing marker | ||||
|             if next_marker_type == "removed": | ||||
|                 open_marker = REMOVED_PLACEMARKER_OPEN | ||||
|                 close_marker = REMOVED_PLACEMARKER_CLOSED | ||||
|             elif next_marker_type == "added": | ||||
|                 open_marker = ADDED_PLACEMARKER_OPEN | ||||
|                 close_marker = ADDED_PLACEMARKER_CLOSED | ||||
|             elif next_marker_type == "changed": | ||||
|                 open_marker = CHANGED_PLACEMARKER_OPEN | ||||
|                 close_marker = CHANGED_PLACEMARKER_CLOSED | ||||
|             else:  # changed_into | ||||
|                 open_marker = CHANGED_INTO_PLACEMARKER_OPEN | ||||
|                 close_marker = CHANGED_INTO_PLACEMARKER_CLOSED | ||||
|  | ||||
|             close_pos = body.find(close_marker, next_marker_pos) | ||||
|  | ||||
|             if close_pos == -1: | ||||
|                 # No closing marker, take rest as this type | ||||
|                 content = body[next_marker_pos + len(open_marker):] | ||||
|                 chunks.append((next_marker_type, content)) | ||||
|                 break | ||||
|             else: | ||||
|                 # Extract content between markers | ||||
|                 content = body[next_marker_pos + len(open_marker):close_pos] | ||||
|                 chunks.append((next_marker_type, content)) | ||||
|                 position = close_pos + len(close_marker) | ||||
|  | ||||
|         return chunks | ||||
|  | ||||
|  | ||||
| # Register the custom Discord handler with Apprise | ||||
| # This will override the built-in discord:// handler | ||||
| @notify(on="discord") | ||||
| def discord_custom_wrapper(body, title, notify_type, meta, body_format=None, *args, **kwargs): | ||||
|     """ | ||||
|     Wrapper function to make the custom Discord handler work with Apprise's decorator system. | ||||
|     Note: This decorator approach may not work for overriding built-in plugins. | ||||
|     The class-based approach above is the proper way to extend NotifyDiscord. | ||||
|     """ | ||||
|     logger.info("Custom Discord handler called") | ||||
|     # This is here for potential future use with decorator-based registration | ||||
|     return True | ||||
							
								
								
									
										42
									
								
								changedetectionio/notification/email_helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								changedetectionio/notification/email_helpers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| def as_monospaced_html_email(content: str, title: str) -> str: | ||||
|     """ | ||||
|     Wraps `content` in a minimal, email-safe HTML template | ||||
|     that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc. | ||||
|  | ||||
|     Args: | ||||
|         content: The body text (plain text or HTML-like). | ||||
|         title: The title plaintext | ||||
|     Returns: | ||||
|         A complete HTML document string suitable for sending as an email body. | ||||
|     """ | ||||
|  | ||||
|     # All line feed types should be removed and then this function should only be fed <br>'s | ||||
|     # Then it works with our <pre> styling without double linefeeds | ||||
|     content = content.translate(str.maketrans('', '', '\r\n')) | ||||
|  | ||||
|     if title: | ||||
|         import html | ||||
|         title = html.escape(title) | ||||
|     else: | ||||
|         title = '' | ||||
|     # 2. Full email-safe HTML | ||||
|     html_email = f"""<!DOCTYPE html> | ||||
| <html lang="en"> | ||||
| <head> | ||||
|   <meta charset="UTF-8"> | ||||
|   <meta name="x-apple-disable-message-reformatting"> | ||||
|   <meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||||
|   <!--[if mso]> | ||||
|     <style> | ||||
|       body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }} | ||||
|     </style> | ||||
|   <![endif]--> | ||||
|   <title>{title}</title> | ||||
| </head> | ||||
| <body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;"> | ||||
|   <pre role="article" aria-roledescription="email" lang="en" | ||||
|        style="font-family: monospace, 'Courier New', Courier; font-size: 0.8em; | ||||
|               white-space: pre-wrap; word-break: break-word;">{content}</pre> | ||||
| </body> | ||||
| </html>""" | ||||
|     return html_email | ||||
| @@ -1,30 +1,274 @@ | ||||
|  | ||||
| import time | ||||
| import apprise | ||||
| from apprise import NotifyFormat | ||||
| from loguru import logger | ||||
| from urllib.parse import urlparse | ||||
| from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL | ||||
| from .apprise_plugin.custom_handlers import SUPPORTED_HTTP_METHODS | ||||
| from .email_helpers import as_monospaced_html_email | ||||
| from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \ | ||||
|     ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \ | ||||
|     CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE | ||||
| from ..notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER | ||||
|  | ||||
| def process_notification(n_object, datastore): | ||||
|     from changedetectionio.safe_jinja import render as jinja_render | ||||
|     from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats | ||||
|  | ||||
|  | ||||
| def markup_text_links_to_html(body): | ||||
|     """ | ||||
|     Convert plaintext to HTML with clickable links. | ||||
|     Uses Jinja2's escape and Markup for XSS safety. | ||||
|     """ | ||||
|     from linkify_it import LinkifyIt | ||||
|     from markupsafe import Markup, escape | ||||
|  | ||||
|     linkify = LinkifyIt() | ||||
|  | ||||
|     # Match URLs in the ORIGINAL text (before escaping) | ||||
|     matches = linkify.match(body) | ||||
|  | ||||
|     if not matches: | ||||
|         # No URLs, just escape everything | ||||
|         return Markup(escape(body)) | ||||
|  | ||||
|     result = [] | ||||
|     last_index = 0 | ||||
|  | ||||
|     # Process each URL match | ||||
|     for match in matches: | ||||
|         # Add escaped text before the URL | ||||
|         if match.index > last_index: | ||||
|             text_part = body[last_index:match.index] | ||||
|             result.append(escape(text_part)) | ||||
|  | ||||
|         # Add the link with escaped URL (both in href and display) | ||||
|         url = match.url | ||||
|         result.append(Markup(f'<a href="{escape(url)}">{escape(url)}</a>')) | ||||
|  | ||||
|         last_index = match.last_index | ||||
|  | ||||
|     # Add remaining escaped text | ||||
|     if last_index < len(body): | ||||
|         result.append(escape(body[last_index:])) | ||||
|  | ||||
|     # Join all parts | ||||
|     return str(Markup(''.join(str(part) for part in result))) | ||||
|  | ||||
| def notification_format_align_with_apprise(n_format : str): | ||||
|     """ | ||||
|     Correctly align changedetection's formats with apprise's formats | ||||
|     Probably these are the same - but good to be sure. | ||||
|     These set the expected OUTPUT format type | ||||
|     :param n_format: | ||||
|     :return: | ||||
|     """ | ||||
|  | ||||
|     if n_format.startswith('html'): | ||||
|         # Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here | ||||
|         n_format = NotifyFormat.HTML.value | ||||
|     elif n_format.startswith('markdown'): | ||||
|         # probably the same but just to be safe | ||||
|         n_format = NotifyFormat.MARKDOWN.value | ||||
|     elif n_format.startswith('text'): | ||||
|         # probably the same but just to be safe | ||||
|         n_format = NotifyFormat.TEXT.value | ||||
|     else: | ||||
|         n_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|     return n_format | ||||
|  | ||||
| def apply_discord_markdown_to_body(n_body): | ||||
|     """ | ||||
|     Discord does not support <del> but it supports non-standard ~~strikethrough~~ | ||||
|     :param n_body: | ||||
|     :return: | ||||
|     """ | ||||
|     import re | ||||
|     # Define the mapping between your placeholders and markdown markers | ||||
|     replacements = [ | ||||
|         (REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'), | ||||
|         (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'), | ||||
|         (CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'), | ||||
|         (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'), | ||||
|     ] | ||||
|     # So that the markdown gets added without any whitespace following it which would break it | ||||
|     for open_tag, open_md, close_tag, close_md in replacements: | ||||
|         # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag | ||||
|         pattern = re.compile( | ||||
|             re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag), | ||||
|             flags=re.DOTALL | ||||
|         ) | ||||
|         n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body) | ||||
|     return n_body | ||||
|  | ||||
| def apply_standard_markdown_to_body(n_body): | ||||
|     """ | ||||
|     Apprise does not support ~~strikethrough~~ but it will convert <del> to HTML strikethrough. | ||||
|     :param n_body: | ||||
|     :return: | ||||
|     """ | ||||
|     import re | ||||
|     # Define the mapping between your placeholders and markdown markers | ||||
|     replacements = [ | ||||
|         (REMOVED_PLACEMARKER_OPEN, '<del>', REMOVED_PLACEMARKER_CLOSED, '</del>'), | ||||
|         (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'), | ||||
|         (CHANGED_PLACEMARKER_OPEN, '<del>', CHANGED_PLACEMARKER_CLOSED, '</del>'), | ||||
|         (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'), | ||||
|     ] | ||||
|  | ||||
|     # So that the markdown gets added without any whitespace following it which would break it | ||||
|     for open_tag, open_md, close_tag, close_md in replacements: | ||||
|         # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag | ||||
|         pattern = re.compile( | ||||
|             re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag), | ||||
|             flags=re.DOTALL | ||||
|         ) | ||||
|         n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body) | ||||
|     return n_body | ||||
|  | ||||
|  | ||||
| def apply_service_tweaks(url, n_body, n_title, requested_output_format): | ||||
|  | ||||
|     # Re 323 - Limit discord length to their 2000 char limit total or it wont send. | ||||
|     # Because different notifications may require different pre-processing, run each sequentially :( | ||||
|     # 2000 bytes minus - | ||||
|     #     200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers | ||||
|     #     Length of URL - Incase they specify a longer custom avatar_url | ||||
|  | ||||
|     if not n_body or not n_body.strip(): | ||||
|         return url, n_body, n_title | ||||
|  | ||||
|     # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload | ||||
|     parsed = urlparse(url) | ||||
|     k = '?' if not parsed.query else '&' | ||||
|     if url and not 'avatar_url' in url \ | ||||
|             and not url.startswith('mail') \ | ||||
|             and not url.startswith('post') \ | ||||
|             and not url.startswith('get') \ | ||||
|             and not url.startswith('delete') \ | ||||
|             and not url.startswith('put'): | ||||
|         url += k + f"avatar_url={APPRISE_AVATAR_URL}" | ||||
|  | ||||
|     if url.startswith('tgram://'): | ||||
|         # Telegram only supports a limit subset of HTML, remove the '<br>' we place in. | ||||
|         # re https://github.com/dgtlmoon/changedetection.io/issues/555 | ||||
|         # @todo re-use an existing library we have already imported to strip all non-allowed tags | ||||
|         n_body = n_body.replace('<br>', '\n') | ||||
|         n_body = n_body.replace('</br>', '\n') | ||||
|         n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n') | ||||
|  | ||||
|         # Use strikethrough for removed content, bold for added content | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '<s>') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '</s>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '<b>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '</b>') | ||||
|         # Handle changed/replaced lines (old → new) | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, '<s>') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, '</s>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>') | ||||
|  | ||||
|         # real limit is 4096, but minus some for extra metadata | ||||
|         payload_max_size = 3600 | ||||
|         body_limit = max(0, payload_max_size - len(n_title)) | ||||
|         n_title = n_title[0:payload_max_size] | ||||
|         n_body = n_body[0:body_limit] | ||||
|  | ||||
|     elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') | ||||
|           or url.startswith('https://discord.com/api'))\ | ||||
|             and 'html' in requested_output_format: | ||||
|         # Discord doesn't support HTML, replace <br> with newlines | ||||
|         n_body = n_body.strip().replace('<br>', '\n') | ||||
|         n_body = n_body.replace('</br>', '\n') | ||||
|         n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n') | ||||
|  | ||||
|         # Don't replace placeholders or truncate here - let the custom Discord plugin handle it | ||||
|         # The plugin will use embeds (6000 char limit across all embeds) if placeholders are present, | ||||
|         # or plain content (2000 char limit) otherwise | ||||
|  | ||||
|         # Only do placeholder replacement if NOT using htmlcolor (which triggers embeds in custom plugin) | ||||
|         if requested_output_format == 'html': | ||||
|             # No diff placeholders, use Discord markdown for any other formatting | ||||
|             # Use Discord markdown: strikethrough for removed, bold for added | ||||
|             n_body = apply_discord_markdown_to_body(n_body=n_body) | ||||
|  | ||||
|             # Apply 2000 char limit for plain content | ||||
|             payload_max_size = 1700 | ||||
|             body_limit = max(0, payload_max_size - len(n_title)) | ||||
|             n_title = n_title[0:payload_max_size] | ||||
|             n_body = n_body[0:body_limit] | ||||
|         # else: our custom Discord plugin will convert any placeholders left over into embeds with color bars | ||||
|  | ||||
|     # Is not discord/tgram and they want htmlcolor | ||||
|     elif requested_output_format == 'htmlcolor': | ||||
|         # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050 | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         # Handle changed/replaced lines (old → new) | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n') | ||||
|     elif requested_output_format == 'html': | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n') | ||||
|     elif requested_output_format == 'markdown': | ||||
|         # Markdown to HTML - Apprise will convert this to HTML | ||||
|         n_body = apply_standard_markdown_to_body(n_body=n_body) | ||||
|  | ||||
|     else: #plaintext etc default | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') | ||||
|  | ||||
|     return url, n_body, n_title | ||||
|  | ||||
|  | ||||
| def process_notification(n_object: NotificationContextData, datastore): | ||||
|     from changedetectionio.jinja2_custom import render as jinja_render | ||||
|     from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats | ||||
|     # be sure its registered | ||||
|     from .apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
|     # Register custom Discord plugin | ||||
|     from .apprise_plugin.discord import NotifyDiscordCustom | ||||
|  | ||||
|     if not isinstance(n_object, NotificationContextData): | ||||
|         raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|     now = time.time() | ||||
|     if n_object.get('notification_timestamp'): | ||||
|         logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s") | ||||
|  | ||||
|     # Insert variables into the notification content | ||||
|     notification_parameters = create_notification_parameters(n_object, datastore) | ||||
|  | ||||
|     n_format = valid_notification_formats.get( | ||||
|         n_object.get('notification_format', default_notification_format), | ||||
|         valid_notification_formats[default_notification_format], | ||||
|     ) | ||||
|     requested_output_format = n_object.get('notification_format', default_notification_format) | ||||
|     logger.debug(f"Requested notification output format: '{requested_output_format}'") | ||||
|  | ||||
|     # If we arrived with 'System default' then look it up | ||||
|     if n_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch: | ||||
|     if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|         # Initially text or whatever | ||||
|         n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format]) | ||||
|         requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format) | ||||
|  | ||||
|     requested_output_format_original = requested_output_format | ||||
|  | ||||
|     # Now clean it up so it fits perfectly with apprise | ||||
|     requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format) | ||||
|  | ||||
|     logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.2f}s") | ||||
|  | ||||
| @@ -39,16 +283,23 @@ def process_notification(n_object, datastore): | ||||
|  | ||||
|     apobj = apprise.Apprise(debug=True, asset=apprise_asset) | ||||
|  | ||||
|     # Override Apprise's built-in Discord plugin with our custom one | ||||
|     # This allows us to use colored embeds for diff content | ||||
|     # First remove the built-in discord plugin, then add our custom one | ||||
|     apprise.plugins.N_MGR.remove('discord') | ||||
|     apprise.plugins.N_MGR.add(NotifyDiscordCustom, schemas='discord') | ||||
|  | ||||
|     if not n_object.get('notification_urls'): | ||||
|         return None | ||||
|  | ||||
|     with apprise.LogCapture(level=apprise.logging.DEBUG) as logs: | ||||
|     with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs): | ||||
|         for url in n_object['notification_urls']: | ||||
|  | ||||
|             # Get the notification body from datastore | ||||
|             n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters) | ||||
|             if n_object.get('notification_format', '').startswith('HTML'): | ||||
|                 n_body = n_body.replace("\n", '<br>') | ||||
|  | ||||
|             if n_object.get('markup_text_links_to_html_links'): | ||||
|                 n_body = markup_text_links_to_html(body=n_body) | ||||
|  | ||||
|             n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters) | ||||
|  | ||||
| @@ -64,74 +315,88 @@ def process_notification(n_object, datastore): | ||||
|             logger.info(f">> Process Notification: AppRise notifying {url}") | ||||
|             url = jinja_render(template_str=url, **notification_parameters) | ||||
|  | ||||
|             # Re 323 - Limit discord length to their 2000 char limit total or it wont send. | ||||
|             # Because different notifications may require different pre-processing, run each sequentially :( | ||||
|             # 2000 bytes minus - | ||||
|             #     200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers | ||||
|             #     Length of URL - Incase they specify a longer custom avatar_url | ||||
|             # If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped | ||||
|             watch_mime_type = n_object.get('watch_mime_type') | ||||
|             if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower(): | ||||
|                 if 'html' in requested_output_format: | ||||
|                     from markupsafe import escape | ||||
|                     n_body = str(escape(n_body)) | ||||
|  | ||||
|             # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload | ||||
|             k = '?' if not '?' in url else '&' | ||||
|             if not 'avatar_url' in url \ | ||||
|                     and not url.startswith('mail') \ | ||||
|                     and not url.startswith('post') \ | ||||
|                     and not url.startswith('get') \ | ||||
|                     and not url.startswith('delete') \ | ||||
|                     and not url.startswith('put'): | ||||
|                 url += k + f"avatar_url={APPRISE_AVATAR_URL}" | ||||
|             if 'html' in requested_output_format: | ||||
|                 # Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output | ||||
|                 # But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and this" etc which is too much. | ||||
|                 n_body = n_body.replace('  ', '  ') | ||||
|  | ||||
|             if url.startswith('tgram://'): | ||||
|                 # Telegram only supports a limit subset of HTML, remove the '<br>' we place in. | ||||
|                 # re https://github.com/dgtlmoon/changedetection.io/issues/555 | ||||
|                 # @todo re-use an existing library we have already imported to strip all non-allowed tags | ||||
|                 n_body = n_body.replace('<br>', '\n') | ||||
|                 n_body = n_body.replace('</br>', '\n') | ||||
|                 # real limit is 4096, but minus some for extra metadata | ||||
|                 payload_max_size = 3600 | ||||
|                 body_limit = max(0, payload_max_size - len(n_title)) | ||||
|                 n_title = n_title[0:payload_max_size] | ||||
|                 n_body = n_body[0:body_limit] | ||||
|             (url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original) | ||||
|  | ||||
|             elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith( | ||||
|                     'https://discord.com/api'): | ||||
|                 # real limit is 2000, but minus some for extra metadata | ||||
|                 payload_max_size = 1700 | ||||
|                 body_limit = max(0, payload_max_size - len(n_title)) | ||||
|                 n_title = n_title[0:payload_max_size] | ||||
|                 n_body = n_body[0:body_limit] | ||||
|             apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS" | ||||
|  | ||||
|             elif url.startswith('mailto'): | ||||
|                 # Apprise will default to HTML, so we need to override it | ||||
|                 # So that whats' generated in n_body is in line with what is going to be sent. | ||||
|                 # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321 | ||||
|                 if not 'format=' in url and (n_format == 'Text' or n_format == 'Markdown'): | ||||
|                     prefix = '?' if not '?' in url else '&' | ||||
|                     # Apprise format is lowercase text https://github.com/caronc/apprise/issues/633 | ||||
|                     n_format = n_format.lower() | ||||
|                     url = f"{url}{prefix}format={n_format}" | ||||
|                 # If n_format == HTML, then apprise email should default to text/html and we should be sending HTML only | ||||
|             if not 'format=' in url: | ||||
|                 parsed_url = urlparse(url) | ||||
|                 prefix_add_to_url = '?' if not parsed_url.query else '&' | ||||
|  | ||||
|             apobj.add(url) | ||||
|                 # THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC | ||||
|                 if 'html' in requested_output_format: | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}" | ||||
|                     apprise_input_format = NotifyFormat.HTML.value | ||||
|                 elif 'text' in requested_output_format: | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}" | ||||
|                     apprise_input_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|                 elif requested_output_format == NotifyFormat.MARKDOWN.value: | ||||
|                     # Convert markdown to HTML ourselves since not all plugins do this | ||||
|                     from apprise.conversion import markdown_to_html | ||||
|                     # Make sure there are paragraph breaks around horizontal rules | ||||
|                     n_body = n_body.replace('---', '\n\n---\n\n') | ||||
|                     n_body = markdown_to_html(n_body) | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}" | ||||
|                     requested_output_format = NotifyFormat.HTML.value | ||||
|                     apprise_input_format = NotifyFormat.HTML.value  # Changed from MARKDOWN to HTML | ||||
|  | ||||
|                 # Could have arrived at any stage, so we dont end up running .escape on it | ||||
|                 if 'html' in requested_output_format: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n') | ||||
|                 else: | ||||
|                     # texty types | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n') | ||||
|  | ||||
|             else: | ||||
|                 # ?format was IN the apprise URL, they are kind of on their own here, we will try our best | ||||
|                 if 'format=html' in url: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n') | ||||
|                     # This will also prevent apprise from doing conversion | ||||
|                     apprise_input_format = NotifyFormat.HTML.value | ||||
|                     requested_output_format = NotifyFormat.HTML.value | ||||
|                 elif 'format=text' in url: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n') | ||||
|                     apprise_input_format = NotifyFormat.TEXT.value | ||||
|                     requested_output_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|             sent_objs.append({'title': n_title, | ||||
|                               'body': n_body, | ||||
|                               'url': url, | ||||
|                               'body_format': n_format}) | ||||
|                               'url': url}) | ||||
|             apobj.add(url) | ||||
|  | ||||
|             # Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely. | ||||
|             # It should always be similar to the 'history' part of the UI. | ||||
|             if url.startswith('mail') and 'html' in requested_output_format: | ||||
|                 if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already | ||||
|                     n_body = as_monospaced_html_email(content=n_body, title=n_title) | ||||
|  | ||||
|         # Blast off the notifications tht are set in .add() | ||||
|         apobj.notify( | ||||
|             title=n_title, | ||||
|             body=n_body, | ||||
|             body_format=n_format, | ||||
|             # `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise | ||||
|             # &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between) | ||||
|             body_format=apprise_input_format, | ||||
|             # False is not an option for AppRise, must be type None | ||||
|             attach=n_object.get('screenshot', None) | ||||
|         ) | ||||
|  | ||||
|  | ||||
|         # Returns empty string if nothing found, multi-line string otherwise | ||||
|         log_value = logs.getvalue() | ||||
|  | ||||
|         if log_value and 'WARNING' in log_value or 'ERROR' in log_value: | ||||
|         if log_value and ('WARNING' in log_value or 'ERROR' in log_value): | ||||
|             logger.critical(log_value) | ||||
|             raise Exception(log_value) | ||||
|  | ||||
| @@ -141,17 +406,15 @@ def process_notification(n_object, datastore): | ||||
|  | ||||
| # Notification title + body content parameters get created here. | ||||
| # ( Where we prepare the tokens in the notification to be replaced with actual values ) | ||||
| def create_notification_parameters(n_object, datastore): | ||||
|     from copy import deepcopy | ||||
|     from . import valid_tokens | ||||
| def create_notification_parameters(n_object: NotificationContextData, datastore): | ||||
|     if not isinstance(n_object, NotificationContextData): | ||||
|         raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|     # in the case we send a test notification from the main settings, there is no UUID. | ||||
|     uuid = n_object['uuid'] if 'uuid' in n_object else '' | ||||
|  | ||||
|     if uuid: | ||||
|         watch_title = datastore.data['watching'][uuid].label | ||||
|     watch = datastore.data['watching'].get(n_object['uuid']) | ||||
|     if watch: | ||||
|         watch_title = datastore.data['watching'][n_object['uuid']].label | ||||
|         tag_list = [] | ||||
|         tags = datastore.get_all_tags_for_watch(uuid) | ||||
|         tags = datastore.get_all_tags_for_watch(n_object['uuid']) | ||||
|         if tags: | ||||
|             for tag_uuid, tag in tags.items(): | ||||
|                 tag_list.append(tag.get('title')) | ||||
| @@ -166,14 +429,10 @@ def create_notification_parameters(n_object, datastore): | ||||
|  | ||||
|     watch_url = n_object['watch_url'] | ||||
|  | ||||
|     diff_url = "{}/diff/{}".format(base_url, uuid) | ||||
|     preview_url = "{}/preview/{}".format(base_url, uuid) | ||||
|     diff_url = "{}/diff/{}".format(base_url, n_object['uuid']) | ||||
|     preview_url = "{}/preview/{}".format(base_url, n_object['uuid']) | ||||
|  | ||||
|     # Not sure deepcopy is needed here, but why not | ||||
|     tokens = deepcopy(valid_tokens) | ||||
|  | ||||
|     # Valid_tokens also used as a field validator | ||||
|     tokens.update( | ||||
|     n_object.update( | ||||
|         { | ||||
|             'base_url': base_url, | ||||
|             'diff_url': diff_url, | ||||
| @@ -181,13 +440,10 @@ def create_notification_parameters(n_object, datastore): | ||||
|             'watch_tag': watch_tag if watch_tag is not None else '', | ||||
|             'watch_title': watch_title if watch_title is not None else '', | ||||
|             'watch_url': watch_url, | ||||
|             'watch_uuid': uuid, | ||||
|             'watch_uuid': n_object['uuid'], | ||||
|         }) | ||||
|  | ||||
|     # n_object will contain diff, diff_added etc etc | ||||
|     tokens.update(n_object) | ||||
|     if watch: | ||||
|         n_object.update(datastore.data['watching'].get(n_object['uuid']).extra_notification_token_values()) | ||||
|  | ||||
|     if uuid: | ||||
|         tokens.update(datastore.data['watching'].get(uuid).extra_notification_token_values()) | ||||
|  | ||||
|     return tokens | ||||
|     return n_object | ||||
|   | ||||
| @@ -6,9 +6,70 @@ Extracted from update_worker.py to provide standalone notification functionality | ||||
| for both sync and async workers | ||||
| """ | ||||
|  | ||||
| import time | ||||
| from loguru import logger | ||||
| import time | ||||
|  | ||||
| from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| from changedetectionio.notification import default_notification_format, valid_notification_formats | ||||
|  | ||||
| # This gets modified on notification time (handler.py) depending on the required notification output | ||||
| CUSTOM_LINEBREAK_PLACEHOLDER='@BR@' | ||||
|  | ||||
|  | ||||
| # What is passed around as notification context, also used as the complete list of valid {{ tokens }} | ||||
| class NotificationContextData(dict): | ||||
|     def __init__(self, initial_data=None, **kwargs): | ||||
|         super().__init__({ | ||||
|             'base_url': None, | ||||
|             'current_snapshot': None, | ||||
|             'diff': None, | ||||
|             'diff_added': None, | ||||
|             'diff_full': None, | ||||
|             'diff_patch': None, | ||||
|             'diff_removed': None, | ||||
|             'diff_url': None, | ||||
|             'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen | ||||
|             'notification_timestamp': time.time(), | ||||
|             'preview_url': None, | ||||
|             'screenshot': None, | ||||
|             'triggered_text': None, | ||||
|             'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX',  # Converted to 'watch_uuid' in create_notification_parameters | ||||
|             'watch_mime_type': None, | ||||
|             'watch_tag': None, | ||||
|             'watch_title': None, | ||||
|             'watch_url': 'https://WATCH-PLACE-HOLDER/', | ||||
|         }) | ||||
|  | ||||
|         # Apply any initial data passed in | ||||
|         self.update({'watch_uuid': self.get('uuid')}) | ||||
|         if initial_data: | ||||
|             self.update(initial_data) | ||||
|  | ||||
|         # Apply any keyword arguments | ||||
|         if kwargs: | ||||
|             self.update(kwargs) | ||||
|  | ||||
|         n_format = self.get('notification_format') | ||||
|         if n_format and not valid_notification_formats.get(n_format): | ||||
|             raise ValueError(f'Invalid notification format: "{n_format}"') | ||||
|  | ||||
|     def set_random_for_validation(self): | ||||
|         import random, string | ||||
|         """Randomly fills all dict keys with random strings (for validation/testing).  | ||||
|         So we can test the output in the notification body | ||||
|         """ | ||||
|         for key in self.keys(): | ||||
|             if key in ['uuid', 'time', 'watch_uuid']: | ||||
|                 continue | ||||
|             rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12)) | ||||
|             self[key] = rand_str | ||||
|  | ||||
|     def __setitem__(self, key, value): | ||||
|         if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'): | ||||
|             if not valid_notification_formats.get(value): | ||||
|                 raise ValueError(f'Invalid notification format: "{value}"') | ||||
|  | ||||
|         super().__setitem__(key, value) | ||||
|  | ||||
| class NotificationService: | ||||
|     """ | ||||
| @@ -20,12 +81,15 @@ class NotificationService: | ||||
|         self.datastore = datastore | ||||
|         self.notification_q = notification_q | ||||
|      | ||||
|     def queue_notification_for_watch(self, n_object, watch): | ||||
|     def queue_notification_for_watch(self, n_object: NotificationContextData, watch): | ||||
|         """ | ||||
|         Queue a notification for a watch with full diff rendering and template variables | ||||
|         """ | ||||
|         from changedetectionio import diff | ||||
|         from changedetectionio.notification import default_notification_format_for_watch | ||||
|         from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|  | ||||
|         if not isinstance(n_object, NotificationContextData): | ||||
|             raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|         dates = [] | ||||
|         trigger_text = '' | ||||
| @@ -44,29 +108,16 @@ class NotificationService: | ||||
|             snapshot_contents = "No snapshot/history available, the watch should fetch atleast once." | ||||
|  | ||||
|         # If we ended up here with "System default" | ||||
|         if n_object.get('notification_format') == default_notification_format_for_watch: | ||||
|         if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|             n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|         html_colour_enable = False | ||||
|         # HTML needs linebreak, but MarkDown and Text can use a linefeed | ||||
|         if n_object.get('notification_format') == 'HTML': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|         elif n_object.get('notification_format') == 'HTML Color': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|             html_colour_enable = True | ||||
|         else: | ||||
|             line_feed_sep = "\n" | ||||
|  | ||||
|         triggered_text = '' | ||||
|         if len(trigger_text): | ||||
|             from . import html_tools | ||||
|             triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text) | ||||
|             if triggered_text: | ||||
|                 triggered_text = line_feed_sep.join(triggered_text) | ||||
|                 triggered_text = CUSTOM_LINEBREAK_PLACEHOLDER.join(triggered_text) | ||||
|  | ||||
|         # Could be called as a 'test notification' with only 1 snapshot available | ||||
|         prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n" | ||||
| @@ -78,16 +129,17 @@ class NotificationService: | ||||
|  | ||||
|         n_object.update({ | ||||
|             'current_snapshot': snapshot_contents, | ||||
|             'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep), | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep), | ||||
|             'notification_timestamp': now, | ||||
|             'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None, | ||||
|             'triggered_text': triggered_text, | ||||
|             'uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_url': watch.get('url') if watch else None, | ||||
|             'watch_uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_mime_type': watch.get('content-type') | ||||
|         }) | ||||
|  | ||||
|         if watch: | ||||
| @@ -103,7 +155,7 @@ class NotificationService: | ||||
|         Individual watch settings > Tag settings > Global settings | ||||
|         """ | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch, | ||||
|             USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, | ||||
|             default_notification_body, | ||||
|             default_notification_title | ||||
|         ) | ||||
| @@ -111,7 +163,7 @@ class NotificationService: | ||||
|         # Would be better if this was some kind of Object where Watch can reference the parent datastore etc | ||||
|         v = watch.get(var_name) | ||||
|         if v and not watch.get('notification_muted'): | ||||
|             if var_name == 'notification_format' and v == default_notification_format_for_watch: | ||||
|             if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|                 return self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|             return v | ||||
| @@ -128,7 +180,7 @@ class NotificationService: | ||||
|  | ||||
|         # Otherwise could be defaults | ||||
|         if var_name == 'notification_format': | ||||
|             return default_notification_format_for_watch | ||||
|             return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         if var_name == 'notification_body': | ||||
|             return default_notification_body | ||||
|         if var_name == 'notification_title': | ||||
| @@ -140,7 +192,7 @@ class NotificationService: | ||||
|         """ | ||||
|         Send notification when content changes are detected | ||||
|         """ | ||||
|         n_object = {} | ||||
|         n_object = NotificationContextData() | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|         if not watch: | ||||
|             return | ||||
| @@ -183,11 +235,25 @@ class NotificationService: | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         ", ".join(watch['include_filters']), | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|         filter_list = ", ".join(watch['include_filters']) | ||||
|         # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed | ||||
|         body = f"""Hello, | ||||
|  | ||||
| Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts. | ||||
|  | ||||
| It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab ) | ||||
|  | ||||
| Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}} | ||||
|  | ||||
| Thanks - Your omniscient changedetection.io installation. | ||||
| """ | ||||
|  | ||||
|         n_object = NotificationContextData({ | ||||
|             'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|             'notification_body': body, | ||||
|             'notification_format': self._check_cascading_vars('notification_format', watch), | ||||
|         }) | ||||
|         n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
| @@ -215,12 +281,28 @@ class NotificationService: | ||||
|         if not watch: | ||||
|             return | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1), | ||||
|                     'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} " | ||||
|                                          "did not appear on the page after {} attempts, did the page change layout? " | ||||
|                                          "Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n" | ||||
|                                          "Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         step = step_n + 1 | ||||
|         # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed | ||||
|  | ||||
|         # {{{{ }}}} because this will be Jinja2 {{ }} tokens | ||||
|         body = f"""Hello, | ||||
|          | ||||
| Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout? | ||||
|  | ||||
| The element may have moved and needs editing, or does it need a delay added? | ||||
|  | ||||
| Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}} | ||||
|  | ||||
| Thanks - Your omniscient changedetection.io installation. | ||||
| """ | ||||
|  | ||||
|         n_object = NotificationContextData({ | ||||
|             'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run", | ||||
|             'notification_body': body, | ||||
|             'notification_format': self._check_cascading_vars('notification_format', watch), | ||||
|         }) | ||||
|         n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|   | ||||
| @@ -91,6 +91,8 @@ class difference_detection_processor(): | ||||
|             else: | ||||
|                 logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ") | ||||
|  | ||||
|         logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}") | ||||
|  | ||||
|         # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. | ||||
|         # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) | ||||
|         self.fetcher = fetcher_obj(proxy_override=proxy_url, | ||||
| @@ -102,7 +104,7 @@ class difference_detection_processor(): | ||||
|             self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid')) | ||||
|  | ||||
|         # Tweak the base config with the per-watch ones | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         request_headers = CaseInsensitiveDict() | ||||
|  | ||||
|         ua = self.datastore.data['settings']['requests'].get('default_ua') | ||||
|   | ||||
							
								
								
									
										133
									
								
								changedetectionio/processors/magic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								changedetectionio/processors/magic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| """ | ||||
| Content Type Detection and Stream Classification | ||||
|  | ||||
| This module provides intelligent content-type detection for changedetection.io. | ||||
| It addresses the common problem where HTTP Content-Type headers are missing, incorrect, | ||||
| or too generic, which would otherwise cause the wrong processor to be used. | ||||
|  | ||||
| The guess_stream_type class combines: | ||||
| 1. HTTP Content-Type headers (when available and reliable) | ||||
| 2. Python-magic library for MIME detection (analyzing actual file content) | ||||
| 3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.) | ||||
|  | ||||
| This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF, | ||||
| plain text, CSV, YAML, and XML formats - even when servers provide misleading headers. | ||||
|  | ||||
| Used by: processors/text_json_diff/processor.py and other content processors | ||||
| """ | ||||
|  | ||||
| # When to apply the 'cdata to real HTML' hack | ||||
| RSS_XML_CONTENT_TYPES = [ | ||||
|     "application/rss+xml", | ||||
|     "application/rdf+xml", | ||||
|     "application/atom+xml", | ||||
|     "text/rss+xml",  # rare, non-standard | ||||
|     "application/x-rss+xml",  # legacy (older feed software) | ||||
|     "application/x-atom+xml",  # legacy (older Atom) | ||||
| ] | ||||
|  | ||||
| # JSON Content-types | ||||
| JSON_CONTENT_TYPES = [ | ||||
|     "application/activity+json", | ||||
|     "application/feed+json", | ||||
|     "application/json", | ||||
|     "application/ld+json", | ||||
|     "application/vnd.api+json", | ||||
| ] | ||||
|  | ||||
|  | ||||
| # Generic XML Content-types (non-RSS/Atom) | ||||
| XML_CONTENT_TYPES = [ | ||||
|     "text/xml", | ||||
|     "application/xml", | ||||
| ] | ||||
|  | ||||
| HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div'] | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| class guess_stream_type(): | ||||
|     is_pdf = False | ||||
|     is_json = False | ||||
|     is_html = False | ||||
|     is_plaintext = False | ||||
|     is_rss = False | ||||
|     is_csv = False | ||||
|     is_xml = False  # Generic XML, not RSS/Atom | ||||
|     is_yaml = False | ||||
|  | ||||
|     def __init__(self, http_content_header, content): | ||||
|         import re | ||||
|         magic_content_header = http_content_header | ||||
|         test_content = content[:200].lower().strip() | ||||
|  | ||||
|         # Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.) | ||||
|         test_content_normalized = re.sub(r'<\s+', '<', test_content) | ||||
|  | ||||
|         # Use puremagic for lightweight MIME detection (saves ~14MB vs python-magic) | ||||
|         magic_result = None | ||||
|         try: | ||||
|             import puremagic | ||||
|  | ||||
|             # puremagic needs bytes, so encode if we have a string | ||||
|             content_bytes = content[:200].encode('utf-8') if isinstance(content, str) else content[:200] | ||||
|  | ||||
|             # puremagic returns a list of PureMagic objects with confidence scores | ||||
|             detections = puremagic.magic_string(content_bytes) | ||||
|             if detections: | ||||
|                 # Get the highest confidence detection | ||||
|                 mime = detections[0].mime_type | ||||
|                 logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'") | ||||
|                 if mime and "/" in mime: | ||||
|                     magic_result = mime | ||||
|                     # Ignore generic/fallback mime types | ||||
|                     if mime in ['application/octet-stream', 'application/x-empty', 'binary']: | ||||
|                         logger.debug(f"Ignoring generic mime type '{mime}' from puremagic library") | ||||
|                     # Trust puremagic for non-text types immediately | ||||
|                     elif mime not in ['text/html', 'text/plain']: | ||||
|                         magic_content_header = mime | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.warning(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection") | ||||
|  | ||||
|         # Content-based detection (most reliable for text formats) | ||||
|         # Check for HTML patterns first - if found, override magic's text/plain | ||||
|         has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS) | ||||
|  | ||||
|         # Always trust headers first | ||||
|         if 'text/plain' in http_content_header: | ||||
|             self.is_plaintext = True | ||||
|         if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES): | ||||
|             self.is_rss = True | ||||
|         elif any(s in http_content_header for s in JSON_CONTENT_TYPES): | ||||
|             self.is_json = True | ||||
|         elif 'pdf' in magic_content_header: | ||||
|             self.is_pdf = True | ||||
|         elif has_html_patterns or http_content_header == 'text/html': | ||||
|             self.is_html = True | ||||
|         elif any(s in magic_content_header for s in JSON_CONTENT_TYPES): | ||||
|             self.is_json = True | ||||
|         # magic will call a rss document 'xml' | ||||
|         # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss | ||||
|         # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list | ||||
|         elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized: | ||||
|             self.is_rss = True | ||||
|         elif any(s in http_content_header for s in XML_CONTENT_TYPES): | ||||
|             # Only mark as generic XML if not already detected as RSS | ||||
|             if not self.is_rss: | ||||
|                 self.is_xml = True | ||||
|         elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES): | ||||
|             # Generic XML that's not RSS/Atom (RSS/Atom checked above) | ||||
|             self.is_xml = True | ||||
|         elif '%pdf-1' in test_content: | ||||
|             self.is_pdf = True | ||||
|         elif http_content_header.startswith('text/'): | ||||
|             self.is_plaintext = True | ||||
|         # Only trust magic for 'text' if no other patterns matched | ||||
|         elif 'text' in magic_content_header: | ||||
|             self.is_plaintext = True | ||||
|         # If magic says text/plain and we found no HTML patterns, trust it | ||||
|         elif magic_result == 'text/plain': | ||||
|             self.is_plaintext = True | ||||
|             logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)") | ||||
|  | ||||
| @@ -32,7 +32,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data): | ||||
|     '''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])''' | ||||
|     from changedetectionio import forms, html_tools | ||||
|     from changedetectionio.model.Watch import model as watch_model | ||||
|     from concurrent.futures import ProcessPoolExecutor | ||||
|     from concurrent.futures import ThreadPoolExecutor | ||||
|     from copy import deepcopy | ||||
|     from flask import request | ||||
|     import brotli | ||||
| @@ -76,13 +76,16 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data): | ||||
|             update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type') | ||||
|  | ||||
|             # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk | ||||
|             # Do this as a parallel process because it could take some time | ||||
|             with ProcessPoolExecutor(max_workers=2) as executor: | ||||
|                 future1 = executor.submit(_task, tmp_watch, update_handler) | ||||
|                 future2 = executor.submit(_task, blank_watch_no_filters, update_handler) | ||||
|             # Do this as parallel threads (not processes) to avoid pickle issues with Lock objects | ||||
|             try: | ||||
|                 with ThreadPoolExecutor(max_workers=2) as executor: | ||||
|                     future1 = executor.submit(_task, tmp_watch, update_handler) | ||||
|                     future2 = executor.submit(_task, blank_watch_no_filters, update_handler) | ||||
|  | ||||
|                 text_after_filter = future1.result() | ||||
|                 text_before_filter = future2.result() | ||||
|                     text_after_filter = future1.result() | ||||
|                     text_before_filter = future2.result() | ||||
|             except Exception as e: | ||||
|                 x=1 | ||||
|  | ||||
|     try: | ||||
|         trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter, | ||||
|   | ||||
| @@ -7,18 +7,24 @@ import re | ||||
| import urllib3 | ||||
|  | ||||
| from changedetectionio.conditions import execute_ruleset_against_all_plugins | ||||
| from changedetectionio.diff import ADDED_PLACEMARKER_OPEN | ||||
| from changedetectionio.processors import difference_detection_processor | ||||
| from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE | ||||
| from changedetectionio import html_tools, content_fetchers | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.processors.magic import guess_stream_type | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
| name = 'Webpage Text/HTML, JSON and PDF changes' | ||||
| description = 'Detects all text changes where possible' | ||||
|  | ||||
| json_filter_prefixes = ['json:', 'jq:', 'jqraw:'] | ||||
| JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:'] | ||||
|  | ||||
| # Assume it's this type if the server says nothing on content-type | ||||
| DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html' | ||||
|  | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg, screenshot=None, xpath_data=None): | ||||
| @@ -32,353 +38,560 @@ class PDFToHTMLToolNotFound(ValueError): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| class FilterConfig: | ||||
|     """Consolidates all filter and rule configurations from watch, tags, and global settings.""" | ||||
|  | ||||
|     def __init__(self, watch, datastore): | ||||
|         self.watch = watch | ||||
|         self.datastore = datastore | ||||
|         self.watch_uuid = watch.get('uuid') | ||||
|         # Cache computed properties to avoid repeated list operations | ||||
|         self._include_filters_cache = None | ||||
|         self._subtractive_selectors_cache = None | ||||
|  | ||||
|     def _get_merged_rules(self, attr, include_global=False): | ||||
|         """Merge rules from watch, tags, and optionally global settings.""" | ||||
|         watch_rules = self.watch.get(attr, []) | ||||
|         tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr) | ||||
|         rules = list(dict.fromkeys(watch_rules + tag_rules)) | ||||
|  | ||||
|         if include_global: | ||||
|             global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', []) | ||||
|             rules = list(dict.fromkeys(rules + global_rules)) | ||||
|  | ||||
|         return rules | ||||
|  | ||||
|     @property | ||||
|     def include_filters(self): | ||||
|         if self._include_filters_cache is None: | ||||
|             filters = self._get_merged_rules('include_filters') | ||||
|             # Inject LD+JSON price tracker rule if enabled | ||||
|             if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT: | ||||
|                 filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS | ||||
|             self._include_filters_cache = filters | ||||
|         return self._include_filters_cache | ||||
|  | ||||
|     @property | ||||
|     def subtractive_selectors(self): | ||||
|         if self._subtractive_selectors_cache is None: | ||||
|             watch_selectors = self.watch.get("subtractive_selectors", []) | ||||
|             tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors') | ||||
|             global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", []) | ||||
|             self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors] | ||||
|         return self._subtractive_selectors_cache | ||||
|  | ||||
|     @property | ||||
|     def extract_text(self): | ||||
|         return self._get_merged_rules('extract_text') | ||||
|  | ||||
|     @property | ||||
|     def ignore_text(self): | ||||
|         return self._get_merged_rules('ignore_text', include_global=True) | ||||
|  | ||||
|     @property | ||||
|     def trigger_text(self): | ||||
|         return self._get_merged_rules('trigger_text') | ||||
|  | ||||
|     @property | ||||
|     def text_should_not_be_present(self): | ||||
|         return self._get_merged_rules('text_should_not_be_present') | ||||
|  | ||||
|     @property | ||||
|     def has_include_filters(self): | ||||
|         return bool(self.include_filters) and bool(self.include_filters[0].strip()) | ||||
|  | ||||
|     @property | ||||
|     def has_include_json_filters(self): | ||||
|         return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES) | ||||
|  | ||||
|     @property | ||||
|     def has_subtractive_selectors(self): | ||||
|         return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip()) | ||||
|  | ||||
|  | ||||
| class ContentTransformer: | ||||
|     """Handles text transformations like trimming, sorting, and deduplication.""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def trim_whitespace(text): | ||||
|         """Remove leading/trailing whitespace from each line.""" | ||||
|         # Use generator expression to avoid building intermediate list | ||||
|         return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines()) | ||||
|  | ||||
|     @staticmethod | ||||
|     def remove_duplicate_lines(text): | ||||
|         """Remove duplicate lines while preserving order.""" | ||||
|         return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|     @staticmethod | ||||
|     def sort_alphabetically(text): | ||||
|         """Sort lines alphabetically (case-insensitive).""" | ||||
|         # Remove double line feeds before sorting | ||||
|         text = text.replace("\n\n", "\n") | ||||
|         return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower())) | ||||
|  | ||||
|     @staticmethod | ||||
|     def extract_by_regex(text, regex_patterns): | ||||
|         """Extract text matching regex patterns.""" | ||||
|         # Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior) | ||||
|         regex_matched_output = [] | ||||
|  | ||||
|         for s_re in regex_patterns: | ||||
|             # Check if it's perl-style regex /.../ | ||||
|             if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): | ||||
|                 regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) | ||||
|                 result = re.findall(regex, text) | ||||
|  | ||||
|                 for match in result: | ||||
|                     if type(match) is tuple: | ||||
|                         regex_matched_output.extend(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|                     else: | ||||
|                         regex_matched_output.append(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|             else: | ||||
|                 # Plain text search (case-insensitive) | ||||
|                 r = re.compile(re.escape(s_re), re.IGNORECASE) | ||||
|                 res = r.findall(text) | ||||
|                 if res: | ||||
|                     for match in res: | ||||
|                         regex_matched_output.append(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|  | ||||
|         return ''.join(regex_matched_output) if regex_matched_output else '' | ||||
|  | ||||
|  | ||||
| class RuleEngine: | ||||
|     """Evaluates blocking rules (triggers, conditions, text_should_not_be_present).""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_trigger_text(content, trigger_patterns): | ||||
|         """ | ||||
|         Check if trigger text is present. If trigger_text is configured, | ||||
|         content is blocked UNLESS the trigger is found. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not trigger_patterns: | ||||
|             return False | ||||
|  | ||||
|         # Assume blocked if trigger_text is configured | ||||
|         result = html_tools.strip_ignore_text( | ||||
|             content=str(content), | ||||
|             wordlist=trigger_patterns, | ||||
|             mode="line numbers" | ||||
|         ) | ||||
|         # Unblock if trigger was found | ||||
|         return not bool(result) | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_text_should_not_be_present(content, patterns): | ||||
|         """ | ||||
|         Check if forbidden text is present. If found, block the change. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not patterns: | ||||
|             return False | ||||
|  | ||||
|         result = html_tools.strip_ignore_text( | ||||
|             content=str(content), | ||||
|             wordlist=patterns, | ||||
|             mode="line numbers" | ||||
|         ) | ||||
|         # Block if forbidden text was found | ||||
|         return bool(result) | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_conditions(watch, datastore, content): | ||||
|         """ | ||||
|         Evaluate custom conditions ruleset. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not watch.get('conditions') or not watch.get('conditions_match_logic'): | ||||
|             return False | ||||
|  | ||||
|         conditions_result = execute_ruleset_against_all_plugins( | ||||
|             current_watch_uuid=watch.get('uuid'), | ||||
|             application_datastruct=datastore.data, | ||||
|             ephemeral_data={'text': content} | ||||
|         ) | ||||
|  | ||||
|         # Block if conditions not met | ||||
|         return not conditions_result.get('result') | ||||
|  | ||||
|  | ||||
| class ContentProcessor: | ||||
|     """Handles content preprocessing, filtering, and extraction.""" | ||||
|  | ||||
|     def __init__(self, fetcher, watch, filter_config, datastore): | ||||
|         self.fetcher = fetcher | ||||
|         self.watch = watch | ||||
|         self.filter_config = filter_config | ||||
|         self.datastore = datastore | ||||
|  | ||||
|     def preprocess_rss(self, content): | ||||
|         """ | ||||
|         Convert CDATA/comments in RSS to usable text. | ||||
|  | ||||
|         Supports two RSS processing modes: | ||||
|         - 'default': Inline CDATA replacement (original behavior) | ||||
|         - 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked) | ||||
|         """ | ||||
|         from changedetectionio import rss_tools | ||||
|         rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode") | ||||
|         if rss_mode: | ||||
|             # Format RSS items nicely with CDATA content unmarked and converted to text | ||||
|             return rss_tools.format_rss_items(content) | ||||
|         else: | ||||
|             # Default: Original inline CDATA replacement | ||||
|             return cdata_in_document_to_text(html_content=content) | ||||
|  | ||||
|     def preprocess_pdf(self, raw_content): | ||||
|         """Convert PDF to HTML using external tool.""" | ||||
|         from shutil import which | ||||
|         tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml") | ||||
|         if not which(tool): | ||||
|             raise PDFToHTMLToolNotFound( | ||||
|                 f"Command-line `{tool}` tool was not found in system PATH, was it installed?" | ||||
|             ) | ||||
|  | ||||
|         import subprocess | ||||
|         proc = subprocess.Popen( | ||||
|             [tool, '-stdout', '-', '-s', 'out.pdf', '-i'], | ||||
|             stdout=subprocess.PIPE, | ||||
|             stdin=subprocess.PIPE | ||||
|         ) | ||||
|         proc.stdin.write(raw_content) | ||||
|         proc.stdin.close() | ||||
|         html_content = proc.stdout.read().decode('utf-8') | ||||
|         proc.wait(timeout=60) | ||||
|  | ||||
|         # Add metadata for change detection | ||||
|         metadata = ( | ||||
|             f"<p>Added by changedetection.io: Document checksum - " | ||||
|             f"{hashlib.md5(raw_content).hexdigest().upper()} " | ||||
|             f"Original file size - {len(raw_content)} bytes</p>" | ||||
|         ) | ||||
|         return html_content.replace('</body>', metadata + '</body>') | ||||
|  | ||||
|     def preprocess_json(self, raw_content): | ||||
|         """Format and sort JSON content.""" | ||||
|         # Then we re-format it, else it does have filters (later on) which will reformat it anyway | ||||
|         content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$") | ||||
|  | ||||
|         # Sort JSON to avoid false alerts from reordering | ||||
|         try: | ||||
|             content = json.dumps(json.loads(content), sort_keys=True, indent=4) | ||||
|         except Exception: | ||||
|             # Might be malformed JSON, continue anyway | ||||
|             pass | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def apply_include_filters(self, content, stream_content_type): | ||||
|         """Apply CSS, XPath, or JSON filters to extract specific content.""" | ||||
|         filtered_content = "" | ||||
|  | ||||
|         for filter_rule in self.filter_config.include_filters: | ||||
|             # XPath filters | ||||
|             if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|                 filtered_content += html_tools.xpath_filter( | ||||
|                     xpath_filter=filter_rule.replace('xpath:', ''), | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url, | ||||
|                     is_rss=stream_content_type.is_rss | ||||
|                 ) | ||||
|  | ||||
|             # XPath1 filters (first match only) | ||||
|             elif filter_rule.startswith('xpath1:'): | ||||
|                 filtered_content += html_tools.xpath1_filter( | ||||
|                     xpath_filter=filter_rule.replace('xpath1:', ''), | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url, | ||||
|                     is_rss=stream_content_type.is_rss | ||||
|                 ) | ||||
|  | ||||
|             # JSON filters | ||||
|             elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES): | ||||
|                 filtered_content += html_tools.extract_json_as_string( | ||||
|                     content=content, | ||||
|                     json_filter=filter_rule | ||||
|                 ) | ||||
|  | ||||
|             # CSS selectors, default fallback | ||||
|             else: | ||||
|                 filtered_content += html_tools.include_filters( | ||||
|                     include_filters=filter_rule, | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url | ||||
|                 ) | ||||
|  | ||||
|         # Raise error if filter returned nothing | ||||
|         if not filtered_content.strip(): | ||||
|             raise FilterNotFoundInResponse( | ||||
|                 msg=self.filter_config.include_filters, | ||||
|                 screenshot=self.fetcher.screenshot, | ||||
|                 xpath_data=self.fetcher.xpath_data | ||||
|             ) | ||||
|  | ||||
|         return filtered_content | ||||
|  | ||||
|     def apply_subtractive_selectors(self, content): | ||||
|         """Remove elements matching subtractive selectors.""" | ||||
|         return html_tools.element_removal(self.filter_config.subtractive_selectors, content) | ||||
|  | ||||
|     def extract_text_from_html(self, html_content, stream_content_type): | ||||
|         """Convert HTML to plain text.""" | ||||
|         do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|         return html_tools.html_to_text( | ||||
|             html_content=html_content, | ||||
|             render_anchor_tag_content=do_anchor, | ||||
|             is_rss=stream_content_type.is_rss | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class ChecksumCalculator: | ||||
|     """Calculates checksums with various options.""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def calculate(text, ignore_whitespace=False): | ||||
|         """Calculate MD5 checksum of text content.""" | ||||
|         if ignore_whitespace: | ||||
|             text = text.translate(TRANSLATE_WHITESPACE_TABLE) | ||||
|         return hashlib.md5(text.encode('utf-8')).hexdigest() | ||||
|  | ||||
|  | ||||
| # Some common stuff here that can be moved to a base class | ||||
| # (set_proxy_from_list) | ||||
| class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|     def run_changedetection(self, watch): | ||||
|         changed_detected = False | ||||
|         html_content = "" | ||||
|         screenshot = False  # as bytes | ||||
|         stripped_text_from_html = "" | ||||
|  | ||||
|         if not watch: | ||||
|             raise Exception("Watch no longer exists.") | ||||
|  | ||||
|         # Initialize components | ||||
|         filter_config = FilterConfig(watch, self.datastore) | ||||
|         content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore) | ||||
|         transformer = ContentTransformer() | ||||
|         rule_engine = RuleEngine() | ||||
|  | ||||
|         # Get content type and stream info | ||||
|         ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower() | ||||
|         stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content) | ||||
|  | ||||
|         # Unset any existing notification error | ||||
|         update_obj = {'last_notification_error': False, 'last_error': False} | ||||
|  | ||||
|         url = watch.link | ||||
|  | ||||
|         self.screenshot = self.fetcher.screenshot | ||||
|         self.xpath_data = self.fetcher.xpath_data | ||||
|  | ||||
|         # Track the content type | ||||
|         update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|  | ||||
|         # Watches added automatically in the queue manager will skip if its the same checksum as the previous run | ||||
|         # Saves a lot of CPU | ||||
|         # Track the content type and checksum before filters | ||||
|         update_obj['content_type'] = ctype_header | ||||
|         update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         # Fetching complete, now filters | ||||
|         # === CONTENT PREPROCESSING === | ||||
|         # Avoid creating unnecessary intermediate string copies by reassigning only when needed | ||||
|         content = self.fetcher.content | ||||
|  | ||||
|         # @note: I feel like the following should be in a more obvious chain system | ||||
|         #  - Check filter text | ||||
|         #  - Is the checksum different? | ||||
|         #  - Do we convert to JSON? | ||||
|         # https://stackoverflow.com/questions/41817578/basic-method-chaining ? | ||||
|         # return content().textfilter().jsonextract().checksumcompare() ? | ||||
|         # RSS preprocessing | ||||
|         if stream_content_type.is_rss: | ||||
|             content = content_processor.preprocess_rss(content) | ||||
|             if self.datastore.data["settings"]["application"].get("rss_reader_mode"): | ||||
|                 # Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc) | ||||
|                 stream_content_type.is_rss = False | ||||
|                 stream_content_type.is_html = True | ||||
|                 self.fetcher.content = content | ||||
|  | ||||
|         is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|         is_html = not is_json | ||||
|         is_rss = False | ||||
|         # PDF preprocessing | ||||
|         if watch.is_pdf or stream_content_type.is_pdf: | ||||
|             content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content) | ||||
|             stream_content_type.is_html = True | ||||
|  | ||||
|         ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|         # Go into RSS preprocess for converting CDATA/comment to usable text | ||||
|         if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']): | ||||
|             if '<rss' in self.fetcher.content[:100].lower(): | ||||
|                 self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content) | ||||
|                 is_rss = True | ||||
|         # JSON - Always reformat it nicely for consistency. | ||||
|  | ||||
|         # source: support, basically treat it as plaintext | ||||
|         if stream_content_type.is_json: | ||||
|             if not filter_config.has_include_json_filters: | ||||
|                 content = content_processor.preprocess_json(raw_content=content) | ||||
|         #else, otherwise it gets sorted/formatted in the filter stage anyway | ||||
|  | ||||
|         # HTML obfuscation workarounds | ||||
|         if stream_content_type.is_html: | ||||
|             content = html_tools.workarounds_for_obfuscations(content) | ||||
|  | ||||
|         # Check for LD+JSON price data (for HTML content) | ||||
|         if stream_content_type.is_html: | ||||
|             update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content) | ||||
|  | ||||
|         # === FILTER APPLICATION === | ||||
|         # Start with content reference, avoid copy until modification | ||||
|         html_content = content | ||||
|  | ||||
|         # Apply include filters (CSS, XPath, JSON) | ||||
|         # Except for plaintext (incase they tried to confuse the system, it will HTML escape | ||||
|         #if not stream_content_type.is_plaintext: | ||||
|         if filter_config.has_include_filters: | ||||
|             html_content = content_processor.apply_include_filters(content, stream_content_type) | ||||
|  | ||||
|         # Apply subtractive selectors | ||||
|         if filter_config.has_subtractive_selectors: | ||||
|             html_content = content_processor.apply_subtractive_selectors(html_content) | ||||
|  | ||||
|         # === TEXT EXTRACTION === | ||||
|         if watch.is_source_type_url: | ||||
|             is_html = False | ||||
|             is_json = False | ||||
|  | ||||
|         inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10] | ||||
|         if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf: | ||||
|             from shutil import which | ||||
|             tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml") | ||||
|             if not which(tool): | ||||
|                 raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool)) | ||||
|  | ||||
|             import subprocess | ||||
|             proc = subprocess.Popen( | ||||
|                 [tool, '-stdout', '-', '-s', 'out.pdf', '-i'], | ||||
|                 stdout=subprocess.PIPE, | ||||
|                 stdin=subprocess.PIPE) | ||||
|             proc.stdin.write(self.fetcher.raw_content) | ||||
|             proc.stdin.close() | ||||
|             self.fetcher.content = proc.stdout.read().decode('utf-8') | ||||
|             proc.wait(timeout=60) | ||||
|  | ||||
|             # Add a little metadata so we know if the file changes (like if an image changes, but the text is the same | ||||
|             # @todo may cause problems with non-UTF8? | ||||
|             metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format( | ||||
|                 hashlib.md5(self.fetcher.raw_content).hexdigest().upper(), | ||||
|                 len(self.fetcher.content)) | ||||
|  | ||||
|             self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>') | ||||
|  | ||||
|         # Better would be if Watch.model could access the global data also | ||||
|         # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__ | ||||
|         # https://realpython.com/inherit-python-dict/ instead of doing it procedurely | ||||
|         include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters') | ||||
|  | ||||
|         # 1845 - remove duplicated filters in both group and watch include filter | ||||
|         include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags)) | ||||
|  | ||||
|         subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'), | ||||
|                                  *watch.get("subtractive_selectors", []), | ||||
|                                  *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", []) | ||||
|                                  ] | ||||
|  | ||||
|         # Inject a virtual LD+JSON price tracker rule | ||||
|         if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT: | ||||
|             include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS | ||||
|  | ||||
|         has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip()) | ||||
|         has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip()) | ||||
|  | ||||
|         if is_json and not has_filter_rule: | ||||
|             include_filters_rule.append("json:$") | ||||
|             has_filter_rule = True | ||||
|  | ||||
|         if is_json: | ||||
|             # Sort the JSON so we dont get false alerts when the content is just re-ordered | ||||
|             try: | ||||
|                 self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True) | ||||
|             except Exception as e: | ||||
|                 # Might have just been a snippet, or otherwise bad JSON, continue | ||||
|                 pass | ||||
|  | ||||
|         if has_filter_rule: | ||||
|             for filter in include_filters_rule: | ||||
|                 if any(prefix in filter for prefix in json_filter_prefixes): | ||||
|                     stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter) | ||||
|                     is_html = False | ||||
|  | ||||
|         if is_html or watch.is_source_type_url: | ||||
|  | ||||
|             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|             self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content) | ||||
|             html_content = self.fetcher.content | ||||
|  | ||||
|             # If not JSON,  and if it's not text/plain.. | ||||
|             if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower(): | ||||
|                 # Don't run get_text or xpath/css filters on plaintext | ||||
|                 stripped_text_from_html = html_content | ||||
|             # For source URLs, keep raw content | ||||
|             stripped_text = html_content | ||||
|         elif stream_content_type.is_plaintext: | ||||
|             # For plaintext, keep as-is without HTML-to-text conversion | ||||
|             stripped_text = html_content | ||||
|         else: | ||||
|             # Extract text from HTML/RSS content (not generic XML) | ||||
|             if stream_content_type.is_html or stream_content_type.is_rss: | ||||
|                 stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type) | ||||
|             else: | ||||
|                 # Does it have some ld+json price data? used for easier monitoring | ||||
|                 update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content) | ||||
|  | ||||
|                 # Then we assume HTML | ||||
|                 if has_filter_rule: | ||||
|                     html_content = "" | ||||
|  | ||||
|                     for filter_rule in include_filters_rule: | ||||
|                         # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." | ||||
|                         if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|                             html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''), | ||||
|                                                                     html_content=self.fetcher.content, | ||||
|                                                                     append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                     is_rss=is_rss) | ||||
|  | ||||
|                         elif filter_rule.startswith('xpath1:'): | ||||
|                             html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''), | ||||
|                                                                      html_content=self.fetcher.content, | ||||
|                                                                      append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                      is_rss=is_rss) | ||||
|                         else: | ||||
|                             html_content += html_tools.include_filters(include_filters=filter_rule, | ||||
|                                                                        html_content=self.fetcher.content, | ||||
|                                                                        append_pretty_line_formatting=not watch.is_source_type_url) | ||||
|  | ||||
|                     if not html_content.strip(): | ||||
|                         raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data) | ||||
|  | ||||
|                 if has_subtractive_selectors: | ||||
|                     html_content = html_tools.element_removal(subtractive_selectors, html_content) | ||||
|  | ||||
|                 if watch.is_source_type_url: | ||||
|                     stripped_text_from_html = html_content | ||||
|                 else: | ||||
|                     # extract text | ||||
|                     do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|                     stripped_text_from_html = html_tools.html_to_text(html_content=html_content, | ||||
|                                                                       render_anchor_tag_content=do_anchor, | ||||
|                                                                       is_rss=is_rss)  # 1874 activate the <title workaround hack | ||||
|                 stripped_text = html_content | ||||
|  | ||||
|         # === TEXT TRANSFORMATIONS === | ||||
|         if watch.get('trim_text_whitespace'): | ||||
|             stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()) | ||||
|             stripped_text = transformer.trim_whitespace(stripped_text) | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         # Also used to calculate/show what was removed | ||||
|         text_content_before_ignored_filter = stripped_text_from_html | ||||
|  | ||||
|         # @todo whitespace coming from missing rtrim()? | ||||
|         # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about. | ||||
|         # Rewrite's the processing text based on only what diff result they want to see | ||||
|         # Save text before ignore filters (for diff calculation) | ||||
|         text_content_before_ignored_filter = stripped_text | ||||
|  | ||||
|         # === DIFF FILTERING === | ||||
|         # If user wants specific diff types (added/removed/replaced only) | ||||
|         if watch.has_special_diff_filter_options_set() and len(watch.history.keys()): | ||||
|             # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences | ||||
|             from changedetectionio import diff | ||||
|             # needs to not include (added) etc or it may get used twice | ||||
|             # Replace the processed text with the preferred result | ||||
|             rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(), | ||||
|                                              newest_version_file_contents=stripped_text_from_html, | ||||
|                                              include_equal=False,  # not the same lines | ||||
|                                              include_added=watch.get('filter_text_added', True), | ||||
|                                              include_removed=watch.get('filter_text_removed', True), | ||||
|                                              include_replaced=watch.get('filter_text_replaced', True), | ||||
|                                              line_feed_sep="\n", | ||||
|                                              include_change_type_prefix=False) | ||||
|             stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter) | ||||
|             if stripped_text is None: | ||||
|                 # No differences found, but content exists | ||||
|                 c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True) | ||||
|                 return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8') | ||||
|  | ||||
|             watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8')) | ||||
|  | ||||
|             if not rendered_diff and stripped_text_from_html: | ||||
|                 # We had some content, but no differences were found | ||||
|                 # Store our new file as the MD5 so it will trigger in the future | ||||
|                 c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|                 return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8') | ||||
|             else: | ||||
|                 stripped_text_from_html = rendered_diff | ||||
|  | ||||
|         # Treat pages with no renderable text content as a change? No by default | ||||
|         # === EMPTY PAGE CHECK === | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|         if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: | ||||
|             raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url, | ||||
|                                                             status_code=self.fetcher.get_last_status_code(), | ||||
|                                                             screenshot=self.fetcher.screenshot, | ||||
|                                                             has_filters=has_filter_rule, | ||||
|                                                             html_content=html_content, | ||||
|                                                             xpath_data=self.fetcher.xpath_data | ||||
|                                                             ) | ||||
|  | ||||
|         # We rely on the actual text in the html output.. many sites have random script vars etc, | ||||
|         # in the future we'll implement other mechanisms. | ||||
|         if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0: | ||||
|             raise content_fetchers.exceptions.ReplyWithContentButNoText( | ||||
|                 url=url, | ||||
|                 status_code=self.fetcher.get_last_status_code(), | ||||
|                 screenshot=self.fetcher.screenshot, | ||||
|                 has_filters=filter_config.has_include_filters, | ||||
|                 html_content=html_content, | ||||
|                 xpath_data=self.fetcher.xpath_data | ||||
|             ) | ||||
|  | ||||
|         update_obj["last_check_status"] = self.fetcher.get_last_status_code() | ||||
|  | ||||
|         # 615 Extract text by regex | ||||
|         extract_text = list(dict.fromkeys(watch.get('extract_text', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text'))) | ||||
|         if len(extract_text) > 0: | ||||
|             regex_matched_output = [] | ||||
|             for s_re in extract_text: | ||||
|                 # incase they specified something in '/.../x' | ||||
|                 if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): | ||||
|                     regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) | ||||
|                     result = re.findall(regex, stripped_text_from_html) | ||||
|  | ||||
|                     for l in result: | ||||
|                         if type(l) is tuple: | ||||
|                             # @todo - some formatter option default (between groups) | ||||
|                             regex_matched_output += list(l) + ['\n'] | ||||
|                         else: | ||||
|                             # @todo - some formatter option default (between each ungrouped result) | ||||
|                             regex_matched_output += [l] + ['\n'] | ||||
|                 else: | ||||
|                     # Doesnt look like regex, just hunt for plaintext and return that which matches | ||||
|                     # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes | ||||
|                     r = re.compile(re.escape(s_re), re.IGNORECASE) | ||||
|                     res = r.findall(stripped_text_from_html) | ||||
|                     if res: | ||||
|                         for match in res: | ||||
|                             regex_matched_output += [match] + ['\n'] | ||||
|  | ||||
|             ########################################################## | ||||
|             stripped_text_from_html = '' | ||||
|  | ||||
|             if regex_matched_output: | ||||
|                 # @todo some formatter for presentation? | ||||
|                 stripped_text_from_html = ''.join(regex_matched_output) | ||||
|         # === REGEX EXTRACTION === | ||||
|         if filter_config.extract_text: | ||||
|             extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text) | ||||
|             stripped_text = extracted | ||||
|  | ||||
|         # === MORE TEXT TRANSFORMATIONS === | ||||
|         if watch.get('remove_duplicate_lines'): | ||||
|             stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|             stripped_text = transformer.remove_duplicate_lines(stripped_text) | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically'): | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") | ||||
|             stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) | ||||
|             stripped_text = transformer.sort_alphabetically(stripped_text) | ||||
|  | ||||
| ### CALCULATE MD5 | ||||
|         # If there's text to ignore | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text') | ||||
|         # === CHECKSUM CALCULATION === | ||||
|         text_for_checksuming = stripped_text | ||||
|  | ||||
|         text_for_checksuming = stripped_text_from_html | ||||
|         if text_to_ignore: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
|         # Apply ignore_text for checksum calculation | ||||
|         if filter_config.ignore_text: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text) | ||||
|  | ||||
|         # Re #133 - if we should strip whitespaces from triggering the change detected comparison | ||||
|         if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|         else: | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest() | ||||
|             # Optionally remove ignored lines from output | ||||
|             strip_ignored_lines = watch.get('strip_ignored_lines') | ||||
|             if strip_ignored_lines is None: | ||||
|                 strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines') | ||||
|             if strip_ignored_lines: | ||||
|                 stripped_text = text_for_checksuming | ||||
|  | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         # Calculate checksum | ||||
|         ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False) | ||||
|         fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace) | ||||
|  | ||||
|         # === BLOCKING RULES EVALUATION === | ||||
|         blocked = False | ||||
|         trigger_text = list(dict.fromkeys(watch.get('trigger_text', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text'))) | ||||
|         if len(trigger_text): | ||||
|             # Assume blocked | ||||
|  | ||||
|         # Check trigger_text | ||||
|         if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text): | ||||
|             blocked = True | ||||
|             # Filter and trigger works the same, so reuse it | ||||
|             # It should return the line numbers that match | ||||
|             # Unblock flow if the trigger was found (some text remained after stripped what didnt match) | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=trigger_text, | ||||
|                                                   mode="line numbers") | ||||
|             # Unblock if the trigger was found | ||||
|             if result: | ||||
|                 blocked = False | ||||
|  | ||||
|         text_should_not_be_present = list(dict.fromkeys(watch.get('text_should_not_be_present', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present'))) | ||||
|         if len(text_should_not_be_present): | ||||
|             # If anything matched, then we should block a change from happening | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=text_should_not_be_present, | ||||
|                                                   mode="line numbers") | ||||
|             if result: | ||||
|                 blocked = True | ||||
|         # Check text_should_not_be_present | ||||
|         if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present): | ||||
|             blocked = True | ||||
|  | ||||
|         # And check if 'conditions' will let this pass through | ||||
|         if watch.get('conditions') and watch.get('conditions_match_logic'): | ||||
|             conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'), | ||||
|                                                                     application_datastruct=self.datastore.data, | ||||
|                                                                     ephemeral_data={ | ||||
|                                                                         'text': stripped_text_from_html | ||||
|                                                                     } | ||||
|                                                                     ) | ||||
|         # Check custom conditions | ||||
|         if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text): | ||||
|             blocked = True | ||||
|  | ||||
|             if not conditions_result.get('result'): | ||||
|                 # Conditions say "Condition not met" so we block it. | ||||
|                 blocked = True | ||||
|  | ||||
|         # Looks like something changed, but did it match all the rules? | ||||
|         # === CHANGE DETECTION === | ||||
|         if blocked: | ||||
|             changed_detected = False | ||||
|         else: | ||||
|             # The main thing that all this at the moment comes down to :) | ||||
|             # Compare checksums | ||||
|             if watch.get('previous_md5') != fetched_md5: | ||||
|                 changed_detected = True | ||||
|  | ||||
|             # Always record the new checksum | ||||
|             update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|             # On the first run of a site, watch['previous_md5'] will be None, set it the current one. | ||||
|             # On first run, initialize previous_md5 | ||||
|             if not watch.get('previous_md5'): | ||||
|                 watch['previous_md5'] = fetched_md5 | ||||
|  | ||||
|         logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") | ||||
|  | ||||
|         if changed_detected: | ||||
|             if watch.get('check_unique_lines', False): | ||||
|                 ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace') | ||||
|         # === UNIQUE LINES CHECK === | ||||
|         if changed_detected and watch.get('check_unique_lines', False): | ||||
|             has_unique_lines = watch.lines_contain_something_unique_compared_to_history( | ||||
|                 lines=stripped_text.splitlines(), | ||||
|                 ignore_whitespace=ignore_whitespace | ||||
|             ) | ||||
|  | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history( | ||||
|                     lines=stripped_text_from_html.splitlines(), | ||||
|                     ignore_whitespace=ignore_whitespace | ||||
|                 ) | ||||
|             if not has_unique_lines: | ||||
|                 logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") | ||||
|                 changed_detected = False | ||||
|             else: | ||||
|                 logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") | ||||
|  | ||||
|                 # One or more lines? unsure? | ||||
|                 if not has_unique_lines: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") | ||||
|                     changed_detected = False | ||||
|                 else: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") | ||||
|         # Note: Explicit cleanup is only needed here because text_json_diff handles | ||||
|         # large strings (100KB-300KB for RSS/HTML). The other processors work with | ||||
|         # small strings and don't need this. | ||||
|         # | ||||
|         # Python would clean these up automatically, but explicit `del` frees memory | ||||
|         # immediately rather than waiting for function return, reducing peak memory usage. | ||||
|         del content | ||||
|         if 'html_content' in locals() and html_content is not stripped_text: | ||||
|             del html_content | ||||
|         if 'text_content_before_ignored_filter' in locals() and text_content_before_ignored_filter is not stripped_text: | ||||
|             del text_content_before_ignored_filter | ||||
|         if 'text_for_checksuming' in locals() and text_for_checksuming is not stripped_text: | ||||
|             del text_for_checksuming | ||||
|  | ||||
|         return changed_detected, update_obj, stripped_text | ||||
|  | ||||
|         # stripped_text_from_html - Everything after filters and NO 'ignored' content | ||||
|         return changed_detected, update_obj, stripped_text_from_html | ||||
|     def _apply_diff_filtering(self, watch, stripped_text, text_before_filter): | ||||
|         """Apply user's diff filtering preferences (show only added/removed/replaced lines).""" | ||||
|         from changedetectionio import diff | ||||
|  | ||||
|         rendered_diff = diff.render_diff( | ||||
|             previous_version_file_contents=watch.get_last_fetched_text_before_filters(), | ||||
|             newest_version_file_contents=stripped_text, | ||||
|             include_equal=False, | ||||
|             include_added=watch.get('filter_text_added', True), | ||||
|             include_removed=watch.get('filter_text_removed', True), | ||||
|             include_replaced=watch.get('filter_text_replaced', True), | ||||
|             line_feed_sep="\n", | ||||
|             include_change_type_prefix=False | ||||
|         ) | ||||
|  | ||||
|         watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8')) | ||||
|  | ||||
|         if not rendered_diff and stripped_text: | ||||
|             # No differences found | ||||
|             return None | ||||
|  | ||||
|         return rendered_diff | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| [pytest] | ||||
| addopts = --no-start-live-server --live-server-port=5005 | ||||
| addopts = --no-start-live-server --live-server-port=0 | ||||
| #testpaths = tests pytest_invenio | ||||
| #live_server_scope = function | ||||
|  | ||||
|   | ||||
| @@ -243,14 +243,15 @@ def handle_watch_update(socketio, **kwargs): | ||||
|  | ||||
|         general_stats = { | ||||
|             'count_errors': errored_count, | ||||
|             'has_unviewed': datastore.has_unviewed | ||||
|             'unread_changes_count': datastore.unread_changes_count | ||||
|         } | ||||
|  | ||||
|         # Debug what's being emitted | ||||
|         # logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}") | ||||
|  | ||||
|         # Emit to all clients (no 'broadcast' parameter needed - it's the default behavior) | ||||
|         socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats}) | ||||
|         socketio.emit("watch_update", {'watch': watch_data}) | ||||
|         socketio.emit("general_stats_update", general_stats) | ||||
|  | ||||
|         # Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues | ||||
|         logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}") | ||||
|   | ||||
							
								
								
									
										130
									
								
								changedetectionio/rss_tools.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								changedetectionio/rss_tools.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,130 @@ | ||||
| """ | ||||
| RSS/Atom feed processing tools for changedetection.io | ||||
| """ | ||||
|  | ||||
| from loguru import logger | ||||
| import re | ||||
|  | ||||
|  | ||||
| def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|     """ | ||||
|     Process CDATA sections in HTML/XML content - inline replacement. | ||||
|  | ||||
|     Args: | ||||
|         html_content: The HTML/XML content to process | ||||
|         render_anchor_tag_content: Whether to render anchor tag content | ||||
|  | ||||
|     Returns: | ||||
|         Processed HTML/XML content with CDATA sections replaced inline | ||||
|     """ | ||||
|     from xml.sax.saxutils import escape as xml_escape | ||||
|     from .html_tools import html_to_text | ||||
|  | ||||
|     pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>' | ||||
|  | ||||
|     def repl(m): | ||||
|         text = m.group(1) | ||||
|         return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip() | ||||
|  | ||||
|     return re.sub(pattern, repl, html_content) | ||||
|  | ||||
|  | ||||
| def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str: | ||||
|     """ | ||||
|     Format RSS/Atom feed items in a readable text format using feedparser. | ||||
|  | ||||
|     Converts RSS <item> or Atom <entry> elements to formatted text with: | ||||
|     - <title> → <h1>Title</h1> | ||||
|     - <link> → Link: [url] | ||||
|     - <guid> → Guid: [id] | ||||
|     - <pubDate> → PubDate: [date] | ||||
|     - <description> or <content> → Raw HTML content (CDATA and entities automatically handled) | ||||
|  | ||||
|     Args: | ||||
|         rss_content: The RSS/Atom feed content | ||||
|         render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility) | ||||
|  | ||||
|     Returns: | ||||
|         Formatted HTML content ready for html_to_text conversion | ||||
|     """ | ||||
|     try: | ||||
|         import feedparser | ||||
|         from xml.sax.saxutils import escape as xml_escape | ||||
|  | ||||
|         # Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc. | ||||
|         feed = feedparser.parse(rss_content) | ||||
|  | ||||
|         formatted_items = [] | ||||
|  | ||||
|         # Determine feed type for appropriate labels when fields are missing | ||||
|         # feedparser sets feed.version to things like 'rss20', 'atom10', etc. | ||||
|         is_atom = feed.version and 'atom' in feed.version | ||||
|  | ||||
|         for entry in feed.entries: | ||||
|             item_parts = [] | ||||
|  | ||||
|             # Title - feedparser handles CDATA and entity unescaping automatically | ||||
|             if hasattr(entry, 'title') and entry.title: | ||||
|                 item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>') | ||||
|  | ||||
|             # Link | ||||
|             if hasattr(entry, 'link') and entry.link: | ||||
|                 item_parts.append(f'Link: {xml_escape(entry.link)}<br>') | ||||
|  | ||||
|             # GUID/ID | ||||
|             if hasattr(entry, 'id') and entry.id: | ||||
|                 item_parts.append(f'Guid: {xml_escape(entry.id)}<br>') | ||||
|  | ||||
|             # Date - feedparser normalizes all date field names to 'published' | ||||
|             if hasattr(entry, 'published') and entry.published: | ||||
|                 item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>') | ||||
|  | ||||
|             # Description/Content - feedparser handles CDATA and entity unescaping automatically | ||||
|             # Only add "Summary:" label for Atom <summary> tags | ||||
|             content = None | ||||
|             add_label = False | ||||
|  | ||||
|             if hasattr(entry, 'content') and entry.content: | ||||
|                 # Atom <content> - no label, just content | ||||
|                 content = entry.content[0].value if entry.content[0].value else None | ||||
|             elif hasattr(entry, 'summary'): | ||||
|                 # Could be RSS <description> or Atom <summary> | ||||
|                 # feedparser maps both to entry.summary | ||||
|                 content = entry.summary if entry.summary else None | ||||
|                 # Only add "Summary:" label for Atom feeds (which use <summary> tag) | ||||
|                 if is_atom: | ||||
|                     add_label = True | ||||
|  | ||||
|             # Add content with or without label | ||||
|             if content: | ||||
|                 if add_label: | ||||
|                     item_parts.append(f'Summary:<br>{content}') | ||||
|                 else: | ||||
|                     item_parts.append(content) | ||||
|             else: | ||||
|                 # No content - just show <none> | ||||
|                 item_parts.append('<none>') | ||||
|  | ||||
|             # Join all parts of this item | ||||
|             if item_parts: | ||||
|                 formatted_items.append('\n'.join(item_parts)) | ||||
|  | ||||
|         # Wrap each item in a div with classes (first, last, item-N) | ||||
|         items_html = [] | ||||
|         total_items = len(formatted_items) | ||||
|         for idx, item in enumerate(formatted_items): | ||||
|             classes = ['rss-item'] | ||||
|             if idx == 0: | ||||
|                 classes.append('first') | ||||
|             if idx == total_items - 1: | ||||
|                 classes.append('last') | ||||
|             classes.append(f'item-{idx + 1}') | ||||
|  | ||||
|             class_str = ' '.join(classes) | ||||
|             items_html.append(f'<div class="{class_str}">{item}</div>') | ||||
|         return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>' | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Error formatting RSS items: {str(e)}") | ||||
|         # Fall back to original content | ||||
|         return rss_content | ||||
| @@ -11,32 +11,29 @@ set -e | ||||
|  | ||||
| SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||||
|  | ||||
| find tests/test_*py -type f|while read test_name | ||||
| do | ||||
|   echo "TEST RUNNING $test_name" | ||||
|   # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser | ||||
|   REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name | ||||
| done | ||||
| # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load  tests/test_*.py | ||||
|  | ||||
| #time pytest -n auto --dist loadfile -vv --tb=long tests/test_*.py | ||||
| echo "RUNNING WITH BASE_URL SET" | ||||
|  | ||||
| # Now re-run some tests with BASE_URL enabled | ||||
| # Re #65 - Ability to include a link back to the installation, in the notification. | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv --maxfail=1 tests/test_notification.py | ||||
|  | ||||
|  | ||||
| # Re-run with HIDE_REFERER set - could affect login | ||||
| export HIDE_REFERER=True | ||||
| pytest tests/test_access_control.py | ||||
| pytest -vv -s --maxfail=1 tests/test_access_control.py | ||||
|  | ||||
| # Re-run a few tests that will trigger brotli based storage | ||||
| export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5 | ||||
| pytest tests/test_access_control.py | ||||
| pytest -vv -s --maxfail=1 tests/test_access_control.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
| pytest tests/test_backend.py | ||||
| pytest tests/test_rss.py | ||||
| pytest tests/test_unique_lines.py | ||||
| pytest -vv -s --maxfail=1 tests/test_backend.py | ||||
| pytest -vv -s --maxfail=1 tests/test_rss.py | ||||
| pytest -vv -s --maxfail=1 tests/test_unique_lines.py | ||||
|  | ||||
| # Try high concurrency | ||||
| FETCH_WORKERS=130 pytest  tests/test_history_consistency.py -v -l | ||||
|   | ||||
| @@ -6,6 +6,8 @@ | ||||
|  | ||||
| # enable debug | ||||
| set -x | ||||
| docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network | ||||
| docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|  | ||||
| # A extra browser is configured, but we never chose to use it, so it should NOT show in the logs | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url' | ||||
|   | ||||
| @@ -19,12 +19,13 @@ docker run --network changedet-network -d \ | ||||
|   -v `pwd`/tests/proxy_list/squid-passwords.txt:/etc/squid3/passwords \ | ||||
|   ubuntu/squid:4.13-21.10_edge | ||||
|  | ||||
|  | ||||
| sleep 5 | ||||
| ## 2nd test actually choose the preferred proxy from proxies.json | ||||
| # This will force a request via "proxy-two" | ||||
| docker run --network changedet-network \ | ||||
|   -v `pwd`/tests/proxy_list/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -v `pwd`/tests/proxy_list/proxies.json-example:/tmp/proxies.json \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest -s tests/proxy_list/test_multiple_proxy.py --datastore-path /tmp' | ||||
|  | ||||
| set +e | ||||
| echo "- Looking for chosen.changedetection.io request in squid-one - it should NOT be here" | ||||
| @@ -48,8 +49,10 @@ fi | ||||
| # Test the UI configurable proxies | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py --datastore-path /tmp' | ||||
|  | ||||
| # Give squid proxies a moment to flush their logs | ||||
| sleep 2 | ||||
|  | ||||
| # Should see a request for one.changedetection.io in there | ||||
| echo "- Looking for .changedetection.io request in squid-custom" | ||||
| @@ -63,7 +66,10 @@ fi | ||||
| # Test "no-proxy" option | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py --datastore-path /tmp' | ||||
|  | ||||
| # Give squid proxies a moment to flush their logs | ||||
| sleep 2 | ||||
|  | ||||
| # We need to handle grep returning 1 | ||||
| set +e | ||||
| @@ -80,6 +86,8 @@ for c in $(echo "squid-one squid-two squid-custom"); do | ||||
|   fi | ||||
| done | ||||
|  | ||||
| echo "docker ps output" | ||||
| docker ps | ||||
|  | ||||
| docker kill squid-one squid-two squid-custom | ||||
|  | ||||
| @@ -88,19 +96,19 @@ docker kill squid-one squid-two squid-custom | ||||
| # Requests | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp' | ||||
|  | ||||
| # Playwright | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp' | ||||
|  | ||||
| # Puppeteer fast | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp' | ||||
|  | ||||
| # Selenium | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp' | ||||
|   | ||||
| @@ -5,22 +5,23 @@ set -e | ||||
| # enable debug | ||||
| set -x | ||||
|  | ||||
| docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network | ||||
|  | ||||
| # SOCKS5 related - start simple Socks5 proxy server | ||||
| # SOCKSTEST=xyz should show in the logs of this service to confirm it fetched | ||||
| docker run --network changedet-network -d --hostname socks5proxy --rm  --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy | ||||
| docker run --network changedet-network -d --hostname socks5proxy-noauth --rm  -p 1081:1080 --name socks5proxy-noauth  serjs/go-socks5-proxy | ||||
| docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy | ||||
|  | ||||
| echo "---------------------------------- SOCKS5 -------------------" | ||||
| # SOCKS5 related - test from proxies.json | ||||
| docker run --network changedet-network \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example:/tmp/proxies.json \ | ||||
|   --rm \ | ||||
|   -e "FLASK_SERVER_NAME=cdio" \ | ||||
|   --hostname cdio \ | ||||
|   -e "SOCKSTEST=proxiesjson" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py  --datastore-path /tmp' | ||||
|  | ||||
| # SOCKS5 related - by manually entering in UI | ||||
| docker run --network changedet-network \ | ||||
| @@ -29,18 +30,18 @@ docker run --network changedet-network \ | ||||
|   --hostname cdio \ | ||||
|   -e "SOCKSTEST=manual" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py --datastore-path /tmp' | ||||
|  | ||||
| # SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY | ||||
| docker run --network changedet-network \ | ||||
|   -e "SOCKSTEST=manual-playwright" \ | ||||
|   --hostname cdio \ | ||||
|   -e "FLASK_SERVER_NAME=cdio" \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/tmp/proxies.json \ | ||||
|   -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \ | ||||
|   --rm \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|   bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp' | ||||
|  | ||||
| echo "socks5 server logs" | ||||
| docker logs socks5proxy | ||||
|   | ||||
| @@ -1,24 +0,0 @@ | ||||
| """ | ||||
| Safe Jinja2 render with max payload sizes | ||||
|  | ||||
| See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations | ||||
| """ | ||||
|  | ||||
| import jinja2.sandbox | ||||
| import typing as t | ||||
| import os | ||||
|  | ||||
| JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10)) | ||||
|  | ||||
| # This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available. | ||||
| # (Which also limits available functions that could be called) | ||||
| def render(template_str, **args: t.Any) -> str: | ||||
|     jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension']) | ||||
|     output = jinja2_env.from_string(template_str).render(args) | ||||
|     return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE] | ||||
|  | ||||
| def render_fully_escaped(content): | ||||
|     env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True) | ||||
|     template = env.from_string("{{ some_html|e }}") | ||||
|     return template.render(some_html=content) | ||||
|  | ||||
| @@ -29,7 +29,7 @@ $(document).ready(function () { | ||||
|         $(this).text(new Date($(this).data("utc")).toLocaleString()); | ||||
|     }) | ||||
|  | ||||
|     const timezoneInput = $('#application-timezone'); | ||||
|     const timezoneInput = $('#application-scheduler_timezone_default'); | ||||
|     if(timezoneInput.length) { | ||||
|         const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; | ||||
|         if (!timezoneInput.val().trim()) { | ||||
|   | ||||
| @@ -2,6 +2,13 @@ | ||||
|  | ||||
| $(document).ready(function () { | ||||
|  | ||||
|     function reapplyTableStripes() { | ||||
|         $('.watch-table tbody tr').each(function(index) { | ||||
|             $(this).removeClass('pure-table-odd pure-table-even'); | ||||
|             $(this).addClass(index % 2 === 0 ? 'pure-table-odd' : 'pure-table-even'); | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     function bindSocketHandlerButtonsEvents(socket) { | ||||
|         $('.ajax-op').on('click.socketHandlerNamespace', function (e) { | ||||
|             e.preventDefault(); | ||||
| @@ -101,6 +108,7 @@ $(document).ready(function () { | ||||
|             socket.on('watch_deleted', function (data) { | ||||
|                 $('tr[data-watch-uuid="' + data.uuid + '"] td').fadeOut(500, function () { | ||||
|                     $(this).closest('tr').remove(); | ||||
|                     reapplyTableStripes(); | ||||
|                 }); | ||||
|             }); | ||||
|  | ||||
| @@ -117,15 +125,16 @@ $(document).ready(function () { | ||||
|                 } | ||||
|             }) | ||||
|  | ||||
|             socket.on('general_stats_update', function (general_stats) { | ||||
|                 // Tabs at bottom of list | ||||
|                 $('#watch-table-wrapper').toggleClass("has-unread-changes", general_stats.unread_changes_count !==0) | ||||
|                 $('#watch-table-wrapper').toggleClass("has-error", general_stats.count_errors !== 0) | ||||
|                 $('#post-list-with-errors a').text(`With errors (${ new Intl.NumberFormat(navigator.language).format(general_stats.count_errors) })`); | ||||
|                 $('#unread-tab-counter').text(new Intl.NumberFormat(navigator.language).format(general_stats.unread_changes_count)); | ||||
|             }); | ||||
|  | ||||
|             socket.on('watch_update', function (data) { | ||||
|                 const watch = data.watch; | ||||
|                 const general_stats = data.general_stats; | ||||
|  | ||||
|                 // Log the entire watch object for debugging | ||||
|                 console.log('!!! WATCH UPDATE EVENT RECEIVED !!!'); | ||||
|                 console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`); | ||||
|                 console.log('Watch data:', watch); | ||||
|                 console.log('General stats:', general_stats); | ||||
|  | ||||
|                 // Updating watch table rows | ||||
|                 const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]'); | ||||
| @@ -150,13 +159,6 @@ $(document).ready(function () { | ||||
|  | ||||
|                     console.log('Updated UI for watch:', watch.uuid); | ||||
|                 } | ||||
|  | ||||
|                 // Tabs at bottom of list | ||||
|                 $('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed); | ||||
|                 $('#post-list-unread').toggleClass("has-unviewed", general_stats.has_unviewed); | ||||
|                 $('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0) | ||||
|                 $('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`); | ||||
|  | ||||
|                 $('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid)); | ||||
|             }); | ||||
|  | ||||
|   | ||||
| @@ -17,18 +17,6 @@ body.checking-now { | ||||
|   position: fixed; | ||||
| } | ||||
|  | ||||
| #post-list-buttons { | ||||
|   #post-list-with-errors.has-error { | ||||
|     display: inline-block !important; | ||||
|   } | ||||
|   #post-list-mark-views.has-unviewed { | ||||
|     display: inline-block !important; | ||||
|   } | ||||
|   #post-list-unread.has-unviewed { | ||||
|     display: inline-block !important; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -127,5 +127,44 @@ | ||||
|       display: inline-block !important; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
| #watch-table-wrapper { | ||||
|   /* general styling */ | ||||
|   #post-list-buttons { | ||||
|     text-align: right; | ||||
|     padding: 0px; | ||||
|     margin: 0px; | ||||
|  | ||||
|     li { | ||||
|       display: inline-block; | ||||
|     } | ||||
|  | ||||
|     a { | ||||
|       border-top-left-radius: initial; | ||||
|       border-top-right-radius: initial; | ||||
|       border-bottom-left-radius: 5px; | ||||
|       border-bottom-right-radius: 5px; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /* post list dynamically on/off stuff */ | ||||
|  | ||||
|   &.has-error { | ||||
|     #post-list-buttons { | ||||
|       #post-list-with-errors { | ||||
|         display: inline-block !important; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   &.has-unread-changes { | ||||
|     #post-list-buttons { | ||||
|       #post-list-unread, #post-list-mark-views, #post-list-unread { | ||||
|         display: inline-block !important; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -34,7 +34,6 @@ | ||||
|       transition: all 0.2s ease; | ||||
|       cursor: pointer; | ||||
|       display: block; | ||||
|       min-width: 60px; | ||||
|       text-align: center; | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -203,24 +203,6 @@ code { | ||||
| } | ||||
|  | ||||
|  | ||||
| #post-list-buttons { | ||||
|   text-align: right; | ||||
|   padding: 0px; | ||||
|   margin: 0px; | ||||
|  | ||||
|   li { | ||||
|     display: inline-block; | ||||
|   } | ||||
|  | ||||
|   a { | ||||
|     border-top-left-radius: initial; | ||||
|     border-top-right-radius: initial; | ||||
|     border-bottom-left-radius: 5px; | ||||
|     border-bottom-right-radius: 5px; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| body:after { | ||||
|   content: ""; | ||||
|   background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%); | ||||
| @@ -362,7 +344,7 @@ label { | ||||
|  }   | ||||
| } | ||||
|  | ||||
| #notification-customisation { | ||||
| .grey-form-border { | ||||
|   border: 1px solid var(--color-border-notification); | ||||
|   padding: 0.5rem; | ||||
|   border-radius: 5px; | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -1,11 +1,13 @@ | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from flask import ( | ||||
|     flash | ||||
| ) | ||||
|  | ||||
| from .html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
| from . model import App, Watch | ||||
| from .model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| from copy import deepcopy, copy | ||||
| from os import path, unlink | ||||
| from threading import Lock | ||||
| @@ -40,17 +42,24 @@ class ChangeDetectionStore: | ||||
|     needs_write_urgent = False | ||||
|  | ||||
|     __version_check = True | ||||
|     save_data_thread = None | ||||
|  | ||||
|     def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): | ||||
|         # Should only be active for docker | ||||
|         # logging.basicConfig(filename='/dev/stdout', level=logging.INFO) | ||||
|         self.__data = App.model() | ||||
|         self.datastore_path = datastore_path | ||||
|         self.json_store_path = os.path.join(self.datastore_path, "url-watches.json") | ||||
|         logger.info(f"Datastore path is '{self.json_store_path}'") | ||||
|  | ||||
|         self.needs_write = False | ||||
|         self.start_time = time.time() | ||||
|         self.stop_thread = False | ||||
|         self.reload_state(datastore_path=datastore_path, include_default_watches=include_default_watches, version_tag=version_tag) | ||||
|  | ||||
|  | ||||
|     def reload_state(self, datastore_path, include_default_watches, version_tag): | ||||
|         logger.info(f"Datastore path is '{datastore_path}'") | ||||
|  | ||||
|         self.__data = App.model() | ||||
|         self.datastore_path = datastore_path | ||||
|         self.json_store_path = os.path.join(self.datastore_path, "url-watches.json") | ||||
|         # Base definition for all watchers | ||||
|         # deepcopy part of #569 - not sure why its needed exactly | ||||
|         self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) | ||||
| @@ -143,7 +152,10 @@ class ChangeDetectionStore: | ||||
|         self.needs_write = True | ||||
|  | ||||
|         # Finally start the thread that will manage periodic data saves to JSON | ||||
|         save_data_thread = threading.Thread(target=self.save_datastore).start() | ||||
|         # Only start if thread is not already running (reload_state might be called multiple times) | ||||
|         if not self.save_data_thread or not self.save_data_thread.is_alive(): | ||||
|             self.save_data_thread = threading.Thread(target=self.save_datastore) | ||||
|             self.save_data_thread.start() | ||||
|  | ||||
|     def rehydrate_entity(self, uuid, entity, processor_override=None): | ||||
|         """Set the dict back to the dict Watch object""" | ||||
| @@ -202,14 +214,13 @@ class ChangeDetectionStore: | ||||
|         return seconds | ||||
|  | ||||
|     @property | ||||
|     def has_unviewed(self): | ||||
|         if not self.__data.get('watching'): | ||||
|             return None | ||||
|  | ||||
|     def unread_changes_count(self): | ||||
|         unread_changes_count = 0 | ||||
|         for uuid, watch in self.__data['watching'].items(): | ||||
|             if watch.history_n >= 2 and watch.viewed == False: | ||||
|                 return True | ||||
|         return False | ||||
|                 unread_changes_count += 1 | ||||
|  | ||||
|         return unread_changes_count | ||||
|  | ||||
|     @property | ||||
|     def data(self): | ||||
| @@ -229,26 +240,36 @@ class ChangeDetectionStore: | ||||
|         d['settings']['application']['active_base_url'] = active_base_url.strip('" ') | ||||
|         return d | ||||
|  | ||||
|     from pathlib import Path | ||||
|  | ||||
|     def delete_path(self, path: Path): | ||||
|         import shutil | ||||
|         """Delete a file or directory tree, including the path itself.""" | ||||
|         if not path.exists(): | ||||
|             return | ||||
|         if path.is_file() or path.is_symlink(): | ||||
|             path.unlink(missing_ok=True)  # deletes a file or symlink | ||||
|         else: | ||||
|             shutil.rmtree(path, ignore_errors=True)  # deletes dir *and* its contents | ||||
|  | ||||
|     # Delete a single watch by UUID | ||||
|     def delete(self, uuid): | ||||
|         import pathlib | ||||
|         import shutil | ||||
|  | ||||
|         with self.lock: | ||||
|             if uuid == 'all': | ||||
|                 self.__data['watching'] = {} | ||||
|                 time.sleep(1) # Mainly used for testing to allow all items to flush before running next test | ||||
|  | ||||
|                 # GitHub #30 also delete history records | ||||
|                 for uuid in self.data['watching']: | ||||
|                     path = pathlib.Path(os.path.join(self.datastore_path, uuid)) | ||||
|                     if os.path.exists(path): | ||||
|                         shutil.rmtree(path) | ||||
|                         self.delete(uuid) | ||||
|  | ||||
|             else: | ||||
|                 path = pathlib.Path(os.path.join(self.datastore_path, uuid)) | ||||
|                 if os.path.exists(path): | ||||
|                     shutil.rmtree(path) | ||||
|                     self.delete_path(path) | ||||
|  | ||||
|                 del self.data['watching'][uuid] | ||||
|  | ||||
|         self.needs_write_urgent = True | ||||
| @@ -331,9 +352,10 @@ class ChangeDetectionStore: | ||||
|                 logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}") | ||||
|                 flash("Error fetching metadata for {}".format(url), 'error') | ||||
|                 return False | ||||
|         from .model.Watch import is_safe_url | ||||
|         if not is_safe_url(url): | ||||
|             flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error') | ||||
|  | ||||
|         if not is_safe_valid_url(url): | ||||
|             flash('Watch protocol is not permitted or invalid URL format', 'error') | ||||
|  | ||||
|             return None | ||||
|  | ||||
|         if tag and type(tag) == str: | ||||
| @@ -399,7 +421,6 @@ class ChangeDetectionStore: | ||||
|             self.sync_to_json() | ||||
|             return | ||||
|         else: | ||||
|  | ||||
|             try: | ||||
|                 # Re #286  - First write to a temp file, then confirm it looks OK and rename it | ||||
|                 # This is a fairly basic strategy to deal with the case that the file is corrupted, | ||||
| @@ -429,7 +450,7 @@ class ChangeDetectionStore: | ||||
|                 logger.remove() | ||||
|                 logger.add(sys.stderr) | ||||
|  | ||||
|                 logger.critical("Shutting down datastore thread") | ||||
|                 logger.info(f"Shutting down datastore '{self.datastore_path}' thread") | ||||
|                 return | ||||
|  | ||||
|             if self.needs_write or self.needs_write_urgent: | ||||
| @@ -977,6 +998,35 @@ class ChangeDetectionStore: | ||||
|         if self.data['settings']['application'].get('extract_title_as_title'): | ||||
|             self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title') | ||||
|  | ||||
|     def update_21(self): | ||||
|         if self.data['settings']['application'].get('timezone'): | ||||
|             self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone') | ||||
|             del self.data['settings']['application']['timezone'] | ||||
|  | ||||
|  | ||||
|     # Some notification formats got the wrong name type | ||||
|     def update_22(self): | ||||
|         from .notification import valid_notification_formats | ||||
|  | ||||
|         sys_n_format = self.data['settings']['application'].get('notification_format') | ||||
|         key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == sys_n_format), None) | ||||
|         if key_exists_as_value: # key of "Plain text" | ||||
|             logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'") | ||||
|             self.data['settings']['application']['notification_format'] = key_exists_as_value | ||||
|  | ||||
|         for uuid, watch in self.data['watching'].items(): | ||||
|             n_format = self.data['watching'][uuid].get('notification_format') | ||||
|             key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == n_format), None) | ||||
|             if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:  # key of "Plain text" | ||||
|                 logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'") | ||||
|                 self.data['watching'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever | ||||
|  | ||||
|         for uuid, tag in self.data['settings']['application']['tags'].items(): | ||||
|             n_format = self.data['settings']['application']['tags'][uuid].get('notification_format') | ||||
|             key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == n_format), None) | ||||
|             if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:  # key of "Plain text" | ||||
|                 logger.success(f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'") | ||||
|                 self.data['settings']['application']['tags'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever | ||||
|  | ||||
|     def add_notification_url(self, notification_url): | ||||
|          | ||||
|   | ||||
| @@ -33,7 +33,7 @@ | ||||
|                                 <div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div> | ||||
|                             </div> | ||||
|                         </div> | ||||
|                         <div id="notification-customisation" class="pure-control-group"> | ||||
|                         <div class="pure-control-group grey-form-border"> | ||||
|                             <div class="pure-control-group"> | ||||
|                                 {{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }} | ||||
|                                 <span class="pure-form-message-inline">Title for all notifications</span> | ||||
| @@ -134,6 +134,12 @@ | ||||
|                                     <p> | ||||
|                                         URL encoding, use <strong>|urlencode</strong>, for example - <code>gets://hook-website.com/test.php?title={{ '{{ watch_title|urlencode }}' }}</code> | ||||
|                                     </p> | ||||
|                                     <p> | ||||
|                                         Regular-expression replace, use <strong>|regex_replace</strong>, for example -   <code>{{ "{{ \"hello world 123\" | regex_replace('[0-9]+', 'no-more-numbers') }}" }}</code> | ||||
|                                     </p> | ||||
|                                     <p> | ||||
|                                         For a complete reference of all Jinja2 built-in filters, users can refer to the <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters">https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters</a> | ||||
|                                     </p> | ||||
|                                 </div> | ||||
|                             </div> | ||||
|                             <div class="pure-control-group"> | ||||
|   | ||||
| @@ -14,13 +14,31 @@ | ||||
|                 {% if field.errors is mapping and 'form' in field.errors %} | ||||
|                     {#  and subfield form errors, such as used in RequiredFormField() for TimeBetweenCheckForm sub form #} | ||||
|                     {% set errors = field.errors['form'] %} | ||||
|                     {% for error in errors %} | ||||
|                         <li>{{ error }}</li> | ||||
|                     {% endfor %} | ||||
|                 {% elif field.type == 'FieldList' %} | ||||
|                     {# Handle FieldList of FormFields - errors is a list of dicts, one per entry #} | ||||
|                     {% for idx, entry_errors in field.errors|enumerate %} | ||||
|                         {% if entry_errors is mapping and entry_errors %} | ||||
|                             {# Only show entries that have actual errors #} | ||||
|                             <li><strong>Entry {{ idx + 1 }}:</strong> | ||||
|                                 <ul> | ||||
|                                     {% for field_name, messages in entry_errors.items() %} | ||||
|                                         {% for message in messages %} | ||||
|                                             <li>{{ field_name }}: {{ message }}</li> | ||||
|                                         {% endfor %} | ||||
|                                     {% endfor %} | ||||
|                                 </ul> | ||||
|                             </li> | ||||
|                         {% endif %} | ||||
|                     {% endfor %} | ||||
|                 {% else %} | ||||
|                     {#  regular list of errors with this field #} | ||||
|                     {% set errors = field.errors %} | ||||
|                     {% for error in field.errors %} | ||||
|                         <li>{{ error }}</li> | ||||
|                     {% endfor %} | ||||
|                 {% endif %} | ||||
|                 {% for error in errors %} | ||||
|                     <li>{{ error }}</li> | ||||
|                 {% endfor %} | ||||
|             </ul> | ||||
|         {% endif %} | ||||
|     </div> | ||||
| @@ -93,6 +111,39 @@ | ||||
|   {{ field(**kwargs)|safe }} | ||||
| {% endmacro %} | ||||
|  | ||||
| {% macro render_fieldlist_with_inline_errors(fieldlist) %} | ||||
|   {# Specialized macro for FieldList(FormField(...)) that renders errors inline with each field #} | ||||
|   <div {% if fieldlist.errors %} class="error" {% endif %}>{{ fieldlist.label }}</div> | ||||
|   <div {% if fieldlist.errors %} class="error" {% endif %}> | ||||
|     <ul id="{{ fieldlist.id }}"> | ||||
|       {% for entry in fieldlist %} | ||||
|         <li {% if entry.errors %} class="error" {% endif %}> | ||||
|           <label for="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>{{ fieldlist.label.text }}-{{ loop.index0 }}</label> | ||||
|           <table id="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}> | ||||
|             <tbody> | ||||
|               {% for subfield in entry %} | ||||
|                 <tr {% if subfield.errors %} class="error" {% endif %}> | ||||
|                   <th {% if subfield.errors %} class="error" {% endif %}><label for="{{ subfield.id }}" {% if subfield.errors %} class="error" {% endif %}>{{ subfield.label.text }}</label></th> | ||||
|                   <td {% if subfield.errors %} class="error" {% endif %}> | ||||
|                     {{ subfield(**kwargs)|safe }} | ||||
|                     {% if subfield.errors %} | ||||
|                       <ul class="errors"> | ||||
|                         {% for error in subfield.errors %} | ||||
|                           <li class="error">{{ error }}</li> | ||||
|                         {% endfor %} | ||||
|                       </ul> | ||||
|                     {% endif %} | ||||
|                   </td> | ||||
|                 </tr> | ||||
|               {% endfor %} | ||||
|             </tbody> | ||||
|           </table> | ||||
|         </li> | ||||
|       {% endfor %} | ||||
|     </ul> | ||||
|   </div> | ||||
| {% endmacro %} | ||||
|  | ||||
| {% macro render_conditions_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %} | ||||
|   <div class="fieldlist_formfields" id="{{ table_id }}"> | ||||
|     <div class="fieldlist-header"> | ||||
| @@ -215,9 +266,7 @@ | ||||
|             <li id="timezone-info"> | ||||
|                 {{ render_field(form.time_schedule_limit.timezone, placeholder=timezone_default_config) }} <span id="local-time-in-tz"></span> | ||||
|                 <datalist id="timezones" style="display: none;"> | ||||
|                     {% for timezone in available_timezones %} | ||||
|                         <option value="{{ timezone }}">{{ timezone }}</option> | ||||
|                     {% endfor %} | ||||
|                     {%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%} | ||||
|                 </datalist> | ||||
|             </li> | ||||
|         </ul> | ||||
|   | ||||
| @@ -53,7 +53,7 @@ | ||||
|           <a class="pure-menu-heading" href="{{url_for('watchlist.index')}}"> | ||||
|             <strong>Change</strong>Detection.io</a> | ||||
|         {% endif %} | ||||
|         {% if current_diff_url %} | ||||
|         {% if current_diff_url and is_safe_valid_url(current_diff_url) %} | ||||
|           <a class="current-diff-url" href="{{ current_diff_url }}"> | ||||
|             <span style="max-width: 30%; overflow: hidden">{{ current_diff_url }}</span></a> | ||||
|         {% else %} | ||||
|   | ||||
| @@ -26,7 +26,10 @@ | ||||
|                             <li>Changing this will affect the comparison checksum which may trigger an alert</li> | ||||
|                         </ul> | ||||
|                 </span> | ||||
|  | ||||
|                 <br><br> | ||||
|                     <div class="pure-control-group"> | ||||
|                       {{ render_ternary_field(form.strip_ignored_lines) }} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|  | ||||
|                 <fieldset> | ||||
|   | ||||
| @@ -4,12 +4,14 @@ import time | ||||
| from threading import Thread | ||||
|  | ||||
| import pytest | ||||
| import arrow | ||||
| from changedetectionio import changedetection_app | ||||
| from changedetectionio import store | ||||
| import os | ||||
| import sys | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.flask_app import init_app_secret | ||||
| from changedetectionio.tests.util import live_server_setup, new_live_server_setup | ||||
|  | ||||
| # https://github.com/pallets/flask/blob/1.1.2/examples/tutorial/tests/test_auth.py | ||||
| @@ -29,16 +31,39 @@ def reportlog(pytestconfig): | ||||
|     logger.remove(handler_id) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def environment(mocker): | ||||
|     """Mock arrow.now() to return a fixed datetime for testing jinja2 time extension.""" | ||||
|     # Fixed datetime: Wed, 09 Dec 2015 23:33:01 UTC | ||||
|     # This is calculated to match the test expectations when offsets are applied | ||||
|     fixed_datetime = arrow.Arrow(2015, 12, 9, 23, 33, 1, tzinfo='UTC') | ||||
|     # Patch arrow.now in the TimeExtension module where it's actually used | ||||
|     mocker.patch('changedetectionio.jinja2_custom.extensions.TimeExtension.arrow.now', return_value=fixed_datetime) | ||||
|     return fixed_datetime | ||||
|  | ||||
|  | ||||
| def format_memory_human(bytes_value): | ||||
|     """Format memory in human-readable units (KB, MB, GB)""" | ||||
|     if bytes_value < 1024: | ||||
|         return f"{bytes_value} B" | ||||
|     elif bytes_value < 1024 ** 2: | ||||
|         return f"{bytes_value / 1024:.2f} KB" | ||||
|     elif bytes_value < 1024 ** 3: | ||||
|         return f"{bytes_value / (1024 ** 2):.2f} MB" | ||||
|     else: | ||||
|         return f"{bytes_value / (1024 ** 3):.2f} GB" | ||||
|  | ||||
| def track_memory(memory_usage, ): | ||||
|     process = psutil.Process(os.getpid()) | ||||
|     while not memory_usage["stop"]: | ||||
|         current_rss = process.memory_info().rss | ||||
|         memory_usage["peak"] = max(memory_usage["peak"], current_rss) | ||||
|         memory_usage["current"] = current_rss  # Keep updating current | ||||
|         time.sleep(0.01)  # Adjust the sleep time as needed | ||||
|  | ||||
| @pytest.fixture(scope='function') | ||||
| def measure_memory_usage(request): | ||||
|     memory_usage = {"peak": 0, "stop": False} | ||||
|     memory_usage = {"peak": 0, "current": 0, "stop": False} | ||||
|     tracker_thread = Thread(target=track_memory, args=(memory_usage,)) | ||||
|     tracker_thread.start() | ||||
|  | ||||
| @@ -47,22 +72,22 @@ def measure_memory_usage(request): | ||||
|     memory_usage["stop"] = True | ||||
|     tracker_thread.join() | ||||
|  | ||||
|     # Note: ru_maxrss is in kilobytes on Unix-based systems | ||||
|     max_memory_used = memory_usage["peak"] / 1024  # Convert to MB | ||||
|     s = f"Peak memory used by the test {request.node.fspath} - '{request.node.name}': {max_memory_used:.2f} MB" | ||||
|     # Note: psutil returns RSS memory in bytes | ||||
|     peak_human = format_memory_human(memory_usage["peak"]) | ||||
|  | ||||
|     s = f"{time.time()} {request.node.fspath} - '{request.node.name}' - Peak memory: {peak_human}" | ||||
|     logger.debug(s) | ||||
|  | ||||
|     with open("test-memory.log", 'a') as f: | ||||
|         f.write(f"{s}\n") | ||||
|  | ||||
|     # Assert that the memory usage is less than 200MB | ||||
| #    assert max_memory_used < 150, f"Memory usage exceeded 200MB: {max_memory_used:.2f} MB" | ||||
| #    assert peak_memory_kb < 150 * 1024, f"Memory usage exceeded 150MB: {peak_human}" | ||||
|  | ||||
|  | ||||
| def cleanup(datastore_path): | ||||
|     import glob | ||||
|     # Unlink test output files | ||||
|  | ||||
|     for g in ["*.txt", "*.json", "*.pdf"]: | ||||
|         files = glob.glob(os.path.join(datastore_path, g)) | ||||
|         for f in files: | ||||
| @@ -72,34 +97,121 @@ def cleanup(datastore_path): | ||||
|             if os.path.isfile(f): | ||||
|                 os.unlink(f) | ||||
|  | ||||
| @pytest.fixture(scope='function', autouse=True) | ||||
| def prepare_test_function(live_server): | ||||
| def pytest_addoption(parser): | ||||
|     """Add custom command-line options for pytest. | ||||
|  | ||||
|     Provides --datastore-path option for specifying custom datastore location. | ||||
|     Note: Cannot use -d short option as it's reserved by pytest for debug mode. | ||||
|     """ | ||||
|     parser.addoption( | ||||
|         "--datastore-path", | ||||
|         action="store", | ||||
|         default=None, | ||||
|         help="Custom datastore path for tests" | ||||
|     ) | ||||
|  | ||||
| @pytest.fixture(scope='session') | ||||
| def datastore_path(tmp_path_factory, request): | ||||
|     """Provide datastore path unique to this worker. | ||||
|  | ||||
|     Supports custom path via --datastore-path/-d flag (mirrors main app). | ||||
|  | ||||
|     CRITICAL for xdist isolation: | ||||
|     - Each WORKER gets its own directory | ||||
|     - Tests on same worker run SEQUENTIALLY and cleanup between tests | ||||
|     - No subdirectories needed since tests don't overlap on same worker | ||||
|     - Example: /tmp/test-datastore-gw0/ for worker gw0 | ||||
|     """ | ||||
|     # Check for custom path first (mirrors main app's -d flag) | ||||
|     custom_path = request.config.getoption("--datastore-path") | ||||
|     if custom_path: | ||||
|         # Ensure the directory exists | ||||
|         os.makedirs(custom_path, exist_ok=True) | ||||
|         logger.info(f"Using custom datastore path: {custom_path}") | ||||
|         return custom_path | ||||
|  | ||||
|     # Otherwise use default tmp_path_factory logic | ||||
|     worker_id = getattr(request.config, 'workerinput', {}).get('workerid', 'master') | ||||
|     if worker_id == 'master': | ||||
|         path = tmp_path_factory.mktemp("test-datastore") | ||||
|     else: | ||||
|         path = tmp_path_factory.mktemp(f"test-datastore-{worker_id}") | ||||
|     return str(path) | ||||
|  | ||||
|  | ||||
| @pytest.fixture(scope='function', autouse=True) | ||||
| def prepare_test_function(live_server, datastore_path): | ||||
|     """Prepare each test with complete isolation. | ||||
|  | ||||
|     CRITICAL for xdist per-test isolation: | ||||
|     - Reuses the SAME datastore instance (so blueprint references stay valid) | ||||
|     - Clears all watches and state for a clean slate | ||||
|     - First watch will get uuid="first" | ||||
|     """ | ||||
|     routes = [rule.rule for rule in live_server.app.url_map.iter_rules()] | ||||
|     if '/test-random-content-endpoint' not in routes: | ||||
|         logger.debug("Setting up test URL routes") | ||||
|         new_live_server_setup(live_server) | ||||
|  | ||||
|     # CRITICAL: Point app to THIS test's unique datastore directory | ||||
|     live_server.app.config['TEST_DATASTORE_PATH'] = datastore_path | ||||
|  | ||||
|     # CRITICAL: Get datastore and stop it from writing stale data | ||||
|     datastore = live_server.app.config.get('DATASTORE') | ||||
|  | ||||
|     # Prevent background thread from writing during cleanup/reload | ||||
|     datastore.needs_write = False | ||||
|     datastore.needs_write_urgent = False | ||||
|  | ||||
|     # CRITICAL: Clean up any files from previous tests | ||||
|     # This ensures a completely clean directory | ||||
|     cleanup(datastore_path) | ||||
|  | ||||
|     # CRITICAL: Reload the EXISTING datastore instead of creating a new one | ||||
|     # This keeps blueprint references valid (they capture datastore at construction) | ||||
|     # reload_state() completely resets the datastore to a clean state | ||||
|  | ||||
|     # Reload state with clean data (no default watches) | ||||
|     datastore.reload_state( | ||||
|         datastore_path=datastore_path, | ||||
|         include_default_watches=False, | ||||
|         version_tag=datastore.data.get('version_tag', '0.0.0') | ||||
|     ) | ||||
|     live_server.app.secret_key = init_app_secret(datastore_path) | ||||
|     logger.debug(f"prepare_test_function: Reloaded datastore at {hex(id(datastore))}") | ||||
|     logger.debug(f"prepare_test_function: Path {datastore.datastore_path}") | ||||
|  | ||||
|     yield | ||||
|     # Then cleanup/shutdown | ||||
|     live_server.app.config['DATASTORE'].data['watching']={} | ||||
|     time.sleep(0.3) | ||||
|     live_server.app.config['DATASTORE'].data['watching']={} | ||||
|  | ||||
|     # Cleanup: Clear watches again after test | ||||
|     try: | ||||
|         datastore.data['watching'] = {} | ||||
|         datastore.needs_write = True | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Error during datastore cleanup: {e}") | ||||
|  | ||||
|  | ||||
| # So the app can also know which test name it was | ||||
| @pytest.fixture(autouse=True) | ||||
| def set_test_name(request): | ||||
|   """Automatically set TEST_NAME env var for every test""" | ||||
|   test_name = request.node.name | ||||
|   os.environ['PYTEST_CURRENT_TEST'] = test_name | ||||
|   yield | ||||
|   # Cleanup if needed | ||||
|  | ||||
|  | ||||
| @pytest.fixture(scope='session') | ||||
| def app(request): | ||||
|     """Create application for the tests.""" | ||||
|     datastore_path = "./test-datastore" | ||||
| def app(request, datastore_path): | ||||
|     """Create application once per worker (session). | ||||
|  | ||||
|     Note: Actual per-test isolation is handled by: | ||||
|     - prepare_test_function() recreates datastore and cleans directory | ||||
|     - All tests on same worker use same directory (cleaned between tests) | ||||
|     """ | ||||
|     # So they don't delay in fetching | ||||
|     os.environ["MINIMUM_SECONDS_RECHECK_TIME"] = "0" | ||||
|     try: | ||||
|         os.mkdir(datastore_path) | ||||
|     except FileExistsError: | ||||
|         pass | ||||
|  | ||||
|     logger.debug(f"Testing with datastore_path={datastore_path}") | ||||
|     cleanup(datastore_path) | ||||
|  | ||||
|     app_config = {'datastore_path': datastore_path, 'disable_checkver' : True} | ||||
| @@ -122,6 +234,8 @@ def app(request): | ||||
|     # Disable CSRF while running tests | ||||
|     app.config['WTF_CSRF_ENABLED'] = False | ||||
|     app.config['STOP_THREADS'] = True | ||||
|     # Store datastore_path so Flask routes can access it | ||||
|     app.config['TEST_DATASTORE_PATH'] = datastore_path | ||||
|  | ||||
|     def teardown(): | ||||
|         # Stop all threads and services | ||||
|   | ||||
| @@ -29,13 +29,8 @@ def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     if make_test_use_extra_browser: | ||||
| @@ -78,13 +73,13 @@ def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|  | ||||
|  | ||||
| # Requires playwright to be installed | ||||
| def test_request_via_custom_browser_url(client, live_server, measure_memory_usage): | ||||
| def test_request_via_custom_browser_url(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     # We do this so we can grep the logs of the custom container and see if the request actually went through that container | ||||
|     do_test(client, live_server, make_test_use_extra_browser=True) | ||||
|  | ||||
|  | ||||
| def test_request_not_via_custom_browser_url(client, live_server, measure_memory_usage): | ||||
| def test_request_not_via_custom_browser_url(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     # We do this so we can grep the logs of the custom container and see if the request actually went through that container | ||||
|     do_test(client, live_server, make_test_use_extra_browser=False) | ||||
|   | ||||
| @@ -8,7 +8,7 @@ import logging | ||||
|  | ||||
|  | ||||
| # Requires playwright to be installed | ||||
| def test_fetch_webdriver_content(client, live_server, measure_memory_usage): | ||||
| def test_fetch_webdriver_content(client, live_server, measure_memory_usage, datastore_path): | ||||
|     #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     ##################### | ||||
|   | ||||
| @@ -3,7 +3,7 @@ from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
|  | ||||
|  | ||||
| def test_execute_custom_js(client, live_server, measure_memory_usage): | ||||
| def test_execute_custom_js(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def test_preferred_proxy(client, live_server, measure_memory_usage): | ||||
| def test_preferred_proxy(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     url = "http://chosen.changedetection.io" | ||||
|  | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
|  | ||||
|  | ||||
| def test_noproxy_option(client, live_server, measure_memory_usage): | ||||
| def test_noproxy_option(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     # Run by run_proxy_tests.sh | ||||
|     # Call this URL then scan the containers that it never went through them | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
|  | ||||
| # just make a request, we will grep in the docker logs to see it actually got called | ||||
| def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage): | ||||
| def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|   | ||||
| @@ -12,7 +12,7 @@ from ... import strtobool | ||||
| # FAST_PUPPETEER_CHROME_FETCHER=True PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 pytest tests/proxy_list/test_proxy_noconnect.py | ||||
| # WEBDRIVER_URL=http://127.0.0.1:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py | ||||
|  | ||||
| def test_proxy_noconnect_custom(client, live_server, measure_memory_usage): | ||||
| def test_proxy_noconnect_custom(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     # Goto settings, add our custom one | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from ..util import live_server_setup, wait_for_all_checks | ||||
| import os | ||||
|  | ||||
| # just make a request, we will grep in the docker logs to see it actually got called | ||||
| def test_select_custom(client, live_server, measure_memory_usage): | ||||
| def test_select_custom(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     # Goto settings, add our custom one | ||||
| @@ -49,3 +49,39 @@ def test_select_custom(client, live_server, measure_memory_usage): | ||||
|     # | ||||
|     # Now we should see the request in the container logs for "squid-squid-custom" because it will be the only default | ||||
|  | ||||
|  | ||||
| def test_custom_proxy_validation(client, live_server, measure_memory_usage, datastore_path): | ||||
|     #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     # Goto settings, add our custom one | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-fetch_backend": 'html_requests', | ||||
|             "requests-extra_proxies-0-proxy_name": "custom-test-proxy", | ||||
|             "requests-extra_proxies-0-proxy_url": "xxxxhtt/333??p://test:awesome@squid-custom:3128", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." not in res.data | ||||
|     assert b'Proxy URLs must start with' in res.data | ||||
|  | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-fetch_backend": 'html_requests', | ||||
|             "requests-extra_proxies-0-proxy_name": "custom-test-proxy", | ||||
|             "requests-extra_proxies-0-proxy_url": "https://", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." not in res.data | ||||
|     assert b"Invalid URL." in res.data | ||||
|      | ||||
| @@ -2,10 +2,10 @@ | ||||
| import json | ||||
| import os | ||||
| from flask import url_for | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, delete_all_watches | ||||
|  | ||||
|  | ||||
| def set_response(): | ||||
| def set_response(datastore_path): | ||||
|     import time | ||||
|     data = """<html> | ||||
|        <body> | ||||
| @@ -15,13 +15,13 @@ def set_response(): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(data) | ||||
|     time.sleep(1) | ||||
|  | ||||
| def test_socks5(client, live_server, measure_memory_usage): | ||||
| def test_socks5(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     set_response() | ||||
|     set_response(datastore_path) | ||||
|  | ||||
|     # Setup a proxy | ||||
|     res = client.post( | ||||
| @@ -98,6 +98,5 @@ def test_socks5(client, live_server, measure_memory_usage): | ||||
|     ) | ||||
|     assert b"OK" in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ from flask import url_for | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def set_response(): | ||||
| def set_response(datastore_path): | ||||
|     import time | ||||
|     data = """<html> | ||||
|        <body> | ||||
| @@ -14,15 +14,15 @@ def set_response(): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(data) | ||||
|     time.sleep(1) | ||||
|  | ||||
| # should be proxies.json mounted from run_proxy_tests.sh already | ||||
| # -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json | ||||
| def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage): | ||||
| def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     set_response() | ||||
|     set_response(datastore_path) | ||||
|     # Because the socks server should connect back to us | ||||
|     test_url = url_for('test_endpoint', _external=True) + f"?socks-test-tag={os.getenv('SOCKSTEST', '')}" | ||||
|     test_url = test_url.replace('localhost.localdomain', 'cdio') | ||||
|   | ||||
| @@ -11,7 +11,7 @@ from changedetectionio.notification import ( | ||||
| ) | ||||
|  | ||||
|  | ||||
| def set_original_response(): | ||||
| def set_original_response(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|        <section id=header style="padding: 50px; height: 350px">This is the header which should be ignored always - <span>add to cart</span></section> | ||||
| @@ -26,13 +26,13 @@ def set_original_response(): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
|  | ||||
|  | ||||
| def set_back_in_stock_response(): | ||||
| def set_back_in_stock_response(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -45,14 +45,14 @@ def set_back_in_stock_response(): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| # Add a site in paused mode, add an invalid filter, we should still have visual selector data ready | ||||
| def test_restock_detection(client, live_server, measure_memory_usage): | ||||
| def test_restock_detection(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|     #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     ##################### | ||||
| @@ -88,24 +88,25 @@ def test_restock_detection(client, live_server, measure_memory_usage): | ||||
|     assert b'not-in-stock' in res.data # should be out of stock | ||||
|  | ||||
|     # Is it correctly shown as in stock | ||||
|     set_back_in_stock_response() | ||||
|     set_back_in_stock_response(datastore_path) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'not-in-stock' not in res.data | ||||
|  | ||||
|     # We should have a notification | ||||
|     wait_for_notification_endpoint_output() | ||||
|     assert os.path.isfile("test-datastore/notification.txt"), "Notification received" | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|     notification_file = os.path.join(datastore_path, "notification.txt") | ||||
|     wait_for_notification_endpoint_output(datastore_path=datastore_path) | ||||
|     assert os.path.isfile(notification_file), "Notification received" | ||||
|     os.unlink(notification_file) | ||||
|  | ||||
|     # Default behaviour is to only fire notification when it goes OUT OF STOCK -> IN STOCK | ||||
|     # So here there should be no file, because we go IN STOCK -> OUT OF STOCK | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(5) | ||||
|     assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default" | ||||
|     assert not os.path.isfile(notification_file), "No notification should have fired when it went OUT OF STOCK by default" | ||||
|  | ||||
|     # BUT we should see that it correctly shows "not in stock" | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|   | ||||
| @@ -1,51 +1,110 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import asyncio | ||||
| import threading | ||||
| import time | ||||
| from aiosmtpd.controller import Controller | ||||
| from aiosmtpd.smtp import SMTP | ||||
| from flask import Flask, Response | ||||
| from email import message_from_bytes | ||||
| from email.policy import default | ||||
|  | ||||
| # Accept a SMTP message and offer a way to retrieve the last message via TCP Socket | ||||
| # Accept a SMTP message and offer a way to retrieve the last message via HTTP | ||||
|  | ||||
| last_received_message = b"Nothing" | ||||
| last_received_message = b"Nothing received yet." | ||||
| active_smtp_connections = 0 | ||||
| smtp_lock = threading.Lock() | ||||
|  | ||||
|  | ||||
| class CustomSMTPHandler: | ||||
|     async def handle_DATA(self, server, session, envelope): | ||||
|         global last_received_message | ||||
|         last_received_message = envelope.content | ||||
|         print('Receiving message from:', session.peer) | ||||
|         print('Message addressed from:', envelope.mail_from) | ||||
|         print('Message addressed to  :', envelope.rcpt_tos) | ||||
|         print('Message length        :', len(envelope.content)) | ||||
|         print(envelope.content.decode('utf8')) | ||||
|         return '250 Message accepted for delivery' | ||||
|         global last_received_message, active_smtp_connections | ||||
|  | ||||
|         with smtp_lock: | ||||
|             active_smtp_connections += 1 | ||||
|  | ||||
|         try: | ||||
|             last_received_message = envelope.content | ||||
|             print('Receiving message from:', session.peer) | ||||
|             print('Message addressed from:', envelope.mail_from) | ||||
|             print('Message addressed to  :', envelope.rcpt_tos) | ||||
|             print('Message length        :', len(envelope.content)) | ||||
|             print('*******************************') | ||||
|             print(envelope.content.decode('utf8')) | ||||
|             print('*******************************') | ||||
|  | ||||
|             # Parse the email message | ||||
|             msg = message_from_bytes(envelope.content, policy=default) | ||||
|             with open('/tmp/last.eml', 'wb') as f: | ||||
|                 f.write(envelope.content) | ||||
|  | ||||
|             # Write parts to files based on content type | ||||
|             if msg.is_multipart(): | ||||
|                 for part in msg.walk(): | ||||
|                     content_type = part.get_content_type() | ||||
|                     payload = part.get_payload(decode=True) | ||||
|  | ||||
|                     if payload: | ||||
|                         if content_type == 'text/plain': | ||||
|                             with open('/tmp/last.txt', 'wb') as f: | ||||
|                                 f.write(payload) | ||||
|                             print(f'Written text/plain part to /tmp/last.txt') | ||||
|                         elif content_type == 'text/html': | ||||
|                             with open('/tmp/last.html', 'wb') as f: | ||||
|                                 f.write(payload) | ||||
|                             print(f'Written text/html part to /tmp/last.html') | ||||
|             else: | ||||
|                 # Single part message | ||||
|                 content_type = msg.get_content_type() | ||||
|                 payload = msg.get_payload(decode=True) | ||||
|  | ||||
|                 if payload: | ||||
|                     if content_type == 'text/plain' or content_type.startswith('text/'): | ||||
|                         with open('/tmp/last.txt', 'wb') as f: | ||||
|                             f.write(payload) | ||||
|                         print(f'Written single part message to /tmp/last.txt') | ||||
|  | ||||
|             return '250 Message accepted for delivery' | ||||
|         finally: | ||||
|             with smtp_lock: | ||||
|                 active_smtp_connections -= 1 | ||||
|  | ||||
|  | ||||
| class EchoServerProtocol(asyncio.Protocol): | ||||
|     def connection_made(self, transport): | ||||
|         global last_received_message | ||||
|         self.transport = transport | ||||
|         peername = transport.get_extra_info('peername') | ||||
|         print('Incoming connection from {}'.format(peername)) | ||||
|         self.transport.write(last_received_message) | ||||
|  | ||||
|         last_received_message = b'' | ||||
|         self.transport.close() | ||||
| # Simple Flask HTTP server to echo back the last SMTP message | ||||
| app = Flask(__name__) | ||||
|  | ||||
|  | ||||
| async def main(): | ||||
| @app.route('/') | ||||
| def echo_last_message(): | ||||
|     global last_received_message, active_smtp_connections | ||||
|  | ||||
|     # Wait for any in-progress SMTP connections to complete | ||||
|     max_wait = 5  # Maximum 5 seconds | ||||
|     wait_interval = 0.05  # Check every 50ms | ||||
|     elapsed = 0 | ||||
|  | ||||
|     while elapsed < max_wait: | ||||
|         with smtp_lock: | ||||
|             if active_smtp_connections == 0: | ||||
|                 break | ||||
|         time.sleep(wait_interval) | ||||
|         elapsed += wait_interval | ||||
|  | ||||
|     return Response(last_received_message, mimetype='text/plain') | ||||
|  | ||||
|  | ||||
| def run_flask(): | ||||
|     app.run(host='0.0.0.0', port=11080, debug=False, use_reloader=False) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     # Start the SMTP server | ||||
|     controller = Controller(CustomSMTPHandler(), hostname='0.0.0.0', port=11025) | ||||
|     controller.start() | ||||
|  | ||||
|     # Start the TCP Echo server | ||||
|     loop = asyncio.get_running_loop() | ||||
|     server = await loop.create_server( | ||||
|         lambda: EchoServerProtocol(), | ||||
|         '0.0.0.0', 11080 | ||||
|     ) | ||||
|     async with server: | ||||
|         await server.serve_forever() | ||||
|     # Start the HTTP server in a separate thread | ||||
|     flask_thread = threading.Thread(target=run_flask, daemon=True) | ||||
|     flask_thread.start() | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     asyncio.run(main()) | ||||
|     # Keep the main thread alive | ||||
|     try: | ||||
|         flask_thread.join() | ||||
|     except KeyboardInterrupt: | ||||
|         print("Shutting down...") | ||||
|   | ||||
| @@ -1,18 +1,22 @@ | ||||
| import json | ||||
| import os | ||||
| import time | ||||
| import re | ||||
| from flask import url_for | ||||
| from email import message_from_string | ||||
| from email.policy import default as email_policy | ||||
|  | ||||
| from changedetectionio.diff import HTML_REMOVED_STYLE, HTML_ADDED_STYLE, HTML_CHANGED_STYLE | ||||
| from changedetectionio.notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER | ||||
| from changedetectionio.tests.util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, \ | ||||
|     wait_for_all_checks, \ | ||||
|     set_longer_modified_response | ||||
| from changedetectionio.tests.util import extract_UUID_from_client | ||||
|     set_longer_modified_response, delete_all_watches | ||||
|  | ||||
| import logging | ||||
| import base64 | ||||
|  | ||||
|  | ||||
| # NOTE - RELIES ON mailserver as hostname running, see github build recipes | ||||
| smtp_test_server = 'mailserver' | ||||
|  | ||||
| ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys()) | ||||
|  | ||||
| from changedetectionio.notification import ( | ||||
|     default_notification_body, | ||||
|     default_notification_format, | ||||
| @@ -23,24 +27,23 @@ from changedetectionio.notification import ( | ||||
|  | ||||
|  | ||||
| def get_last_message_from_smtp_server(): | ||||
|     import socket | ||||
|     port = 11080  # socket server port number | ||||
|  | ||||
|     client_socket = socket.socket()  # instantiate | ||||
|     client_socket.connect((smtp_test_server, port))  # connect to the server | ||||
|  | ||||
|     data = client_socket.recv(50024).decode()  # receive response | ||||
|     import requests | ||||
|     time.sleep(1) # wait for any smtp connects to die off | ||||
|     port = 11080  # HTTP server port number | ||||
|     # Make HTTP GET request to Flask server | ||||
|     response = requests.get(f'http://{smtp_test_server}:{port}/') | ||||
|     data = response.text | ||||
|     logging.info("get_last_message_from_smtp_server..") | ||||
|     logging.info(data) | ||||
|     client_socket.close()  # close the connection | ||||
|     return data | ||||
|  | ||||
|  | ||||
| # Requires running the test SMTP server | ||||
|  | ||||
| def test_check_notification_email_formats_default_HTML(client, live_server, measure_memory_usage): | ||||
| def test_check_notification_email_formats_default_HTML(client, live_server, measure_memory_usage, datastore_path): | ||||
|     ##  live_server_setup(live_server) # Setup on conftest per function | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|  | ||||
| @@ -50,8 +53,8 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": "fallback-body<br> " + default_notification_body, | ||||
|               "application-notification_format": 'HTML', | ||||
|               "application-notification_body": "some text\nfallback-body<br> " + default_notification_body, | ||||
|               "application-notification_format": 'html', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
| @@ -69,7 +72,7 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response() | ||||
|     set_longer_modified_response(datastore_path=datastore_path) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -77,25 +80,242 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     msg = get_last_message_from_smtp_server() | ||||
|     assert len(msg) >= 1 | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # The email should have two bodies, and the text/html part should be <br> | ||||
|     assert 'Content-Type: text/plain' in msg | ||||
|     assert '(added) So let\'s see what happens.\r\n' in msg  # The plaintext part with \r\n | ||||
|     assert 'Content-Type: text/html' in msg | ||||
|     assert '(added) So let\'s see what happens.<br>' in msg  # the html part | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative with text/plain and text/html) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain (the auto-generated plaintext version) | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     assert '(added) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|     assert 'fallback-body\r\n' in text_content  # The plaintext part | ||||
|     assert CUSTOM_LINEBREAK_PLACEHOLDER not in text_content | ||||
|  | ||||
|     # Second part should be text/html | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert 'some text<br>' in html_content  # We converted \n from the notification body | ||||
|     assert 'fallback-body<br>' in html_content  # kept the original <br> | ||||
|     assert '(added) So let\'s see what happens.<br>' in html_content  # the html part | ||||
|     assert CUSTOM_LINEBREAK_PLACEHOLDER not in html_content | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage): | ||||
|     ##  live_server_setup(live_server) # Setup on conftest per function | ||||
| def test_check_notification_plaintext_format(client, live_server, measure_memory_usage, datastore_path): | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": "some text\n" + default_notification_body, | ||||
|               "application-notification_format": 'text', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add a watch and trigger a HTTP POST | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     set_longer_modified_response(datastore_path=datastore_path) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should be plain text only (not multipart) | ||||
|     assert not msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'text/plain' | ||||
|  | ||||
|     # Get the plain text content | ||||
|     text_content = msg.get_content() | ||||
|     assert '(added) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|  | ||||
|     # Should NOT contain HTML | ||||
|     assert '<br>' not in text_content  # We should not have HTML in plain text | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
|  | ||||
| def test_check_notification_html_color_format(client, live_server, measure_memory_usage, datastore_path): | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"some text\n{default_notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'htmlcolor', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add a watch and trigger a HTTP POST | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": 'nice one'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response(datastore_path=datastore_path) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative with text/plain and text/html) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain (the auto-generated plaintext version) | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     assert 'So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|     assert '(added)' not in text_content # Because apprise only dumb converts the html to text | ||||
|  | ||||
|     # Second part should be text/html with color styling | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert HTML_CHANGED_STYLE or HTML_REMOVED_STYLE in html_content | ||||
|     assert HTML_ADDED_STYLE in html_content | ||||
|     assert '<' not in html_content | ||||
|  | ||||
|     assert 'some text<br>' in html_content | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_check_notification_markdown_format(client, live_server, measure_memory_usage, datastore_path): | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": "*header*\n\nsome text\n" + default_notification_body, | ||||
|               "application-notification_format": 'markdown', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add a watch and trigger a HTTP POST | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": 'nice one'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response(datastore_path=datastore_path) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative with text/plain and text/html) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain (the auto-generated plaintext version) | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     # We wont see anything in the "FALLBACK" text but that's OK (no added/strikethrough etc) | ||||
|     assert 'So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|  | ||||
|  | ||||
|     # Second part should be text/html and roughly converted from markdown to HTML | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert '<p><em>header</em></p>' in html_content | ||||
|     assert '<strong>So let\'s see what happens.</strong><br>' in html_content # Additions are <strong> in markdown | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # Custom notification body with HTML, that is either sent as HTML or rendered to plaintext and sent | ||||
| def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|     # HTML problems? see this | ||||
|     # https://github.com/caronc/apprise/issues/633 | ||||
|  | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""<!DOCTYPE html> | ||||
| <html lang="en"> | ||||
| @@ -116,7 +336,377 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": notification_body, | ||||
|               "application-notification_format": 'Text', | ||||
|               "application-notification_format": 'text', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add a watch and trigger a HTTP POST | ||||
|     test_url = url_for('test_endpoint',content_type="text/html", _external=True) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": 'nice one'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     #################################### FIRST SITUATION, PLAIN TEXT NOTIFICATION IS WANTED BUT WE HAVE HTML IN OUR TEMPLATE AND CONTENT ########## | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response(datastore_path=datastore_path) | ||||
|     time.sleep(2) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|     #    with open('/tmp/m.txt', 'w') as f: | ||||
|     #        f.write(msg_raw) | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should not have two bodies, should be TEXT only | ||||
|     assert not msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'text/plain' | ||||
|  | ||||
|     # Get the plain text content | ||||
|     text_content = msg.get_content() | ||||
|     assert '(added) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|     assert '<!DOCTYPE html>' in text_content # even tho they added html, they selected plaintext so it should have not got converted | ||||
|  | ||||
|  | ||||
|     #################################### SECOND SITUATION, HTML IS CORRECTLY PASSED THROUGH TO THE EMAIL #################### | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|     # Now override as HTML format | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             "notification_format": 'html', | ||||
|             'fetch_backend': "html_requests", | ||||
|             "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     assert '(removed) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|  | ||||
|     # Second part should be text/html | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert '(removed) So let\'s see what happens.' in html_content  # the html part | ||||
|     assert '<!DOCTYPE html' not in html_content | ||||
|     assert '<!DOCTYPE html' in html_content # Our original template is working correctly | ||||
|  | ||||
|     # https://github.com/dgtlmoon/changedetection.io/issues/2103 | ||||
|     assert '<h1>Test</h1>' in html_content | ||||
|     assert '<' not in html_content | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_check_plaintext_document_plaintext_notification_smtp(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """When following a plaintext document, notification in Plain Text format is sent correctly""" | ||||
|     import os | ||||
|  | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("Some nice plain text\nwhich we add some extra data\nover here\n") | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""{default_notification_body}""" | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'text', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Change the content | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("Some nice plain text\nwhich we add some extra data\nAnd let's talk about <title> tags\nover here\n") | ||||
|  | ||||
|  | ||||
|     time.sleep(1) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(get_last_message_from_smtp_server(), policy=email_policy) | ||||
|  | ||||
|     assert not msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'text/plain' | ||||
|     body = msg.get_content() | ||||
|     # nothing is escaped, raw html stuff in text/plain | ||||
|     assert 'talk about <title> tags' in body | ||||
|     assert '(added)' in body | ||||
|     assert '<br' not in body | ||||
|     assert '<' not in body | ||||
|     assert '<pre' not in body | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_check_plaintext_document_html_notifications(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """When following a plaintext document, notification in Plain Text format is sent correctly""" | ||||
|     import os | ||||
|  | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("    Some nice plain text\nwhich we add some extra data\nover here\n") | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""{default_notification_body}""" | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'html', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Change the content | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("    Some nice plain text\nwhich we add some extra data\nAnd let's talk about <title> tags\nover here\n") | ||||
|  | ||||
|  | ||||
|     time.sleep(2) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(get_last_message_from_smtp_server(), policy=email_policy) | ||||
|  | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|  | ||||
|  | ||||
|     assert 'And let\'s talk about <title> tags\r\n' in text_content | ||||
|     assert '<br' not in text_content | ||||
|     assert '<span' not in text_content | ||||
|  | ||||
|  | ||||
|     assert 'talk about <title>' not in html_content  # the html part, should have got marked up to < etc | ||||
|     assert 'talk about <title>' in html_content | ||||
|     # Should be the HTML, but not HTML Color | ||||
|     assert 'background-color' not in html_content | ||||
|     assert '<br>(added) And let's talk about <title> tags<br>' in html_content | ||||
|     assert '<br' not in html_content | ||||
|     assert '<pre role="article"' in html_content # Should have got wrapped nicely in email_helpers.py | ||||
|  | ||||
|     # And now for the whitespace retention | ||||
|     assert '    Some nice plain text' in html_content | ||||
|     assert '(added) And let' in html_content # just to show a single whitespace didnt get touched | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_check_plaintext_document_html_color_notifications(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """When following a plaintext document, notification in Plain Text format is sent correctly""" | ||||
|     import os | ||||
|  | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("Some nice plain text\nwhich we add some extra data\nover here\n") | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""{default_notification_body}""" | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'htmlcolor', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Change the content | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("Some nice plain text\nwhich we add some extra data\nAnd let's talk about <title> tags\nover here\n") | ||||
|  | ||||
|     time.sleep(1) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(get_last_message_from_smtp_server(), policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|  | ||||
|  | ||||
|     assert 'And let\'s talk about <title> tags\r\n' in text_content | ||||
|     assert '<br' not in text_content | ||||
|     assert '<span' not in text_content | ||||
|  | ||||
|     assert 'talk about <title>' not in html_content  # the html part, should have got marked up to < etc | ||||
|     assert 'talk about <title>' in html_content | ||||
|     # Should be the HTML, but not HTML Color | ||||
|     assert 'background-color' in html_content | ||||
|     assert '(added) And let' not in html_content | ||||
|     assert '<br' not in html_content | ||||
|     assert '<br>' in html_content | ||||
|     assert '<pre role="article"' in html_content # Should have got wrapped nicely in email_helpers.py | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_check_html_document_plaintext_notification(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """When following a HTML document, notification in Plain Text format is sent correctly""" | ||||
|     import os | ||||
|  | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("<html><body>some stuff<br>and more stuff<br>and even more stuff<br></body></html>") | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""{default_notification_body}""" | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'text', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/html", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("<html><body>sxome stuff<br>and more stuff<br>lets slip this in<br>and this in<br>and even more stuff<br><tag></body></html>") | ||||
|  | ||||
|     time.sleep(0.1) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(get_last_message_from_smtp_server(), policy=email_policy) | ||||
|  | ||||
|     assert not msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'text/plain' | ||||
|     body = msg.get_content() | ||||
|  | ||||
|     assert '<tag>' in body # Should have got converted from original HTML to plaintext | ||||
|     assert '(changed) some stuff\r\n' in body | ||||
|     assert '(into) sxome stuff\r\n' in body | ||||
|     assert '(added) lets slip this in\r\n' in body | ||||
|     assert '(added) and this in\r\n' in body | ||||
|     assert ' ' not in body | ||||
|  | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_check_html_notification_with_apprise_format_is_html(client, live_server, measure_memory_usage, datastore_path): | ||||
|     ##  live_server_setup(live_server) # Setup on conftest per function | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com&format=html' | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": "some text\nfallback-body<br> " + default_notification_body, | ||||
|               "application-notification_format": 'html', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
| @@ -134,50 +724,42 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response() | ||||
|     set_longer_modified_response(datastore_path=datastore_path) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|     msg = get_last_message_from_smtp_server() | ||||
|     assert len(msg) >= 1 | ||||
|     #    with open('/tmp/m.txt', 'w') as f: | ||||
|     #        f.write(msg) | ||||
|  | ||||
|     # The email should not have two bodies, should be TEXT only | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     assert 'Content-Type: text/plain' in msg | ||||
|     assert '(added) So let\'s see what happens.\r\n' in msg  # The plaintext part with \r\n | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     set_original_response() | ||||
|     # Now override as HTML format | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             "notification_format": 'HTML', | ||||
|             'fetch_backend': "html_requests", | ||||
|             "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     # The email should have two bodies (multipart/alternative with text/plain and text/html) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     time.sleep(3) | ||||
|     msg = get_last_message_from_smtp_server() | ||||
|     assert len(msg) >= 1 | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # The email should have two bodies, and the text/html part should be <br> | ||||
|     assert 'Content-Type: text/plain' in msg | ||||
|     assert '(removed) So let\'s see what happens.\r\n' in msg  # The plaintext part with \n | ||||
|     assert 'Content-Type: text/html' in msg | ||||
|     assert '(removed) So let\'s see what happens.<br>' in msg  # the html part | ||||
|     # First part should be text/plain (the auto-generated plaintext version) | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     assert '(added) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|     assert 'fallback-body\r\n' in text_content  # The plaintext part | ||||
|     assert CUSTOM_LINEBREAK_PLACEHOLDER not in text_content | ||||
|  | ||||
|     # https://github.com/dgtlmoon/changedetection.io/issues/2103 | ||||
|     assert '<h1>Test</h1>' in msg | ||||
|     assert '<' not in msg | ||||
|     assert 'Content-Type: text/html' in msg | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     # Second part should be text/html | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert 'some text<br>' in html_content  # We converted \n from the notification body | ||||
|     assert 'fallback-body<br>' in html_content  # kept the original <br> | ||||
|     assert '(added) So let\'s see what happens.<br>' in html_content  # the html part | ||||
|     assert CUSTOM_LINEBREAK_PLACEHOLDER not in html_content | ||||
|     delete_all_watches(client) | ||||
| @@ -2,7 +2,7 @@ from .util import live_server_setup, wait_for_all_checks | ||||
| from flask import url_for | ||||
| import time | ||||
|  | ||||
| def test_check_access_control(app, client, live_server): | ||||
| def test_check_access_control(app, client, live_server, measure_memory_usage, datastore_path): | ||||
|     # Still doesnt work, but this is closer. | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|   | ||||
| @@ -1,12 +1,16 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| import os.path | ||||
| import os | ||||
|  | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output | ||||
| from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, delete_all_watches | ||||
| import time | ||||
|  | ||||
| def set_original(excluding=None, add_line=None): | ||||
| from ..diff import ADDED_PLACEMARKER_OPEN | ||||
|  | ||||
|  | ||||
| def set_original(datastore_path, excluding=None, add_line=None): | ||||
|     test_return_data = """<html> | ||||
|      <body> | ||||
|      <p>Some initial text</p> | ||||
| @@ -32,24 +36,20 @@ def set_original(excluding=None, add_line=None): | ||||
|  | ||||
|         test_return_data = output | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
| # def test_setup(client, live_server, measure_memory_usage): | ||||
| # def test_setup(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
| def test_check_removed_line_contains_trigger(client, live_server, measure_memory_usage): | ||||
| def test_check_removed_line_contains_trigger(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     set_original() | ||||
|     set_original(datastore_path=datastore_path) | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -65,9 +65,10 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|               "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     set_original(excluding='Something irrelevant') | ||||
|     set_original(excluding='Something irrelevant', datastore_path=datastore_path) | ||||
|  | ||||
|     # A line thats not the trigger should not trigger anything | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -75,16 +76,16 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(0.5) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # The trigger line is REMOVED,  this should trigger | ||||
|     set_original(excluding='The golden line') | ||||
|     set_original(excluding='The golden line', datastore_path=datastore_path) | ||||
|  | ||||
|     # Check in the processor here what's going on, its triggering empty-reply and no change. | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     time.sleep(1) | ||||
|  | ||||
| @@ -93,28 +94,26 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|     time.sleep(0.2) | ||||
|  | ||||
|     time.sleep(1) | ||||
|     set_original(excluding=None) | ||||
|     set_original(excluding=None, datastore_path=datastore_path) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(1) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # Remove it again, and we should get a trigger | ||||
|     set_original(excluding='The golden line') | ||||
|     set_original(excluding='The golden line', datastore_path=datastore_path) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage): | ||||
| def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage, datastore_path): | ||||
|      | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
| @@ -127,6 +126,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|               "application-notification_body": 'triggered text was -{{triggered_text}}- ### 网站监测 内容更新了 ####', | ||||
|               # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation | ||||
|               "application-notification_urls": test_notification_url, | ||||
|               "application-notification_format": 'text', | ||||
|               "application-minutes_between_check": 180, | ||||
|               "application-fetch_backend": "html_requests" | ||||
|               }, | ||||
| @@ -134,15 +134,11 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|     ) | ||||
|     assert b'Settings updated' in res.data | ||||
|  | ||||
|     set_original() | ||||
|     set_original(datastore_path=datastore_path) | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -161,7 +157,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     set_original(excluding='Something irrelevant') | ||||
|     set_original(excluding='Something irrelevant', datastore_path=datastore_path) | ||||
|  | ||||
|     # A line thats not the trigger should not trigger anything | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -169,23 +165,23 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # The trigger line is ADDED,  this should trigger | ||||
|     set_original(add_line='<p>Oh yes please</p>') | ||||
|     set_original(add_line='<p>Oh yes please</p>', datastore_path=datastore_path) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|  | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     # Takes a moment for apprise to fire | ||||
|     wait_for_notification_endpoint_output() | ||||
|     assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file" | ||||
|     with open("test-datastore/notification.txt", 'rb') as f: | ||||
|     wait_for_notification_endpoint_output(datastore_path=datastore_path) | ||||
|     assert os.path.isfile(os.path.join(datastore_path, "notification.txt")), "Notification fired because I can see the output file" | ||||
|     with open(os.path.join(datastore_path, "notification.txt"), 'rb') as f: | ||||
|         response = f.read() | ||||
|         assert ADDED_PLACEMARKER_OPEN.encode('utf-8') not in response #  _apply_diff_filtering shouldnt add something here | ||||
|         assert b'-Oh yes please' in response | ||||
|         assert '网站监测 内容更新了'.encode('utf-8') in response | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|   | ||||
| @@ -2,13 +2,14 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
| import os | ||||
|  | ||||
| import json | ||||
| import uuid | ||||
|  | ||||
|  | ||||
| def set_original_response(): | ||||
| def set_original_response(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -21,12 +22,12 @@ def set_original_response(): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def set_modified_response(): | ||||
| def set_modified_response(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -39,7 +40,7 @@ def set_modified_response(): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     return None | ||||
| @@ -52,17 +53,17 @@ def is_valid_uuid(val): | ||||
|         return False | ||||
|  | ||||
|  | ||||
| # def test_setup(client, live_server, measure_memory_usage): | ||||
| # def test_setup(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|  | ||||
| def test_api_simple(client, live_server, measure_memory_usage): | ||||
| def test_api_simple(client, live_server, measure_memory_usage, datastore_path): | ||||
|      | ||||
|  | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # Create a watch | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|     # Validate bad URL | ||||
|     test_url = url_for('test_endpoint', _external=True ) | ||||
| @@ -111,7 +112,7 @@ def test_api_simple(client, live_server, measure_memory_usage): | ||||
|     time.sleep(1) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     set_modified_response() | ||||
|     set_modified_response(datastore_path=datastore_path) | ||||
|     # Trigger recheck of all ?recheck_all=1 | ||||
|     client.get( | ||||
|         url_for("createwatch", recheck_all='1'), | ||||
| @@ -244,7 +245,7 @@ def test_api_simple(client, live_server, measure_memory_usage): | ||||
|     ) | ||||
|     assert len(res.json) == 0, "Watch list should be empty" | ||||
|  | ||||
| def test_access_denied(client, live_server, measure_memory_usage): | ||||
| def test_access_denied(client, live_server, measure_memory_usage, datastore_path): | ||||
|     # `config_api_token_enabled` Should be On by default | ||||
|     res = client.get( | ||||
|         url_for("createwatch") | ||||
| @@ -276,8 +277,7 @@ def test_access_denied(client, live_server, measure_memory_usage): | ||||
|     assert res.status_code == 200 | ||||
|  | ||||
|     # Cleanup everything | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
| @@ -290,11 +290,11 @@ def test_access_denied(client, live_server, measure_memory_usage): | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
| def test_api_watch_PUT_update(client, live_server, measure_memory_usage): | ||||
| def test_api_watch_PUT_update(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|     # Create a watch | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # Create new | ||||
| @@ -371,7 +371,7 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     ###################################################### | ||||
|  | ||||
|     # HTTP PUT try a field that doenst exist | ||||
|     # HTTP PUT try a field that doesn't exist | ||||
|  | ||||
|     # HTTP PUT an update | ||||
|     res = client.put( | ||||
| @@ -384,19 +384,30 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage): | ||||
|     # Message will come from `flask_expects_json` | ||||
|     assert b'Additional properties are not allowed' in res.data | ||||
|  | ||||
|  | ||||
|     # Try a XSS URL | ||||
|     res = client.put( | ||||
|         url_for("watch", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'}, | ||||
|         data=json.dumps({ | ||||
|             'url': 'javascript:alert(document.domain)' | ||||
|         }), | ||||
|     ) | ||||
|     assert res.status_code == 400 | ||||
|  | ||||
|     # Cleanup everything | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_api_import(client, live_server, measure_memory_usage): | ||||
| def test_api_import(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("import") + "?tag=import-test", | ||||
|         data='https://website1.com\r\nhttps://website2.com', | ||||
|         headers={'x-api-key': api_key, 'content-type': 'text/plain'}, | ||||
|         # We removed  'content-type': 'text/plain', the Import API should assume this if none is set #3547 #3542 | ||||
|         headers={'x-api-key': api_key}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
| @@ -410,7 +421,7 @@ def test_api_import(client, live_server, measure_memory_usage): | ||||
|     res = client.get(url_for('tags.tags_overview_page')) | ||||
|     assert b'import-test' in res.data | ||||
|  | ||||
| def test_api_conflict_UI_password(client, live_server, measure_memory_usage): | ||||
| def test_api_conflict_UI_password(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|      | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
| @@ -428,7 +439,7 @@ def test_api_conflict_UI_password(client, live_server, measure_memory_usage): | ||||
|     assert b"Password protection enabled." in res.data | ||||
|  | ||||
|     # Create a watch | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # Create new | ||||
|   | ||||
| @@ -4,7 +4,7 @@ from flask import url_for | ||||
| from .util import live_server_setup | ||||
| import json | ||||
|  | ||||
| def test_api_notifications_crud(client, live_server): | ||||
| def test_api_notifications_crud(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|   | ||||
| @@ -12,7 +12,7 @@ from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage): | ||||
| def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that creating a watch with invalid content-type triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
| @@ -29,7 +29,7 @@ def test_openapi_validation_invalid_content_type_on_create_watch(client, live_se | ||||
|     assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage): | ||||
| def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that creating a watch without required URL field triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
| @@ -46,7 +46,7 @@ def test_openapi_validation_missing_required_field_create_watch(client, live_ser | ||||
|     assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage): | ||||
| def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that including invalid fields triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
| @@ -83,7 +83,7 @@ def test_openapi_validation_invalid_field_in_request_body(client, live_server, m | ||||
|     assert b"Additional properties are not allowed" in res.data, "Should contain validation error about additional properties" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage): | ||||
| def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that import endpoint with wrong content-type triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
| @@ -100,7 +100,7 @@ def test_openapi_validation_import_wrong_content_type(client, live_server, measu | ||||
|     assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage): | ||||
| def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that import endpoint with correct content-type succeeds (positive test).""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
| @@ -117,7 +117,7 @@ def test_openapi_validation_import_correct_content_type_succeeds(client, live_se | ||||
|     assert len(res.json) == 2, "Should import 2 URLs" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage): | ||||
| def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that GET requests bypass OpenAPI validation entirely.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
| @@ -141,7 +141,7 @@ def test_openapi_validation_get_requests_bypass_validation(client, live_server, | ||||
|     assert isinstance(res.json, dict), "Should return JSON dictionary for watch list" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage): | ||||
| def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that creating a tag without required title triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
| @@ -158,7 +158,7 @@ def test_openapi_validation_create_tag_missing_required_title(client, live_serve | ||||
|     assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage): | ||||
| def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that watch updates allow partial updates without requiring all fields (positive test).""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|   | ||||
| @@ -6,7 +6,7 @@ import time | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def test_api_search(client, live_server): | ||||
| def test_api_search(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|   | ||||
| @@ -5,13 +5,14 @@ from .util import live_server_setup, wait_for_all_checks, set_original_response | ||||
| import json | ||||
| import time | ||||
|  | ||||
| def test_api_tags_listing(client, live_server, measure_memory_usage): | ||||
| def test_api_tags_listing(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|     tag_title = 'Test Tag' | ||||
|  | ||||
|  | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("tags"), | ||||
|   | ||||
| @@ -5,19 +5,15 @@ from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
| # test pages with http://username@password:foobar.com/ work | ||||
| def test_basic_auth(client, live_server, measure_memory_usage): | ||||
| def test_basic_auth(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|  | ||||
|     # This page will echo back any auth info | ||||
|     test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@") | ||||
|     time.sleep(1) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(1) | ||||
|     # Check form validation | ||||
|   | ||||
| @@ -3,9 +3,10 @@ | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, extract_UUID_from_client, wait_for_all_checks | ||||
| import os | ||||
|  | ||||
|  | ||||
| def set_response_with_ldjson(): | ||||
| def set_response_with_ldjson(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -55,11 +56,11 @@ def set_response_with_ldjson(): | ||||
|      </html> | ||||
| """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| def set_response_without_ldjson(): | ||||
| def set_response_without_ldjson(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -72,26 +73,22 @@ def set_response_without_ldjson(): | ||||
|      </html> | ||||
| """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| # def test_setup(client, live_server, measure_memory_usage): | ||||
| # def test_setup(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
| # actually only really used by the distll.io importer, but could be handy too | ||||
| def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage): | ||||
| def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage, datastore_path): | ||||
|      | ||||
|     set_response_with_ldjson() | ||||
|     set_response_with_ldjson(datastore_path=datastore_path) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Should get a notice that it's available | ||||
| @@ -125,16 +122,12 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage | ||||
|  | ||||
|     ########################################################################################## | ||||
|     # And we shouldnt see the offer | ||||
|     set_response_without_ldjson() | ||||
|     set_response_without_ldjson(datastore_path=datastore_path) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'ldjson-price-track-offer' not in res.data | ||||
| @@ -146,12 +139,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage | ||||
| def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data): | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     for k,v in client.application.config.get('DATASTORE').data['watching'].items(): | ||||
| @@ -163,7 +152,7 @@ def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_ | ||||
|     client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|  | ||||
|  | ||||
| def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage): | ||||
| def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usage, datastore_path): | ||||
|      | ||||
|     test_return_data = """ | ||||
|             <html> | ||||
| @@ -193,7 +182,7 @@ def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usa | ||||
|             <div class="yes">Some extra stuff</div> | ||||
|             </body></html> | ||||
|      """ | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     _test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=True) | ||||
| @@ -227,7 +216,7 @@ def test_bad_ldjson_is_correctly_ignored(client, live_server, measure_memory_usa | ||||
|     #         <div class="yes">Some extra stuff</div> | ||||
|     #         </body></html> | ||||
|     #  """ | ||||
|     # with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     # with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|     #     f.write(test_return_data) | ||||
|     # | ||||
|     # _test_runner_check_bad_format_ignored(live_server=live_server, client=client, has_ldjson_price_data=False) | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import os | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \ | ||||
|     extract_UUID_from_client | ||||
|     extract_UUID_from_client, delete_all_watches | ||||
|  | ||||
| sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
| @@ -16,8 +17,8 @@ def test_inscriptus(): | ||||
|     assert stripped_text_from_html == 'test!\nok man' | ||||
|  | ||||
|  | ||||
| def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage): | ||||
|     set_original_response() | ||||
| def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage, datastore_path): | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     # Add our URL to the import page | ||||
| @@ -38,9 +39,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|         # Give the thread time to pick it up | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         # It should report nothing found (no new 'has-unread-changes' class) | ||||
|         res = client.get(url_for("watchlist.index")) | ||||
|         assert b'unviewed' not in res.data | ||||
|         assert b'has-unread-changes' not in res.data | ||||
|         assert b'test-endpoint' in res.data | ||||
|  | ||||
|         # Default no password set, this stuff should be always available. | ||||
| @@ -60,7 +61,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     assert b'foobar-detection' not in res.data | ||||
|  | ||||
|     # Make a change | ||||
|     set_modified_response() | ||||
|     set_modified_response(datastore_path=datastore_path) | ||||
|  | ||||
|     # Force recheck | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -74,9 +75,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid)) | ||||
|     assert b'which has this one new line' in res.data | ||||
|  | ||||
|     # Now something should be ready, indicated by having a 'unviewed' class | ||||
|     # Now something should be ready, indicated by having a 'has-unread-changes' class | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     # #75, and it should be in the RSS feed | ||||
|     rss_token = extract_rss_token_from_UI(client) | ||||
| @@ -90,7 +91,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|  | ||||
|     assert expected_url.encode('utf-8') in res.data | ||||
| # | ||||
|     # Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times | ||||
|     # Following the 'diff' link, it should no longer display as 'has-unread-changes' even after we recheck it a few times | ||||
|     res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid)) | ||||
|     assert b'selected=""' in res.data, "Confirm diff history page loaded" | ||||
|  | ||||
| @@ -111,17 +112,17 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|         # Give the thread time to pick it up | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         # It should report nothing found (no new 'has-unread-changes' class) | ||||
|         res = client.get(url_for("watchlist.index")) | ||||
|  | ||||
|  | ||||
|         assert b'unviewed' not in res.data | ||||
|         assert b'class="has-unviewed' not in res.data | ||||
|         assert b'has-unread-changes' not in res.data | ||||
|         assert b'class="has-unread-changes' not in res.data | ||||
|         assert b'head title' in res.data  # Should be ON by default | ||||
|         assert b'test-endpoint' in res.data | ||||
|  | ||||
|     # Recheck it but only with a title change, content wasnt changed | ||||
|     set_original_response(extra_title=" and more") | ||||
|     set_original_response(datastore_path=datastore_path, extra_title=" and more") | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
| @@ -140,8 +141,8 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'class="has-unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|     assert b'class="has-unread-changes' in res.data | ||||
|     assert b'head title' not in res.data  # should now be off | ||||
|  | ||||
|  | ||||
| @@ -151,8 +152,8 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     # hit the mark all viewed link | ||||
|     res = client.get(url_for("ui.mark_all_viewed"), follow_redirects=True) | ||||
|  | ||||
|     assert b'class="has-unviewed' not in res.data | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'class="has-unread-changes' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again | ||||
|     client.get(url_for("ui.clear_watch_history", uuid=uuid)) | ||||
| @@ -163,5 +164,219 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|  | ||||
|     # | ||||
|     # Cleanup everything | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| # Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that | ||||
| def test_requests_timeout(client, live_server, measure_memory_usage, datastore_path): | ||||
|     delay = 2 | ||||
|     test_url = url_for('test_endpoint', delay=delay, _external=True) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-ui-use_page_title_in_list": "", | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               "requests-timeout": delay - 1, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # requests takes >2 sec but we timeout at 1 second | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'Read timed out. (read timeout=1)' in res.data | ||||
|  | ||||
|     ##### Now set a longer timeout | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-ui-use_page_title_in_list": "", | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               "requests-timeout": delay + 1, # timeout should be a second more than the reply time | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'Read timed out' not in res.data | ||||
|  | ||||
| def test_non_text_mime_or_downloads(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """ | ||||
|  | ||||
|     https://github.com/dgtlmoon/changedetection.io/issues/3434 | ||||
|     I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8, | ||||
|     but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog), | ||||
|     changedetection somehow ignores all line breaks and treats the document file as if everything is on one line. | ||||
|  | ||||
|     WHAT THIS DOES - makes the system rely on 'magic' to determine what is it | ||||
|  | ||||
|     :param client: | ||||
|     :param live_server: | ||||
|     :param measure_memory_usage: | ||||
|     :return: | ||||
|     """ | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""some random text that should be split by line | ||||
| and not parsed with html_to_text | ||||
| this way we know that it correctly parsed as plain text | ||||
| \r\n | ||||
| ok\r\n | ||||
| got it\r\n | ||||
| """) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', content_type="application/octet-stream", _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     ### check the front end | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"some random text that should be split by line\n" in res.data | ||||
|     #### | ||||
|  | ||||
|     # Check the snapshot by API that it has linefeeds too | ||||
|     watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|     res = client.get( | ||||
|         url_for("watchhistory", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|  | ||||
|     # Fetch a snapshot by timestamp, check the right one was found | ||||
|     res = client.get( | ||||
|         url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|     assert b"some random text that should be split by line\n" in res.data | ||||
|  | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_standard_text_plain(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """ | ||||
|  | ||||
|     https://github.com/dgtlmoon/changedetection.io/issues/3434 | ||||
|     I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8, | ||||
|     but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog), | ||||
|     changedetection somehow ignores all line breaks and treats the document file as if everything is on one line. | ||||
|  | ||||
|     The real bug here can be that it will try to process plain-text as HTML, losing <etc> | ||||
|  | ||||
|     :param client: | ||||
|     :param live_server: | ||||
|     :param measure_memory_usage: | ||||
|     :return: | ||||
|     """ | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""some random text that should be split by line | ||||
| and not parsed with html_to_text | ||||
| <title>Even this title should stay because we are just plain text</title> | ||||
| this way we know that it correctly parsed as plain text | ||||
| \r\n | ||||
| ok\r\n | ||||
| got it\r\n | ||||
| """) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     ### check the front end | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"some random text that should be split by line\n" in res.data | ||||
|     #### | ||||
|  | ||||
|     # Check the snapshot by API that it has linefeeds too | ||||
|     watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|     res = client.get( | ||||
|         url_for("watchhistory", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|  | ||||
|     # Fetch a snapshot by timestamp, check the right one was found | ||||
|     res = client.get( | ||||
|         url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|     assert b"some random text that should be split by line\n" in res.data | ||||
|     assert b"<title>Even this title should stay because we are just plain text</title>" in res.data | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # Server says its plaintext, we should always treat it as plaintext | ||||
| def test_plaintext_even_if_xml_content(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""<?xml version="1.0" encoding="utf-8"?> | ||||
| <resources xmlns:tools="http://schemas.android.com/tools"> | ||||
|     <!--Activity and fragment titles--> | ||||
|     <string name="feed_update_receiver_name">Abonnementen bijwerken</string> | ||||
| </resources> | ||||
| """) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'<string name="feed_update_receiver_name"' in res.data | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that | ||||
| def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|  | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""<?xml version="1.0" encoding="utf-8"?> | ||||
| <resources xmlns:tools="http://schemas.android.com/tools"> | ||||
|     <!--Activity and fragment titles--> | ||||
|     <string name="feed_update_receiver_name">Abonnementen bijwerken</string> | ||||
|     <foobar>ok man</foobar> | ||||
| </resources> | ||||
| """) | ||||
|  | ||||
|     test_url=url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": ['//string']}) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'<string name="feed_update_receiver_name"' in res.data | ||||
|     assert b'<foobar' not in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|   | ||||
| @@ -8,13 +8,11 @@ import re | ||||
| import time | ||||
|  | ||||
|  | ||||
| def test_backup(client, live_server, measure_memory_usage): | ||||
| def test_backup(client, live_server, measure_memory_usage, datastore_path): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
| @@ -31,7 +29,7 @@ def test_backup(client, live_server, measure_memory_usage): | ||||
|         url_for("backups.request_backup"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     time.sleep(2) | ||||
|     time.sleep(4) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("backups.index"), | ||||
|   | ||||
| @@ -10,11 +10,12 @@ from .util import ( | ||||
| ) | ||||
| from loguru import logger | ||||
|  | ||||
| def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
| def run_socketio_watch_update_test(client, live_server, password_mode="", datastore_path=""): | ||||
|     """Test that the socketio emits a watch update event when content changes""" | ||||
|  | ||||
|     # Set up the test server | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|  | ||||
|     # Get the SocketIO instance from the app | ||||
|     from changedetectionio.flask_app import app | ||||
| @@ -47,7 +48,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
|     socketio_test_client.get_received() | ||||
|  | ||||
|     # Make a change to trigger an update | ||||
|     set_modified_response() | ||||
|     set_modified_response(datastore_path=datastore_path) | ||||
|  | ||||
|     # Force recheck | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -58,6 +59,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
|  | ||||
|     has_watch_update = False | ||||
|     has_unviewed_update = False | ||||
|     got_general_stats_update = False | ||||
|  | ||||
|     for i in range(10): | ||||
|         # Get received events | ||||
| @@ -65,15 +67,11 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
|  | ||||
|         if received: | ||||
|             logger.info(f"Received {len(received)} events after {i+1} seconds") | ||||
|  | ||||
|             # Check for watch_update events with unviewed=True | ||||
|             for event in received: | ||||
|                 if event['name'] == 'watch_update': | ||||
|                     has_watch_update = True | ||||
|                     if event['args'][0]['watch'].get('unviewed', False): | ||||
|                         has_unviewed_update = True | ||||
|                         logger.info("Found unviewed update event!") | ||||
|                         break | ||||
|                 if event['name'] == 'general_stats_update': | ||||
|                     got_general_stats_update = True | ||||
|  | ||||
|         if has_unviewed_update: | ||||
|             break | ||||
| @@ -92,7 +90,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
|     assert has_watch_update, "No watch_update events received" | ||||
|  | ||||
|     # Verify we received an unviewed event | ||||
|     assert has_unviewed_update, "No watch_update event with unviewed=True received" | ||||
|     assert got_general_stats_update, "Got general stats update event" | ||||
|  | ||||
|     # Alternatively, check directly if the watch in the datastore is marked as unviewed | ||||
|     from changedetectionio.flask_app import app | ||||
| @@ -108,11 +106,11 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
|     # Clean up | ||||
|     client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|  | ||||
| def test_everything(live_server, client): | ||||
| def test_everything(live_server, client, measure_memory_usage, datastore_path): | ||||
|  | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     run_socketio_watch_update_test(password_mode="", live_server=live_server, client=client) | ||||
|     run_socketio_watch_update_test(password_mode="", live_server=live_server, client=client, datastore_path=datastore_path) | ||||
|  | ||||
|     ############################ Password required auth check ############################## | ||||
|  | ||||
| @@ -127,7 +125,7 @@ def test_everything(live_server, client): | ||||
|  | ||||
|     assert b"Password protection enabled." in res.data | ||||
|  | ||||
|     run_socketio_watch_update_test(password_mode="not logged in, should exit on connect", live_server=live_server, client=client) | ||||
|     run_socketio_watch_update_test(password_mode="not logged in, should exit on connect", live_server=live_server, client=client, datastore_path=datastore_path) | ||||
|     res = client.post( | ||||
|         url_for("login"), | ||||
|         data={"password": "foobar"}, | ||||
| @@ -136,4 +134,4 @@ def test_everything(live_server, client): | ||||
|  | ||||
|     # Yes we are correctly logged in | ||||
|     assert b"LOG OUT" in res.data | ||||
|     run_socketio_watch_update_test(password_mode="should be like normal", live_server=live_server, client=client) | ||||
|     run_socketio_watch_update_test(password_mode="should be like normal", live_server=live_server, client=client, datastore_path=datastore_path) | ||||
|   | ||||
| @@ -2,10 +2,11 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
| from changedetectionio import html_tools | ||||
| import os | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
| def set_original_ignore_response(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -17,11 +18,11 @@ def set_original_ignore_response(): | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def set_modified_original_ignore_response(): | ||||
| def set_modified_original_ignore_response(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some NEW nice initial text<br> | ||||
| @@ -36,12 +37,12 @@ def set_modified_original_ignore_response(): | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| # Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text | ||||
| def set_modified_response_minus_block_text(): | ||||
| def set_modified_response_minus_block_text(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some NEW nice initial text<br> | ||||
| @@ -56,26 +57,22 @@ def set_modified_response_minus_block_text(): | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def test_check_block_changedetection_text_NOT_present(client, live_server, measure_memory_usage): | ||||
| def test_check_block_changedetection_text_NOT_present(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     # Use a mix of case in ZzZ to prove it works case-insensitive. | ||||
|     ignore_text = "out of stoCk\r\nfoobar" | ||||
|     set_original_ignore_response() | ||||
|     set_original_ignore_response(datastore_path=datastore_path) | ||||
|  | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -107,42 +104,42 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     # It should report nothing found (no new 'has-unread-changes' class) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|     # The page changed, BUT the text is still there, just the rest of it changes, we should not see a change | ||||
|     set_modified_original_ignore_response() | ||||
|     set_modified_original_ignore_response(datastore_path=datastore_path) | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     # It should report nothing found (no new 'has-unread-changes' class) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|     # 2548 | ||||
|     # Going back to the ORIGINAL should NOT trigger a change | ||||
|     set_original_ignore_response() | ||||
|     set_original_ignore_response(datastore_path=datastore_path) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|  | ||||
|     # Now we set a change where the text is gone AND its different content, it should now trigger | ||||
|     set_modified_response_minus_block_text() | ||||
|     set_modified_response_minus_block_text(datastore_path=datastore_path) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|   | ||||
| @@ -3,23 +3,20 @@ | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| import os | ||||
|  | ||||
|  | ||||
| def test_clone_functionality(client, live_server, measure_memory_usage): | ||||
| def test_clone_functionality(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("<html><body>Some content</body></html>") | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # So that we can be sure the same history doesnt carry over | ||||
|   | ||||
| @@ -1,13 +1,14 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import json | ||||
| import time | ||||
| import os | ||||
|  | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
| from ..model import CONDITIONS_MATCH_LOGIC_DEFAULT | ||||
|  | ||||
|  | ||||
| def set_original_response(number="50"): | ||||
| def set_original_response(datastore_path, number="50"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      <h1>Test Page for Conditions</h1> | ||||
| @@ -17,10 +18,10 @@ def set_original_response(number="50"): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
| def set_number_in_range_response(number="75"): | ||||
| def set_number_in_range_response(datastore_path, number="75"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      <h1>Test Page for Conditions</h1> | ||||
| @@ -30,10 +31,10 @@ def set_number_in_range_response(number="75"): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
| def set_number_out_of_range_response(number="150"): | ||||
| def set_number_out_of_range_response(datastore_path, number="150"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      <h1>Test Page for Conditions</h1> | ||||
| @@ -43,29 +44,25 @@ def set_number_out_of_range_response(number="150"): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| # def test_setup(client, live_server): | ||||
| # def test_setup(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that both text and number conditions work together with AND logic.""" | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
| def test_conditions_with_text_and_number(client, live_server): | ||||
| def test_conditions_with_text_and_number(client, live_server, measure_memory_usage, datastore_path): | ||||
|     """Test that both text and number conditions work together with AND logic.""" | ||||
|      | ||||
|     set_original_response("50") | ||||
|     set_original_response(datastore_path=datastore_path, number="50") | ||||
|      | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Configure the watch with two conditions connected with AND: | ||||
| @@ -118,14 +115,14 @@ def test_conditions_with_text_and_number(client, live_server): | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Case 1 | ||||
|     set_number_in_range_response("70.5") | ||||
|     set_number_in_range_response(datastore_path=datastore_path, number="70.5") | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(2) | ||||
|     # 75 is > 20 and < 100 and contains "5" | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|  | ||||
|     # Case 2: Change with one condition violated | ||||
| @@ -133,7 +130,7 @@ def test_conditions_with_text_and_number(client, live_server): | ||||
|     client.get(url_for("ui.mark_all_viewed"), follow_redirects=True) | ||||
|     time.sleep(0.2) | ||||
|  | ||||
|     set_number_out_of_range_response("150.5") | ||||
|     set_number_out_of_range_response(datastore_path=datastore_path, number="150.5") | ||||
|  | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -141,25 +138,20 @@ def test_conditions_with_text_and_number(client, live_server): | ||||
|  | ||||
|     # Should NOT be marked as having changes since not all conditions are met | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # The 'validate' button next to each rule row | ||||
| def test_condition_validate_rule_row(client, live_server): | ||||
| def test_condition_validate_rule_row(client, live_server, measure_memory_usage, datastore_path): | ||||
|  | ||||
|     set_original_response("50") | ||||
|     set_original_response(datastore_path=datastore_path, number="50") | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
| @@ -212,7 +204,7 @@ def test_condition_validate_rule_row(client, live_server): | ||||
|  | ||||
|  | ||||
| # If there was only a change in the whitespacing, then we shouldnt have a change detected | ||||
| def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage): | ||||
| def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage, datastore_path): | ||||
|      | ||||
|  | ||||
|     test_return_data = """<html> | ||||
| @@ -225,17 +217,13 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -255,10 +243,10 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|     ) | ||||
|  | ||||
| # If there was only a change in the whitespacing, then we shouldnt have a change detected | ||||
| def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|      | ||||
| def test_lev_conditions_plugin(client, live_server, measure_memory_usage, datastore_path): | ||||
|     # This should break.. | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -299,7 +287,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions | ||||
|     res = client.get( | ||||
| @@ -310,7 +298,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|  | ||||
|  | ||||
|     ############### Now change it a LITTLE bit... | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -326,7 +314,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold | ||||
|     assert b'has-unread-changes' not in res.data #because this will be like 0.90 not 0.8 threshold | ||||
|  | ||||
|     ############### Now change it a MORE THAN 50% | ||||
|     test_return_data = """<html> | ||||
| @@ -339,13 +327,13 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'Queued 1 watch for rechecking.' in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("ui.form_delete", uuid="all"), | ||||
|   | ||||
| @@ -3,12 +3,13 @@ | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| import os | ||||
|  | ||||
| from ..html_tools import * | ||||
|  | ||||
|  | ||||
|  | ||||
| def set_original_response(): | ||||
| def set_original_response(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -21,11 +22,11 @@ def set_original_response(): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| def set_modified_response(): | ||||
| def set_modified_response(datastore_path): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
| @@ -38,7 +39,7 @@ def set_modified_response(): | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     return None | ||||
| @@ -69,24 +70,20 @@ def test_include_filters_output(): | ||||
|  | ||||
|  | ||||
| # Tests the whole stack works with the CSS Filter | ||||
| def test_check_markup_include_filters_restriction(client, live_server, measure_memory_usage): | ||||
| def test_check_markup_include_filters_restriction(client, live_server, measure_memory_usage, datastore_path): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
|     include_filters = "#sametext" | ||||
|  | ||||
|     set_original_response() | ||||
|     set_original_response(datastore_path=datastore_path) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
| @@ -109,25 +106,25 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     #  Make a change | ||||
|     set_modified_response() | ||||
|     set_modified_response(datastore_path=datastore_path) | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # It should have 'unviewed' still | ||||
|     # It should have 'has-unread-changes' still | ||||
|     # Because it should be looking at only that 'sametext' id | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|  | ||||
| # Tests the whole stack works with the CSS Filter | ||||
| def test_check_multiple_filters(client, live_server, measure_memory_usage): | ||||
| def test_check_multiple_filters(client, live_server, measure_memory_usage, datastore_path): | ||||
|      | ||||
|     include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]" | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""<html><body> | ||||
|      <div id="blob-a">Blob A</div> | ||||
|      <div id="blob-b">Blob B</div> | ||||
| @@ -138,12 +135,8 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
| @@ -176,12 +169,12 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage): | ||||
| # The filter exists, but did not contain anything useful | ||||
| # Mainly used when the filter contains just an IMG, this can happen when someone selects an image in the visual-selector | ||||
| # Tests fetcher can throw a "ReplyWithContentButNoText" exception after applying filter and extracting text | ||||
| def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usage): | ||||
| def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usage, datastore_path): | ||||
|      | ||||
|  | ||||
|     include_filters = "#blob-a" | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""<html><body> | ||||
|          <div id="blob-a"> | ||||
|            <img src="something.jpg"> | ||||
| @@ -193,12 +186,8 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
| @@ -228,7 +217,7 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa | ||||
|  | ||||
|     ### Just an empty selector, no image | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|     with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f: | ||||
|         f.write("""<html><body> | ||||
|          <div id="blob-a"> | ||||
|            <!-- doo doo --> | ||||
|   | ||||
							
								
								
									
										19
									
								
								changedetectionio/tests/test_datastore_isolation.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								changedetectionio/tests/test_datastore_isolation.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| #!/usr/bin/env python3 | ||||
| """Test to verify client and live_server share the same datastore""" | ||||
|  | ||||
| def test_client_and_live_server_share_datastore(client, live_server): | ||||
|     """Verify that client and live_server use the same app and datastore.""" | ||||
|  | ||||
|     # They should be the SAME object | ||||
|     assert client.application is live_server.app, "client.application and live_server.app should be the SAME object!" | ||||
|  | ||||
|     # They should share the same datastore | ||||
|     client_datastore = client.application.config.get('DATASTORE') | ||||
|     server_datastore = live_server.app.config.get('DATASTORE') | ||||
|  | ||||
|     assert client_datastore is server_datastore, \ | ||||
|         f"Datastores are DIFFERENT objects! client={hex(id(client_datastore))} server={hex(id(server_datastore))}" | ||||
|  | ||||
|     print(f"✓ client.application and live_server.app are the SAME object") | ||||
|     print(f"✓ Both use the same DATASTORE at {hex(id(client_datastore))}") | ||||
|     print(f"✓ Datastore path: {client_datastore.datastore_path}") | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user