mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			108 Commits
		
	
	
		
			3423-opena
			...
			url-valida
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | d07cb2a7a8 | ||
|   | f9cd4da2bb | ||
|   | e09cea60ef | ||
|   | 4e699cc13b | ||
|   | 964302cf3c | ||
|   | 00b31cabd5 | ||
|   | f304ae19db | ||
|   | ac13d8cbde | ||
|   | a037cf7b9a | ||
|   | 8a7ea79fb3 | ||
|   | ec0d7cff21 | ||
|   | 2116b2cb93 | ||
|   | d48c82052a | ||
|   | 552e98519b | ||
|   | 8f580ac96b | ||
|   | a8cadc3d16 | ||
|   | c9290d73e0 | ||
|   | 2db5e906e9 | ||
|   | 0751bd371a | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 3ffa0805e9 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 3335270692 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | a7573b10ec | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | df945ad743 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 4536e95205 | ||
|   | 1479d7bd46 | ||
|   | 9ba2094f75 | ||
|   | 8aa012ba8e | ||
|   | 8bc6b10db1 | ||
|   | 76d799c95b | ||
|   | 7c8bdfcc9f | ||
|   | 01a938d7ce | ||
|   | e44853c439 | ||
|   | 3830bec891 | ||
|   | 88ab663330 | ||
|   | 68335b95c3 | ||
|   | 7bbfa0ef32 | ||
|   | e233d52931 | ||
|   | 181d32e82a | ||
|   | a51614f83d | ||
|   | 07f98d6bd3 | ||
|   | f71550da4d | ||
|   | 8c3d0d7e31 | ||
|   | 46658a85d6 | ||
|   | d699652955 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 9e88db5d9b | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 5d9c102aff | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | cb1c36d97d | ||
|   | cc29ba5ea9 | ||
|   | 6f371b1bc6 | ||
|   | 785dabd071 | ||
|   | 09914d54a0 | ||
|   | 58b5586674 | ||
|   | cb02ccc8b4 | ||
|   | ec692ed727 | ||
|   | 2fb2ea573e | ||
|   | ada2dc6112 | ||
|   | ad9024a4f0 | ||
|   | 047c10e23c | ||
|   | 4f83164544 | ||
|   | 6f926ed595 | ||
|   | 249dc55212 | ||
|   | 46252bc6f3 | ||
|   | 64350a2e78 | ||
|   | 2902c63a3b | ||
|   | 55b8588f1f | ||
|   | 02ecc4ae9a | ||
|   | 3ee50b7832 | ||
|   | 66ddd87ee4 | ||
|   | 233189e4f7 | ||
|   | b237fd7201 | ||
|   | 3c81efe2f4 | ||
|   | 0fcfb94690 | ||
|   | bb6d4c2756 | ||
|   | b59ce190ac | ||
|   | 80be1a30f2 | ||
|   | 93b4f79006 | ||
|   | 3009e46617 | ||
|   | 8f040a1a84 | ||
|   | 4dbab8d77a | ||
|   | cde42c8a49 | ||
|   | 3b9d19df43 | ||
|   | 6ad4acc9fc | ||
|   | 3e59521f48 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 0970c087c8 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 676c550e6e | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 78fa47f6f8 | ||
|   | 4aa5bb6da3 | ||
|   | f7dfc9bbb8 | ||
|   | 584b6e378d | ||
|   | 754febfd33 | ||
|   | 0c9c475f32 | ||
|   | e4baca1127 | ||
|   | bb61a35a54 | ||
|   | 4b9ae5a97c | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | c8caa0662d | ||
|   | f4e8d1963f | ||
|   | 45d5e961dc | ||
|   | 45f2863966 | ||
|   | 01c1ac4c0c | ||
|   | b2f9aec383 | ||
|   | a95aa67aef | ||
|   | cbeefeccbb | ||
|   | 2b72d38235 | ||
|   | 8fe7aec3c6 | ||
|   | 6e1f5a8503 | ||
|   | b74b76c9f9 | ||
|   | a27265450c | ||
|   | cc5455c3dc | 
| @@ -33,7 +33,6 @@ venv/ | ||||
| # Test and development files | ||||
| test-datastore/ | ||||
| tests/ | ||||
| docs/ | ||||
| *.md | ||||
| !README.md | ||||
|  | ||||
|   | ||||
							
								
								
									
										8
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							| @@ -4,11 +4,11 @@ updates: | ||||
|     directory: / | ||||
|     schedule: | ||||
|       interval: "weekly" | ||||
|     "caronc/apprise": | ||||
|       versioning-strategy: "increase" | ||||
|       schedule: | ||||
|         interval: "daily" | ||||
|     groups: | ||||
|       all: | ||||
|         patterns: | ||||
|         - "*" | ||||
|   - package-ecosystem: pip | ||||
|     directory: / | ||||
|     schedule: | ||||
|       interval: "weekly" | ||||
|   | ||||
							
								
								
									
										6
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							| @@ -34,7 +34,7 @@ jobs: | ||||
|  | ||||
|     # Initializes the CodeQL tools for scanning. | ||||
|     - name: Initialize CodeQL | ||||
|       uses: github/codeql-action/init@v3 | ||||
|       uses: github/codeql-action/init@v4 | ||||
|       with: | ||||
|         languages: ${{ matrix.language }} | ||||
|         # If you wish to specify custom queries, you can do so here or in a config file. | ||||
| @@ -45,7 +45,7 @@ jobs: | ||||
|     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java). | ||||
|     # If this step fails, then you should remove it and run the build manually (see below) | ||||
|     - name: Autobuild | ||||
|       uses: github/codeql-action/autobuild@v3 | ||||
|       uses: github/codeql-action/autobuild@v4 | ||||
|  | ||||
|     # ℹ️ Command-line programs to run using the OS shell. | ||||
|     # 📚 https://git.io/JvXDl | ||||
| @@ -59,4 +59,4 @@ jobs: | ||||
|     #   make release | ||||
|  | ||||
|     - name: Perform CodeQL Analysis | ||||
|       uses: github/codeql-action/analyze@v3 | ||||
|       uses: github/codeql-action/analyze@v4 | ||||
|   | ||||
							
								
								
									
										4
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							| @@ -95,7 +95,7 @@ jobs: | ||||
|           push: true | ||||
|           tags: | | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 | ||||
|           cache-from: type=gha | ||||
|           cache-to: type=gha,mode=max | ||||
|  | ||||
| @@ -133,7 +133,7 @@ jobs: | ||||
|           file: ./Dockerfile | ||||
|           push: true | ||||
|           tags: ${{ steps.meta.outputs.tags }} | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8 | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8 | ||||
|           cache-from: type=gha | ||||
|           cache-to: type=gha,mode=max | ||||
| # Looks like this was disabled | ||||
|   | ||||
							
								
								
									
										33
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										33
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							| @@ -21,20 +21,20 @@ jobs: | ||||
|     - name: Build a binary wheel and a source tarball | ||||
|       run: python3 -m build | ||||
|     - name: Store the distribution packages | ||||
|       uses: actions/upload-artifact@v4 | ||||
|       uses: actions/upload-artifact@v5 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|  | ||||
|  | ||||
|   test-pypi-package: | ||||
|     name: Test the built 📦 package works basically. | ||||
|     name: Test the built package works basically. | ||||
|     runs-on: ubuntu-latest | ||||
|     needs: | ||||
|     - build | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v5 | ||||
|       uses: actions/download-artifact@v6 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
| @@ -42,18 +42,39 @@ jobs: | ||||
|       uses: actions/setup-python@v6 | ||||
|       with: | ||||
|         python-version: '3.11' | ||||
|  | ||||
|     - name: Test that the basic pip built package runs without error | ||||
|       run: | | ||||
|         set -ex | ||||
|         ls -alR  | ||||
|          | ||||
|         # Find and install the first .whl file | ||||
|         find dist -type f -name "*.whl" -exec pip3 install {} \; -quit | ||||
|         # Install the first wheel found in dist/ | ||||
|         WHEEL=$(find dist -type f -name "*.whl" -print -quit) | ||||
|         echo Installing $WHEEL | ||||
|         python3 -m pip install --upgrade pip | ||||
|         python3 -m pip install "$WHEEL" | ||||
|         changedetection.io -d /tmp -p 10000 & | ||||
|          | ||||
|         sleep 3 | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null | ||||
|          | ||||
|         # --- API test --- | ||||
|         # This also means that the docs/api-spec.yml was shipped and could be read | ||||
|         test -f /tmp/url-watches.json | ||||
|         API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json) | ||||
|         echo Test API KEY is $API_KEY | ||||
|         curl -X POST "http://127.0.0.1:10000/api/v1/watch" \ | ||||
|           -H "x-api-key: ${API_KEY}" \ | ||||
|           -H "Content-Type: application/json" \ | ||||
|           --show-error --fail \ | ||||
|           --retry 6 --retry-delay 1 --retry-connrefused \ | ||||
|           -d '{ | ||||
|             "url": "https://example.com", | ||||
|             "title": "Example Site Monitor", | ||||
|             "time_between_check": { "hours": 1 } | ||||
|           }' | ||||
|            | ||||
|         killall changedetection.io | ||||
|  | ||||
|  | ||||
| @@ -72,7 +93,7 @@ jobs: | ||||
|  | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v5 | ||||
|       uses: actions/download-artifact@v6 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|   | ||||
							
								
								
									
										4
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							| @@ -38,8 +38,6 @@ jobs: | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           - platform: linux/arm64/v8 | ||||
|             dockerfile: ./Dockerfile | ||||
|           # Alpine Dockerfile platforms (musl via alpine check) | ||||
|           - platform: linux/amd64 | ||||
|             dockerfile: ./.github/test/Dockerfile-alpine | ||||
| @@ -76,5 +74,5 @@ jobs: | ||||
|             file: ${{ matrix.dockerfile }} | ||||
|             platforms: ${{ matrix.platform }} | ||||
|             cache-from: type=gha | ||||
|             cache-to: type=gha,mode=max | ||||
|             cache-to: type=gha,mode=min | ||||
|  | ||||
|   | ||||
| @@ -54,7 +54,10 @@ jobs: | ||||
|  | ||||
|       - name: Spin up ancillary SMTP+Echo message test server | ||||
|         run: | | ||||
|           # Debug SMTP server/echo message back server | ||||
|           # Debug SMTP server/echo message back server, telnet 11080 to it should immediately bounce back the most recent message that tried to send (then you can see if cdio tried to send, the format, etc) | ||||
|           # 11025 is the SMTP port for testing | ||||
|           # apprise example would be 'mailto://changedetection@localhost:11025/?to=fff@home.com  (it will also echo to STDOUT) | ||||
|           # telnet localhost 11080 | ||||
|           docker run --network changedet-network -d -p 11025:11025 -p 11080:11080  --hostname mailserver test-changedetectionio  bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py' | ||||
|           docker ps | ||||
|  | ||||
| @@ -253,9 +256,33 @@ jobs: | ||||
|           docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt | ||||
|           docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt | ||||
|  | ||||
|       - name: Extract and display memory test report | ||||
|         if: always() | ||||
|         run: | | ||||
|           # Extract test-memory.log from the container | ||||
|           echo "Extracting test-memory.log from container..." | ||||
|           docker cp test-cdio-basic-tests:/app/changedetectionio/test-memory.log output-logs/test-memory-${{ env.PYTHON_VERSION }}.log || echo "test-memory.log not found in container" | ||||
|  | ||||
|           # Display the memory log contents for immediate visibility in workflow output | ||||
|           echo "=== Top 10 Highest Peak Memory Tests ===" | ||||
|           if [ -f output-logs/test-memory-${{ env.PYTHON_VERSION }}.log ]; then | ||||
|             # Sort by peak memory value (extract number before MB and sort numerically, reverse order) | ||||
|             grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log | \ | ||||
|               sed 's/.*Peak memory: //' | \ | ||||
|               paste -d'|' - <(grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log) | \ | ||||
|               sort -t'|' -k1 -nr | \ | ||||
|               cut -d'|' -f2 | \ | ||||
|               head -10 | ||||
|             echo "" | ||||
|             echo "=== Full Memory Test Report ===" | ||||
|             cat output-logs/test-memory-${{ env.PYTHON_VERSION }}.log | ||||
|           else | ||||
|             echo "No memory log available" | ||||
|           fi | ||||
|  | ||||
|       - name: Store everything including test-datastore | ||||
|         if: always() | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         uses: actions/upload-artifact@v5 | ||||
|         with: | ||||
|           name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }} | ||||
|           path: . | ||||
|   | ||||
| @@ -84,6 +84,11 @@ EXPOSE 5000 | ||||
|  | ||||
| # The actual flask app module | ||||
| COPY changedetectionio /app/changedetectionio | ||||
|  | ||||
| # Also for OpenAPI validation wrapper - needs the YML | ||||
| RUN [ ! -d "/app/docs" ] && mkdir /app/docs | ||||
| COPY docs/api-spec.yaml /app/docs/api-spec.yaml | ||||
|  | ||||
| # Starting wrapper | ||||
| COPY changedetection.py /app/changedetection.py | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| recursive-include changedetectionio/api * | ||||
| include docs/api-spec.yaml | ||||
| recursive-include changedetectionio/blueprint * | ||||
| recursive-include changedetectionio/conditions * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/jinja2_custom * | ||||
| recursive-include changedetectionio/model * | ||||
| recursive-include changedetectionio/notification * | ||||
| recursive-include changedetectionio/processors * | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.50.12' | ||||
| __version__ = '0.50.33' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
|   | ||||
| @@ -1,9 +1,22 @@ | ||||
| import os | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request | ||||
| import validators | ||||
| from functools import wraps | ||||
| from . import auth, validate_openapi_request | ||||
| from ..validate_url import is_safe_valid_url | ||||
|  | ||||
|  | ||||
| def default_content_type(content_type='text/plain'): | ||||
|     """Decorator to set a default Content-Type header if none is provided.""" | ||||
|     def decorator(f): | ||||
|         @wraps(f) | ||||
|         def wrapper(*args, **kwargs): | ||||
|             if not request.content_type: | ||||
|                 # Set default content type in the request environment | ||||
|                 request.environ['CONTENT_TYPE'] = content_type | ||||
|             return f(*args, **kwargs) | ||||
|         return wrapper | ||||
|     return decorator | ||||
|  | ||||
|  | ||||
| class Import(Resource): | ||||
| @@ -12,6 +25,7 @@ class Import(Resource): | ||||
|         self.datastore = kwargs['datastore'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     @default_content_type('text/plain') #3547 #3542 | ||||
|     @validate_openapi_request('importWatches') | ||||
|     def post(self): | ||||
|         """Import a list of watched URLs.""" | ||||
| @@ -35,14 +49,13 @@ class Import(Resource): | ||||
|  | ||||
|         urls = request.get_data().decode('utf8').splitlines() | ||||
|         added = [] | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         for url in urls: | ||||
|             url = url.strip() | ||||
|             if not len(url): | ||||
|                 continue | ||||
|  | ||||
|             # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|             if not validators.url(url, simple_host=allow_simplehost): | ||||
|             if not is_safe_valid_url(url): | ||||
|                 return f"Invalid or unsupported URL - {url}", 400 | ||||
|  | ||||
|             if dedupe and self.datastore.url_exists(url): | ||||
|   | ||||
| @@ -1,12 +1,12 @@ | ||||
| import os | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from flask_expects_json import expects_json | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio import worker_handler | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request, make_response, send_from_directory | ||||
| import validators | ||||
| from . import auth | ||||
| import copy | ||||
|  | ||||
| @@ -14,6 +14,39 @@ import copy | ||||
| from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request | ||||
|  | ||||
|  | ||||
| def validate_time_between_check_required(json_data): | ||||
|     """ | ||||
|     Validate that at least one time interval is specified when not using default settings. | ||||
|     Returns None if valid, or error message string if invalid. | ||||
|     Defaults to using global settings if time_between_check_use_default is not provided. | ||||
|     """ | ||||
|     # Default to using global settings if not specified | ||||
|     use_default = json_data.get('time_between_check_use_default', True) | ||||
|  | ||||
|     # If using default settings, no validation needed | ||||
|     if use_default: | ||||
|         return None | ||||
|  | ||||
|     # If not using defaults, check if time_between_check exists and has at least one non-zero value | ||||
|     time_check = json_data.get('time_between_check') | ||||
|     if not time_check: | ||||
|         # No time_between_check provided and not using defaults - this is an error | ||||
|         return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings." | ||||
|  | ||||
|     # time_between_check exists, check if it has at least one non-zero value | ||||
|     if any([ | ||||
|         (time_check.get('weeks') or 0) > 0, | ||||
|         (time_check.get('days') or 0) > 0, | ||||
|         (time_check.get('hours') or 0) > 0, | ||||
|         (time_check.get('minutes') or 0) > 0, | ||||
|         (time_check.get('seconds') or 0) > 0 | ||||
|     ]): | ||||
|         return None | ||||
|  | ||||
|     # time_between_check exists but all values are 0 or empty - this is an error | ||||
|     return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings." | ||||
|  | ||||
|  | ||||
| class Watch(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
| @@ -55,6 +88,8 @@ class Watch(Resource): | ||||
|         # attr .last_changed will check for the last written text snapshot on change | ||||
|         watch['last_changed'] = watch.last_changed | ||||
|         watch['viewed'] = watch.viewed | ||||
|         watch['link'] = watch.link, | ||||
|  | ||||
|         return watch | ||||
|  | ||||
|     @auth.check_token | ||||
| @@ -81,6 +116,15 @@ class Watch(Resource): | ||||
|             if not request.json.get('proxy') in plist: | ||||
|                 return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400 | ||||
|  | ||||
|         # Validate time_between_check when not using defaults | ||||
|         validation_error = validate_time_between_check_required(request.json) | ||||
|         if validation_error: | ||||
|             return validation_error, 400 | ||||
|  | ||||
|         # XSS etc protection | ||||
|         if request.json.get('url') and not is_safe_valid_url(request.json.get('url')): | ||||
|             return "Invalid URL", 400 | ||||
|  | ||||
|         watch.update(request.json) | ||||
|  | ||||
|         return "OK", 200 | ||||
| @@ -186,9 +230,7 @@ class CreateWatch(Resource): | ||||
|         json_data = request.get_json() | ||||
|         url = json_data['url'].strip() | ||||
|  | ||||
|         # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|         allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
|         if not validators.url(url, simple_host=allow_simplehost): | ||||
|         if not is_safe_valid_url(url): | ||||
|             return "Invalid or unsupported URL", 400 | ||||
|  | ||||
|         if json_data.get('proxy'): | ||||
| @@ -196,6 +238,11 @@ class CreateWatch(Resource): | ||||
|             if not json_data.get('proxy') in plist: | ||||
|                 return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400 | ||||
|  | ||||
|         # Validate time_between_check when not using defaults | ||||
|         validation_error = validate_time_between_check_required(json_data) | ||||
|         if validation_error: | ||||
|             return validation_error, 400 | ||||
|  | ||||
|         extras = copy.deepcopy(json_data) | ||||
|  | ||||
|         # Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API) | ||||
| @@ -230,6 +277,8 @@ class CreateWatch(Resource): | ||||
|                 'last_changed': watch.last_changed, | ||||
|                 'last_checked': watch['last_checked'], | ||||
|                 'last_error': watch['last_error'], | ||||
|                 'link': watch.link, | ||||
|                 'page_title': watch['page_title'], | ||||
|                 'title': watch['title'], | ||||
|                 'url': watch['url'], | ||||
|                 'viewed': watch.viewed | ||||
|   | ||||
| @@ -1,9 +1,7 @@ | ||||
| import copy | ||||
| import yaml | ||||
| import functools | ||||
| from flask import request, abort | ||||
| from openapi_core import OpenAPI | ||||
| from openapi_core.contrib.flask import FlaskOpenAPIRequest | ||||
| from loguru import logger | ||||
| from . import api_schema | ||||
| from ..model import watch_base | ||||
|  | ||||
| @@ -31,17 +29,21 @@ schema_create_notification_urls['required'] = ['notification_urls'] | ||||
| schema_delete_notification_urls = copy.deepcopy(schema_notification_urls) | ||||
| schema_delete_notification_urls['required'] = ['notification_urls'] | ||||
|  | ||||
| # Load OpenAPI spec for validation | ||||
| _openapi_spec = None | ||||
|  | ||||
| @functools.cache | ||||
| def get_openapi_spec(): | ||||
|     global _openapi_spec | ||||
|     if _openapi_spec is None: | ||||
|         import os | ||||
|         spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml') | ||||
|         with open(spec_path, 'r') as f: | ||||
|             spec_dict = yaml.safe_load(f) | ||||
|         _openapi_spec = OpenAPI.from_dict(spec_dict) | ||||
|     """Lazy load OpenAPI spec and dependencies only when validation is needed.""" | ||||
|     import os | ||||
|     import yaml  # Lazy import - only loaded when API validation is actually used | ||||
|     from openapi_core import OpenAPI  # Lazy import - saves ~10.7 MB on startup | ||||
|  | ||||
|     spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml') | ||||
|     if not os.path.exists(spec_path): | ||||
|         # Possibly for pip3 packages | ||||
|         spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml') | ||||
|  | ||||
|     with open(spec_path, 'r') as f: | ||||
|         spec_dict = yaml.safe_load(f) | ||||
|     _openapi_spec = OpenAPI.from_dict(spec_dict) | ||||
|     return _openapi_spec | ||||
|  | ||||
| def validate_openapi_request(operation_id): | ||||
| @@ -50,16 +52,28 @@ def validate_openapi_request(operation_id): | ||||
|         @functools.wraps(f) | ||||
|         def wrapper(*args, **kwargs): | ||||
|             try: | ||||
|                 spec = get_openapi_spec() | ||||
|                 openapi_request = FlaskOpenAPIRequest(request) | ||||
|                 result = spec.unmarshal_request(openapi_request) | ||||
|                 if result.errors: | ||||
|                     abort(400, message=f"OpenAPI validation failed: {result.errors}") | ||||
|                 return f(*args, **kwargs) | ||||
|                 # Skip OpenAPI validation for GET requests since they don't have request bodies | ||||
|                 if request.method.upper() != 'GET': | ||||
|                     # Lazy import - only loaded when actually validating a request | ||||
|                     from openapi_core.contrib.flask import FlaskOpenAPIRequest | ||||
|  | ||||
|                     spec = get_openapi_spec() | ||||
|                     openapi_request = FlaskOpenAPIRequest(request) | ||||
|                     result = spec.unmarshal_request(openapi_request) | ||||
|                     if result.errors: | ||||
|                         from werkzeug.exceptions import BadRequest | ||||
|                         error_details = [] | ||||
|                         for error in result.errors: | ||||
|                             error_details.append(str(error)) | ||||
|                         raise BadRequest(f"OpenAPI validation failed: {error_details}") | ||||
|             except BadRequest: | ||||
|                 # Re-raise BadRequest exceptions (validation failures) | ||||
|                 raise | ||||
|             except Exception as e: | ||||
|                 # If OpenAPI validation fails, log but don't break existing functionality | ||||
|                 print(f"OpenAPI validation warning for {operation_id}: {e}") | ||||
|                 return f(*args, **kwargs) | ||||
|                 # If OpenAPI spec loading fails, log but don't break existing functionality | ||||
|                 logger.critical(f"OpenAPI validation warning for {operation_id}: {e}") | ||||
|                 abort(500) | ||||
|             return f(*args, **kwargs) | ||||
|         return wrapper | ||||
|     return decorator | ||||
|  | ||||
| @@ -69,3 +83,4 @@ from .Tags import Tags, Tag | ||||
| from .Import import Import | ||||
| from .SystemInfo import SystemInfo | ||||
| from .Notifications import Notifications | ||||
|  | ||||
|   | ||||
| @@ -119,6 +119,12 @@ def build_watch_json_schema(d): | ||||
|  | ||||
|     schema['properties']['time_between_check'] = build_time_between_check_json_schema() | ||||
|  | ||||
|     schema['properties']['time_between_check_use_default'] = { | ||||
|         "type": "boolean", | ||||
|         "default": True, | ||||
|         "description": "Whether to use global settings for time between checks - defaults to true if not set" | ||||
|     } | ||||
|  | ||||
|     schema['properties']['browser_steps'] = { | ||||
|         "anyOf": [ | ||||
|             { | ||||
|   | ||||
| @@ -334,6 +334,10 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                             if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change): | ||||
|                                 watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time)) | ||||
|  | ||||
|                             # Explicitly delete large content variables to free memory IMMEDIATELY after saving | ||||
|                             # These are no longer needed after being saved to history | ||||
|                             del contents | ||||
|  | ||||
|                             # Send notifications on second+ check | ||||
|                             if watch.history_n >= 2: | ||||
|                                 logger.info(f"Change detected in UUID {uuid} - {watch['url']}") | ||||
| @@ -372,6 +376,12 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                 datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3), | ||||
|                                                                'check_count': count}) | ||||
|  | ||||
|                 # NOW clear fetcher content - after all processing is complete | ||||
|                 # This is the last point where we need the fetcher data | ||||
|                 if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher: | ||||
|                     update_handler.fetcher.clear_content() | ||||
|                     logger.debug(f"Cleared fetcher content for UUID {uuid}") | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}") | ||||
|             logger.error(f"Worker {worker_id} traceback:", exc_info=True) | ||||
| @@ -392,7 +402,28 @@ async def async_update_worker(worker_id, q, notification_q, app, datastore): | ||||
|                         #logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}") | ||||
|                         watch_check_update.send(watch_uuid=watch['uuid']) | ||||
|  | ||||
|                     update_handler = None | ||||
|                     # Explicitly clean up update_handler and all its references | ||||
|                     if update_handler: | ||||
|                         # Clear fetcher content using the proper method | ||||
|                         if hasattr(update_handler, 'fetcher') and update_handler.fetcher: | ||||
|                             update_handler.fetcher.clear_content() | ||||
|  | ||||
|                         # Clear processor references | ||||
|                         if hasattr(update_handler, 'content_processor'): | ||||
|                             update_handler.content_processor = None | ||||
|  | ||||
|                         update_handler = None | ||||
|  | ||||
|                     # Clear local contents variable if it still exists | ||||
|                     if 'contents' in locals(): | ||||
|                         del contents | ||||
|  | ||||
|                     # Note: We don't set watch = None here because: | ||||
|                     # 1. watch is just a local reference to datastore.data['watching'][uuid] | ||||
|                     # 2. Setting it to None doesn't affect the datastore | ||||
|                     # 3. GC can't collect the object anyway (still referenced by datastore) | ||||
|                     # 4. It would just cause confusion | ||||
|  | ||||
|                     logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s") | ||||
|                 except Exception as cleanup_error: | ||||
|                     logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}") | ||||
|   | ||||
| @@ -6,7 +6,7 @@ from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT | ||||
| from changedetectionio.content_fetchers.base import manage_user_agent | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     def long_task(uuid, preferred_proxy): | ||||
|         import time | ||||
|         from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         status = {'status': '', 'length': 0, 'text': ''} | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
|  | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
| from changedetectionio.notification.handler import apply_service_tweaks | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from feedgen.feed import FeedGenerator | ||||
| from flask import Blueprint, make_response, request, url_for, redirect | ||||
| @@ -120,9 +121,13 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                     html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]), | ||||
|                                                  newest_version_file_contents=watch.get_history_snapshot(dates[-1]), | ||||
|                                                  include_equal=False, | ||||
|                                                  line_feed_sep="<br>", | ||||
|                                                  html_colour=html_colour_enable | ||||
|                                                  line_feed_sep="<br>" | ||||
|                                                  ) | ||||
|  | ||||
|  | ||||
|                     requested_output_format = 'htmlcolor' if html_colour_enable else 'html' | ||||
|                     html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format) | ||||
|  | ||||
|                 except FileNotFoundError as e: | ||||
|                     html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found." | ||||
|  | ||||
|   | ||||
| @@ -119,7 +119,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                                 hide_remove_pass=os.getenv("SALTED_PASS", False), | ||||
|                                 min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)), | ||||
|                                 settings_application=datastore.data['settings']['application'], | ||||
|                                 timezone_default_config=datastore.data['settings']['application'].get('timezone'), | ||||
|                                 timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'), | ||||
|                                 utc_time=utc_time, | ||||
|                                 ) | ||||
|  | ||||
|   | ||||
| @@ -1,7 +1,7 @@ | ||||
| {% extends 'base.html' %} | ||||
|  | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}"; | ||||
| @@ -72,25 +72,23 @@ | ||||
|                         <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page) | ||||
|                         </span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.rss_content_format) }} | ||||
|                         <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} | ||||
|                         <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span> | ||||
|                     </div> | ||||
|                 {% if form.requests.proxy %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                         {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                         Choose a default proxy for all watches | ||||
|                         </span> | ||||
|                     <div class="grey-form-border"> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }} | ||||
|                         </div> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_field(form.application.form.rss_content_format) }} | ||||
|                             <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span> | ||||
|                         </div> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_checkbox_field(form.application.form.rss_reader_mode) }} | ||||
|                             <span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                 {% endif %} | ||||
|                 </fieldset> | ||||
|             </div> | ||||
|  | ||||
| @@ -133,6 +131,10 @@ | ||||
|                     <span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br> | ||||
|                     Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.requests.form.timeout) }} | ||||
|                     <span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group inline-radio"> | ||||
|                     {{ render_field(form.requests.form.default_ua) }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
| @@ -191,6 +193,12 @@ nav | ||||
|                         </ul> | ||||
|                      </span> | ||||
|                     </fieldset> | ||||
|                     <fieldset class="pure-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.strip_ignored_lines) }} | ||||
|                         <span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br> | ||||
|                         <i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc. | ||||
|                         </span> | ||||
|                     </fieldset> | ||||
|            </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="api"> | ||||
| @@ -230,11 +238,9 @@ nav | ||||
|                     <p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p> | ||||
|                     <p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p> | ||||
|                     <p> | ||||
|                        {{ render_field(form.application.form.timezone) }} | ||||
|                        {{ render_field(form.application.form.scheduler_timezone_default) }} | ||||
|                         <datalist id="timezones" style="display: none;"> | ||||
|                             {% for tz_name in available_timezones %} | ||||
|                                 <option value="{{ tz_name }}">{{ tz_name }}</option> | ||||
|                             {% endfor %} | ||||
|                             {%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%} | ||||
|                         </datalist> | ||||
|                     </p> | ||||
|                 </div> | ||||
| @@ -308,17 +314,27 @@ nav | ||||
|                <p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites. | ||||
|  | ||||
|                 <div class="pure-control-group" id="extra-proxies-setting"> | ||||
|                 {{ render_field(form.requests.form.extra_proxies) }} | ||||
|                 {{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }} | ||||
|                 <span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br> | ||||
|                 <span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span> | ||||
|                 {% if form.requests.proxy %} | ||||
|                 <div> | ||||
|                 <br> | ||||
|                     <div class="inline-radio"> | ||||
|                         {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} | ||||
|                         <span class="pure-form-message-inline">Choose a default proxy for all watches</span> | ||||
|                     </div> | ||||
|                 </div> | ||||
|                 {% endif %} | ||||
|                 </div> | ||||
|                 <div class="pure-control-group" id="extra-browsers-setting"> | ||||
|                     <p> | ||||
|                     <span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br> | ||||
|                     <span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span> | ||||
|                     </p> | ||||
|                     {{ render_field(form.requests.form.extra_browsers) }} | ||||
|                     {{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }} | ||||
|                 </div> | ||||
|              | ||||
|             </div> | ||||
|             <div id="actions"> | ||||
|                 <div class="pure-control-group"> | ||||
|   | ||||
| @@ -76,14 +76,14 @@ def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWat | ||||
|  | ||||
|     elif (op == 'notification-default'): | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch | ||||
|             USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         ) | ||||
|         for uuid in uuids: | ||||
|             if datastore.data['watching'].get(uuid): | ||||
|                 datastore.data['watching'][uuid]['notification_title'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_body'] = None | ||||
|                 datastore.data['watching'][uuid]['notification_urls'] = [] | ||||
|                 datastore.data['watching'][uuid]['notification_format'] = default_notification_format_for_watch | ||||
|                 datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         if emit_flash: | ||||
|             flash(f"{len(uuids)} watches set to use default notification settings") | ||||
|  | ||||
|   | ||||
| @@ -187,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|  | ||||
|             tz_name = time_schedule_limit.get('timezone') | ||||
|             if not tz_name: | ||||
|                 tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') | ||||
|                 tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip()) | ||||
|  | ||||
|             if time_schedule_limit and time_schedule_limit.get('enabled'): | ||||
|                 try: | ||||
| @@ -257,7 +257,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), | ||||
|                 'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch), | ||||
|                 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'), | ||||
|                 'using_global_webdriver_wait': not default['webdriver_delay'], | ||||
|                 'uuid': uuid, | ||||
|                 'watch': watch, | ||||
|   | ||||
| @@ -2,6 +2,7 @@ from flask import Blueprint, request, make_response | ||||
| import random | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.notification_service import NotificationContextData | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio.auth_decorator import login_optionally_required | ||||
|  | ||||
| @@ -19,6 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         import apprise | ||||
|         from changedetectionio.notification.handler import process_notification | ||||
|         from changedetectionio.notification.apprise_plugin.assets import apprise_asset | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
|  | ||||
| @@ -61,16 +63,20 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             return 'Error: No Notification URLs set/found' | ||||
|  | ||||
|         for n_url in notification_urls: | ||||
|             # We are ONLY validating the apprise:// part here, convert all tags to something so as not to break apprise URLs | ||||
|             generic_notification_context_data = NotificationContextData() | ||||
|             generic_notification_context_data.set_random_for_validation() | ||||
|             n_url = jinja_render(template_str=n_url, **generic_notification_context_data).strip() | ||||
|             if len(n_url.strip()): | ||||
|                 if not apobj.add(n_url): | ||||
|                     return f'Error:  {n_url} is not a valid AppRise URL.' | ||||
|  | ||||
|         try: | ||||
|             # use the same as when it is triggered, but then override it with the form test values | ||||
|             n_object = { | ||||
|             n_object = NotificationContextData({ | ||||
|                 'watch_url': request.form.get('window_url', "https://changedetection.io"), | ||||
|                 'notification_urls': notification_urls | ||||
|             } | ||||
|             }) | ||||
|  | ||||
|             # Only use if present, if not set in n_object it should use the default system value | ||||
|             if 'notification_format' in request.form and request.form['notification_format'].strip(): | ||||
|   | ||||
| @@ -87,7 +87,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             form=form, | ||||
|             guid=datastore.data['app_guid'], | ||||
|             has_proxies=datastore.proxy_list, | ||||
|             has_unviewed=datastore.has_unviewed, | ||||
|             hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|             now_time_server=round(time.time()), | ||||
|             pagination=pagination, | ||||
| @@ -97,6 +96,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), | ||||
|             system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'), | ||||
|             tags=sorted_tags, | ||||
|             unread_changes_count=datastore.unread_changes_count, | ||||
|             watches=sorted_watches | ||||
|         ) | ||||
|  | ||||
|   | ||||
| @@ -82,8 +82,11 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|         {%- set cols_required = cols_required + 1 -%} | ||||
|     {%- endif -%} | ||||
|     {%- set ui_settings = datastore.data['settings']['application']['ui'] -%} | ||||
|  | ||||
|     <div id="watch-table-wrapper"> | ||||
|     {%- set wrapper_classes = [ | ||||
|         'has-unread-changes' if unread_changes_count else '', | ||||
|         'has-error' if errored_count else '', | ||||
|     ] -%} | ||||
|     <div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}"> | ||||
|         {%- set table_classes = [ | ||||
|             'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled', | ||||
|         ] -%} | ||||
| @@ -158,9 +161,9 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|                         <div> | ||||
|                         <span class="watch-title"> | ||||
|                             {% if system_use_url_watchlist or watch.get('use_page_title_in_list') %} | ||||
|                                 {{watch.label}} | ||||
|                                 {{ watch.label }} | ||||
|                             {% else %} | ||||
|                                 {{watch.url}} | ||||
|                                 {{ watch.get('title') or watch.link }} | ||||
|                             {% endif %} | ||||
|                            <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a> | ||||
|                         </span> | ||||
| @@ -241,10 +244,10 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <ul id="post-list-buttons"> | ||||
|             <li id="post-list-with-errors" class="{%- if errored_count -%}has-error{%- endif -%}" style="display: none;" > | ||||
|             <li id="post-list-with-errors" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a> | ||||
|             </li> | ||||
|             <li id="post-list-mark-views" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" > | ||||
|             <li id="post-list-mark-views" style="display: none;" > | ||||
|                 <a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a> | ||||
|             </li> | ||||
|         {%-  if active_tag_uuid -%} | ||||
| @@ -252,8 +255,8 @@ document.addEventListener('DOMContentLoaded', function() { | ||||
|                 <a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a> | ||||
|             </li> | ||||
|         {%-  endif -%} | ||||
|             <li id="post-list-unread" class="{%- if has_unviewed -%}has-unviewed{%- endif -%}" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread</a> | ||||
|             <li id="post-list-unread" style="display: none;" > | ||||
|                 <a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a> | ||||
|             </li> | ||||
|             <li> | ||||
|                <a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck | ||||
|   | ||||
| @@ -64,6 +64,18 @@ class Fetcher(): | ||||
|     # Time ONTOP of the system defined env minimum time | ||||
|     render_extract_delay = 0 | ||||
|  | ||||
|     def clear_content(self): | ||||
|         """ | ||||
|         Explicitly clear all content from memory to free up heap space. | ||||
|         Call this after content has been saved to disk. | ||||
|         """ | ||||
|         self.content = None | ||||
|         if hasattr(self, 'raw_content'): | ||||
|             self.raw_content = None | ||||
|         self.screenshot = None | ||||
|         self.xpath_data = None | ||||
|         # Keep headers and status_code as they're small | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_error(self): | ||||
|         return self.error | ||||
| @@ -128,7 +140,7 @@ class Fetcher(): | ||||
|     async def iterate_browser_steps(self, start_url=None): | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|         from playwright._impl._errors import TimeoutError, Error | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         step_n = 0 | ||||
|  | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|   | ||||
| @@ -51,6 +51,7 @@ class fetcher(Fetcher): | ||||
|  | ||||
|         session = requests.Session() | ||||
|  | ||||
|  | ||||
|         if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'): | ||||
|             from requests_file import FileAdapter | ||||
|             session.mount('file://', FileAdapter()) | ||||
|   | ||||
| @@ -1,8 +1,32 @@ | ||||
| import difflib | ||||
| from typing import List, Iterator, Union | ||||
|  | ||||
| REMOVED_STYLE = "background-color: #fadad7; color: #b30000;" | ||||
| ADDED_STYLE = "background-color: #eaf2c2; color: #406619;" | ||||
| # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050 | ||||
| #HTML_ADDED_STYLE = "background-color: #d2f7c2; color: #255d00;" | ||||
| #HTML_CHANGED_INTO_STYLE = "background-color: #dafbe1; color: #116329;" | ||||
| #HTML_CHANGED_STYLE = "background-color: #ffd6cc; color: #7a2000;" | ||||
| #HTML_REMOVED_STYLE = "background-color: #ffebe9; color: #82071e;" | ||||
|  | ||||
| # @todo - In the future we can make this configurable | ||||
| HTML_ADDED_STYLE = "background-color: #eaf2c2; color: #406619" | ||||
| HTML_REMOVED_STYLE = "background-color: #fadad7; color: #b30000" | ||||
| HTML_CHANGED_STYLE = HTML_REMOVED_STYLE | ||||
| HTML_CHANGED_INTO_STYLE = HTML_ADDED_STYLE | ||||
|  | ||||
|  | ||||
| # These get set to html or telegram type or discord compatible or whatever in handler.py | ||||
| # Something that cant get escaped to HTML by accident | ||||
| REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN' | ||||
| REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED' | ||||
|  | ||||
| ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN' | ||||
| ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED' | ||||
|  | ||||
| CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN' | ||||
| CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED' | ||||
|  | ||||
| CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN' | ||||
| CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED' | ||||
|  | ||||
| def same_slicer(lst: List[str], start: int, end: int) -> List[str]: | ||||
|     """Return a slice of the list, or a single element if start == end.""" | ||||
| @@ -15,8 +39,7 @@ def customSequenceMatcher( | ||||
|     include_removed: bool = True, | ||||
|     include_added: bool = True, | ||||
|     include_replaced: bool = True, | ||||
|     include_change_type_prefix: bool = True, | ||||
|     html_colour: bool = False | ||||
|     include_change_type_prefix: bool = True | ||||
| ) -> Iterator[List[str]]: | ||||
|     """ | ||||
|     Compare two sequences and yield differences based on specified parameters. | ||||
| @@ -29,8 +52,6 @@ def customSequenceMatcher( | ||||
|         include_added (bool): Include added parts | ||||
|         include_replaced (bool): Include replaced parts | ||||
|         include_change_type_prefix (bool): Add prefixes to indicate change types | ||||
|         html_colour (bool): Use HTML background colors for differences | ||||
|  | ||||
|     Yields: | ||||
|         List[str]: Differences between sequences | ||||
|     """ | ||||
| @@ -42,22 +63,22 @@ def customSequenceMatcher( | ||||
|         if include_equal and tag == 'equal': | ||||
|             yield before[alo:ahi] | ||||
|         elif include_removed and tag == 'delete': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] | ||||
|             else: | ||||
|                 yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi) | ||||
|                 yield same_slicer(before, alo, ahi) | ||||
|         elif include_replaced and tag == 'replace': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)] | ||||
|             else: | ||||
|                 yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \ | ||||
|                       [f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi) | ||||
|                 yield same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi) | ||||
|         elif include_added and tag == 'insert': | ||||
|             if html_colour: | ||||
|                 yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)] | ||||
|             if include_change_type_prefix: | ||||
|                 yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)] | ||||
|             else: | ||||
|                 yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi) | ||||
|                 yield same_slicer(after, blo, bhi) | ||||
|  | ||||
|  | ||||
| def render_diff( | ||||
|     previous_version_file_contents: str, | ||||
| @@ -68,8 +89,7 @@ def render_diff( | ||||
|     include_replaced: bool = True, | ||||
|     line_feed_sep: str = "\n", | ||||
|     include_change_type_prefix: bool = True, | ||||
|     patch_format: bool = False, | ||||
|     html_colour: bool = False | ||||
|     patch_format: bool = False | ||||
| ) -> str: | ||||
|     """ | ||||
|     Render the difference between two file contents. | ||||
| @@ -84,8 +104,6 @@ def render_diff( | ||||
|         line_feed_sep (str): Separator for lines in output | ||||
|         include_change_type_prefix (bool): Add prefixes to indicate change types | ||||
|         patch_format (bool): Use patch format for output | ||||
|         html_colour (bool): Use HTML background colors for differences | ||||
|  | ||||
|     Returns: | ||||
|         str: Rendered difference | ||||
|     """ | ||||
| @@ -103,8 +121,7 @@ def render_diff( | ||||
|         include_removed=include_removed, | ||||
|         include_added=include_added, | ||||
|         include_replaced=include_replaced, | ||||
|         include_change_type_prefix=include_change_type_prefix, | ||||
|         html_colour=html_colour | ||||
|         include_change_type_prefix=include_change_type_prefix | ||||
|     ) | ||||
|  | ||||
|     def flatten(lst: List[Union[str, List[str]]]) -> str: | ||||
|   | ||||
| @@ -133,6 +133,11 @@ def get_socketio_path(): | ||||
|     # Socket.IO will be available at {prefix}/socket.io/ | ||||
|     return prefix | ||||
|  | ||||
| @app.template_global('is_safe_valid_url') | ||||
| def _is_safe_valid_url(test_url): | ||||
|     from .validate_url import is_safe_valid_url | ||||
|     return is_safe_valid_url(test_url) | ||||
|  | ||||
|  | ||||
| @app.template_filter('format_number_locale') | ||||
| def _jinja2_filter_format_number_locale(value: float) -> str: | ||||
| @@ -382,7 +387,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # We would sometimes get login loop errors on sites hosted in sub-paths | ||||
|  | ||||
|             # note for the future: | ||||
|             #            if not is_safe_url(next): | ||||
|             #            if not is_safe_valid_url(next): | ||||
|             #                return flask.abort(400) | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
| @@ -795,7 +800,7 @@ def ticker_thread_check_time_launch_checks(): | ||||
|             else: | ||||
|                 time_schedule_limit = watch.get('time_schedule_limit') | ||||
|                 logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)") | ||||
|             tz_name = datastore.data['settings']['application'].get('timezone', 'UTC') | ||||
|             tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip()) | ||||
|  | ||||
|             if time_schedule_limit and time_schedule_limit.get('enabled'): | ||||
|                 try: | ||||
|   | ||||
| @@ -5,6 +5,7 @@ from wtforms.widgets.core import TimeInput | ||||
|  | ||||
| from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES | ||||
| from changedetectionio.conditions.form import ConditionFormRow | ||||
| from changedetectionio.notification_service import NotificationContextData | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from wtforms import ( | ||||
| @@ -23,14 +24,12 @@ from wtforms import ( | ||||
| ) | ||||
| from flask_wtf.file import FileField, FileAllowed | ||||
| from wtforms.fields import FieldList | ||||
| from wtforms.utils import unset_value | ||||
|  | ||||
| from wtforms.validators import ValidationError | ||||
|  | ||||
| from validators.url import url as url_validator | ||||
|  | ||||
| from changedetectionio.widgets import TernaryNoneBooleanField | ||||
|  | ||||
|  | ||||
| # default | ||||
| # each select <option data-enabled="enabled-0-0" | ||||
| from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config | ||||
| @@ -56,6 +55,8 @@ valid_method = { | ||||
|  | ||||
| default_method = 'GET' | ||||
| allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False')) | ||||
| REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.' | ||||
| REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.' | ||||
|  | ||||
| class StringListField(StringField): | ||||
|     widget = widgets.TextArea() | ||||
| @@ -212,6 +213,35 @@ class ScheduleLimitForm(Form): | ||||
|         self.sunday.form.enabled.label.text = "Sunday" | ||||
|  | ||||
|  | ||||
| def validate_time_between_check_has_values(form): | ||||
|     """ | ||||
|     Custom validation function for TimeBetweenCheckForm. | ||||
|     Returns True if at least one time interval field has a value > 0. | ||||
|     """ | ||||
|     res = any([ | ||||
|         form.weeks.data and int(form.weeks.data) > 0, | ||||
|         form.days.data and int(form.days.data) > 0, | ||||
|         form.hours.data and int(form.hours.data) > 0, | ||||
|         form.minutes.data and int(form.minutes.data) > 0, | ||||
|         form.seconds.data and int(form.seconds.data) > 0 | ||||
|     ]) | ||||
|  | ||||
|     return res | ||||
|  | ||||
|  | ||||
| class RequiredTimeInterval(object): | ||||
|     """ | ||||
|     WTForms validator that ensures at least one time interval field has a value > 0. | ||||
|     Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]). | ||||
|     """ | ||||
|     def __init__(self, message=None): | ||||
|         self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.' | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         if not validate_time_between_check_has_values(field.form): | ||||
|             raise ValidationError(self.message) | ||||
|  | ||||
|  | ||||
| class TimeBetweenCheckForm(Form): | ||||
|     weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|     days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
| @@ -220,6 +250,123 @@ class TimeBetweenCheckForm(Form): | ||||
|     seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|     # @todo add total seconds minimum validatior = minimum_seconds_recheck_time | ||||
|  | ||||
|     def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs): | ||||
|         super().__init__(formdata, obj, prefix, data, meta, **kwargs) | ||||
|         self.require_at_least_one = kwargs.get('require_at_least_one', False) | ||||
|         self.require_at_least_one_message = kwargs.get('require_at_least_one_message', REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT) | ||||
|  | ||||
|     def validate(self, **kwargs): | ||||
|         """Custom validation that can optionally require at least one time interval.""" | ||||
|         # Run normal field validation first | ||||
|         if not super().validate(**kwargs): | ||||
|             return False | ||||
|  | ||||
|         # Apply optional "at least one" validation | ||||
|         if self.require_at_least_one: | ||||
|             if not validate_time_between_check_has_values(self): | ||||
|                 # Add error to the form's general errors (not field-specific) | ||||
|                 if not hasattr(self, '_formdata_errors'): | ||||
|                     self._formdata_errors = [] | ||||
|                 self._formdata_errors.append(self.require_at_least_one_message) | ||||
|                 return False | ||||
|  | ||||
|         return True | ||||
|  | ||||
|  | ||||
| class EnhancedFormField(FormField): | ||||
|     """ | ||||
|     An enhanced FormField that supports conditional validation with top-level error messages. | ||||
|     Adds a 'top_errors' property for validation errors at the FormField level. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, form_class, label=None, validators=None, separator="-", | ||||
|                  conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs): | ||||
|         """ | ||||
|         Initialize EnhancedFormField with optional conditional validation. | ||||
|  | ||||
|         :param conditional_field: Name of the field this FormField depends on (e.g. 'time_between_check_use_default') | ||||
|         :param conditional_message: Error message to show when validation fails | ||||
|         :param conditional_test_function: Custom function to test if FormField has valid values. | ||||
|                                         Should take self.form as parameter and return True if valid. | ||||
|         """ | ||||
|         super().__init__(form_class, label, validators, separator, **kwargs) | ||||
|         self.top_errors = [] | ||||
|         self.conditional_field = conditional_field | ||||
|         self.conditional_message = conditional_message or "At least one field must have a value when not using defaults." | ||||
|         self.conditional_test_function = conditional_test_function | ||||
|  | ||||
|     def validate(self, form, extra_validators=()): | ||||
|         """ | ||||
|         Custom validation that supports conditional logic and stores top-level errors. | ||||
|         """ | ||||
|         self.top_errors = [] | ||||
|  | ||||
|         # First run the normal FormField validation | ||||
|         base_valid = super().validate(form, extra_validators) | ||||
|  | ||||
|         # Apply conditional validation if configured | ||||
|         if self.conditional_field and hasattr(form, self.conditional_field): | ||||
|             conditional_field_obj = getattr(form, self.conditional_field) | ||||
|  | ||||
|             # If the conditional field is False/unchecked, check if this FormField has any values | ||||
|             if not conditional_field_obj.data: | ||||
|                 # Use custom test function if provided, otherwise use generic fallback | ||||
|                 if self.conditional_test_function: | ||||
|                     has_any_value = self.conditional_test_function(self.form) | ||||
|                 else: | ||||
|                     # Generic fallback - check if any field has truthy data | ||||
|                     has_any_value = any(field.data for field in self.form if hasattr(field, 'data') and field.data) | ||||
|  | ||||
|                 if not has_any_value: | ||||
|                     self.top_errors.append(self.conditional_message) | ||||
|                     base_valid = False | ||||
|  | ||||
|         return base_valid | ||||
|  | ||||
|  | ||||
| class RequiredFormField(FormField): | ||||
|     """ | ||||
|     A FormField that passes require_at_least_one=True to TimeBetweenCheckForm. | ||||
|     Use this when you want the sub-form to always require at least one value. | ||||
|     """ | ||||
|  | ||||
|     def __init__(self, form_class, label=None, validators=None, separator="-", **kwargs): | ||||
|         super().__init__(form_class, label, validators, separator, **kwargs) | ||||
|  | ||||
|     def process(self, formdata, data=unset_value, extra_filters=None): | ||||
|         if extra_filters: | ||||
|             raise TypeError( | ||||
|                 "FormField cannot take filters, as the encapsulated" | ||||
|                 "data is not mutable." | ||||
|             ) | ||||
|  | ||||
|         if data is unset_value: | ||||
|             try: | ||||
|                 data = self.default() | ||||
|             except TypeError: | ||||
|                 data = self.default | ||||
|             self._obj = data | ||||
|  | ||||
|         self.object_data = data | ||||
|  | ||||
|         prefix = self.name + self.separator | ||||
|         # Pass require_at_least_one=True to the sub-form | ||||
|         if isinstance(data, dict): | ||||
|             self.form = self.form_class(formdata=formdata, prefix=prefix, require_at_least_one=True, **data) | ||||
|         else: | ||||
|             self.form = self.form_class(formdata=formdata, obj=data, prefix=prefix, require_at_least_one=True) | ||||
|  | ||||
|     @property | ||||
|     def errors(self): | ||||
|         """Include sub-form validation errors""" | ||||
|         form_errors = self.form.errors | ||||
|         # Add any general form errors to a special 'form' key | ||||
|         if hasattr(self.form, '_formdata_errors') and self.form._formdata_errors: | ||||
|             form_errors = dict(form_errors)  # Make a copy | ||||
|             form_errors['form'] = self.form._formdata_errors | ||||
|         return form_errors | ||||
|  | ||||
|  | ||||
| # Separated by  key:value | ||||
| class StringDictKeyValue(StringField): | ||||
|     widget = widgets.TextArea() | ||||
| @@ -320,11 +467,16 @@ class ValidateAppRiseServers(object): | ||||
|         import apprise | ||||
|         from .notification.apprise_plugin.assets import apprise_asset | ||||
|         from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401 | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|  | ||||
|         apobj = apprise.Apprise(asset=apprise_asset) | ||||
|  | ||||
|         for server_url in field.data: | ||||
|             url = server_url.strip() | ||||
|             generic_notification_context_data = NotificationContextData() | ||||
|             # Make sure something is atleast in all those regular token fields | ||||
|             generic_notification_context_data.set_random_for_validation() | ||||
|  | ||||
|             url = jinja_render(template_str=server_url.strip(), **generic_notification_context_data).strip() | ||||
|             if url.startswith("#"): | ||||
|                 continue | ||||
|  | ||||
| @@ -338,9 +490,8 @@ class ValidateJinja2Template(object): | ||||
|     """ | ||||
|     def __call__(self, form, field): | ||||
|         from changedetectionio import notification | ||||
|  | ||||
|         from changedetectionio.jinja2_custom import create_jinja_env | ||||
|         from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError | ||||
|         from jinja2.sandbox import ImmutableSandboxedEnvironment | ||||
|         from jinja2.meta import find_undeclared_variables | ||||
|         import jinja2.exceptions | ||||
|  | ||||
| @@ -348,9 +499,11 @@ class ValidateJinja2Template(object): | ||||
|         joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}" | ||||
|  | ||||
|         try: | ||||
|             jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader) | ||||
|             jinja2_env.globals.update(notification.valid_tokens) | ||||
|             # Extra validation tokens provided on the form_class(... extra_tokens={}) setup | ||||
|             # Use the shared helper to create a properly configured environment | ||||
|             jinja2_env = create_jinja_env(loader=BaseLoader) | ||||
|  | ||||
|             # Add notification tokens for validation | ||||
|             jinja2_env.globals.update(NotificationContextData()) | ||||
|             if hasattr(field, 'extra_notification_tokens'): | ||||
|                 jinja2_env.globals.update(field.extra_notification_tokens) | ||||
|  | ||||
| @@ -362,6 +515,7 @@ class ValidateJinja2Template(object): | ||||
|         except jinja2.exceptions.SecurityError as e: | ||||
|             raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e | ||||
|  | ||||
|         # Check for undeclared variables | ||||
|         ast = jinja2_env.parse(joined_data) | ||||
|         undefined = ", ".join(find_undeclared_variables(ast)) | ||||
|         if undefined: | ||||
| @@ -384,19 +538,10 @@ class validateURL(object): | ||||
|  | ||||
|  | ||||
| def validate_url(test_url): | ||||
|     # If hosts that only contain alphanumerics are allowed ("localhost" for example) | ||||
|     try: | ||||
|         url_validator(test_url, simple_host=allow_simplehost) | ||||
|     except validators.ValidationError: | ||||
|         #@todo check for xss | ||||
|         message = f"'{test_url}' is not a valid URL." | ||||
|     from changedetectionio.validate_url import is_safe_valid_url | ||||
|     if not is_safe_valid_url(test_url): | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError(message) | ||||
|  | ||||
|     from .model.Watch import is_safe_url | ||||
|     if not is_safe_url(test_url): | ||||
|         # This should be wtforms.validators. | ||||
|         raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format') | ||||
|         raise ValidationError('Watch protocol is not permitted or invalid URL format') | ||||
|  | ||||
|  | ||||
| class ValidateSinglePythonRegexString(object): | ||||
| @@ -529,6 +674,51 @@ class ValidateCSSJSONXPATHInput(object): | ||||
|                 except: | ||||
|                     raise ValidationError("A system-error occurred when validating your jq expression") | ||||
|  | ||||
| class ValidateSimpleURL: | ||||
|     """Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc.""" | ||||
|     def __init__(self, message=None): | ||||
|         self.message = message or "Invalid URL." | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         data = (field.data or "").strip() | ||||
|         if not data: | ||||
|             return  # empty is OK — pair with validators.Optional() | ||||
|         from urllib.parse import urlparse | ||||
|  | ||||
|         parsed = urlparse(data) | ||||
|         if not parsed.scheme or not parsed.netloc: | ||||
|             raise ValidationError(self.message) | ||||
|  | ||||
| class ValidateStartsWithRegex(object): | ||||
|     def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True): | ||||
|         # compile with given flags (we’ll pass re.IGNORECASE below) | ||||
|         self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex | ||||
|         self.message = message | ||||
|         self.allow_empty = allow_empty | ||||
|         self.split_lines = split_lines | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         data = field.data | ||||
|         if not data: | ||||
|             return | ||||
|  | ||||
|         # normalize into list of lines | ||||
|         if isinstance(data, str) and self.split_lines: | ||||
|             lines = data.splitlines() | ||||
|         elif isinstance(data, (list, tuple)): | ||||
|             lines = data | ||||
|         else: | ||||
|             lines = [data] | ||||
|  | ||||
|         for line in lines: | ||||
|             stripped = line.strip() | ||||
|             if not stripped: | ||||
|                 if self.allow_empty: | ||||
|                     continue | ||||
|                 raise ValidationError(self.message or "Empty value not allowed.") | ||||
|             if not self.pattern.match(stripped): | ||||
|                 raise ValidationError(self.message or "Invalid value.") | ||||
|  | ||||
| class quickWatchForm(Form): | ||||
|     from . import processors | ||||
|  | ||||
| @@ -539,7 +729,6 @@ class quickWatchForm(Form): | ||||
|     edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|  | ||||
|  | ||||
| # Common to a single watch and the global settings | ||||
| class commonSettingsForm(Form): | ||||
|     from . import processors | ||||
| @@ -552,13 +741,21 @@ class commonSettingsForm(Form): | ||||
|  | ||||
|     fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_format = SelectField('Notification format', choices=valid_notification_formats.keys()) | ||||
|     notification_format = SelectField('Notification format', choices=list(valid_notification_formats.items())) | ||||
|     notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()]) | ||||
|     processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff") | ||||
|     timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) | ||||
|     scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()]) | ||||
|     webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")]) | ||||
|  | ||||
| # Not true anymore but keep the validate_ hook for future use, we convert color tags | ||||
| #    def validate_notification_urls(self, field): | ||||
| #        """Validate that HTML Color format is not used with Telegram""" | ||||
| #        if self.notification_format.data == 'HTML Color' and field.data: | ||||
| #            for url in field.data: | ||||
| #                if url and ('tgram://' in url or 'discord://' in url or 'discord.com/api/webhooks' in url): | ||||
| #                    raise ValidationError('HTML Color format is not supported by Telegram and Discord. Please choose another Notification Format (Plain Text, HTML, or Markdown to HTML).') | ||||
|  | ||||
|  | ||||
| class importForm(Form): | ||||
|     from . import processors | ||||
| @@ -583,11 +780,16 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     url = fields.URLField('URL', validators=[validateURL()]) | ||||
|     tags = StringTagUUID('Group tag', [validators.Optional()], default='') | ||||
|  | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|     time_between_check = EnhancedFormField( | ||||
|         TimeBetweenCheckForm, | ||||
|         conditional_field='time_between_check_use_default', | ||||
|         conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT, | ||||
|         conditional_test_function=validate_time_between_check_has_values | ||||
|     ) | ||||
|  | ||||
|     time_schedule_limit = FormField(ScheduleLimitForm) | ||||
|  | ||||
|     time_between_check_use_default = BooleanField('Use global settings for time between check', default=False) | ||||
|     time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False) | ||||
|  | ||||
|     include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='') | ||||
|  | ||||
| @@ -605,6 +807,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False) | ||||
|     remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False) | ||||
|     sort_text_alphabetically =  BooleanField('Sort text alphabetically', default=False) | ||||
|     strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None) | ||||
|     trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False) | ||||
|  | ||||
|     filter_text_added = BooleanField('Added lines', default=True) | ||||
| @@ -640,7 +843,7 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|         if not super().validate(): | ||||
|             return False | ||||
|  | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         result = True | ||||
|  | ||||
|         # Fail form validation when a body is set for a GET | ||||
| @@ -703,23 +906,36 @@ class processor_text_json_diff_form(commonSettingsForm): | ||||
|     ): | ||||
|         super().__init__(formdata, obj, prefix, data, meta, **kwargs) | ||||
|         if kwargs and kwargs.get('default_system_settings'): | ||||
|             default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone') | ||||
|             default_tz = kwargs.get('default_system_settings').get('application', {}).get('scheduler_timezone_default') | ||||
|             if default_tz: | ||||
|                 self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz | ||||
|  | ||||
|  | ||||
|  | ||||
| class SingleExtraProxy(Form): | ||||
|  | ||||
|     # maybe better to set some <script>var.. | ||||
|     proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"}) | ||||
|     proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50}) | ||||
|     # @todo do the validation here instead | ||||
|     proxy_url = StringField('Proxy URL', [ | ||||
|         validators.Optional(), | ||||
|         ValidateStartsWithRegex( | ||||
|             regex=r'^(https?|socks5)://',  # ✅ main pattern | ||||
|             flags=re.IGNORECASE,  # ✅ makes it case-insensitive | ||||
|             message='Proxy URLs must start with http://, https:// or socks5://', | ||||
|         ), | ||||
|         ValidateSimpleURL() | ||||
|     ], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50}) | ||||
|  | ||||
| class SingleExtraBrowser(Form): | ||||
|     browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"}) | ||||
|     browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) | ||||
|     # @todo do the validation here instead | ||||
|     browser_connection_url = StringField('Browser connection URL', [ | ||||
|         validators.Optional(), | ||||
|         ValidateStartsWithRegex( | ||||
|             regex=r'^(wss?|ws)://', | ||||
|             flags=re.IGNORECASE, | ||||
|             message='Browser URLs must start with wss:// or ws://' | ||||
|         ), | ||||
|         ValidateSimpleURL() | ||||
|     ], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50}) | ||||
|  | ||||
| class DefaultUAInputForm(Form): | ||||
|     html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"}) | ||||
| @@ -728,9 +944,9 @@ class DefaultUAInputForm(Form): | ||||
|  | ||||
| # datastore.data['settings']['requests'].. | ||||
| class globalSettingsRequestForm(Form): | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|     time_between_check = RequiredFormField(TimeBetweenCheckForm) | ||||
|     time_schedule_limit = FormField(ScheduleLimitForm) | ||||
|     proxy = RadioField('Proxy') | ||||
|     proxy = RadioField('Default proxy') | ||||
|     jitter_seconds = IntegerField('Random jitter seconds ± check', | ||||
|                                   render_kw={"style": "width: 5em;"}, | ||||
|                                   validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
| @@ -739,7 +955,12 @@ class globalSettingsRequestForm(Form): | ||||
|                           render_kw={"style": "width: 5em;"}, | ||||
|                           validators=[validators.NumberRange(min=1, max=50, | ||||
|                                                              message="Should be between 1 and 50")]) | ||||
|      | ||||
|  | ||||
|     timeout = IntegerField('Requests timeout in seconds', | ||||
|                            render_kw={"style": "width: 5em;"}, | ||||
|                            validators=[validators.NumberRange(min=1, max=999, | ||||
|                                                               message="Should be between 1 and 999")]) | ||||
|  | ||||
|     extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) | ||||
|     extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5) | ||||
|  | ||||
| @@ -782,8 +1003,13 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|     removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) | ||||
|     shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()]) | ||||
|     strip_ignored_lines = BooleanField('Strip ignored lines') | ||||
|     rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True, | ||||
|                                       validators=[validators.Optional()]) | ||||
|  | ||||
|     rss_reader_mode = BooleanField('RSS reader mode ', default=False, | ||||
|                                       validators=[validators.Optional()]) | ||||
|  | ||||
|     filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', | ||||
|                                                                   render_kw={"style": "width: 5em;"}, | ||||
|                                                                   validators=[validators.NumberRange(min=0, | ||||
|   | ||||
| @@ -1,5 +1,6 @@ | ||||
| from functools import lru_cache | ||||
|  | ||||
| from loguru import logger | ||||
| from lxml import etree | ||||
| from typing import List | ||||
| import html | ||||
| import json | ||||
| @@ -14,7 +15,6 @@ TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S) | ||||
| META_CS  = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I) | ||||
| META_CT  = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I) | ||||
|  | ||||
|  | ||||
| # 'price' , 'lowPrice', 'highPrice' are usually under here | ||||
| # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here | ||||
| LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] | ||||
| @@ -23,9 +23,9 @@ class JSONNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| # Doesn't look like python supports forward slash auto enclosure in re.findall | ||||
| # So convert it to inline flag "(?i)foobar" type configuration | ||||
| @lru_cache(maxsize=100) | ||||
| def perl_style_slash_enclosed_regex_to_options(regex): | ||||
|  | ||||
|     res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE) | ||||
| @@ -58,13 +58,17 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting | ||||
|  | ||||
|     return html_block | ||||
|  | ||||
| def subtractive_css_selector(css_selector, html_content): | ||||
| def subtractive_css_selector(css_selector, content): | ||||
|     from bs4 import BeautifulSoup | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     soup = BeautifulSoup(content, "html.parser") | ||||
|  | ||||
|     # So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM | ||||
|     elements_to_remove = soup.select(css_selector) | ||||
|  | ||||
|     if not elements_to_remove: | ||||
|         # Better to return the original that rebuild with BeautifulSoup | ||||
|         return content | ||||
|  | ||||
|     # Then, remove them in a separate loop | ||||
|     for item in elements_to_remove: | ||||
|         item.decompose() | ||||
| @@ -72,6 +76,7 @@ def subtractive_css_selector(css_selector, html_content): | ||||
|     return str(soup) | ||||
|  | ||||
| def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str: | ||||
|     from lxml import etree | ||||
|     # Parse the HTML content using lxml | ||||
|     html_tree = etree.HTML(html_content) | ||||
|  | ||||
| @@ -83,6 +88,10 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str: | ||||
|         # Collect elements for each selector | ||||
|         elements_to_remove.extend(html_tree.xpath(selector)) | ||||
|  | ||||
|     # If no elements were found, return the original HTML content | ||||
|     if not elements_to_remove: | ||||
|         return html_content | ||||
|  | ||||
|     # Then, remove them in a separate loop | ||||
|     for element in elements_to_remove: | ||||
|         if element.getparent() is not None:  # Ensure the element has a parent before removing | ||||
| @@ -100,7 +109,7 @@ def element_removal(selectors: List[str], html_content): | ||||
|     xpath_selectors = [] | ||||
|  | ||||
|     for selector in selectors: | ||||
|         if selector.startswith(('xpath:', 'xpath1:', '//')): | ||||
|         if selector.strip().startswith(('xpath:', 'xpath1:', '//')): | ||||
|             # Handle XPath selectors separately | ||||
|             xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:') | ||||
|             xpath_selectors.append(xpath_selector) | ||||
| @@ -177,8 +186,21 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser) | ||||
|     html_block = "" | ||||
|  | ||||
|     r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|     # Build namespace map for XPath queries | ||||
|     namespaces = {'re': 'http://exslt.org/regular-expressions'} | ||||
|  | ||||
|     # Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML) | ||||
|     # XPath spec: unprefixed element names have no namespace, not the default namespace | ||||
|     # Solution: Register the default namespace with empty string prefix in elementpath | ||||
|     # This is primarily for RSS/Atom feeds but works for any XML with default namespace | ||||
|     if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap: | ||||
|         # Register the default namespace with empty string prefix for elementpath | ||||
|         # This allows //title to match elements in the default namespace | ||||
|         namespaces[''] = tree.nsmap[None] | ||||
|  | ||||
|     r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser) | ||||
|     #@note: //title/text() now works with default namespaces (fixed by registering '' prefix) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first) | ||||
|  | ||||
|     if type(r) != list: | ||||
|         r = [r] | ||||
| @@ -213,8 +235,19 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser) | ||||
|     html_block = "" | ||||
|  | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}) | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|     # Build namespace map for XPath queries | ||||
|     namespaces = {'re': 'http://exslt.org/regular-expressions'} | ||||
|  | ||||
|     # NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace | ||||
|     # For documents with default namespace (RSS/Atom feeds), users must use: | ||||
|     #   - local-name(): //*[local-name()='title']/text() | ||||
|     #   - Or use xpath_filter (not xpath1_filter) which supports default namespaces | ||||
|     # XPath spec: unprefixed element names have no namespace, not the default namespace | ||||
|  | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces=namespaces) | ||||
|     #@note: xpath1 (lxml) does NOT automatically handle default namespaces | ||||
|     #@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support | ||||
|     #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first) | ||||
|  | ||||
|     for element in r: | ||||
|         # When there's more than 1 match, then add the suffix to separate each line | ||||
| @@ -295,70 +328,92 @@ def _get_stripped_text_from_json_match(match): | ||||
|  | ||||
|     return stripped_text_from_html | ||||
|  | ||||
| def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter): | ||||
|     from bs4 import BeautifulSoup | ||||
|     stripped_text_from_html = '' | ||||
|  | ||||
|     # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|     # As a last resort, try to parse the whole <body> | ||||
|     soup = BeautifulSoup(content, 'html.parser') | ||||
|  | ||||
|     if ensure_is_ldjson_info_type: | ||||
|         bs_result = soup.find_all('script', {"type": "application/ld+json"}) | ||||
|     else: | ||||
|         bs_result = soup.find_all('script') | ||||
|     bs_result += soup.find_all('body') | ||||
|  | ||||
|     bs_jsons = [] | ||||
|  | ||||
|     for result in bs_result: | ||||
|         # result.text is how bs4 magically strips JSON from the body | ||||
|         content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else '' | ||||
|         # Skip empty tags, and things that dont even look like JSON | ||||
|         if not result.text or not (content_start[0] == '{' or content_start[0] == '['): | ||||
|             continue | ||||
|         try: | ||||
|             json_data = json.loads(result.text) | ||||
|             bs_jsons.append(json_data) | ||||
|         except json.JSONDecodeError: | ||||
|             # Skip objects which cannot be parsed | ||||
|             continue | ||||
|  | ||||
|     if not bs_jsons: | ||||
|         raise JSONNotFound("No parsable JSON found in this document") | ||||
|  | ||||
|     for json_data in bs_jsons: | ||||
|         stripped_text_from_html = _parse_json(json_data, json_filter) | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             # Could sometimes be list, string or something else random | ||||
|             if isinstance(json_data, dict): | ||||
|                 # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search | ||||
|                 # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) | ||||
|                 # @type could also be a list although non-standard ("@type": ["Product", "SubType"],) | ||||
|                 # LD_JSON auto-extract also requires some content PLUS the ldjson to be present | ||||
|                 # 1833 - could be either str or dict, should not be anything else | ||||
|  | ||||
|                 t = json_data.get('@type') | ||||
|                 if t and stripped_text_from_html: | ||||
|  | ||||
|                     if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower(): | ||||
|                         break | ||||
|                     # The non-standard part, some have a list | ||||
|                     elif isinstance(t, list): | ||||
|                         if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]: | ||||
|                             break | ||||
|  | ||||
|         elif stripped_text_from_html: | ||||
|             break | ||||
|  | ||||
|     return stripped_text_from_html | ||||
|  | ||||
| # content - json | ||||
| # json_filter - ie json:$..price | ||||
| # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) | ||||
| def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): | ||||
|     from bs4 import BeautifulSoup | ||||
|  | ||||
|     stripped_text_from_html = False | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w | ||||
|     # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags | ||||
|     try: | ||||
|         # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work | ||||
|         stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter) | ||||
|     except json.JSONDecodeError as e: | ||||
|         logger.warning(str(e)) | ||||
|  | ||||
|         # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|         # As a last resort, try to parse the whole <body> | ||||
|         soup = BeautifulSoup(content, 'html.parser') | ||||
|     # Looks like clean JSON, dont bother extracting from HTML | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             bs_result = soup.find_all('script', {"type": "application/ld+json"}) | ||||
|         else: | ||||
|             bs_result = soup.find_all('script') | ||||
|         bs_result += soup.find_all('body') | ||||
|     content_start = content.lstrip("\ufeff").strip()[:100] | ||||
|  | ||||
|         bs_jsons = [] | ||||
|         for result in bs_result: | ||||
|             # Skip empty tags, and things that dont even look like JSON | ||||
|             if not result.text or '{' not in result.text: | ||||
|                 continue | ||||
|             try: | ||||
|                 json_data = json.loads(result.text) | ||||
|                 bs_jsons.append(json_data) | ||||
|             except json.JSONDecodeError: | ||||
|                 # Skip objects which cannot be parsed | ||||
|                 continue | ||||
|  | ||||
|         if not bs_jsons: | ||||
|             raise JSONNotFound("No parsable JSON found in this document") | ||||
|          | ||||
|         for json_data in bs_jsons: | ||||
|             stripped_text_from_html = _parse_json(json_data, json_filter) | ||||
|  | ||||
|             if ensure_is_ldjson_info_type: | ||||
|                 # Could sometimes be list, string or something else random | ||||
|                 if isinstance(json_data, dict): | ||||
|                     # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search | ||||
|                     # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) | ||||
|                     # @type could also be a list although non-standard ("@type": ["Product", "SubType"],) | ||||
|                     # LD_JSON auto-extract also requires some content PLUS the ldjson to be present | ||||
|                     # 1833 - could be either str or dict, should not be anything else | ||||
|  | ||||
|                     t = json_data.get('@type') | ||||
|                     if t and stripped_text_from_html: | ||||
|  | ||||
|                         if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower(): | ||||
|                             break | ||||
|                         # The non-standard part, some have a list | ||||
|                         elif isinstance(t, list): | ||||
|                             if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]: | ||||
|                                 break | ||||
|  | ||||
|             elif stripped_text_from_html: | ||||
|                 break | ||||
|     if content_start[0] == '{' or content_start[0] == '[': | ||||
|         try: | ||||
|             # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work | ||||
|             stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter) | ||||
|         except json.JSONDecodeError as e: | ||||
|             logger.warning(f"Error processing JSON {content[:20]}...{str(e)})") | ||||
|     else: | ||||
|         # Probably something else, go fish inside for it | ||||
|         try: | ||||
|             stripped_text_from_html = extract_json_blob_from_html(content=content, | ||||
|                                                                   ensure_is_ldjson_info_type=ensure_is_ldjson_info_type, | ||||
|                                                                   json_filter=json_filter                                                                  ) | ||||
|         except json.JSONDecodeError as e: | ||||
|             logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})") | ||||
|  | ||||
|     if not stripped_text_from_html: | ||||
|         # Re 265 - Just return an empty string when filter not found | ||||
| @@ -378,6 +433,9 @@ def strip_ignore_text(content, wordlist, mode="content"): | ||||
|     ignored_lines = [] | ||||
|  | ||||
|     for k in wordlist: | ||||
|         # Skip empty strings to avoid matching everything | ||||
|         if not k or not k.strip(): | ||||
|             continue | ||||
|         # Is it a regex? | ||||
|         res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE) | ||||
|         if res: | ||||
|   | ||||
							
								
								
									
										22
									
								
								changedetectionio/jinja2_custom/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								changedetectionio/jinja2_custom/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,22 @@ | ||||
| """ | ||||
| Jinja2 custom extensions and safe rendering utilities. | ||||
| """ | ||||
| from .extensions.TimeExtension import TimeExtension | ||||
| from .safe_jinja import ( | ||||
|     render, | ||||
|     render_fully_escaped, | ||||
|     create_jinja_env, | ||||
|     JINJA2_MAX_RETURN_PAYLOAD_SIZE, | ||||
|     DEFAULT_JINJA2_EXTENSIONS, | ||||
| ) | ||||
| from .plugins.regex import regex_replace | ||||
|  | ||||
| __all__ = [ | ||||
|     'TimeExtension', | ||||
|     'render', | ||||
|     'render_fully_escaped', | ||||
|     'create_jinja_env', | ||||
|     'JINJA2_MAX_RETURN_PAYLOAD_SIZE', | ||||
|     'DEFAULT_JINJA2_EXTENSIONS', | ||||
|     'regex_replace', | ||||
| ] | ||||
							
								
								
									
										221
									
								
								changedetectionio/jinja2_custom/extensions/TimeExtension.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										221
									
								
								changedetectionio/jinja2_custom/extensions/TimeExtension.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,221 @@ | ||||
| """ | ||||
| Jinja2 TimeExtension - Custom date/time handling for templates. | ||||
|  | ||||
| This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware | ||||
| date/time formatting with support for time offsets. | ||||
|  | ||||
| Why This Extension Exists: | ||||
|     The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot | ||||
|     directly call Python functions - they need extensions or filters to expose functionality. | ||||
|  | ||||
|     This TimeExtension serves as a Jinja2-to-Arrow bridge that: | ||||
|  | ||||
|     1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags | ||||
|        through extensions. You cannot use arrow.now() directly in a template. | ||||
|  | ||||
|     2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags: | ||||
|        {% now 'UTC' %} | ||||
|        {% now 'UTC' + 'hours=2' %} | ||||
|        {% now 'Europe/London', '%Y-%m-%d' %} | ||||
|  | ||||
|     3. Adds convenience features on top of Arrow: | ||||
|        - Default timezone from environment variable (TZ) or config | ||||
|        - Default datetime format configuration | ||||
|        - Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30) | ||||
|        - Empty string timezone support to use configured defaults | ||||
|  | ||||
|     4. Maintains security - Works within Jinja2's sandboxed environment so users | ||||
|        cannot access arbitrary Python code or objects. | ||||
|  | ||||
|     Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that | ||||
|     provides user-friendly template syntax while maintaining security. | ||||
|  | ||||
| Basic Usage: | ||||
|     {% now 'UTC' %} | ||||
|     # Output: Wed, 09 Dec 2015 23:33:01 | ||||
|  | ||||
| Custom Format: | ||||
|     {% now 'UTC', '%Y-%m-%d %H:%M:%S' %} | ||||
|     # Output: 2015-12-09 23:33:01 | ||||
|  | ||||
| Timezone Support: | ||||
|     {% now 'America/New_York' %} | ||||
|     {% now 'Europe/London' %} | ||||
|     {% now '' %}  # Uses default timezone from environment.default_timezone | ||||
|  | ||||
| Time Offsets (Addition): | ||||
|     {% now 'UTC' + 'hours=2' %} | ||||
|     {% now 'UTC' + 'hours=2,minutes=30' %} | ||||
|     {% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %} | ||||
|  | ||||
| Time Offsets (Subtraction): | ||||
|     {% now 'UTC' - 'minutes=11' %} | ||||
|     {% now 'UTC' - 'days=2,minutes=33,seconds=1' %} | ||||
|  | ||||
| Time Offsets with Custom Format: | ||||
|     {% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %} | ||||
|     # Output: 2015-12-10 01:33:01 | ||||
|  | ||||
| Weekday Support (for finding next/previous weekday): | ||||
|     {% now 'UTC' + 'weekday=0' %}  # Next Monday (0=Monday, 6=Sunday) | ||||
|     {% now 'UTC' + 'weekday=4' %}  # Next Friday | ||||
|  | ||||
| Configuration: | ||||
|     - Default timezone: Set via TZ environment variable or override environment.default_timezone | ||||
|     - Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format) | ||||
|  | ||||
| Environment Customization: | ||||
|     from changedetectionio.jinja2_custom import create_jinja_env | ||||
|  | ||||
|     jinja2_env = create_jinja_env() | ||||
|     jinja2_env.default_timezone = 'America/New_York'  # Override default timezone | ||||
|     jinja2_env.datetime_format = '%Y-%m-%d %H:%M'      # Override default format | ||||
|  | ||||
| Supported Offset Parameters: | ||||
|     - years, months, weeks, days | ||||
|     - hours, minutes, seconds, microseconds | ||||
|     - weekday (0=Monday through 6=Sunday, must be integer) | ||||
|  | ||||
| Note: | ||||
|     This extension uses the Arrow library for timezone-aware datetime handling. | ||||
|     All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York'). | ||||
| """ | ||||
| import arrow | ||||
|  | ||||
| from jinja2 import nodes | ||||
| from jinja2.ext import Extension | ||||
| import os | ||||
|  | ||||
| class TimeExtension(Extension): | ||||
|     """ | ||||
|     Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering. | ||||
|  | ||||
|     This extension adds two attributes to the Jinja2 environment: | ||||
|     - datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S') | ||||
|     - default_timezone: Default timezone for rendering (default: TZ env var or 'UTC') | ||||
|  | ||||
|     Both can be overridden after environment creation by setting the attributes directly. | ||||
|     """ | ||||
|  | ||||
|     tags = {'now'} | ||||
|  | ||||
|     def __init__(self, environment): | ||||
|         """Jinja2 Extension constructor.""" | ||||
|         super().__init__(environment) | ||||
|  | ||||
|         environment.extend( | ||||
|             datetime_format='%a, %d %b %Y %H:%M:%S', | ||||
|             default_timezone=os.getenv('TZ', 'UTC').strip() | ||||
|         ) | ||||
|  | ||||
|     def _datetime(self, timezone, operator, offset, datetime_format): | ||||
|         """ | ||||
|         Get current datetime with time offset applied. | ||||
|  | ||||
|         Args: | ||||
|             timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default | ||||
|             operator: '+' for addition or '-' for subtraction | ||||
|             offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30') | ||||
|             datetime_format: strftime format string or None to use environment default | ||||
|  | ||||
|         Returns: | ||||
|             Formatted datetime string with offset applied | ||||
|  | ||||
|         Example: | ||||
|             _datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S') | ||||
|             # Returns current time + 2.5 hours | ||||
|         """ | ||||
|         # Use default timezone if none specified | ||||
|         if not timezone or timezone == '': | ||||
|             timezone = self.environment.default_timezone | ||||
|  | ||||
|         d = arrow.now(timezone) | ||||
|  | ||||
|         # parse shift params from offset and include operator | ||||
|         shift_params = {} | ||||
|         for param in offset.split(','): | ||||
|             interval, value = param.split('=') | ||||
|             shift_params[interval.strip()] = float(operator + value.strip()) | ||||
|  | ||||
|         # Fix weekday parameter can not be float | ||||
|         if 'weekday' in shift_params: | ||||
|             shift_params['weekday'] = int(shift_params['weekday']) | ||||
|  | ||||
|         d = d.shift(**shift_params) | ||||
|  | ||||
|         if datetime_format is None: | ||||
|             datetime_format = self.environment.datetime_format | ||||
|         return d.strftime(datetime_format) | ||||
|  | ||||
|     def _now(self, timezone, datetime_format): | ||||
|         """ | ||||
|         Get current datetime without any offset. | ||||
|  | ||||
|         Args: | ||||
|             timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default | ||||
|             datetime_format: strftime format string or None to use environment default | ||||
|  | ||||
|         Returns: | ||||
|             Formatted datetime string for current time | ||||
|  | ||||
|         Example: | ||||
|             _now('America/New_York', '%Y-%m-%d %H:%M:%S') | ||||
|             # Returns current time in New York timezone | ||||
|         """ | ||||
|         # Use default timezone if none specified | ||||
|         if not timezone or timezone == '': | ||||
|             timezone = self.environment.default_timezone | ||||
|  | ||||
|         if datetime_format is None: | ||||
|             datetime_format = self.environment.datetime_format | ||||
|         return arrow.now(timezone).strftime(datetime_format) | ||||
|  | ||||
|     def parse(self, parser): | ||||
|         """ | ||||
|         Parse the {% now %} tag and generate appropriate AST nodes. | ||||
|  | ||||
|         This method is called by Jinja2 when it encounters a {% now %} tag. | ||||
|         It parses the tag syntax and determines whether to call _now() or _datetime() | ||||
|         based on whether offset operations (+ or -) are present. | ||||
|  | ||||
|         Supported syntax: | ||||
|             {% now 'timezone' %}                              -> calls _now() | ||||
|             {% now 'timezone', 'format' %}                    -> calls _now() | ||||
|             {% now 'timezone' + 'offset' %}                   -> calls _datetime() | ||||
|             {% now 'timezone' + 'offset', 'format' %}         -> calls _datetime() | ||||
|             {% now 'timezone' - 'offset', 'format' %}         -> calls _datetime() | ||||
|  | ||||
|         Args: | ||||
|             parser: Jinja2 parser instance | ||||
|  | ||||
|         Returns: | ||||
|             nodes.Output: AST output node containing the formatted datetime string | ||||
|         """ | ||||
|         lineno = next(parser.stream).lineno | ||||
|  | ||||
|         node = parser.parse_expression() | ||||
|  | ||||
|         if parser.stream.skip_if('comma'): | ||||
|             datetime_format = parser.parse_expression() | ||||
|         else: | ||||
|             datetime_format = nodes.Const(None) | ||||
|  | ||||
|         if isinstance(node, nodes.Add): | ||||
|             call_method = self.call_method( | ||||
|                 '_datetime', | ||||
|                 [node.left, nodes.Const('+'), node.right, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         elif isinstance(node, nodes.Sub): | ||||
|             call_method = self.call_method( | ||||
|                 '_datetime', | ||||
|                 [node.left, nodes.Const('-'), node.right, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         else: | ||||
|             call_method = self.call_method( | ||||
|                 '_now', | ||||
|                 [node, datetime_format], | ||||
|                 lineno=lineno, | ||||
|             ) | ||||
|         return nodes.Output([call_method], lineno=lineno) | ||||
							
								
								
									
										6
									
								
								changedetectionio/jinja2_custom/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								changedetectionio/jinja2_custom/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,6 @@ | ||||
| """ | ||||
| Jinja2 custom filter plugins for changedetection.io | ||||
| """ | ||||
| from .regex import regex_replace | ||||
|  | ||||
| __all__ = ['regex_replace'] | ||||
							
								
								
									
										98
									
								
								changedetectionio/jinja2_custom/plugins/regex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								changedetectionio/jinja2_custom/plugins/regex.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| """ | ||||
| Regex filter plugin for Jinja2 templates. | ||||
|  | ||||
| Provides regex_replace filter for pattern-based string replacements in templates. | ||||
| """ | ||||
| import re | ||||
| import signal | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| def regex_replace(value: str, pattern: str, replacement: str = '', count: int = 0) -> str: | ||||
|     """ | ||||
|     Replace occurrences of a regex pattern in a string. | ||||
|  | ||||
|     Security: Protected against ReDoS (Regular Expression Denial of Service) attacks: | ||||
|     - Limits input value size to prevent excessive processing | ||||
|     - Uses timeout mechanism to prevent runaway regex operations | ||||
|     - Validates pattern complexity to prevent catastrophic backtracking | ||||
|  | ||||
|     Args: | ||||
|         value: The input string to perform replacements on | ||||
|         pattern: The regex pattern to search for | ||||
|         replacement: The replacement string (default: '') | ||||
|         count: Maximum number of replacements (0 = replace all, default: 0) | ||||
|  | ||||
|     Returns: | ||||
|         String with replacements applied, or original value on error | ||||
|  | ||||
|     Example: | ||||
|         {{ "hello world" | regex_replace("world", "universe") }} | ||||
|         {{ diff | regex_replace("<td>([^<]+)</td><td>([^<]+)</td>", "Label1: \\1\\nLabel2: \\2") }} | ||||
|  | ||||
|     Security limits: | ||||
|         - Maximum input size: 10MB | ||||
|         - Maximum pattern length: 500 characters | ||||
|         - Operation timeout: 10 seconds | ||||
|         - Dangerous nested quantifier patterns are rejected | ||||
|     """ | ||||
|     # Security limits | ||||
|     MAX_INPUT_SIZE = 1024 * 1024 * 10 # 10MB max input size | ||||
|     MAX_PATTERN_LENGTH = 500  # Maximum regex pattern length | ||||
|     REGEX_TIMEOUT_SECONDS = 10  # Maximum time for regex operation | ||||
|  | ||||
|     # Validate input sizes | ||||
|     value_str = str(value) | ||||
|     if len(value_str) > MAX_INPUT_SIZE: | ||||
|         logger.warning(f"regex_replace: Input too large ({len(value_str)} bytes), truncating") | ||||
|         value_str = value_str[:MAX_INPUT_SIZE] | ||||
|  | ||||
|     if len(pattern) > MAX_PATTERN_LENGTH: | ||||
|         logger.warning(f"regex_replace: Pattern too long ({len(pattern)} chars), rejecting") | ||||
|         return value_str | ||||
|  | ||||
|     # Check for potentially dangerous patterns (basic checks) | ||||
|     # Nested quantifiers like (a+)+ can cause catastrophic backtracking | ||||
|     dangerous_patterns = [ | ||||
|         r'\([^)]*\+[^)]*\)\+',  # (x+)+ | ||||
|         r'\([^)]*\*[^)]*\)\+',  # (x*)+ | ||||
|         r'\([^)]*\+[^)]*\)\*',  # (x+)* | ||||
|         r'\([^)]*\*[^)]*\)\*',  # (x*)* | ||||
|     ] | ||||
|  | ||||
|     for dangerous in dangerous_patterns: | ||||
|         if re.search(dangerous, pattern): | ||||
|             logger.warning(f"regex_replace: Potentially dangerous pattern detected: {pattern}") | ||||
|             return value_str | ||||
|  | ||||
|     def timeout_handler(signum, frame): | ||||
|         raise TimeoutError("Regex operation timed out") | ||||
|  | ||||
|     try: | ||||
|         # Set up timeout for regex operation (Unix-like systems only) | ||||
|         # This prevents ReDoS attacks | ||||
|         old_handler = None | ||||
|         if hasattr(signal, 'SIGALRM'): | ||||
|             old_handler = signal.signal(signal.SIGALRM, timeout_handler) | ||||
|             signal.alarm(REGEX_TIMEOUT_SECONDS) | ||||
|  | ||||
|         try: | ||||
|             result = re.sub(pattern, replacement, value_str, count=count) | ||||
|         finally: | ||||
|             # Cancel the alarm | ||||
|             if hasattr(signal, 'SIGALRM'): | ||||
|                 signal.alarm(0) | ||||
|                 if old_handler is not None: | ||||
|                     signal.signal(signal.SIGALRM, old_handler) | ||||
|  | ||||
|         return result | ||||
|  | ||||
|     except TimeoutError: | ||||
|         logger.error(f"regex_replace: Regex operation timed out - possible ReDoS attack. Pattern: {pattern}") | ||||
|         return value_str | ||||
|     except re.error as e: | ||||
|         logger.warning(f"regex_replace: Invalid regex pattern: {e}") | ||||
|         return value_str | ||||
|     except Exception as e: | ||||
|         logger.error(f"regex_replace: Unexpected error: {e}") | ||||
|         return value_str | ||||
							
								
								
									
										58
									
								
								changedetectionio/jinja2_custom/safe_jinja.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								changedetectionio/jinja2_custom/safe_jinja.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,58 @@ | ||||
| """ | ||||
| Safe Jinja2 render with max payload sizes | ||||
|  | ||||
| See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations | ||||
| """ | ||||
|  | ||||
| import jinja2.sandbox | ||||
| import typing as t | ||||
| import os | ||||
| from .extensions.TimeExtension import TimeExtension | ||||
| from .plugins import regex_replace | ||||
|  | ||||
| JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10)) | ||||
|  | ||||
| # Default extensions - can be overridden in create_jinja_env() | ||||
| DEFAULT_JINJA2_EXTENSIONS = [TimeExtension] | ||||
|  | ||||
| def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment: | ||||
|     """ | ||||
|     Create a sandboxed Jinja2 environment with our custom extensions and default timezone. | ||||
|  | ||||
|     Args: | ||||
|         extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS) | ||||
|         **kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment | ||||
|  | ||||
|     Returns: | ||||
|         Configured Jinja2 environment | ||||
|     """ | ||||
|     if extensions is None: | ||||
|         extensions = DEFAULT_JINJA2_EXTENSIONS | ||||
|  | ||||
|     jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment( | ||||
|         extensions=extensions, | ||||
|         **kwargs | ||||
|     ) | ||||
|  | ||||
|     # Get default timezone from environment variable | ||||
|     default_timezone = os.getenv('TZ', 'UTC').strip() | ||||
|     jinja2_env.default_timezone = default_timezone | ||||
|  | ||||
|     # Register custom filters | ||||
|     jinja2_env.filters['regex_replace'] = regex_replace | ||||
|  | ||||
|     return jinja2_env | ||||
|  | ||||
|  | ||||
| # This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available. | ||||
| # (Which also limits available functions that could be called) | ||||
| def render(template_str, **args: t.Any) -> str: | ||||
|     jinja2_env = create_jinja_env() | ||||
|     output = jinja2_env.from_string(template_str).render(args) | ||||
|     return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE] | ||||
|  | ||||
| def render_fully_escaped(content): | ||||
|     env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True) | ||||
|     template = env.from_string("{{ some_html|e }}") | ||||
|     return template.render(some_html=content) | ||||
|  | ||||
| @@ -55,10 +55,12 @@ class model(dict): | ||||
|                     'rss_access_token': None, | ||||
|                     'rss_content_format': RSS_FORMAT_TYPES[0][0], | ||||
|                     'rss_hide_muted_watches': True, | ||||
|                     'rss_reader_mode': False, | ||||
|                     'scheduler_timezone_default': None,  # Default IANA timezone name | ||||
|                     'schema_version' : 0, | ||||
|                     'shared_diff_access': False, | ||||
|                     'strip_ignored_lines': False, | ||||
|                     'tags': {}, #@todo use Tag.model initialisers | ||||
|                     'timezone': None, # Default IANA timezone name | ||||
|                     'webdriver_delay': None , # Extra delay in seconds before extracting text | ||||
|                     'ui': { | ||||
|                         'use_page_title_in_list': True, | ||||
|   | ||||
| @@ -1,42 +1,24 @@ | ||||
| from blinker import signal | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from changedetectionio.safe_jinja import render as jinja_render | ||||
| from changedetectionio.jinja2_custom import render as jinja_render | ||||
| from . import watch_base | ||||
| import os | ||||
| import re | ||||
| from pathlib import Path | ||||
| from loguru import logger | ||||
|  | ||||
| from .. import safe_jinja | ||||
| from .. import jinja2_custom as safe_jinja | ||||
| from ..diff import ADDED_PLACEMARKER_OPEN | ||||
| from ..html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
|  | ||||
| # Allowable protocols, protects against javascript: etc | ||||
| # file:// is further checked by ALLOW_FILE_URI | ||||
| SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):' | ||||
| FAVICON_RESAVE_THRESHOLD_SECONDS=86400 | ||||
|  | ||||
|  | ||||
| minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)) | ||||
| mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} | ||||
|  | ||||
|  | ||||
| def is_safe_url(test_url): | ||||
|     # See https://github.com/dgtlmoon/changedetection.io/issues/1358 | ||||
|  | ||||
|     # Remove 'source:' prefix so we dont get 'source:javascript:' etc | ||||
|     # 'source:' is a valid way to tell us to return the source | ||||
|  | ||||
|     r = re.compile(re.escape('source:'), re.IGNORECASE) | ||||
|     test_url = r.sub('', test_url) | ||||
|  | ||||
|     pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE) | ||||
|     if not pattern.match(test_url.strip()): | ||||
|         return False | ||||
|  | ||||
|     return True | ||||
|  | ||||
|  | ||||
| class model(watch_base): | ||||
|     __newest_history_key = None | ||||
|     __history_n = 0 | ||||
| @@ -79,7 +61,7 @@ class model(watch_base): | ||||
|     def link(self): | ||||
|  | ||||
|         url = self.get('url', '') | ||||
|         if not is_safe_url(url): | ||||
|         if not is_safe_valid_url(url): | ||||
|             return 'DISABLED' | ||||
|  | ||||
|         ready_url = url | ||||
| @@ -89,9 +71,8 @@ class model(watch_base): | ||||
|                 ready_url = jinja_render(template_str=url) | ||||
|             except Exception as e: | ||||
|                 logger.critical(f"Invalid URL template for: '{url}' - {str(e)}") | ||||
|                 from flask import ( | ||||
|                     flash, Markup, url_for | ||||
|                 ) | ||||
|                 from flask import flash, url_for | ||||
|                 from markupsafe import Markup | ||||
|                 message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format( | ||||
|                     url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', ''))) | ||||
|                 flash(message, 'error') | ||||
| @@ -101,7 +82,7 @@ class model(watch_base): | ||||
|             ready_url=ready_url.replace('source:', '') | ||||
|  | ||||
|         # Also double check it after any Jinja2 formatting just incase | ||||
|         if not is_safe_url(ready_url): | ||||
|         if not is_safe_valid_url(ready_url): | ||||
|             return 'DISABLED' | ||||
|         return ready_url | ||||
|  | ||||
| @@ -170,7 +151,7 @@ class model(watch_base): | ||||
|     @property | ||||
|     def label(self): | ||||
|         # Used for sorting, display, etc | ||||
|         return self.get('title') or self.get('page_title') or self.get('url') | ||||
|         return self.get('title') or self.get('page_title') or self.link | ||||
|  | ||||
|     @property | ||||
|     def last_changed(self): | ||||
|   | ||||
| @@ -2,7 +2,7 @@ import os | ||||
| import uuid | ||||
|  | ||||
| from changedetectionio import strtobool | ||||
| default_notification_format_for_watch = 'System default' | ||||
| USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default' | ||||
| CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL' | ||||
|  | ||||
| class watch_base(dict): | ||||
| @@ -44,7 +44,7 @@ class watch_base(dict): | ||||
|             'method': 'GET', | ||||
|             'notification_alert_count': 0, | ||||
|             'notification_body': None, | ||||
|             'notification_format': default_notification_format_for_watch, | ||||
|             'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, | ||||
|             'notification_muted': False, | ||||
|             'notification_screenshot': False,  # Include the latest screenshot if available and supported by the apprise URL | ||||
|             'notification_title': None, | ||||
| @@ -58,6 +58,7 @@ class watch_base(dict): | ||||
|             'proxy': None,  # Preferred proxy connection | ||||
|             'remote_server_reply': None,  # From 'server' reply header | ||||
|             'sort_text_alphabetically': False, | ||||
|             'strip_ignored_lines': None, | ||||
|             'subtractive_selectors': [], | ||||
|             'tag': '',  # Old system of text name for a tag, to be removed | ||||
|             'tags': [],  # list of UUIDs to App.Tags | ||||
|   | ||||
| @@ -1,35 +1,16 @@ | ||||
| from changedetectionio.model import default_notification_format_for_watch | ||||
| from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|  | ||||
| ult_notification_format_for_watch = 'System default' | ||||
| default_notification_format = 'HTML Color' | ||||
| default_notification_format = 'htmlcolor' | ||||
| default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n' | ||||
| default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}' | ||||
|  | ||||
| # The values (markdown etc) are from apprise NotifyFormat, | ||||
| # But to avoid importing the whole heavy module just use the same strings here. | ||||
| valid_notification_formats = { | ||||
|     'Text': 'text', | ||||
|     'Markdown': 'markdown', | ||||
|     'HTML': 'html', | ||||
|     'HTML Color': 'htmlcolor', | ||||
|     'text': 'Plain Text', | ||||
|     'html': 'HTML', | ||||
|     'htmlcolor': 'HTML Color', | ||||
|     'markdown': 'Markdown to HTML', | ||||
|     # Used only for editing a watch (not for global) | ||||
|     default_notification_format_for_watch: default_notification_format_for_watch | ||||
| } | ||||
|  | ||||
|  | ||||
| valid_tokens = { | ||||
|     'base_url': '', | ||||
|     'current_snapshot': '', | ||||
|     'diff': '', | ||||
|     'diff_added': '', | ||||
|     'diff_full': '', | ||||
|     'diff_patch': '', | ||||
|     'diff_removed': '', | ||||
|     'diff_url': '', | ||||
|     'preview_url': '', | ||||
|     'triggered_text': '', | ||||
|     'watch_tag': '', | ||||
|     'watch_title': '', | ||||
|     'watch_url': '', | ||||
|     'watch_uuid': '', | ||||
|     USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| } | ||||
|   | ||||
| @@ -1,10 +1,61 @@ | ||||
| """ | ||||
| Custom Apprise HTTP Handlers with format= Parameter Support | ||||
|  | ||||
| IMPORTANT: This module works around a limitation in Apprise's @notify decorator. | ||||
|  | ||||
| THE PROBLEM: | ||||
| ------------- | ||||
| When using Apprise's @notify decorator to create custom notification handlers, the | ||||
| decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse | ||||
| URLs. This simple parsing mode does NOT extract the format= query parameter from the URL | ||||
| and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format. | ||||
|  | ||||
| As a result: | ||||
| 1. URL: post://example.com/webhook?format=html | ||||
| 2. Apprise parses this and sees format=html in qsd (query string dictionary) | ||||
| 3. But it does NOT extract it and pass it to NotifyBase.__init__ | ||||
| 4. NotifyBase defaults to notify_format=TEXT | ||||
| 5. When you call apobj.notify(body="<html>...", body_format="html"): | ||||
|    - Apprise sees: input format = html, output format (notify_format) = text | ||||
|    - Apprise calls convert_between("html", "text", body) | ||||
|    - This strips all HTML tags, leaving only plain text | ||||
| 6. Your custom handler receives stripped plain text instead of HTML | ||||
|  | ||||
| THE SOLUTION: | ||||
| ------------- | ||||
| Instead of using the @notify decorator directly, we: | ||||
| 1. Manually register custom plugins using plugins.N_MGR.add() | ||||
| 2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin | ||||
| 3. Override __init__ to extract format= from qsd and set it as kwargs['format'] | ||||
| 4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format'] | ||||
| 5. Set up _default_args like CustomNotifyPlugin does for compatibility | ||||
|  | ||||
| This ensures that when format=html is in the URL: | ||||
| - notify_format is set to HTML | ||||
| - Apprise sees: input format = html, output format = html | ||||
| - No conversion happens (convert_between returns content unchanged) | ||||
| - Your custom handler receives the original HTML intact | ||||
|  | ||||
| TESTING: | ||||
| -------- | ||||
| To verify this works: | ||||
| >>> apobj = apprise.Apprise() | ||||
| >>> apobj.add('post://localhost:5005/test?format=html') | ||||
| >>> for server in apobj: | ||||
| ...     print(server.notify_format)  # Should print: html (not text) | ||||
| >>> apobj.notify(body='<span>Test</span>', body_format='html') | ||||
| # Your handler should receive '<span>Test</span>' not 'Test' | ||||
| """ | ||||
|  | ||||
| import json | ||||
| import re | ||||
| from urllib.parse import unquote_plus | ||||
|  | ||||
| import requests | ||||
| from apprise.decorators import notify | ||||
| from apprise.utils.parse import parse_url as apprise_parse_url | ||||
| from apprise import plugins | ||||
| from apprise.decorators.base import CustomNotifyPlugin | ||||
| from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly | ||||
| from apprise.utils.logic import dict_full_update | ||||
| from loguru import logger | ||||
| from requests.structures import CaseInsensitiveDict | ||||
|  | ||||
| @@ -12,13 +63,66 @@ SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"} | ||||
|  | ||||
|  | ||||
| def notify_supported_methods(func): | ||||
|     """Register custom HTTP method handlers that properly support format= parameter.""" | ||||
|     for method in SUPPORTED_HTTP_METHODS: | ||||
|         func = notify(on=method)(func) | ||||
|         # Add support for https, for each supported http method | ||||
|         func = notify(on=f"{method}s")(func) | ||||
|         _register_http_handler(method, func) | ||||
|         _register_http_handler(f"{method}s", func) | ||||
|     return func | ||||
|  | ||||
|  | ||||
| def _register_http_handler(schema, send_func): | ||||
|     """Register a custom HTTP handler that extracts format= from URL query parameters.""" | ||||
|  | ||||
|     # Parse base URL | ||||
|     base_url = f"{schema}://" | ||||
|     base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True) | ||||
|  | ||||
|     class CustomHTTPHandler(CustomNotifyPlugin): | ||||
|         secure_protocol = schema | ||||
|         service_name = f"Custom HTTP - {schema.upper()}" | ||||
|         _base_args = base_args | ||||
|  | ||||
|         def __init__(self, **kwargs): | ||||
|             # Extract format from qsd and set it as a top-level kwarg | ||||
|             # This allows NotifyBase.__init__ to properly set notify_format | ||||
|             if 'qsd' in kwargs and 'format' in kwargs['qsd']: | ||||
|                 kwargs['format'] = kwargs['qsd']['format'] | ||||
|  | ||||
|             # Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__) | ||||
|             super(CustomNotifyPlugin, self).__init__(**kwargs) | ||||
|  | ||||
|             # Set up _default_args like CustomNotifyPlugin does | ||||
|             self._default_args = {} | ||||
|             kwargs.pop("secure", None) | ||||
|             dict_full_update(self._default_args, self._base_args) | ||||
|             dict_full_update(self._default_args, kwargs) | ||||
|             self._default_args["url"] = url_assembly(**self._default_args) | ||||
|  | ||||
|         __send = staticmethod(send_func) | ||||
|  | ||||
|         def send(self, body, title="", notify_type="info", *args, **kwargs): | ||||
|             """Call the custom send function.""" | ||||
|             try: | ||||
|                 result = self.__send( | ||||
|                     body, title, notify_type, | ||||
|                     *args, | ||||
|                     meta=self._default_args, | ||||
|                     **kwargs | ||||
|                 ) | ||||
|                 return True if result is None else bool(result) | ||||
|             except Exception as e: | ||||
|                 self.logger.warning(f"Exception in custom HTTP handler: {e}") | ||||
|                 return False | ||||
|  | ||||
|     # Register the plugin | ||||
|     plugins.N_MGR.add( | ||||
|         plugin=CustomHTTPHandler, | ||||
|         schemas=schema, | ||||
|         send_func=send_func, | ||||
|         url=base_url, | ||||
|     ) | ||||
|  | ||||
|  | ||||
| def _get_auth(parsed_url: dict) -> str | tuple[str, str]: | ||||
|     user: str | None = parsed_url.get("user") | ||||
|     password: str | None = parsed_url.get("password") | ||||
| @@ -70,9 +174,12 @@ def apprise_http_custom_handler( | ||||
|     title: str, | ||||
|     notify_type: str, | ||||
|     meta: dict, | ||||
|     body_format: str = None, | ||||
|     *args, | ||||
|     **kwargs, | ||||
| ) -> bool: | ||||
|  | ||||
|  | ||||
|     url: str = meta.get("url") | ||||
|     schema: str = meta.get("schema") | ||||
|     method: str = re.sub(r"s$", "", schema).upper() | ||||
| @@ -88,25 +195,16 @@ def apprise_http_custom_handler( | ||||
|  | ||||
|     url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url")) | ||||
|  | ||||
|     try: | ||||
|         response = requests.request( | ||||
|             method=method, | ||||
|             url=url, | ||||
|             auth=auth, | ||||
|             headers=headers, | ||||
|             params=params, | ||||
|             data=body.encode("utf-8") if isinstance(body, str) else body, | ||||
|         ) | ||||
|     response = requests.request( | ||||
|         method=method, | ||||
|         url=url, | ||||
|         auth=auth, | ||||
|         headers=headers, | ||||
|         params=params, | ||||
|         data=body.encode("utf-8") if isinstance(body, str) else body, | ||||
|     ) | ||||
|  | ||||
|         response.raise_for_status() | ||||
|     response.raise_for_status() | ||||
|  | ||||
|         logger.info(f"Successfully sent custom notification to {url}") | ||||
|         return True | ||||
|  | ||||
|     except requests.RequestException as e: | ||||
|         logger.error(f"Remote host error while sending custom notification to {url}: {e}") | ||||
|         return False | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}") | ||||
|         return False | ||||
|     logger.info(f"Successfully sent custom notification to {url}") | ||||
|     return True | ||||
|   | ||||
							
								
								
									
										286
									
								
								changedetectionio/notification/apprise_plugin/discord.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								changedetectionio/notification/apprise_plugin/discord.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,286 @@ | ||||
| """ | ||||
| Custom Discord plugin for changedetection.io | ||||
| Extends Apprise's Discord plugin to support custom colored embeds for removed/added content | ||||
| """ | ||||
| from apprise.plugins.discord import NotifyDiscord | ||||
| from apprise.decorators import notify | ||||
| from apprise.common import NotifyFormat | ||||
| from loguru import logger | ||||
|  | ||||
| # Import placeholders from changedetection's diff module | ||||
| from ...diff import ( | ||||
|     REMOVED_PLACEMARKER_OPEN, | ||||
|     REMOVED_PLACEMARKER_CLOSED, | ||||
|     ADDED_PLACEMARKER_OPEN, | ||||
|     ADDED_PLACEMARKER_CLOSED, | ||||
|     CHANGED_PLACEMARKER_OPEN, | ||||
|     CHANGED_PLACEMARKER_CLOSED, | ||||
|     CHANGED_INTO_PLACEMARKER_OPEN, | ||||
|     CHANGED_INTO_PLACEMARKER_CLOSED, | ||||
| ) | ||||
|  | ||||
| # Discord embed sidebar colors for different change types | ||||
| DISCORD_COLOR_UNCHANGED = 8421504   # Gray (#808080) | ||||
| DISCORD_COLOR_REMOVED = 16711680    # Red (#FF0000) | ||||
| DISCORD_COLOR_ADDED = 65280         # Green (#00FF00) | ||||
| DISCORD_COLOR_CHANGED = 16753920    # Orange (#FFA500) | ||||
| DISCORD_COLOR_CHANGED_INTO = 3447003  # Blue (#5865F2 - Discord blue) | ||||
| DISCORD_COLOR_WARNING = 16776960    # Yellow (#FFFF00) | ||||
|  | ||||
|  | ||||
| class NotifyDiscordCustom(NotifyDiscord): | ||||
|     """ | ||||
|     Custom Discord notification handler that supports multiple colored embeds | ||||
|     for showing removed (red) and added (green) content separately. | ||||
|     """ | ||||
|  | ||||
|     def send(self, body, title="", notify_type=None, attach=None, **kwargs): | ||||
|         """ | ||||
|         Override send method to create custom embeds with red/green colors | ||||
|         for removed/added content when placeholders are present. | ||||
|         """ | ||||
|  | ||||
|         # Check if body contains our diff placeholders | ||||
|         has_removed = REMOVED_PLACEMARKER_OPEN in body | ||||
|         has_added = ADDED_PLACEMARKER_OPEN in body | ||||
|         has_changed = CHANGED_PLACEMARKER_OPEN in body | ||||
|         has_changed_into = CHANGED_INTO_PLACEMARKER_OPEN in body | ||||
|  | ||||
|         # If we have diff placeholders and we're in markdown/html format, create custom embeds | ||||
|         if (has_removed or has_added or has_changed or has_changed_into) and self.notify_format in (NotifyFormat.MARKDOWN, NotifyFormat.HTML): | ||||
|             return self._send_with_colored_embeds(body, title, notify_type, attach, **kwargs) | ||||
|  | ||||
|         # Otherwise, use the parent class's default behavior | ||||
|         return super().send(body, title, notify_type, attach, **kwargs) | ||||
|  | ||||
|     def _send_with_colored_embeds(self, body, title, notify_type, attach, **kwargs): | ||||
|         """ | ||||
|         Send Discord message with embeds in the original diff order. | ||||
|         Preserves the sequence: unchanged -> removed -> added -> unchanged, etc. | ||||
|         """ | ||||
|         from datetime import datetime, timezone | ||||
|  | ||||
|         payload = { | ||||
|             "tts": self.tts, | ||||
|             "wait": self.tts is False, | ||||
|         } | ||||
|  | ||||
|         if self.flags: | ||||
|             payload["flags"] = self.flags | ||||
|  | ||||
|         # Acquire image_url | ||||
|         image_url = self.image_url(notify_type) | ||||
|  | ||||
|         if self.avatar and (image_url or self.avatar_url): | ||||
|             payload["avatar_url"] = self.avatar_url if self.avatar_url else image_url | ||||
|  | ||||
|         if self.user: | ||||
|             payload["username"] = self.user | ||||
|  | ||||
|         # Associate our thread_id with our message | ||||
|         params = {"thread_id": self.thread_id} if self.thread_id else None | ||||
|  | ||||
|         # Build embeds array preserving order | ||||
|         embeds = [] | ||||
|  | ||||
|         # Add title as plain bold text in message content (not an embed) | ||||
|         if title: | ||||
|             payload["content"] = f"**{title}**" | ||||
|  | ||||
|         # Parse the body into ordered chunks | ||||
|         chunks = self._parse_body_into_chunks(body) | ||||
|  | ||||
|         # Discord limits: | ||||
|         # - Max 10 embeds per message | ||||
|         # - Max 6000 characters total across all embeds | ||||
|         # - Max 4096 characters per embed description | ||||
|         max_embeds = 10 | ||||
|         max_total_chars = 6000 | ||||
|         max_embed_description = 4096 | ||||
|  | ||||
|         # All 10 embed slots are available for content | ||||
|         max_content_embeds = max_embeds | ||||
|  | ||||
|         # Start character count | ||||
|         total_chars = 0 | ||||
|  | ||||
|         # Create embeds from chunks in order (no titles, just color coding) | ||||
|         for chunk_type, content in chunks: | ||||
|             if not content.strip(): | ||||
|                 continue | ||||
|  | ||||
|             # Truncate individual embed description if needed | ||||
|             if len(content) > max_embed_description: | ||||
|                 content = content[:max_embed_description - 3] + "..." | ||||
|  | ||||
|             # Check if we're approaching the embed count limit | ||||
|             # We need room for the warning embed, so stop at max_content_embeds - 1 | ||||
|             current_content_embeds = len(embeds) | ||||
|             if current_content_embeds >= max_content_embeds - 1: | ||||
|                 # Add a truncation notice (this will be the 10th embed) | ||||
|                 embeds.append({ | ||||
|                     "description": "⚠️ Content truncated (Discord 10 embed limit reached) - Tip: Select 'Plain Text' or 'HTML' format for longer diffs", | ||||
|                     "color": DISCORD_COLOR_WARNING, | ||||
|                 }) | ||||
|                 break | ||||
|  | ||||
|             # Check if adding this embed would exceed total character limit | ||||
|             if total_chars + len(content) > max_total_chars: | ||||
|                 # Add a truncation notice | ||||
|                 remaining_chars = max_total_chars - total_chars | ||||
|                 if remaining_chars > 100: | ||||
|                     # Add partial content if we have room | ||||
|                     truncated_content = content[:remaining_chars - 100] + "..." | ||||
|                     embeds.append({ | ||||
|                         "description": truncated_content, | ||||
|                         "color": (DISCORD_COLOR_UNCHANGED if chunk_type == "unchanged" | ||||
|                                  else DISCORD_COLOR_REMOVED if chunk_type == "removed" | ||||
|                                  else DISCORD_COLOR_ADDED), | ||||
|                     }) | ||||
|                 embeds.append({ | ||||
|                     "description": "⚠️ Content truncated (Discord 6000 char limit reached)\nTip: Select 'Plain Text' or 'HTML' format for longer diffs", | ||||
|                     "color": DISCORD_COLOR_WARNING, | ||||
|                 }) | ||||
|                 break | ||||
|  | ||||
|             if chunk_type == "unchanged": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_UNCHANGED, | ||||
|                 }) | ||||
|             elif chunk_type == "removed": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_REMOVED, | ||||
|                 }) | ||||
|             elif chunk_type == "added": | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_ADDED, | ||||
|                 }) | ||||
|             elif chunk_type == "changed": | ||||
|                 # Changed (old value) - use orange to distinguish from pure removal | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_CHANGED, | ||||
|                 }) | ||||
|             elif chunk_type == "changed_into": | ||||
|                 # Changed into (new value) - use blue to distinguish from pure addition | ||||
|                 embeds.append({ | ||||
|                     "description": content, | ||||
|                     "color": DISCORD_COLOR_CHANGED_INTO, | ||||
|                 }) | ||||
|  | ||||
|             total_chars += len(content) | ||||
|  | ||||
|         if embeds: | ||||
|             payload["embeds"] = embeds | ||||
|  | ||||
|         # Send the payload using parent's _send method | ||||
|         if not self._send(payload, params=params): | ||||
|             return False | ||||
|  | ||||
|         # Handle attachments if present | ||||
|         if attach and self.attachment_support: | ||||
|             payload.update({ | ||||
|                 "tts": False, | ||||
|                 "wait": True, | ||||
|             }) | ||||
|             payload.pop("embeds", None) | ||||
|             payload.pop("content", None) | ||||
|             payload.pop("allow_mentions", None) | ||||
|  | ||||
|             for attachment in attach: | ||||
|                 self.logger.info(f"Posting Discord Attachment {attachment.name}") | ||||
|                 if not self._send(payload, params=params, attach=attachment): | ||||
|                     return False | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     def _parse_body_into_chunks(self, body): | ||||
|         """ | ||||
|         Parse the body into ordered chunks of (type, content) tuples. | ||||
|         Types: "unchanged", "removed", "added", "changed", "changed_into" | ||||
|         Preserves the original order of the diff. | ||||
|         """ | ||||
|         chunks = [] | ||||
|         position = 0 | ||||
|  | ||||
|         while position < len(body): | ||||
|             # Find the next marker | ||||
|             next_removed = body.find(REMOVED_PLACEMARKER_OPEN, position) | ||||
|             next_added = body.find(ADDED_PLACEMARKER_OPEN, position) | ||||
|             next_changed = body.find(CHANGED_PLACEMARKER_OPEN, position) | ||||
|             next_changed_into = body.find(CHANGED_INTO_PLACEMARKER_OPEN, position) | ||||
|  | ||||
|             # Determine which marker comes first | ||||
|             if next_removed == -1 and next_added == -1 and next_changed == -1 and next_changed_into == -1: | ||||
|                 # No more markers, rest is unchanged | ||||
|                 if position < len(body): | ||||
|                     chunks.append(("unchanged", body[position:])) | ||||
|                 break | ||||
|  | ||||
|             # Find the earliest marker | ||||
|             next_marker_pos = None | ||||
|             next_marker_type = None | ||||
|  | ||||
|             # Compare all marker positions to find the earliest | ||||
|             markers = [] | ||||
|             if next_removed != -1: | ||||
|                 markers.append((next_removed, "removed")) | ||||
|             if next_added != -1: | ||||
|                 markers.append((next_added, "added")) | ||||
|             if next_changed != -1: | ||||
|                 markers.append((next_changed, "changed")) | ||||
|             if next_changed_into != -1: | ||||
|                 markers.append((next_changed_into, "changed_into")) | ||||
|  | ||||
|             if markers: | ||||
|                 next_marker_pos, next_marker_type = min(markers, key=lambda x: x[0]) | ||||
|  | ||||
|             # Add unchanged content before the marker | ||||
|             if next_marker_pos > position: | ||||
|                 chunks.append(("unchanged", body[position:next_marker_pos])) | ||||
|  | ||||
|             # Find the closing marker | ||||
|             if next_marker_type == "removed": | ||||
|                 open_marker = REMOVED_PLACEMARKER_OPEN | ||||
|                 close_marker = REMOVED_PLACEMARKER_CLOSED | ||||
|             elif next_marker_type == "added": | ||||
|                 open_marker = ADDED_PLACEMARKER_OPEN | ||||
|                 close_marker = ADDED_PLACEMARKER_CLOSED | ||||
|             elif next_marker_type == "changed": | ||||
|                 open_marker = CHANGED_PLACEMARKER_OPEN | ||||
|                 close_marker = CHANGED_PLACEMARKER_CLOSED | ||||
|             else:  # changed_into | ||||
|                 open_marker = CHANGED_INTO_PLACEMARKER_OPEN | ||||
|                 close_marker = CHANGED_INTO_PLACEMARKER_CLOSED | ||||
|  | ||||
|             close_pos = body.find(close_marker, next_marker_pos) | ||||
|  | ||||
|             if close_pos == -1: | ||||
|                 # No closing marker, take rest as this type | ||||
|                 content = body[next_marker_pos + len(open_marker):] | ||||
|                 chunks.append((next_marker_type, content)) | ||||
|                 break | ||||
|             else: | ||||
|                 # Extract content between markers | ||||
|                 content = body[next_marker_pos + len(open_marker):close_pos] | ||||
|                 chunks.append((next_marker_type, content)) | ||||
|                 position = close_pos + len(close_marker) | ||||
|  | ||||
|         return chunks | ||||
|  | ||||
|  | ||||
| # Register the custom Discord handler with Apprise | ||||
| # This will override the built-in discord:// handler | ||||
| @notify(on="discord") | ||||
| def discord_custom_wrapper(body, title, notify_type, meta, body_format=None, *args, **kwargs): | ||||
|     """ | ||||
|     Wrapper function to make the custom Discord handler work with Apprise's decorator system. | ||||
|     Note: This decorator approach may not work for overriding built-in plugins. | ||||
|     The class-based approach above is the proper way to extend NotifyDiscord. | ||||
|     """ | ||||
|     logger.info("Custom Discord handler called") | ||||
|     # This is here for potential future use with decorator-based registration | ||||
|     return True | ||||
							
								
								
									
										42
									
								
								changedetectionio/notification/email_helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								changedetectionio/notification/email_helpers.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| def as_monospaced_html_email(content: str, title: str) -> str: | ||||
|     """ | ||||
|     Wraps `content` in a minimal, email-safe HTML template | ||||
|     that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc. | ||||
|  | ||||
|     Args: | ||||
|         content: The body text (plain text or HTML-like). | ||||
|         title: The title plaintext | ||||
|     Returns: | ||||
|         A complete HTML document string suitable for sending as an email body. | ||||
|     """ | ||||
|  | ||||
|     # All line feed types should be removed and then this function should only be fed <br>'s | ||||
|     # Then it works with our <pre> styling without double linefeeds | ||||
|     content = content.translate(str.maketrans('', '', '\r\n')) | ||||
|  | ||||
|     if title: | ||||
|         import html | ||||
|         title = html.escape(title) | ||||
|     else: | ||||
|         title = '' | ||||
|     # 2. Full email-safe HTML | ||||
|     html_email = f"""<!DOCTYPE html> | ||||
| <html lang="en"> | ||||
| <head> | ||||
|   <meta charset="UTF-8"> | ||||
|   <meta name="x-apple-disable-message-reformatting"> | ||||
|   <meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||||
|   <!--[if mso]> | ||||
|     <style> | ||||
|       body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }} | ||||
|     </style> | ||||
|   <![endif]--> | ||||
|   <title>{title}</title> | ||||
| </head> | ||||
| <body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;"> | ||||
|   <pre role="article" aria-roledescription="email" lang="en" | ||||
|        style="font-family: monospace, 'Courier New', Courier; font-size: 0.8em; | ||||
|               white-space: pre-wrap; word-break: break-word;">{content}</pre> | ||||
| </body> | ||||
| </html>""" | ||||
|     return html_email | ||||
| @@ -1,30 +1,274 @@ | ||||
|  | ||||
| import time | ||||
| import apprise | ||||
| from apprise import NotifyFormat | ||||
| from loguru import logger | ||||
| from urllib.parse import urlparse | ||||
| from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL | ||||
| from .apprise_plugin.custom_handlers import SUPPORTED_HTTP_METHODS | ||||
| from .email_helpers import as_monospaced_html_email | ||||
| from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \ | ||||
|     ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \ | ||||
|     CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE | ||||
| from ..notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER | ||||
|  | ||||
| def process_notification(n_object, datastore): | ||||
|     from changedetectionio.safe_jinja import render as jinja_render | ||||
|     from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats | ||||
|  | ||||
|  | ||||
| def markup_text_links_to_html(body): | ||||
|     """ | ||||
|     Convert plaintext to HTML with clickable links. | ||||
|     Uses Jinja2's escape and Markup for XSS safety. | ||||
|     """ | ||||
|     from linkify_it import LinkifyIt | ||||
|     from markupsafe import Markup, escape | ||||
|  | ||||
|     linkify = LinkifyIt() | ||||
|  | ||||
|     # Match URLs in the ORIGINAL text (before escaping) | ||||
|     matches = linkify.match(body) | ||||
|  | ||||
|     if not matches: | ||||
|         # No URLs, just escape everything | ||||
|         return Markup(escape(body)) | ||||
|  | ||||
|     result = [] | ||||
|     last_index = 0 | ||||
|  | ||||
|     # Process each URL match | ||||
|     for match in matches: | ||||
|         # Add escaped text before the URL | ||||
|         if match.index > last_index: | ||||
|             text_part = body[last_index:match.index] | ||||
|             result.append(escape(text_part)) | ||||
|  | ||||
|         # Add the link with escaped URL (both in href and display) | ||||
|         url = match.url | ||||
|         result.append(Markup(f'<a href="{escape(url)}">{escape(url)}</a>')) | ||||
|  | ||||
|         last_index = match.last_index | ||||
|  | ||||
|     # Add remaining escaped text | ||||
|     if last_index < len(body): | ||||
|         result.append(escape(body[last_index:])) | ||||
|  | ||||
|     # Join all parts | ||||
|     return str(Markup(''.join(str(part) for part in result))) | ||||
|  | ||||
| def notification_format_align_with_apprise(n_format : str): | ||||
|     """ | ||||
|     Correctly align changedetection's formats with apprise's formats | ||||
|     Probably these are the same - but good to be sure. | ||||
|     These set the expected OUTPUT format type | ||||
|     :param n_format: | ||||
|     :return: | ||||
|     """ | ||||
|  | ||||
|     if n_format.startswith('html'): | ||||
|         # Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here | ||||
|         n_format = NotifyFormat.HTML.value | ||||
|     elif n_format.startswith('markdown'): | ||||
|         # probably the same but just to be safe | ||||
|         n_format = NotifyFormat.MARKDOWN.value | ||||
|     elif n_format.startswith('text'): | ||||
|         # probably the same but just to be safe | ||||
|         n_format = NotifyFormat.TEXT.value | ||||
|     else: | ||||
|         n_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|     return n_format | ||||
|  | ||||
| def apply_discord_markdown_to_body(n_body): | ||||
|     """ | ||||
|     Discord does not support <del> but it supports non-standard ~~strikethrough~~ | ||||
|     :param n_body: | ||||
|     :return: | ||||
|     """ | ||||
|     import re | ||||
|     # Define the mapping between your placeholders and markdown markers | ||||
|     replacements = [ | ||||
|         (REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'), | ||||
|         (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'), | ||||
|         (CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'), | ||||
|         (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'), | ||||
|     ] | ||||
|     # So that the markdown gets added without any whitespace following it which would break it | ||||
|     for open_tag, open_md, close_tag, close_md in replacements: | ||||
|         # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag | ||||
|         pattern = re.compile( | ||||
|             re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag), | ||||
|             flags=re.DOTALL | ||||
|         ) | ||||
|         n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body) | ||||
|     return n_body | ||||
|  | ||||
| def apply_standard_markdown_to_body(n_body): | ||||
|     """ | ||||
|     Apprise does not support ~~strikethrough~~ but it will convert <del> to HTML strikethrough. | ||||
|     :param n_body: | ||||
|     :return: | ||||
|     """ | ||||
|     import re | ||||
|     # Define the mapping between your placeholders and markdown markers | ||||
|     replacements = [ | ||||
|         (REMOVED_PLACEMARKER_OPEN, '<del>', REMOVED_PLACEMARKER_CLOSED, '</del>'), | ||||
|         (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'), | ||||
|         (CHANGED_PLACEMARKER_OPEN, '<del>', CHANGED_PLACEMARKER_CLOSED, '</del>'), | ||||
|         (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'), | ||||
|     ] | ||||
|  | ||||
|     # So that the markdown gets added without any whitespace following it which would break it | ||||
|     for open_tag, open_md, close_tag, close_md in replacements: | ||||
|         # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag | ||||
|         pattern = re.compile( | ||||
|             re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag), | ||||
|             flags=re.DOTALL | ||||
|         ) | ||||
|         n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body) | ||||
|     return n_body | ||||
|  | ||||
|  | ||||
| def apply_service_tweaks(url, n_body, n_title, requested_output_format): | ||||
|  | ||||
|     # Re 323 - Limit discord length to their 2000 char limit total or it wont send. | ||||
|     # Because different notifications may require different pre-processing, run each sequentially :( | ||||
|     # 2000 bytes minus - | ||||
|     #     200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers | ||||
|     #     Length of URL - Incase they specify a longer custom avatar_url | ||||
|  | ||||
|     if not n_body or not n_body.strip(): | ||||
|         return url, n_body, n_title | ||||
|  | ||||
|     # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload | ||||
|     parsed = urlparse(url) | ||||
|     k = '?' if not parsed.query else '&' | ||||
|     if url and not 'avatar_url' in url \ | ||||
|             and not url.startswith('mail') \ | ||||
|             and not url.startswith('post') \ | ||||
|             and not url.startswith('get') \ | ||||
|             and not url.startswith('delete') \ | ||||
|             and not url.startswith('put'): | ||||
|         url += k + f"avatar_url={APPRISE_AVATAR_URL}" | ||||
|  | ||||
|     if url.startswith('tgram://'): | ||||
|         # Telegram only supports a limit subset of HTML, remove the '<br>' we place in. | ||||
|         # re https://github.com/dgtlmoon/changedetection.io/issues/555 | ||||
|         # @todo re-use an existing library we have already imported to strip all non-allowed tags | ||||
|         n_body = n_body.replace('<br>', '\n') | ||||
|         n_body = n_body.replace('</br>', '\n') | ||||
|         n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n') | ||||
|  | ||||
|         # Use strikethrough for removed content, bold for added content | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '<s>') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '</s>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '<b>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '</b>') | ||||
|         # Handle changed/replaced lines (old → new) | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, '<s>') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, '</s>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>') | ||||
|  | ||||
|         # real limit is 4096, but minus some for extra metadata | ||||
|         payload_max_size = 3600 | ||||
|         body_limit = max(0, payload_max_size - len(n_title)) | ||||
|         n_title = n_title[0:payload_max_size] | ||||
|         n_body = n_body[0:body_limit] | ||||
|  | ||||
|     elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') | ||||
|           or url.startswith('https://discord.com/api'))\ | ||||
|             and 'html' in requested_output_format: | ||||
|         # Discord doesn't support HTML, replace <br> with newlines | ||||
|         n_body = n_body.strip().replace('<br>', '\n') | ||||
|         n_body = n_body.replace('</br>', '\n') | ||||
|         n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\n') | ||||
|  | ||||
|         # Don't replace placeholders or truncate here - let the custom Discord plugin handle it | ||||
|         # The plugin will use embeds (6000 char limit across all embeds) if placeholders are present, | ||||
|         # or plain content (2000 char limit) otherwise | ||||
|  | ||||
|         # Only do placeholder replacement if NOT using htmlcolor (which triggers embeds in custom plugin) | ||||
|         if requested_output_format == 'html': | ||||
|             # No diff placeholders, use Discord markdown for any other formatting | ||||
|             # Use Discord markdown: strikethrough for removed, bold for added | ||||
|             n_body = apply_discord_markdown_to_body(n_body=n_body) | ||||
|  | ||||
|             # Apply 2000 char limit for plain content | ||||
|             payload_max_size = 1700 | ||||
|             body_limit = max(0, payload_max_size - len(n_title)) | ||||
|             n_title = n_title[0:payload_max_size] | ||||
|             n_body = n_body[0:body_limit] | ||||
|         # else: our custom Discord plugin will convert any placeholders left over into embeds with color bars | ||||
|  | ||||
|     # Is not discord/tgram and they want htmlcolor | ||||
|     elif requested_output_format == 'htmlcolor': | ||||
|         # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050 | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         # Handle changed/replaced lines (old → new) | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>') | ||||
|         n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n') | ||||
|     elif requested_output_format == 'html': | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace('\n', f'{CUSTOM_LINEBREAK_PLACEHOLDER}\n') | ||||
|     elif requested_output_format == 'markdown': | ||||
|         # Markdown to HTML - Apprise will convert this to HTML | ||||
|         n_body = apply_standard_markdown_to_body(n_body=n_body) | ||||
|  | ||||
|     else: #plaintext etc default | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ') | ||||
|         n_body = n_body.replace(REMOVED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_OPEN, '(added) ') | ||||
|         n_body = n_body.replace(ADDED_PLACEMARKER_CLOSED, '') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ') | ||||
|         n_body = n_body.replace(CHANGED_PLACEMARKER_CLOSED, f'') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ') | ||||
|         n_body = n_body.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'') | ||||
|  | ||||
|     return url, n_body, n_title | ||||
|  | ||||
|  | ||||
| def process_notification(n_object: NotificationContextData, datastore): | ||||
|     from changedetectionio.jinja2_custom import render as jinja_render | ||||
|     from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats | ||||
|     # be sure its registered | ||||
|     from .apprise_plugin.custom_handlers import apprise_http_custom_handler | ||||
|     # Register custom Discord plugin | ||||
|     from .apprise_plugin.discord import NotifyDiscordCustom | ||||
|  | ||||
|     if not isinstance(n_object, NotificationContextData): | ||||
|         raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|     now = time.time() | ||||
|     if n_object.get('notification_timestamp'): | ||||
|         logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s") | ||||
|  | ||||
|     # Insert variables into the notification content | ||||
|     notification_parameters = create_notification_parameters(n_object, datastore) | ||||
|  | ||||
|     n_format = valid_notification_formats.get( | ||||
|         n_object.get('notification_format', default_notification_format), | ||||
|         valid_notification_formats[default_notification_format], | ||||
|     ) | ||||
|     requested_output_format = n_object.get('notification_format', default_notification_format) | ||||
|     logger.debug(f"Requested notification output format: '{requested_output_format}'") | ||||
|  | ||||
|     # If we arrived with 'System default' then look it up | ||||
|     if n_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch: | ||||
|     if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|         # Initially text or whatever | ||||
|         n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format]) | ||||
|         requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format) | ||||
|  | ||||
|     requested_output_format_original = requested_output_format | ||||
|  | ||||
|     # Now clean it up so it fits perfectly with apprise | ||||
|     requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format) | ||||
|  | ||||
|     logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.2f}s") | ||||
|  | ||||
| @@ -39,16 +283,23 @@ def process_notification(n_object, datastore): | ||||
|  | ||||
|     apobj = apprise.Apprise(debug=True, asset=apprise_asset) | ||||
|  | ||||
|     # Override Apprise's built-in Discord plugin with our custom one | ||||
|     # This allows us to use colored embeds for diff content | ||||
|     # First remove the built-in discord plugin, then add our custom one | ||||
|     apprise.plugins.N_MGR.remove('discord') | ||||
|     apprise.plugins.N_MGR.add(NotifyDiscordCustom, schemas='discord') | ||||
|  | ||||
|     if not n_object.get('notification_urls'): | ||||
|         return None | ||||
|  | ||||
|     with apprise.LogCapture(level=apprise.logging.DEBUG) as logs: | ||||
|     with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs): | ||||
|         for url in n_object['notification_urls']: | ||||
|  | ||||
|             # Get the notification body from datastore | ||||
|             n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters) | ||||
|             if n_object.get('notification_format', '').startswith('HTML'): | ||||
|                 n_body = n_body.replace("\n", '<br>') | ||||
|  | ||||
|             if n_object.get('markup_text_links_to_html_links'): | ||||
|                 n_body = markup_text_links_to_html(body=n_body) | ||||
|  | ||||
|             n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters) | ||||
|  | ||||
| @@ -64,74 +315,88 @@ def process_notification(n_object, datastore): | ||||
|             logger.info(f">> Process Notification: AppRise notifying {url}") | ||||
|             url = jinja_render(template_str=url, **notification_parameters) | ||||
|  | ||||
|             # Re 323 - Limit discord length to their 2000 char limit total or it wont send. | ||||
|             # Because different notifications may require different pre-processing, run each sequentially :( | ||||
|             # 2000 bytes minus - | ||||
|             #     200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers | ||||
|             #     Length of URL - Incase they specify a longer custom avatar_url | ||||
|             # If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped | ||||
|             watch_mime_type = n_object.get('watch_mime_type') | ||||
|             if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower(): | ||||
|                 if 'html' in requested_output_format: | ||||
|                     from markupsafe import escape | ||||
|                     n_body = str(escape(n_body)) | ||||
|  | ||||
|             # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload | ||||
|             k = '?' if not '?' in url else '&' | ||||
|             if not 'avatar_url' in url \ | ||||
|                     and not url.startswith('mail') \ | ||||
|                     and not url.startswith('post') \ | ||||
|                     and not url.startswith('get') \ | ||||
|                     and not url.startswith('delete') \ | ||||
|                     and not url.startswith('put'): | ||||
|                 url += k + f"avatar_url={APPRISE_AVATAR_URL}" | ||||
|             if 'html' in requested_output_format: | ||||
|                 # Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output | ||||
|                 # But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and this" etc which is too much. | ||||
|                 n_body = n_body.replace('  ', '  ') | ||||
|  | ||||
|             if url.startswith('tgram://'): | ||||
|                 # Telegram only supports a limit subset of HTML, remove the '<br>' we place in. | ||||
|                 # re https://github.com/dgtlmoon/changedetection.io/issues/555 | ||||
|                 # @todo re-use an existing library we have already imported to strip all non-allowed tags | ||||
|                 n_body = n_body.replace('<br>', '\n') | ||||
|                 n_body = n_body.replace('</br>', '\n') | ||||
|                 # real limit is 4096, but minus some for extra metadata | ||||
|                 payload_max_size = 3600 | ||||
|                 body_limit = max(0, payload_max_size - len(n_title)) | ||||
|                 n_title = n_title[0:payload_max_size] | ||||
|                 n_body = n_body[0:body_limit] | ||||
|             (url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original) | ||||
|  | ||||
|             elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith( | ||||
|                     'https://discord.com/api'): | ||||
|                 # real limit is 2000, but minus some for extra metadata | ||||
|                 payload_max_size = 1700 | ||||
|                 body_limit = max(0, payload_max_size - len(n_title)) | ||||
|                 n_title = n_title[0:payload_max_size] | ||||
|                 n_body = n_body[0:body_limit] | ||||
|             apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS" | ||||
|  | ||||
|             elif url.startswith('mailto'): | ||||
|                 # Apprise will default to HTML, so we need to override it | ||||
|                 # So that whats' generated in n_body is in line with what is going to be sent. | ||||
|                 # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321 | ||||
|                 if not 'format=' in url and (n_format == 'Text' or n_format == 'Markdown'): | ||||
|                     prefix = '?' if not '?' in url else '&' | ||||
|                     # Apprise format is lowercase text https://github.com/caronc/apprise/issues/633 | ||||
|                     n_format = n_format.lower() | ||||
|                     url = f"{url}{prefix}format={n_format}" | ||||
|                 # If n_format == HTML, then apprise email should default to text/html and we should be sending HTML only | ||||
|             if not 'format=' in url: | ||||
|                 parsed_url = urlparse(url) | ||||
|                 prefix_add_to_url = '?' if not parsed_url.query else '&' | ||||
|  | ||||
|             apobj.add(url) | ||||
|                 # THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC | ||||
|                 if 'html' in requested_output_format: | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}" | ||||
|                     apprise_input_format = NotifyFormat.HTML.value | ||||
|                 elif 'text' in requested_output_format: | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}" | ||||
|                     apprise_input_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|                 elif requested_output_format == NotifyFormat.MARKDOWN.value: | ||||
|                     # Convert markdown to HTML ourselves since not all plugins do this | ||||
|                     from apprise.conversion import markdown_to_html | ||||
|                     # Make sure there are paragraph breaks around horizontal rules | ||||
|                     n_body = n_body.replace('---', '\n\n---\n\n') | ||||
|                     n_body = markdown_to_html(n_body) | ||||
|                     url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}" | ||||
|                     requested_output_format = NotifyFormat.HTML.value | ||||
|                     apprise_input_format = NotifyFormat.HTML.value  # Changed from MARKDOWN to HTML | ||||
|  | ||||
|                 # Could have arrived at any stage, so we dont end up running .escape on it | ||||
|                 if 'html' in requested_output_format: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n') | ||||
|                 else: | ||||
|                     # texty types | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n') | ||||
|  | ||||
|             else: | ||||
|                 # ?format was IN the apprise URL, they are kind of on their own here, we will try our best | ||||
|                 if 'format=html' in url: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '<br>\r\n') | ||||
|                     # This will also prevent apprise from doing conversion | ||||
|                     apprise_input_format = NotifyFormat.HTML.value | ||||
|                     requested_output_format = NotifyFormat.HTML.value | ||||
|                 elif 'format=text' in url: | ||||
|                     n_body = n_body.replace(CUSTOM_LINEBREAK_PLACEHOLDER, '\r\n') | ||||
|                     apprise_input_format = NotifyFormat.TEXT.value | ||||
|                     requested_output_format = NotifyFormat.TEXT.value | ||||
|  | ||||
|             sent_objs.append({'title': n_title, | ||||
|                               'body': n_body, | ||||
|                               'url': url, | ||||
|                               'body_format': n_format}) | ||||
|                               'url': url}) | ||||
|             apobj.add(url) | ||||
|  | ||||
|             # Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely. | ||||
|             # It should always be similar to the 'history' part of the UI. | ||||
|             if url.startswith('mail') and 'html' in requested_output_format: | ||||
|                 if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already | ||||
|                     n_body = as_monospaced_html_email(content=n_body, title=n_title) | ||||
|  | ||||
|         # Blast off the notifications tht are set in .add() | ||||
|         apobj.notify( | ||||
|             title=n_title, | ||||
|             body=n_body, | ||||
|             body_format=n_format, | ||||
|             # `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise | ||||
|             # &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between) | ||||
|             body_format=apprise_input_format, | ||||
|             # False is not an option for AppRise, must be type None | ||||
|             attach=n_object.get('screenshot', None) | ||||
|         ) | ||||
|  | ||||
|  | ||||
|         # Returns empty string if nothing found, multi-line string otherwise | ||||
|         log_value = logs.getvalue() | ||||
|  | ||||
|         if log_value and 'WARNING' in log_value or 'ERROR' in log_value: | ||||
|         if log_value and ('WARNING' in log_value or 'ERROR' in log_value): | ||||
|             logger.critical(log_value) | ||||
|             raise Exception(log_value) | ||||
|  | ||||
| @@ -141,17 +406,15 @@ def process_notification(n_object, datastore): | ||||
|  | ||||
| # Notification title + body content parameters get created here. | ||||
| # ( Where we prepare the tokens in the notification to be replaced with actual values ) | ||||
| def create_notification_parameters(n_object, datastore): | ||||
|     from copy import deepcopy | ||||
|     from . import valid_tokens | ||||
| def create_notification_parameters(n_object: NotificationContextData, datastore): | ||||
|     if not isinstance(n_object, NotificationContextData): | ||||
|         raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|     # in the case we send a test notification from the main settings, there is no UUID. | ||||
|     uuid = n_object['uuid'] if 'uuid' in n_object else '' | ||||
|  | ||||
|     if uuid: | ||||
|         watch_title = datastore.data['watching'][uuid].label | ||||
|     watch = datastore.data['watching'].get(n_object['uuid']) | ||||
|     if watch: | ||||
|         watch_title = datastore.data['watching'][n_object['uuid']].label | ||||
|         tag_list = [] | ||||
|         tags = datastore.get_all_tags_for_watch(uuid) | ||||
|         tags = datastore.get_all_tags_for_watch(n_object['uuid']) | ||||
|         if tags: | ||||
|             for tag_uuid, tag in tags.items(): | ||||
|                 tag_list.append(tag.get('title')) | ||||
| @@ -166,14 +429,10 @@ def create_notification_parameters(n_object, datastore): | ||||
|  | ||||
|     watch_url = n_object['watch_url'] | ||||
|  | ||||
|     diff_url = "{}/diff/{}".format(base_url, uuid) | ||||
|     preview_url = "{}/preview/{}".format(base_url, uuid) | ||||
|     diff_url = "{}/diff/{}".format(base_url, n_object['uuid']) | ||||
|     preview_url = "{}/preview/{}".format(base_url, n_object['uuid']) | ||||
|  | ||||
|     # Not sure deepcopy is needed here, but why not | ||||
|     tokens = deepcopy(valid_tokens) | ||||
|  | ||||
|     # Valid_tokens also used as a field validator | ||||
|     tokens.update( | ||||
|     n_object.update( | ||||
|         { | ||||
|             'base_url': base_url, | ||||
|             'diff_url': diff_url, | ||||
| @@ -181,13 +440,10 @@ def create_notification_parameters(n_object, datastore): | ||||
|             'watch_tag': watch_tag if watch_tag is not None else '', | ||||
|             'watch_title': watch_title if watch_title is not None else '', | ||||
|             'watch_url': watch_url, | ||||
|             'watch_uuid': uuid, | ||||
|             'watch_uuid': n_object['uuid'], | ||||
|         }) | ||||
|  | ||||
|     # n_object will contain diff, diff_added etc etc | ||||
|     tokens.update(n_object) | ||||
|     if watch: | ||||
|         n_object.update(datastore.data['watching'].get(n_object['uuid']).extra_notification_token_values()) | ||||
|  | ||||
|     if uuid: | ||||
|         tokens.update(datastore.data['watching'].get(uuid).extra_notification_token_values()) | ||||
|  | ||||
|     return tokens | ||||
|     return n_object | ||||
|   | ||||
| @@ -6,9 +6,70 @@ Extracted from update_worker.py to provide standalone notification functionality | ||||
| for both sync and async workers | ||||
| """ | ||||
|  | ||||
| import time | ||||
| from loguru import logger | ||||
| import time | ||||
|  | ||||
| from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| from changedetectionio.notification import default_notification_format, valid_notification_formats | ||||
|  | ||||
| # This gets modified on notification time (handler.py) depending on the required notification output | ||||
| CUSTOM_LINEBREAK_PLACEHOLDER='@BR@' | ||||
|  | ||||
|  | ||||
| # What is passed around as notification context, also used as the complete list of valid {{ tokens }} | ||||
| class NotificationContextData(dict): | ||||
|     def __init__(self, initial_data=None, **kwargs): | ||||
|         super().__init__({ | ||||
|             'base_url': None, | ||||
|             'current_snapshot': None, | ||||
|             'diff': None, | ||||
|             'diff_added': None, | ||||
|             'diff_full': None, | ||||
|             'diff_patch': None, | ||||
|             'diff_removed': None, | ||||
|             'diff_url': None, | ||||
|             'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen | ||||
|             'notification_timestamp': time.time(), | ||||
|             'preview_url': None, | ||||
|             'screenshot': None, | ||||
|             'triggered_text': None, | ||||
|             'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX',  # Converted to 'watch_uuid' in create_notification_parameters | ||||
|             'watch_mime_type': None, | ||||
|             'watch_tag': None, | ||||
|             'watch_title': None, | ||||
|             'watch_url': 'https://WATCH-PLACE-HOLDER/', | ||||
|         }) | ||||
|  | ||||
|         # Apply any initial data passed in | ||||
|         self.update({'watch_uuid': self.get('uuid')}) | ||||
|         if initial_data: | ||||
|             self.update(initial_data) | ||||
|  | ||||
|         # Apply any keyword arguments | ||||
|         if kwargs: | ||||
|             self.update(kwargs) | ||||
|  | ||||
|         n_format = self.get('notification_format') | ||||
|         if n_format and not valid_notification_formats.get(n_format): | ||||
|             raise ValueError(f'Invalid notification format: "{n_format}"') | ||||
|  | ||||
|     def set_random_for_validation(self): | ||||
|         import random, string | ||||
|         """Randomly fills all dict keys with random strings (for validation/testing).  | ||||
|         So we can test the output in the notification body | ||||
|         """ | ||||
|         for key in self.keys(): | ||||
|             if key in ['uuid', 'time', 'watch_uuid']: | ||||
|                 continue | ||||
|             rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12)) | ||||
|             self[key] = rand_str | ||||
|  | ||||
|     def __setitem__(self, key, value): | ||||
|         if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'): | ||||
|             if not valid_notification_formats.get(value): | ||||
|                 raise ValueError(f'Invalid notification format: "{value}"') | ||||
|  | ||||
|         super().__setitem__(key, value) | ||||
|  | ||||
| class NotificationService: | ||||
|     """ | ||||
| @@ -20,12 +81,15 @@ class NotificationService: | ||||
|         self.datastore = datastore | ||||
|         self.notification_q = notification_q | ||||
|      | ||||
|     def queue_notification_for_watch(self, n_object, watch): | ||||
|     def queue_notification_for_watch(self, n_object: NotificationContextData, watch): | ||||
|         """ | ||||
|         Queue a notification for a watch with full diff rendering and template variables | ||||
|         """ | ||||
|         from changedetectionio import diff | ||||
|         from changedetectionio.notification import default_notification_format_for_watch | ||||
|         from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|  | ||||
|         if not isinstance(n_object, NotificationContextData): | ||||
|             raise TypeError(f"Expected NotificationContextData, got {type(n_object)}") | ||||
|  | ||||
|         dates = [] | ||||
|         trigger_text = '' | ||||
| @@ -44,29 +108,16 @@ class NotificationService: | ||||
|             snapshot_contents = "No snapshot/history available, the watch should fetch atleast once." | ||||
|  | ||||
|         # If we ended up here with "System default" | ||||
|         if n_object.get('notification_format') == default_notification_format_for_watch: | ||||
|         if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|             n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|         html_colour_enable = False | ||||
|         # HTML needs linebreak, but MarkDown and Text can use a linefeed | ||||
|         if n_object.get('notification_format') == 'HTML': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|         elif n_object.get('notification_format') == 'HTML Color': | ||||
|             line_feed_sep = "<br>" | ||||
|             # Snapshot will be plaintext on the disk, convert to some kind of HTML | ||||
|             snapshot_contents = snapshot_contents.replace('\n', line_feed_sep) | ||||
|             html_colour_enable = True | ||||
|         else: | ||||
|             line_feed_sep = "\n" | ||||
|  | ||||
|         triggered_text = '' | ||||
|         if len(trigger_text): | ||||
|             from . import html_tools | ||||
|             triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text) | ||||
|             if triggered_text: | ||||
|                 triggered_text = line_feed_sep.join(triggered_text) | ||||
|                 triggered_text = CUSTOM_LINEBREAK_PLACEHOLDER.join(triggered_text) | ||||
|  | ||||
|         # Could be called as a 'test notification' with only 1 snapshot available | ||||
|         prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n" | ||||
| @@ -78,16 +129,17 @@ class NotificationService: | ||||
|  | ||||
|         n_object.update({ | ||||
|             'current_snapshot': snapshot_contents, | ||||
|             'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=line_feed_sep), | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep, html_colour=html_colour_enable), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep), | ||||
|             'notification_timestamp': now, | ||||
|             'diff': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=CUSTOM_LINEBREAK_PLACEHOLDER), | ||||
|             'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None, | ||||
|             'triggered_text': triggered_text, | ||||
|             'uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_url': watch.get('url') if watch else None, | ||||
|             'watch_uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_mime_type': watch.get('content-type') | ||||
|         }) | ||||
|  | ||||
|         if watch: | ||||
| @@ -103,7 +155,7 @@ class NotificationService: | ||||
|         Individual watch settings > Tag settings > Global settings | ||||
|         """ | ||||
|         from changedetectionio.notification import ( | ||||
|             default_notification_format_for_watch, | ||||
|             USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, | ||||
|             default_notification_body, | ||||
|             default_notification_title | ||||
|         ) | ||||
| @@ -111,7 +163,7 @@ class NotificationService: | ||||
|         # Would be better if this was some kind of Object where Watch can reference the parent datastore etc | ||||
|         v = watch.get(var_name) | ||||
|         if v and not watch.get('notification_muted'): | ||||
|             if var_name == 'notification_format' and v == default_notification_format_for_watch: | ||||
|             if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: | ||||
|                 return self.datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|             return v | ||||
| @@ -128,7 +180,7 @@ class NotificationService: | ||||
|  | ||||
|         # Otherwise could be defaults | ||||
|         if var_name == 'notification_format': | ||||
|             return default_notification_format_for_watch | ||||
|             return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
|         if var_name == 'notification_body': | ||||
|             return default_notification_body | ||||
|         if var_name == 'notification_title': | ||||
| @@ -140,7 +192,7 @@ class NotificationService: | ||||
|         """ | ||||
|         Send notification when content changes are detected | ||||
|         """ | ||||
|         n_object = {} | ||||
|         n_object = NotificationContextData() | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid) | ||||
|         if not watch: | ||||
|             return | ||||
| @@ -183,11 +235,25 @@ class NotificationService: | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         ", ".join(watch['include_filters']), | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|         filter_list = ", ".join(watch['include_filters']) | ||||
|         # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed | ||||
|         body = f"""Hello, | ||||
|  | ||||
| Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts. | ||||
|  | ||||
| It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab ) | ||||
|  | ||||
| Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}} | ||||
|  | ||||
| Thanks - Your omniscient changedetection.io installation. | ||||
| """ | ||||
|  | ||||
|         n_object = NotificationContextData({ | ||||
|             'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|             'notification_body': body, | ||||
|             'notification_format': self._check_cascading_vars('notification_format', watch), | ||||
|         }) | ||||
|         n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
| @@ -215,12 +281,28 @@ class NotificationService: | ||||
|         if not watch: | ||||
|             return | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         n_object = {'notification_title': "Changedetection.io - Alert - Browser step at position {} could not be run".format(step_n+1), | ||||
|                     'notification_body': "Your configured browser step at position {} for {{{{watch_url}}}} " | ||||
|                                          "did not appear on the page after {} attempts, did the page change layout? " | ||||
|                                          "Does it need a delay added?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\n" | ||||
|                                          "Thanks - Your omniscient changedetection.io installation :)\n".format(step_n+1, threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         step = step_n + 1 | ||||
|         # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed | ||||
|  | ||||
|         # {{{{ }}}} because this will be Jinja2 {{ }} tokens | ||||
|         body = f"""Hello, | ||||
|          | ||||
| Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout? | ||||
|  | ||||
| The element may have moved and needs editing, or does it need a delay added? | ||||
|  | ||||
| Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}} | ||||
|  | ||||
| Thanks - Your omniscient changedetection.io installation. | ||||
| """ | ||||
|  | ||||
|         n_object = NotificationContextData({ | ||||
|             'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run", | ||||
|             'notification_body': body, | ||||
|             'notification_format': self._check_cascading_vars('notification_format', watch), | ||||
|         }) | ||||
|         n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html') | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|   | ||||
| @@ -102,7 +102,7 @@ class difference_detection_processor(): | ||||
|             self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid')) | ||||
|  | ||||
|         # Tweak the base config with the per-watch ones | ||||
|         from changedetectionio.safe_jinja import render as jinja_render | ||||
|         from changedetectionio.jinja2_custom import render as jinja_render | ||||
|         request_headers = CaseInsensitiveDict() | ||||
|  | ||||
|         ua = self.datastore.data['settings']['requests'].get('default_ua') | ||||
|   | ||||
							
								
								
									
										133
									
								
								changedetectionio/processors/magic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								changedetectionio/processors/magic.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,133 @@ | ||||
| """ | ||||
| Content Type Detection and Stream Classification | ||||
|  | ||||
| This module provides intelligent content-type detection for changedetection.io. | ||||
| It addresses the common problem where HTTP Content-Type headers are missing, incorrect, | ||||
| or too generic, which would otherwise cause the wrong processor to be used. | ||||
|  | ||||
| The guess_stream_type class combines: | ||||
| 1. HTTP Content-Type headers (when available and reliable) | ||||
| 2. Python-magic library for MIME detection (analyzing actual file content) | ||||
| 3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.) | ||||
|  | ||||
| This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF, | ||||
| plain text, CSV, YAML, and XML formats - even when servers provide misleading headers. | ||||
|  | ||||
| Used by: processors/text_json_diff/processor.py and other content processors | ||||
| """ | ||||
|  | ||||
| # When to apply the 'cdata to real HTML' hack | ||||
| RSS_XML_CONTENT_TYPES = [ | ||||
|     "application/rss+xml", | ||||
|     "application/rdf+xml", | ||||
|     "application/atom+xml", | ||||
|     "text/rss+xml",  # rare, non-standard | ||||
|     "application/x-rss+xml",  # legacy (older feed software) | ||||
|     "application/x-atom+xml",  # legacy (older Atom) | ||||
| ] | ||||
|  | ||||
| # JSON Content-types | ||||
| JSON_CONTENT_TYPES = [ | ||||
|     "application/activity+json", | ||||
|     "application/feed+json", | ||||
|     "application/json", | ||||
|     "application/ld+json", | ||||
|     "application/vnd.api+json", | ||||
| ] | ||||
|  | ||||
|  | ||||
| # Generic XML Content-types (non-RSS/Atom) | ||||
| XML_CONTENT_TYPES = [ | ||||
|     "text/xml", | ||||
|     "application/xml", | ||||
| ] | ||||
|  | ||||
| HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div'] | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| class guess_stream_type(): | ||||
|     is_pdf = False | ||||
|     is_json = False | ||||
|     is_html = False | ||||
|     is_plaintext = False | ||||
|     is_rss = False | ||||
|     is_csv = False | ||||
|     is_xml = False  # Generic XML, not RSS/Atom | ||||
|     is_yaml = False | ||||
|  | ||||
|     def __init__(self, http_content_header, content): | ||||
|         import re | ||||
|         magic_content_header = http_content_header | ||||
|         test_content = content[:200].lower().strip() | ||||
|  | ||||
|         # Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.) | ||||
|         test_content_normalized = re.sub(r'<\s+', '<', test_content) | ||||
|  | ||||
|         # Use puremagic for lightweight MIME detection (saves ~14MB vs python-magic) | ||||
|         magic_result = None | ||||
|         try: | ||||
|             import puremagic | ||||
|  | ||||
|             # puremagic needs bytes, so encode if we have a string | ||||
|             content_bytes = content[:200].encode('utf-8') if isinstance(content, str) else content[:200] | ||||
|  | ||||
|             # puremagic returns a list of PureMagic objects with confidence scores | ||||
|             detections = puremagic.magic_string(content_bytes) | ||||
|             if detections: | ||||
|                 # Get the highest confidence detection | ||||
|                 mime = detections[0].mime_type | ||||
|                 logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'") | ||||
|                 if mime and "/" in mime: | ||||
|                     magic_result = mime | ||||
|                     # Ignore generic/fallback mime types | ||||
|                     if mime in ['application/octet-stream', 'application/x-empty', 'binary']: | ||||
|                         logger.debug(f"Ignoring generic mime type '{mime}' from puremagic library") | ||||
|                     # Trust puremagic for non-text types immediately | ||||
|                     elif mime not in ['text/html', 'text/plain']: | ||||
|                         magic_content_header = mime | ||||
|  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection") | ||||
|  | ||||
|         # Content-based detection (most reliable for text formats) | ||||
|         # Check for HTML patterns first - if found, override magic's text/plain | ||||
|         has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS) | ||||
|  | ||||
|         # Always trust headers first | ||||
|         if 'text/plain' in http_content_header: | ||||
|             self.is_plaintext = True | ||||
|         if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES): | ||||
|             self.is_rss = True | ||||
|         elif any(s in http_content_header for s in JSON_CONTENT_TYPES): | ||||
|             self.is_json = True | ||||
|         elif 'pdf' in magic_content_header: | ||||
|             self.is_pdf = True | ||||
|         elif has_html_patterns or http_content_header == 'text/html': | ||||
|             self.is_html = True | ||||
|         elif any(s in magic_content_header for s in JSON_CONTENT_TYPES): | ||||
|             self.is_json = True | ||||
|         # magic will call a rss document 'xml' | ||||
|         # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss | ||||
|         # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list | ||||
|         elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized: | ||||
|             self.is_rss = True | ||||
|         elif any(s in http_content_header for s in XML_CONTENT_TYPES): | ||||
|             # Only mark as generic XML if not already detected as RSS | ||||
|             if not self.is_rss: | ||||
|                 self.is_xml = True | ||||
|         elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES): | ||||
|             # Generic XML that's not RSS/Atom (RSS/Atom checked above) | ||||
|             self.is_xml = True | ||||
|         elif '%pdf-1' in test_content: | ||||
|             self.is_pdf = True | ||||
|         elif http_content_header.startswith('text/'): | ||||
|             self.is_plaintext = True | ||||
|         # Only trust magic for 'text' if no other patterns matched | ||||
|         elif 'text' in magic_content_header: | ||||
|             self.is_plaintext = True | ||||
|         # If magic says text/plain and we found no HTML patterns, trust it | ||||
|         elif magic_result == 'text/plain': | ||||
|             self.is_plaintext = True | ||||
|             logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)") | ||||
|  | ||||
| @@ -7,18 +7,24 @@ import re | ||||
| import urllib3 | ||||
|  | ||||
| from changedetectionio.conditions import execute_ruleset_against_all_plugins | ||||
| from changedetectionio.diff import ADDED_PLACEMARKER_OPEN | ||||
| from changedetectionio.processors import difference_detection_processor | ||||
| from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE | ||||
| from changedetectionio import html_tools, content_fetchers | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.processors.magic import guess_stream_type | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
| name = 'Webpage Text/HTML, JSON and PDF changes' | ||||
| description = 'Detects all text changes where possible' | ||||
|  | ||||
| json_filter_prefixes = ['json:', 'jq:', 'jqraw:'] | ||||
| JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:'] | ||||
|  | ||||
| # Assume it's this type if the server says nothing on content-type | ||||
| DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html' | ||||
|  | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg, screenshot=None, xpath_data=None): | ||||
| @@ -32,353 +38,560 @@ class PDFToHTMLToolNotFound(ValueError): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| class FilterConfig: | ||||
|     """Consolidates all filter and rule configurations from watch, tags, and global settings.""" | ||||
|  | ||||
|     def __init__(self, watch, datastore): | ||||
|         self.watch = watch | ||||
|         self.datastore = datastore | ||||
|         self.watch_uuid = watch.get('uuid') | ||||
|         # Cache computed properties to avoid repeated list operations | ||||
|         self._include_filters_cache = None | ||||
|         self._subtractive_selectors_cache = None | ||||
|  | ||||
|     def _get_merged_rules(self, attr, include_global=False): | ||||
|         """Merge rules from watch, tags, and optionally global settings.""" | ||||
|         watch_rules = self.watch.get(attr, []) | ||||
|         tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr) | ||||
|         rules = list(dict.fromkeys(watch_rules + tag_rules)) | ||||
|  | ||||
|         if include_global: | ||||
|             global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', []) | ||||
|             rules = list(dict.fromkeys(rules + global_rules)) | ||||
|  | ||||
|         return rules | ||||
|  | ||||
|     @property | ||||
|     def include_filters(self): | ||||
|         if self._include_filters_cache is None: | ||||
|             filters = self._get_merged_rules('include_filters') | ||||
|             # Inject LD+JSON price tracker rule if enabled | ||||
|             if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT: | ||||
|                 filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS | ||||
|             self._include_filters_cache = filters | ||||
|         return self._include_filters_cache | ||||
|  | ||||
|     @property | ||||
|     def subtractive_selectors(self): | ||||
|         if self._subtractive_selectors_cache is None: | ||||
|             watch_selectors = self.watch.get("subtractive_selectors", []) | ||||
|             tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors') | ||||
|             global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", []) | ||||
|             self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors] | ||||
|         return self._subtractive_selectors_cache | ||||
|  | ||||
|     @property | ||||
|     def extract_text(self): | ||||
|         return self._get_merged_rules('extract_text') | ||||
|  | ||||
|     @property | ||||
|     def ignore_text(self): | ||||
|         return self._get_merged_rules('ignore_text', include_global=True) | ||||
|  | ||||
|     @property | ||||
|     def trigger_text(self): | ||||
|         return self._get_merged_rules('trigger_text') | ||||
|  | ||||
|     @property | ||||
|     def text_should_not_be_present(self): | ||||
|         return self._get_merged_rules('text_should_not_be_present') | ||||
|  | ||||
|     @property | ||||
|     def has_include_filters(self): | ||||
|         return bool(self.include_filters) and bool(self.include_filters[0].strip()) | ||||
|  | ||||
|     @property | ||||
|     def has_include_json_filters(self): | ||||
|         return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES) | ||||
|  | ||||
|     @property | ||||
|     def has_subtractive_selectors(self): | ||||
|         return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip()) | ||||
|  | ||||
|  | ||||
| class ContentTransformer: | ||||
|     """Handles text transformations like trimming, sorting, and deduplication.""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def trim_whitespace(text): | ||||
|         """Remove leading/trailing whitespace from each line.""" | ||||
|         # Use generator expression to avoid building intermediate list | ||||
|         return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines()) | ||||
|  | ||||
|     @staticmethod | ||||
|     def remove_duplicate_lines(text): | ||||
|         """Remove duplicate lines while preserving order.""" | ||||
|         return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|     @staticmethod | ||||
|     def sort_alphabetically(text): | ||||
|         """Sort lines alphabetically (case-insensitive).""" | ||||
|         # Remove double line feeds before sorting | ||||
|         text = text.replace("\n\n", "\n") | ||||
|         return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower())) | ||||
|  | ||||
|     @staticmethod | ||||
|     def extract_by_regex(text, regex_patterns): | ||||
|         """Extract text matching regex patterns.""" | ||||
|         # Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior) | ||||
|         regex_matched_output = [] | ||||
|  | ||||
|         for s_re in regex_patterns: | ||||
|             # Check if it's perl-style regex /.../ | ||||
|             if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): | ||||
|                 regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) | ||||
|                 result = re.findall(regex, text) | ||||
|  | ||||
|                 for match in result: | ||||
|                     if type(match) is tuple: | ||||
|                         regex_matched_output.extend(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|                     else: | ||||
|                         regex_matched_output.append(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|             else: | ||||
|                 # Plain text search (case-insensitive) | ||||
|                 r = re.compile(re.escape(s_re), re.IGNORECASE) | ||||
|                 res = r.findall(text) | ||||
|                 if res: | ||||
|                     for match in res: | ||||
|                         regex_matched_output.append(match) | ||||
|                         regex_matched_output.append('\n') | ||||
|  | ||||
|         return ''.join(regex_matched_output) if regex_matched_output else '' | ||||
|  | ||||
|  | ||||
| class RuleEngine: | ||||
|     """Evaluates blocking rules (triggers, conditions, text_should_not_be_present).""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_trigger_text(content, trigger_patterns): | ||||
|         """ | ||||
|         Check if trigger text is present. If trigger_text is configured, | ||||
|         content is blocked UNLESS the trigger is found. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not trigger_patterns: | ||||
|             return False | ||||
|  | ||||
|         # Assume blocked if trigger_text is configured | ||||
|         result = html_tools.strip_ignore_text( | ||||
|             content=str(content), | ||||
|             wordlist=trigger_patterns, | ||||
|             mode="line numbers" | ||||
|         ) | ||||
|         # Unblock if trigger was found | ||||
|         return not bool(result) | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_text_should_not_be_present(content, patterns): | ||||
|         """ | ||||
|         Check if forbidden text is present. If found, block the change. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not patterns: | ||||
|             return False | ||||
|  | ||||
|         result = html_tools.strip_ignore_text( | ||||
|             content=str(content), | ||||
|             wordlist=patterns, | ||||
|             mode="line numbers" | ||||
|         ) | ||||
|         # Block if forbidden text was found | ||||
|         return bool(result) | ||||
|  | ||||
|     @staticmethod | ||||
|     def evaluate_conditions(watch, datastore, content): | ||||
|         """ | ||||
|         Evaluate custom conditions ruleset. | ||||
|         Returns True if blocked, False if allowed. | ||||
|         """ | ||||
|         if not watch.get('conditions') or not watch.get('conditions_match_logic'): | ||||
|             return False | ||||
|  | ||||
|         conditions_result = execute_ruleset_against_all_plugins( | ||||
|             current_watch_uuid=watch.get('uuid'), | ||||
|             application_datastruct=datastore.data, | ||||
|             ephemeral_data={'text': content} | ||||
|         ) | ||||
|  | ||||
|         # Block if conditions not met | ||||
|         return not conditions_result.get('result') | ||||
|  | ||||
|  | ||||
| class ContentProcessor: | ||||
|     """Handles content preprocessing, filtering, and extraction.""" | ||||
|  | ||||
|     def __init__(self, fetcher, watch, filter_config, datastore): | ||||
|         self.fetcher = fetcher | ||||
|         self.watch = watch | ||||
|         self.filter_config = filter_config | ||||
|         self.datastore = datastore | ||||
|  | ||||
|     def preprocess_rss(self, content): | ||||
|         """ | ||||
|         Convert CDATA/comments in RSS to usable text. | ||||
|  | ||||
|         Supports two RSS processing modes: | ||||
|         - 'default': Inline CDATA replacement (original behavior) | ||||
|         - 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked) | ||||
|         """ | ||||
|         from changedetectionio import rss_tools | ||||
|         rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode") | ||||
|         if rss_mode: | ||||
|             # Format RSS items nicely with CDATA content unmarked and converted to text | ||||
|             return rss_tools.format_rss_items(content) | ||||
|         else: | ||||
|             # Default: Original inline CDATA replacement | ||||
|             return cdata_in_document_to_text(html_content=content) | ||||
|  | ||||
|     def preprocess_pdf(self, raw_content): | ||||
|         """Convert PDF to HTML using external tool.""" | ||||
|         from shutil import which | ||||
|         tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml") | ||||
|         if not which(tool): | ||||
|             raise PDFToHTMLToolNotFound( | ||||
|                 f"Command-line `{tool}` tool was not found in system PATH, was it installed?" | ||||
|             ) | ||||
|  | ||||
|         import subprocess | ||||
|         proc = subprocess.Popen( | ||||
|             [tool, '-stdout', '-', '-s', 'out.pdf', '-i'], | ||||
|             stdout=subprocess.PIPE, | ||||
|             stdin=subprocess.PIPE | ||||
|         ) | ||||
|         proc.stdin.write(raw_content) | ||||
|         proc.stdin.close() | ||||
|         html_content = proc.stdout.read().decode('utf-8') | ||||
|         proc.wait(timeout=60) | ||||
|  | ||||
|         # Add metadata for change detection | ||||
|         metadata = ( | ||||
|             f"<p>Added by changedetection.io: Document checksum - " | ||||
|             f"{hashlib.md5(raw_content).hexdigest().upper()} " | ||||
|             f"Original file size - {len(raw_content)} bytes</p>" | ||||
|         ) | ||||
|         return html_content.replace('</body>', metadata + '</body>') | ||||
|  | ||||
|     def preprocess_json(self, raw_content): | ||||
|         """Format and sort JSON content.""" | ||||
|         # Then we re-format it, else it does have filters (later on) which will reformat it anyway | ||||
|         content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$") | ||||
|  | ||||
|         # Sort JSON to avoid false alerts from reordering | ||||
|         try: | ||||
|             content = json.dumps(json.loads(content), sort_keys=True, indent=4) | ||||
|         except Exception: | ||||
|             # Might be malformed JSON, continue anyway | ||||
|             pass | ||||
|  | ||||
|         return content | ||||
|  | ||||
|     def apply_include_filters(self, content, stream_content_type): | ||||
|         """Apply CSS, XPath, or JSON filters to extract specific content.""" | ||||
|         filtered_content = "" | ||||
|  | ||||
|         for filter_rule in self.filter_config.include_filters: | ||||
|             # XPath filters | ||||
|             if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|                 filtered_content += html_tools.xpath_filter( | ||||
|                     xpath_filter=filter_rule.replace('xpath:', ''), | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url, | ||||
|                     is_rss=stream_content_type.is_rss | ||||
|                 ) | ||||
|  | ||||
|             # XPath1 filters (first match only) | ||||
|             elif filter_rule.startswith('xpath1:'): | ||||
|                 filtered_content += html_tools.xpath1_filter( | ||||
|                     xpath_filter=filter_rule.replace('xpath1:', ''), | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url, | ||||
|                     is_rss=stream_content_type.is_rss | ||||
|                 ) | ||||
|  | ||||
|             # JSON filters | ||||
|             elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES): | ||||
|                 filtered_content += html_tools.extract_json_as_string( | ||||
|                     content=content, | ||||
|                     json_filter=filter_rule | ||||
|                 ) | ||||
|  | ||||
|             # CSS selectors, default fallback | ||||
|             else: | ||||
|                 filtered_content += html_tools.include_filters( | ||||
|                     include_filters=filter_rule, | ||||
|                     html_content=content, | ||||
|                     append_pretty_line_formatting=not self.watch.is_source_type_url | ||||
|                 ) | ||||
|  | ||||
|         # Raise error if filter returned nothing | ||||
|         if not filtered_content.strip(): | ||||
|             raise FilterNotFoundInResponse( | ||||
|                 msg=self.filter_config.include_filters, | ||||
|                 screenshot=self.fetcher.screenshot, | ||||
|                 xpath_data=self.fetcher.xpath_data | ||||
|             ) | ||||
|  | ||||
|         return filtered_content | ||||
|  | ||||
|     def apply_subtractive_selectors(self, content): | ||||
|         """Remove elements matching subtractive selectors.""" | ||||
|         return html_tools.element_removal(self.filter_config.subtractive_selectors, content) | ||||
|  | ||||
|     def extract_text_from_html(self, html_content, stream_content_type): | ||||
|         """Convert HTML to plain text.""" | ||||
|         do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|         return html_tools.html_to_text( | ||||
|             html_content=html_content, | ||||
|             render_anchor_tag_content=do_anchor, | ||||
|             is_rss=stream_content_type.is_rss | ||||
|         ) | ||||
|  | ||||
|  | ||||
| class ChecksumCalculator: | ||||
|     """Calculates checksums with various options.""" | ||||
|  | ||||
|     @staticmethod | ||||
|     def calculate(text, ignore_whitespace=False): | ||||
|         """Calculate MD5 checksum of text content.""" | ||||
|         if ignore_whitespace: | ||||
|             text = text.translate(TRANSLATE_WHITESPACE_TABLE) | ||||
|         return hashlib.md5(text.encode('utf-8')).hexdigest() | ||||
|  | ||||
|  | ||||
| # Some common stuff here that can be moved to a base class | ||||
| # (set_proxy_from_list) | ||||
| class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|     def run_changedetection(self, watch): | ||||
|         changed_detected = False | ||||
|         html_content = "" | ||||
|         screenshot = False  # as bytes | ||||
|         stripped_text_from_html = "" | ||||
|  | ||||
|         if not watch: | ||||
|             raise Exception("Watch no longer exists.") | ||||
|  | ||||
|         # Initialize components | ||||
|         filter_config = FilterConfig(watch, self.datastore) | ||||
|         content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore) | ||||
|         transformer = ContentTransformer() | ||||
|         rule_engine = RuleEngine() | ||||
|  | ||||
|         # Get content type and stream info | ||||
|         ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower() | ||||
|         stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content) | ||||
|  | ||||
|         # Unset any existing notification error | ||||
|         update_obj = {'last_notification_error': False, 'last_error': False} | ||||
|  | ||||
|         url = watch.link | ||||
|  | ||||
|         self.screenshot = self.fetcher.screenshot | ||||
|         self.xpath_data = self.fetcher.xpath_data | ||||
|  | ||||
|         # Track the content type | ||||
|         update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|  | ||||
|         # Watches added automatically in the queue manager will skip if its the same checksum as the previous run | ||||
|         # Saves a lot of CPU | ||||
|         # Track the content type and checksum before filters | ||||
|         update_obj['content_type'] = ctype_header | ||||
|         update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() | ||||
|  | ||||
|         # Fetching complete, now filters | ||||
|         # === CONTENT PREPROCESSING === | ||||
|         # Avoid creating unnecessary intermediate string copies by reassigning only when needed | ||||
|         content = self.fetcher.content | ||||
|  | ||||
|         # @note: I feel like the following should be in a more obvious chain system | ||||
|         #  - Check filter text | ||||
|         #  - Is the checksum different? | ||||
|         #  - Do we convert to JSON? | ||||
|         # https://stackoverflow.com/questions/41817578/basic-method-chaining ? | ||||
|         # return content().textfilter().jsonextract().checksumcompare() ? | ||||
|         # RSS preprocessing | ||||
|         if stream_content_type.is_rss: | ||||
|             content = content_processor.preprocess_rss(content) | ||||
|             if self.datastore.data["settings"]["application"].get("rss_reader_mode"): | ||||
|                 # Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc) | ||||
|                 stream_content_type.is_rss = False | ||||
|                 stream_content_type.is_html = True | ||||
|                 self.fetcher.content = content | ||||
|  | ||||
|         is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|         is_html = not is_json | ||||
|         is_rss = False | ||||
|         # PDF preprocessing | ||||
|         if watch.is_pdf or stream_content_type.is_pdf: | ||||
|             content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content) | ||||
|             stream_content_type.is_html = True | ||||
|  | ||||
|         ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower() | ||||
|         # Go into RSS preprocess for converting CDATA/comment to usable text | ||||
|         if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']): | ||||
|             if '<rss' in self.fetcher.content[:100].lower(): | ||||
|                 self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content) | ||||
|                 is_rss = True | ||||
|         # JSON - Always reformat it nicely for consistency. | ||||
|  | ||||
|         # source: support, basically treat it as plaintext | ||||
|         if stream_content_type.is_json: | ||||
|             if not filter_config.has_include_json_filters: | ||||
|                 content = content_processor.preprocess_json(raw_content=content) | ||||
|         #else, otherwise it gets sorted/formatted in the filter stage anyway | ||||
|  | ||||
|         # HTML obfuscation workarounds | ||||
|         if stream_content_type.is_html: | ||||
|             content = html_tools.workarounds_for_obfuscations(content) | ||||
|  | ||||
|         # Check for LD+JSON price data (for HTML content) | ||||
|         if stream_content_type.is_html: | ||||
|             update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content) | ||||
|  | ||||
|         # === FILTER APPLICATION === | ||||
|         # Start with content reference, avoid copy until modification | ||||
|         html_content = content | ||||
|  | ||||
|         # Apply include filters (CSS, XPath, JSON) | ||||
|         # Except for plaintext (incase they tried to confuse the system, it will HTML escape | ||||
|         #if not stream_content_type.is_plaintext: | ||||
|         if filter_config.has_include_filters: | ||||
|             html_content = content_processor.apply_include_filters(content, stream_content_type) | ||||
|  | ||||
|         # Apply subtractive selectors | ||||
|         if filter_config.has_subtractive_selectors: | ||||
|             html_content = content_processor.apply_subtractive_selectors(html_content) | ||||
|  | ||||
|         # === TEXT EXTRACTION === | ||||
|         if watch.is_source_type_url: | ||||
|             is_html = False | ||||
|             is_json = False | ||||
|  | ||||
|         inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10] | ||||
|         if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf: | ||||
|             from shutil import which | ||||
|             tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml") | ||||
|             if not which(tool): | ||||
|                 raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool)) | ||||
|  | ||||
|             import subprocess | ||||
|             proc = subprocess.Popen( | ||||
|                 [tool, '-stdout', '-', '-s', 'out.pdf', '-i'], | ||||
|                 stdout=subprocess.PIPE, | ||||
|                 stdin=subprocess.PIPE) | ||||
|             proc.stdin.write(self.fetcher.raw_content) | ||||
|             proc.stdin.close() | ||||
|             self.fetcher.content = proc.stdout.read().decode('utf-8') | ||||
|             proc.wait(timeout=60) | ||||
|  | ||||
|             # Add a little metadata so we know if the file changes (like if an image changes, but the text is the same | ||||
|             # @todo may cause problems with non-UTF8? | ||||
|             metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format( | ||||
|                 hashlib.md5(self.fetcher.raw_content).hexdigest().upper(), | ||||
|                 len(self.fetcher.content)) | ||||
|  | ||||
|             self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>') | ||||
|  | ||||
|         # Better would be if Watch.model could access the global data also | ||||
|         # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__ | ||||
|         # https://realpython.com/inherit-python-dict/ instead of doing it procedurely | ||||
|         include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters') | ||||
|  | ||||
|         # 1845 - remove duplicated filters in both group and watch include filter | ||||
|         include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags)) | ||||
|  | ||||
|         subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'), | ||||
|                                  *watch.get("subtractive_selectors", []), | ||||
|                                  *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", []) | ||||
|                                  ] | ||||
|  | ||||
|         # Inject a virtual LD+JSON price tracker rule | ||||
|         if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT: | ||||
|             include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS | ||||
|  | ||||
|         has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip()) | ||||
|         has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip()) | ||||
|  | ||||
|         if is_json and not has_filter_rule: | ||||
|             include_filters_rule.append("json:$") | ||||
|             has_filter_rule = True | ||||
|  | ||||
|         if is_json: | ||||
|             # Sort the JSON so we dont get false alerts when the content is just re-ordered | ||||
|             try: | ||||
|                 self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True) | ||||
|             except Exception as e: | ||||
|                 # Might have just been a snippet, or otherwise bad JSON, continue | ||||
|                 pass | ||||
|  | ||||
|         if has_filter_rule: | ||||
|             for filter in include_filters_rule: | ||||
|                 if any(prefix in filter for prefix in json_filter_prefixes): | ||||
|                     stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter) | ||||
|                     is_html = False | ||||
|  | ||||
|         if is_html or watch.is_source_type_url: | ||||
|  | ||||
|             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|             self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content) | ||||
|             html_content = self.fetcher.content | ||||
|  | ||||
|             # If not JSON,  and if it's not text/plain.. | ||||
|             if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower(): | ||||
|                 # Don't run get_text or xpath/css filters on plaintext | ||||
|                 stripped_text_from_html = html_content | ||||
|             # For source URLs, keep raw content | ||||
|             stripped_text = html_content | ||||
|         elif stream_content_type.is_plaintext: | ||||
|             # For plaintext, keep as-is without HTML-to-text conversion | ||||
|             stripped_text = html_content | ||||
|         else: | ||||
|             # Extract text from HTML/RSS content (not generic XML) | ||||
|             if stream_content_type.is_html or stream_content_type.is_rss: | ||||
|                 stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type) | ||||
|             else: | ||||
|                 # Does it have some ld+json price data? used for easier monitoring | ||||
|                 update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content) | ||||
|  | ||||
|                 # Then we assume HTML | ||||
|                 if has_filter_rule: | ||||
|                     html_content = "" | ||||
|  | ||||
|                     for filter_rule in include_filters_rule: | ||||
|                         # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." | ||||
|                         if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|                             html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''), | ||||
|                                                                     html_content=self.fetcher.content, | ||||
|                                                                     append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                     is_rss=is_rss) | ||||
|  | ||||
|                         elif filter_rule.startswith('xpath1:'): | ||||
|                             html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''), | ||||
|                                                                      html_content=self.fetcher.content, | ||||
|                                                                      append_pretty_line_formatting=not watch.is_source_type_url, | ||||
|                                                                      is_rss=is_rss) | ||||
|                         else: | ||||
|                             html_content += html_tools.include_filters(include_filters=filter_rule, | ||||
|                                                                        html_content=self.fetcher.content, | ||||
|                                                                        append_pretty_line_formatting=not watch.is_source_type_url) | ||||
|  | ||||
|                     if not html_content.strip(): | ||||
|                         raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data) | ||||
|  | ||||
|                 if has_subtractive_selectors: | ||||
|                     html_content = html_tools.element_removal(subtractive_selectors, html_content) | ||||
|  | ||||
|                 if watch.is_source_type_url: | ||||
|                     stripped_text_from_html = html_content | ||||
|                 else: | ||||
|                     # extract text | ||||
|                     do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False) | ||||
|                     stripped_text_from_html = html_tools.html_to_text(html_content=html_content, | ||||
|                                                                       render_anchor_tag_content=do_anchor, | ||||
|                                                                       is_rss=is_rss)  # 1874 activate the <title workaround hack | ||||
|                 stripped_text = html_content | ||||
|  | ||||
|         # === TEXT TRANSFORMATIONS === | ||||
|         if watch.get('trim_text_whitespace'): | ||||
|             stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()) | ||||
|             stripped_text = transformer.trim_whitespace(stripped_text) | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         # Also used to calculate/show what was removed | ||||
|         text_content_before_ignored_filter = stripped_text_from_html | ||||
|  | ||||
|         # @todo whitespace coming from missing rtrim()? | ||||
|         # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about. | ||||
|         # Rewrite's the processing text based on only what diff result they want to see | ||||
|         # Save text before ignore filters (for diff calculation) | ||||
|         text_content_before_ignored_filter = stripped_text | ||||
|  | ||||
|         # === DIFF FILTERING === | ||||
|         # If user wants specific diff types (added/removed/replaced only) | ||||
|         if watch.has_special_diff_filter_options_set() and len(watch.history.keys()): | ||||
|             # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences | ||||
|             from changedetectionio import diff | ||||
|             # needs to not include (added) etc or it may get used twice | ||||
|             # Replace the processed text with the preferred result | ||||
|             rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(), | ||||
|                                              newest_version_file_contents=stripped_text_from_html, | ||||
|                                              include_equal=False,  # not the same lines | ||||
|                                              include_added=watch.get('filter_text_added', True), | ||||
|                                              include_removed=watch.get('filter_text_removed', True), | ||||
|                                              include_replaced=watch.get('filter_text_replaced', True), | ||||
|                                              line_feed_sep="\n", | ||||
|                                              include_change_type_prefix=False) | ||||
|             stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter) | ||||
|             if stripped_text is None: | ||||
|                 # No differences found, but content exists | ||||
|                 c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True) | ||||
|                 return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8') | ||||
|  | ||||
|             watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8')) | ||||
|  | ||||
|             if not rendered_diff and stripped_text_from_html: | ||||
|                 # We had some content, but no differences were found | ||||
|                 # Store our new file as the MD5 so it will trigger in the future | ||||
|                 c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|                 return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8') | ||||
|             else: | ||||
|                 stripped_text_from_html = rendered_diff | ||||
|  | ||||
|         # Treat pages with no renderable text content as a change? No by default | ||||
|         # === EMPTY PAGE CHECK === | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|         if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: | ||||
|             raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url, | ||||
|                                                             status_code=self.fetcher.get_last_status_code(), | ||||
|                                                             screenshot=self.fetcher.screenshot, | ||||
|                                                             has_filters=has_filter_rule, | ||||
|                                                             html_content=html_content, | ||||
|                                                             xpath_data=self.fetcher.xpath_data | ||||
|                                                             ) | ||||
|  | ||||
|         # We rely on the actual text in the html output.. many sites have random script vars etc, | ||||
|         # in the future we'll implement other mechanisms. | ||||
|         if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0: | ||||
|             raise content_fetchers.exceptions.ReplyWithContentButNoText( | ||||
|                 url=url, | ||||
|                 status_code=self.fetcher.get_last_status_code(), | ||||
|                 screenshot=self.fetcher.screenshot, | ||||
|                 has_filters=filter_config.has_include_filters, | ||||
|                 html_content=html_content, | ||||
|                 xpath_data=self.fetcher.xpath_data | ||||
|             ) | ||||
|  | ||||
|         update_obj["last_check_status"] = self.fetcher.get_last_status_code() | ||||
|  | ||||
|         # 615 Extract text by regex | ||||
|         extract_text = list(dict.fromkeys(watch.get('extract_text', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text'))) | ||||
|         if len(extract_text) > 0: | ||||
|             regex_matched_output = [] | ||||
|             for s_re in extract_text: | ||||
|                 # incase they specified something in '/.../x' | ||||
|                 if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE): | ||||
|                     regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re) | ||||
|                     result = re.findall(regex, stripped_text_from_html) | ||||
|  | ||||
|                     for l in result: | ||||
|                         if type(l) is tuple: | ||||
|                             # @todo - some formatter option default (between groups) | ||||
|                             regex_matched_output += list(l) + ['\n'] | ||||
|                         else: | ||||
|                             # @todo - some formatter option default (between each ungrouped result) | ||||
|                             regex_matched_output += [l] + ['\n'] | ||||
|                 else: | ||||
|                     # Doesnt look like regex, just hunt for plaintext and return that which matches | ||||
|                     # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes | ||||
|                     r = re.compile(re.escape(s_re), re.IGNORECASE) | ||||
|                     res = r.findall(stripped_text_from_html) | ||||
|                     if res: | ||||
|                         for match in res: | ||||
|                             regex_matched_output += [match] + ['\n'] | ||||
|  | ||||
|             ########################################################## | ||||
|             stripped_text_from_html = '' | ||||
|  | ||||
|             if regex_matched_output: | ||||
|                 # @todo some formatter for presentation? | ||||
|                 stripped_text_from_html = ''.join(regex_matched_output) | ||||
|         # === REGEX EXTRACTION === | ||||
|         if filter_config.extract_text: | ||||
|             extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text) | ||||
|             stripped_text = extracted | ||||
|  | ||||
|         # === MORE TEXT TRANSFORMATIONS === | ||||
|         if watch.get('remove_duplicate_lines'): | ||||
|             stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())) | ||||
|  | ||||
|             stripped_text = transformer.remove_duplicate_lines(stripped_text) | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically'): | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n") | ||||
|             stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower())) | ||||
|             stripped_text = transformer.sort_alphabetically(stripped_text) | ||||
|  | ||||
| ### CALCULATE MD5 | ||||
|         # If there's text to ignore | ||||
|         text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', []) | ||||
|         text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text') | ||||
|         # === CHECKSUM CALCULATION === | ||||
|         text_for_checksuming = stripped_text | ||||
|  | ||||
|         text_for_checksuming = stripped_text_from_html | ||||
|         if text_to_ignore: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore) | ||||
|         # Apply ignore_text for checksum calculation | ||||
|         if filter_config.ignore_text: | ||||
|             text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text) | ||||
|  | ||||
|         # Re #133 - if we should strip whitespaces from triggering the change detected comparison | ||||
|         if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest() | ||||
|         else: | ||||
|             fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest() | ||||
|             # Optionally remove ignored lines from output | ||||
|             strip_ignored_lines = watch.get('strip_ignored_lines') | ||||
|             if strip_ignored_lines is None: | ||||
|                 strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines') | ||||
|             if strip_ignored_lines: | ||||
|                 stripped_text = text_for_checksuming | ||||
|  | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         # Calculate checksum | ||||
|         ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False) | ||||
|         fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace) | ||||
|  | ||||
|         # === BLOCKING RULES EVALUATION === | ||||
|         blocked = False | ||||
|         trigger_text = list(dict.fromkeys(watch.get('trigger_text', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text'))) | ||||
|         if len(trigger_text): | ||||
|             # Assume blocked | ||||
|  | ||||
|         # Check trigger_text | ||||
|         if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text): | ||||
|             blocked = True | ||||
|             # Filter and trigger works the same, so reuse it | ||||
|             # It should return the line numbers that match | ||||
|             # Unblock flow if the trigger was found (some text remained after stripped what didnt match) | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=trigger_text, | ||||
|                                                   mode="line numbers") | ||||
|             # Unblock if the trigger was found | ||||
|             if result: | ||||
|                 blocked = False | ||||
|  | ||||
|         text_should_not_be_present = list(dict.fromkeys(watch.get('text_should_not_be_present', []) + self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present'))) | ||||
|         if len(text_should_not_be_present): | ||||
|             # If anything matched, then we should block a change from happening | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=text_should_not_be_present, | ||||
|                                                   mode="line numbers") | ||||
|             if result: | ||||
|                 blocked = True | ||||
|         # Check text_should_not_be_present | ||||
|         if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present): | ||||
|             blocked = True | ||||
|  | ||||
|         # And check if 'conditions' will let this pass through | ||||
|         if watch.get('conditions') and watch.get('conditions_match_logic'): | ||||
|             conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'), | ||||
|                                                                     application_datastruct=self.datastore.data, | ||||
|                                                                     ephemeral_data={ | ||||
|                                                                         'text': stripped_text_from_html | ||||
|                                                                     } | ||||
|                                                                     ) | ||||
|         # Check custom conditions | ||||
|         if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text): | ||||
|             blocked = True | ||||
|  | ||||
|             if not conditions_result.get('result'): | ||||
|                 # Conditions say "Condition not met" so we block it. | ||||
|                 blocked = True | ||||
|  | ||||
|         # Looks like something changed, but did it match all the rules? | ||||
|         # === CHANGE DETECTION === | ||||
|         if blocked: | ||||
|             changed_detected = False | ||||
|         else: | ||||
|             # The main thing that all this at the moment comes down to :) | ||||
|             # Compare checksums | ||||
|             if watch.get('previous_md5') != fetched_md5: | ||||
|                 changed_detected = True | ||||
|  | ||||
|             # Always record the new checksum | ||||
|             update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|             # On the first run of a site, watch['previous_md5'] will be None, set it the current one. | ||||
|             # On first run, initialize previous_md5 | ||||
|             if not watch.get('previous_md5'): | ||||
|                 watch['previous_md5'] = fetched_md5 | ||||
|  | ||||
|         logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") | ||||
|  | ||||
|         if changed_detected: | ||||
|             if watch.get('check_unique_lines', False): | ||||
|                 ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace') | ||||
|         # === UNIQUE LINES CHECK === | ||||
|         if changed_detected and watch.get('check_unique_lines', False): | ||||
|             has_unique_lines = watch.lines_contain_something_unique_compared_to_history( | ||||
|                 lines=stripped_text.splitlines(), | ||||
|                 ignore_whitespace=ignore_whitespace | ||||
|             ) | ||||
|  | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history( | ||||
|                     lines=stripped_text_from_html.splitlines(), | ||||
|                     ignore_whitespace=ignore_whitespace | ||||
|                 ) | ||||
|             if not has_unique_lines: | ||||
|                 logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") | ||||
|                 changed_detected = False | ||||
|             else: | ||||
|                 logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") | ||||
|  | ||||
|                 # One or more lines? unsure? | ||||
|                 if not has_unique_lines: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False") | ||||
|                     changed_detected = False | ||||
|                 else: | ||||
|                     logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content") | ||||
|         # Note: Explicit cleanup is only needed here because text_json_diff handles | ||||
|         # large strings (100KB-300KB for RSS/HTML). The other processors work with | ||||
|         # small strings and don't need this. | ||||
|         # | ||||
|         # Python would clean these up automatically, but explicit `del` frees memory | ||||
|         # immediately rather than waiting for function return, reducing peak memory usage. | ||||
|         del content | ||||
|         if 'html_content' in locals() and html_content is not stripped_text: | ||||
|             del html_content | ||||
|         if 'text_content_before_ignored_filter' in locals() and text_content_before_ignored_filter is not stripped_text: | ||||
|             del text_content_before_ignored_filter | ||||
|         if 'text_for_checksuming' in locals() and text_for_checksuming is not stripped_text: | ||||
|             del text_for_checksuming | ||||
|  | ||||
|         return changed_detected, update_obj, stripped_text | ||||
|  | ||||
|         # stripped_text_from_html - Everything after filters and NO 'ignored' content | ||||
|         return changed_detected, update_obj, stripped_text_from_html | ||||
|     def _apply_diff_filtering(self, watch, stripped_text, text_before_filter): | ||||
|         """Apply user's diff filtering preferences (show only added/removed/replaced lines).""" | ||||
|         from changedetectionio import diff | ||||
|  | ||||
|         rendered_diff = diff.render_diff( | ||||
|             previous_version_file_contents=watch.get_last_fetched_text_before_filters(), | ||||
|             newest_version_file_contents=stripped_text, | ||||
|             include_equal=False, | ||||
|             include_added=watch.get('filter_text_added', True), | ||||
|             include_removed=watch.get('filter_text_removed', True), | ||||
|             include_replaced=watch.get('filter_text_replaced', True), | ||||
|             line_feed_sep="\n", | ||||
|             include_change_type_prefix=False | ||||
|         ) | ||||
|  | ||||
|         watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8')) | ||||
|  | ||||
|         if not rendered_diff and stripped_text: | ||||
|             # No differences found | ||||
|             return None | ||||
|  | ||||
|         return rendered_diff | ||||
|   | ||||
| @@ -243,14 +243,15 @@ def handle_watch_update(socketio, **kwargs): | ||||
|  | ||||
|         general_stats = { | ||||
|             'count_errors': errored_count, | ||||
|             'has_unviewed': datastore.has_unviewed | ||||
|             'unread_changes_count': datastore.unread_changes_count | ||||
|         } | ||||
|  | ||||
|         # Debug what's being emitted | ||||
|         # logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}") | ||||
|  | ||||
|         # Emit to all clients (no 'broadcast' parameter needed - it's the default behavior) | ||||
|         socketio.emit("watch_update", {'watch': watch_data, 'general_stats': general_stats}) | ||||
|         socketio.emit("watch_update", {'watch': watch_data}) | ||||
|         socketio.emit("general_stats_update", general_stats) | ||||
|  | ||||
|         # Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues | ||||
|         logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}") | ||||
|   | ||||
							
								
								
									
										130
									
								
								changedetectionio/rss_tools.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								changedetectionio/rss_tools.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,130 @@ | ||||
| """ | ||||
| RSS/Atom feed processing tools for changedetection.io | ||||
| """ | ||||
|  | ||||
| from loguru import logger | ||||
| import re | ||||
|  | ||||
|  | ||||
| def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|     """ | ||||
|     Process CDATA sections in HTML/XML content - inline replacement. | ||||
|  | ||||
|     Args: | ||||
|         html_content: The HTML/XML content to process | ||||
|         render_anchor_tag_content: Whether to render anchor tag content | ||||
|  | ||||
|     Returns: | ||||
|         Processed HTML/XML content with CDATA sections replaced inline | ||||
|     """ | ||||
|     from xml.sax.saxutils import escape as xml_escape | ||||
|     from .html_tools import html_to_text | ||||
|  | ||||
|     pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>' | ||||
|  | ||||
|     def repl(m): | ||||
|         text = m.group(1) | ||||
|         return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip() | ||||
|  | ||||
|     return re.sub(pattern, repl, html_content) | ||||
|  | ||||
|  | ||||
| def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str: | ||||
|     """ | ||||
|     Format RSS/Atom feed items in a readable text format using feedparser. | ||||
|  | ||||
|     Converts RSS <item> or Atom <entry> elements to formatted text with: | ||||
|     - <title> → <h1>Title</h1> | ||||
|     - <link> → Link: [url] | ||||
|     - <guid> → Guid: [id] | ||||
|     - <pubDate> → PubDate: [date] | ||||
|     - <description> or <content> → Raw HTML content (CDATA and entities automatically handled) | ||||
|  | ||||
|     Args: | ||||
|         rss_content: The RSS/Atom feed content | ||||
|         render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility) | ||||
|  | ||||
|     Returns: | ||||
|         Formatted HTML content ready for html_to_text conversion | ||||
|     """ | ||||
|     try: | ||||
|         import feedparser | ||||
|         from xml.sax.saxutils import escape as xml_escape | ||||
|  | ||||
|         # Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc. | ||||
|         feed = feedparser.parse(rss_content) | ||||
|  | ||||
|         formatted_items = [] | ||||
|  | ||||
|         # Determine feed type for appropriate labels when fields are missing | ||||
|         # feedparser sets feed.version to things like 'rss20', 'atom10', etc. | ||||
|         is_atom = feed.version and 'atom' in feed.version | ||||
|  | ||||
|         for entry in feed.entries: | ||||
|             item_parts = [] | ||||
|  | ||||
|             # Title - feedparser handles CDATA and entity unescaping automatically | ||||
|             if hasattr(entry, 'title') and entry.title: | ||||
|                 item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>') | ||||
|  | ||||
|             # Link | ||||
|             if hasattr(entry, 'link') and entry.link: | ||||
|                 item_parts.append(f'Link: {xml_escape(entry.link)}<br>') | ||||
|  | ||||
|             # GUID/ID | ||||
|             if hasattr(entry, 'id') and entry.id: | ||||
|                 item_parts.append(f'Guid: {xml_escape(entry.id)}<br>') | ||||
|  | ||||
|             # Date - feedparser normalizes all date field names to 'published' | ||||
|             if hasattr(entry, 'published') and entry.published: | ||||
|                 item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>') | ||||
|  | ||||
|             # Description/Content - feedparser handles CDATA and entity unescaping automatically | ||||
|             # Only add "Summary:" label for Atom <summary> tags | ||||
|             content = None | ||||
|             add_label = False | ||||
|  | ||||
|             if hasattr(entry, 'content') and entry.content: | ||||
|                 # Atom <content> - no label, just content | ||||
|                 content = entry.content[0].value if entry.content[0].value else None | ||||
|             elif hasattr(entry, 'summary'): | ||||
|                 # Could be RSS <description> or Atom <summary> | ||||
|                 # feedparser maps both to entry.summary | ||||
|                 content = entry.summary if entry.summary else None | ||||
|                 # Only add "Summary:" label for Atom feeds (which use <summary> tag) | ||||
|                 if is_atom: | ||||
|                     add_label = True | ||||
|  | ||||
|             # Add content with or without label | ||||
|             if content: | ||||
|                 if add_label: | ||||
|                     item_parts.append(f'Summary:<br>{content}') | ||||
|                 else: | ||||
|                     item_parts.append(content) | ||||
|             else: | ||||
|                 # No content - just show <none> | ||||
|                 item_parts.append('<none>') | ||||
|  | ||||
|             # Join all parts of this item | ||||
|             if item_parts: | ||||
|                 formatted_items.append('\n'.join(item_parts)) | ||||
|  | ||||
|         # Wrap each item in a div with classes (first, last, item-N) | ||||
|         items_html = [] | ||||
|         total_items = len(formatted_items) | ||||
|         for idx, item in enumerate(formatted_items): | ||||
|             classes = ['rss-item'] | ||||
|             if idx == 0: | ||||
|                 classes.append('first') | ||||
|             if idx == total_items - 1: | ||||
|                 classes.append('last') | ||||
|             classes.append(f'item-{idx + 1}') | ||||
|  | ||||
|             class_str = ' '.join(classes) | ||||
|             items_html.append(f'<div class="{class_str}">{item}</div>') | ||||
|         return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>' | ||||
|  | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Error formatting RSS items: {str(e)}") | ||||
|         # Fall back to original content | ||||
|         return rss_content | ||||
| @@ -15,7 +15,7 @@ find tests/test_*py -type f|while read test_name | ||||
| do | ||||
|   echo "TEST RUNNING $test_name" | ||||
|   # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser | ||||
|   REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name | ||||
|   REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 --tb=long $test_name | ||||
| done | ||||
|  | ||||
| echo "RUNNING WITH BASE_URL SET" | ||||
| @@ -23,20 +23,20 @@ echo "RUNNING WITH BASE_URL SET" | ||||
| # Now re-run some tests with BASE_URL enabled | ||||
| # Re #65 - Ability to include a link back to the installation, in the notification. | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv -s --maxfail=1 tests/test_notification.py | ||||
|  | ||||
|  | ||||
| # Re-run with HIDE_REFERER set - could affect login | ||||
| export HIDE_REFERER=True | ||||
| pytest tests/test_access_control.py | ||||
| pytest -vv -s --maxfail=1 tests/test_access_control.py | ||||
|  | ||||
| # Re-run a few tests that will trigger brotli based storage | ||||
| export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5 | ||||
| pytest tests/test_access_control.py | ||||
| pytest -vv -s --maxfail=1 tests/test_access_control.py | ||||
| REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py | ||||
| pytest tests/test_backend.py | ||||
| pytest tests/test_rss.py | ||||
| pytest tests/test_unique_lines.py | ||||
| pytest -vv -s --maxfail=1 tests/test_backend.py | ||||
| pytest -vv -s --maxfail=1 tests/test_rss.py | ||||
| pytest -vv -s --maxfail=1 tests/test_unique_lines.py | ||||
|  | ||||
| # Try high concurrency | ||||
| FETCH_WORKERS=130 pytest  tests/test_history_consistency.py -v -l | ||||
|   | ||||
| @@ -9,7 +9,7 @@ set -x | ||||
| # SOCKS5 related - start simple Socks5 proxy server | ||||
| # SOCKSTEST=xyz should show in the logs of this service to confirm it fetched | ||||
| docker run --network changedet-network -d --hostname socks5proxy --rm  --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy | ||||
| docker run --network changedet-network -d --hostname socks5proxy-noauth --rm  -p 1081:1080 --name socks5proxy-noauth  serjs/go-socks5-proxy | ||||
| docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy | ||||
|  | ||||
| echo "---------------------------------- SOCKS5 -------------------" | ||||
| # SOCKS5 related - test from proxies.json | ||||
|   | ||||
| @@ -1,24 +0,0 @@ | ||||
| """ | ||||
| Safe Jinja2 render with max payload sizes | ||||
|  | ||||
| See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations | ||||
| """ | ||||
|  | ||||
| import jinja2.sandbox | ||||
| import typing as t | ||||
| import os | ||||
|  | ||||
| JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10)) | ||||
|  | ||||
| # This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available. | ||||
| # (Which also limits available functions that could be called) | ||||
| def render(template_str, **args: t.Any) -> str: | ||||
|     jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension']) | ||||
|     output = jinja2_env.from_string(template_str).render(args) | ||||
|     return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE] | ||||
|  | ||||
| def render_fully_escaped(content): | ||||
|     env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True) | ||||
|     template = env.from_string("{{ some_html|e }}") | ||||
|     return template.render(some_html=content) | ||||
|  | ||||
| @@ -29,7 +29,7 @@ $(document).ready(function () { | ||||
|         $(this).text(new Date($(this).data("utc")).toLocaleString()); | ||||
|     }) | ||||
|  | ||||
|     const timezoneInput = $('#application-timezone'); | ||||
|     const timezoneInput = $('#application-scheduler_timezone_default'); | ||||
|     if(timezoneInput.length) { | ||||
|         const timezone = Intl.DateTimeFormat().resolvedOptions().timeZone; | ||||
|         if (!timezoneInput.val().trim()) { | ||||
|   | ||||
| @@ -2,6 +2,13 @@ | ||||
|  | ||||
| $(document).ready(function () { | ||||
|  | ||||
|     function reapplyTableStripes() { | ||||
|         $('.watch-table tbody tr').each(function(index) { | ||||
|             $(this).removeClass('pure-table-odd pure-table-even'); | ||||
|             $(this).addClass(index % 2 === 0 ? 'pure-table-odd' : 'pure-table-even'); | ||||
|         }); | ||||
|     } | ||||
|  | ||||
|     function bindSocketHandlerButtonsEvents(socket) { | ||||
|         $('.ajax-op').on('click.socketHandlerNamespace', function (e) { | ||||
|             e.preventDefault(); | ||||
| @@ -101,6 +108,7 @@ $(document).ready(function () { | ||||
|             socket.on('watch_deleted', function (data) { | ||||
|                 $('tr[data-watch-uuid="' + data.uuid + '"] td').fadeOut(500, function () { | ||||
|                     $(this).closest('tr').remove(); | ||||
|                     reapplyTableStripes(); | ||||
|                 }); | ||||
|             }); | ||||
|  | ||||
| @@ -117,15 +125,16 @@ $(document).ready(function () { | ||||
|                 } | ||||
|             }) | ||||
|  | ||||
|             socket.on('general_stats_update', function (general_stats) { | ||||
|                 // Tabs at bottom of list | ||||
|                 $('#watch-table-wrapper').toggleClass("has-unread-changes", general_stats.unread_changes_count !==0) | ||||
|                 $('#watch-table-wrapper').toggleClass("has-error", general_stats.count_errors !== 0) | ||||
|                 $('#post-list-with-errors a').text(`With errors (${ new Intl.NumberFormat(navigator.language).format(general_stats.count_errors) })`); | ||||
|                 $('#unread-tab-counter').text(new Intl.NumberFormat(navigator.language).format(general_stats.unread_changes_count)); | ||||
|             }); | ||||
|  | ||||
|             socket.on('watch_update', function (data) { | ||||
|                 const watch = data.watch; | ||||
|                 const general_stats = data.general_stats; | ||||
|  | ||||
|                 // Log the entire watch object for debugging | ||||
|                 console.log('!!! WATCH UPDATE EVENT RECEIVED !!!'); | ||||
|                 console.log(`${watch.event_timestamp} - Watch update ${watch.uuid} - Checking now - ${watch.checking_now} - UUID in URL ${window.location.href.includes(watch.uuid)}`); | ||||
|                 console.log('Watch data:', watch); | ||||
|                 console.log('General stats:', general_stats); | ||||
|  | ||||
|                 // Updating watch table rows | ||||
|                 const $watchRow = $('tr[data-watch-uuid="' + watch.uuid + '"]'); | ||||
| @@ -150,13 +159,6 @@ $(document).ready(function () { | ||||
|  | ||||
|                     console.log('Updated UI for watch:', watch.uuid); | ||||
|                 } | ||||
|  | ||||
|                 // Tabs at bottom of list | ||||
|                 $('#post-list-mark-views').toggleClass("has-unviewed", general_stats.has_unviewed); | ||||
|                 $('#post-list-unread').toggleClass("has-unviewed", general_stats.has_unviewed); | ||||
|                 $('#post-list-with-errors').toggleClass("has-error", general_stats.count_errors !== 0) | ||||
|                 $('#post-list-with-errors a').text(`With errors (${ general_stats.count_errors })`); | ||||
|  | ||||
|                 $('body').toggleClass('checking-now', watch.checking_now && window.location.href.includes(watch.uuid)); | ||||
|             }); | ||||
|  | ||||
|   | ||||
| @@ -17,18 +17,6 @@ body.checking-now { | ||||
|   position: fixed; | ||||
| } | ||||
|  | ||||
| #post-list-buttons { | ||||
|   #post-list-with-errors.has-error { | ||||
|     display: inline-block !important; | ||||
|   } | ||||
|   #post-list-mark-views.has-unviewed { | ||||
|     display: inline-block !important; | ||||
|   } | ||||
|   #post-list-unread.has-unviewed { | ||||
|     display: inline-block !important; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -127,5 +127,44 @@ | ||||
|       display: inline-block !important; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|  | ||||
| } | ||||
|  | ||||
| #watch-table-wrapper { | ||||
|   /* general styling */ | ||||
|   #post-list-buttons { | ||||
|     text-align: right; | ||||
|     padding: 0px; | ||||
|     margin: 0px; | ||||
|  | ||||
|     li { | ||||
|       display: inline-block; | ||||
|     } | ||||
|  | ||||
|     a { | ||||
|       border-top-left-radius: initial; | ||||
|       border-top-right-radius: initial; | ||||
|       border-bottom-left-radius: 5px; | ||||
|       border-bottom-right-radius: 5px; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /* post list dynamically on/off stuff */ | ||||
|  | ||||
|   &.has-error { | ||||
|     #post-list-buttons { | ||||
|       #post-list-with-errors { | ||||
|         display: inline-block !important; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   &.has-unread-changes { | ||||
|     #post-list-buttons { | ||||
|       #post-list-unread, #post-list-mark-views, #post-list-unread { | ||||
|         display: inline-block !important; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|   | ||||
| @@ -34,7 +34,6 @@ | ||||
|       transition: all 0.2s ease; | ||||
|       cursor: pointer; | ||||
|       display: block; | ||||
|       min-width: 60px; | ||||
|       text-align: center; | ||||
|     } | ||||
|  | ||||
|   | ||||
| @@ -203,24 +203,6 @@ code { | ||||
| } | ||||
|  | ||||
|  | ||||
| #post-list-buttons { | ||||
|   text-align: right; | ||||
|   padding: 0px; | ||||
|   margin: 0px; | ||||
|  | ||||
|   li { | ||||
|     display: inline-block; | ||||
|   } | ||||
|  | ||||
|   a { | ||||
|     border-top-left-radius: initial; | ||||
|     border-top-right-radius: initial; | ||||
|     border-bottom-left-radius: 5px; | ||||
|     border-bottom-right-radius: 5px; | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| body:after { | ||||
|   content: ""; | ||||
|   background: linear-gradient(130deg, var(--color-background-gradient-first), var(--color-background-gradient-second) 41.07%, var(--color-background-gradient-third) 84.05%); | ||||
| @@ -362,7 +344,7 @@ label { | ||||
|  }   | ||||
| } | ||||
|  | ||||
| #notification-customisation { | ||||
| .grey-form-border { | ||||
|   border: 1px solid var(--color-border-notification); | ||||
|   padding: 0.5rem; | ||||
|   border-radius: 5px; | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -1,11 +1,13 @@ | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from changedetectionio.validate_url import is_safe_valid_url | ||||
|  | ||||
| from flask import ( | ||||
|     flash | ||||
| ) | ||||
|  | ||||
| from .html_tools import TRANSLATE_WHITESPACE_TABLE | ||||
| from . model import App, Watch | ||||
| from .model import App, Watch, USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH | ||||
| from copy import deepcopy, copy | ||||
| from os import path, unlink | ||||
| from threading import Lock | ||||
| @@ -202,14 +204,13 @@ class ChangeDetectionStore: | ||||
|         return seconds | ||||
|  | ||||
|     @property | ||||
|     def has_unviewed(self): | ||||
|         if not self.__data.get('watching'): | ||||
|             return None | ||||
|  | ||||
|     def unread_changes_count(self): | ||||
|         unread_changes_count = 0 | ||||
|         for uuid, watch in self.__data['watching'].items(): | ||||
|             if watch.history_n >= 2 and watch.viewed == False: | ||||
|                 return True | ||||
|         return False | ||||
|                 unread_changes_count += 1 | ||||
|  | ||||
|         return unread_changes_count | ||||
|  | ||||
|     @property | ||||
|     def data(self): | ||||
| @@ -229,26 +230,36 @@ class ChangeDetectionStore: | ||||
|         d['settings']['application']['active_base_url'] = active_base_url.strip('" ') | ||||
|         return d | ||||
|  | ||||
|     from pathlib import Path | ||||
|  | ||||
|     def delete_path(self, path: Path): | ||||
|         import shutil | ||||
|         """Delete a file or directory tree, including the path itself.""" | ||||
|         if not path.exists(): | ||||
|             return | ||||
|         if path.is_file() or path.is_symlink(): | ||||
|             path.unlink(missing_ok=True)  # deletes a file or symlink | ||||
|         else: | ||||
|             shutil.rmtree(path, ignore_errors=True)  # deletes dir *and* its contents | ||||
|  | ||||
|     # Delete a single watch by UUID | ||||
|     def delete(self, uuid): | ||||
|         import pathlib | ||||
|         import shutil | ||||
|  | ||||
|         with self.lock: | ||||
|             if uuid == 'all': | ||||
|                 self.__data['watching'] = {} | ||||
|                 time.sleep(1) # Mainly used for testing to allow all items to flush before running next test | ||||
|  | ||||
|                 # GitHub #30 also delete history records | ||||
|                 for uuid in self.data['watching']: | ||||
|                     path = pathlib.Path(os.path.join(self.datastore_path, uuid)) | ||||
|                     if os.path.exists(path): | ||||
|                         shutil.rmtree(path) | ||||
|                         self.delete(uuid) | ||||
|  | ||||
|             else: | ||||
|                 path = pathlib.Path(os.path.join(self.datastore_path, uuid)) | ||||
|                 if os.path.exists(path): | ||||
|                     shutil.rmtree(path) | ||||
|                     self.delete_path(path) | ||||
|  | ||||
|                 del self.data['watching'][uuid] | ||||
|  | ||||
|         self.needs_write_urgent = True | ||||
| @@ -331,9 +342,10 @@ class ChangeDetectionStore: | ||||
|                 logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}") | ||||
|                 flash("Error fetching metadata for {}".format(url), 'error') | ||||
|                 return False | ||||
|         from .model.Watch import is_safe_url | ||||
|         if not is_safe_url(url): | ||||
|             flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error') | ||||
|  | ||||
|         if not is_safe_valid_url(url): | ||||
|             flash('Watch protocol is not permitted or invalid URL format', 'error') | ||||
|  | ||||
|             return None | ||||
|  | ||||
|         if tag and type(tag) == str: | ||||
| @@ -977,6 +989,35 @@ class ChangeDetectionStore: | ||||
|         if self.data['settings']['application'].get('extract_title_as_title'): | ||||
|             self.data['settings']['application']['ui']['use_page_title_in_list'] = self.data['settings']['application'].get('extract_title_as_title') | ||||
|  | ||||
|     def update_21(self): | ||||
|         if self.data['settings']['application'].get('timezone'): | ||||
|             self.data['settings']['application']['scheduler_timezone_default'] = self.data['settings']['application'].get('timezone') | ||||
|             del self.data['settings']['application']['timezone'] | ||||
|  | ||||
|  | ||||
|     # Some notification formats got the wrong name type | ||||
|     def update_22(self): | ||||
|         from .notification import valid_notification_formats | ||||
|  | ||||
|         sys_n_format = self.data['settings']['application'].get('notification_format') | ||||
|         key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == sys_n_format), None) | ||||
|         if key_exists_as_value: # key of "Plain text" | ||||
|             logger.success(f"['settings']['application']['notification_format'] '{sys_n_format}' -> '{key_exists_as_value}'") | ||||
|             self.data['settings']['application']['notification_format'] = key_exists_as_value | ||||
|  | ||||
|         for uuid, watch in self.data['watching'].items(): | ||||
|             n_format = self.data['watching'][uuid].get('notification_format') | ||||
|             key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == n_format), None) | ||||
|             if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:  # key of "Plain text" | ||||
|                 logger.success(f"['watching'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'") | ||||
|                 self.data['watching'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever | ||||
|  | ||||
|         for uuid, tag in self.data['settings']['application']['tags'].items(): | ||||
|             n_format = self.data['settings']['application']['tags'][uuid].get('notification_format') | ||||
|             key_exists_as_value = next((k for k, v in valid_notification_formats.items() if v == n_format), None) | ||||
|             if key_exists_as_value and key_exists_as_value != USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:  # key of "Plain text" | ||||
|                 logger.success(f"['settings']['application']['tags'][{uuid}]['notification_format'] '{n_format}' -> '{key_exists_as_value}'") | ||||
|                 self.data['settings']['application']['tags'][uuid]['notification_format'] = key_exists_as_value # should be 'text' or whatever | ||||
|  | ||||
|     def add_notification_url(self, notification_url): | ||||
|          | ||||
|   | ||||
| @@ -33,7 +33,7 @@ | ||||
|                                 <div id="notification-test-log" style="display: none;"><span class="pure-form-message-inline">Processing..</span></div> | ||||
|                             </div> | ||||
|                         </div> | ||||
|                         <div id="notification-customisation" class="pure-control-group"> | ||||
|                         <div class="pure-control-group grey-form-border"> | ||||
|                             <div class="pure-control-group"> | ||||
|                                 {{ render_field(form.notification_title, class="m-d notification-title", placeholder=settings_application['notification_title']) }} | ||||
|                                 <span class="pure-form-message-inline">Title for all notifications</span> | ||||
| @@ -134,6 +134,12 @@ | ||||
|                                     <p> | ||||
|                                         URL encoding, use <strong>|urlencode</strong>, for example - <code>gets://hook-website.com/test.php?title={{ '{{ watch_title|urlencode }}' }}</code> | ||||
|                                     </p> | ||||
|                                     <p> | ||||
|                                         Regular-expression replace, use <strong>|regex_replace</strong>, for example -   <code>{{ "{{ \"hello world 123\" | regex_replace('[0-9]+', 'no-more-numbers') }}" }}</code> | ||||
|                                     </p> | ||||
|                                     <p> | ||||
|                                         For a complete reference of all Jinja2 built-in filters, users can refer to the <a href="https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters">https://jinja.palletsprojects.com/en/3.1.x/templates/#builtin-filters</a> | ||||
|                                     </p> | ||||
|                                 </div> | ||||
|                             </div> | ||||
|                             <div class="pure-control-group"> | ||||
|   | ||||
| @@ -1,14 +1,47 @@ | ||||
| {% macro render_field(field) %} | ||||
|   <div {% if field.errors %} class="error" {% endif %}>{{ field.label }}</div> | ||||
|   <div {% if field.errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }} | ||||
|   {% if field.errors %} | ||||
|     <ul class=errors> | ||||
|     {% for error in field.errors %} | ||||
|       <li>{{ error }}</li> | ||||
|     {% endfor %} | ||||
|     </ul> | ||||
|   {% endif %} | ||||
|   </div> | ||||
|     <div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field.label }}</div> | ||||
|     <div {% if field.errors or field.top_errors %} class="error" {% endif %}>{{ field(**kwargs)|safe }} | ||||
|         {% if field.top_errors %} | ||||
|             top | ||||
|             <ul class="errors top-errors"> | ||||
|                 {% for error in field.top_errors %} | ||||
|                     <li>{{ error }}</li> | ||||
|                 {% endfor %} | ||||
|             </ul> | ||||
|         {% endif %} | ||||
|         {% if field.errors %} | ||||
|             <ul class=errors> | ||||
|                 {% if field.errors is mapping and 'form' in field.errors %} | ||||
|                     {#  and subfield form errors, such as used in RequiredFormField() for TimeBetweenCheckForm sub form #} | ||||
|                     {% set errors = field.errors['form'] %} | ||||
|                     {% for error in errors %} | ||||
|                         <li>{{ error }}</li> | ||||
|                     {% endfor %} | ||||
|                 {% elif field.type == 'FieldList' %} | ||||
|                     {# Handle FieldList of FormFields - errors is a list of dicts, one per entry #} | ||||
|                     {% for idx, entry_errors in field.errors|enumerate %} | ||||
|                         {% if entry_errors is mapping and entry_errors %} | ||||
|                             {# Only show entries that have actual errors #} | ||||
|                             <li><strong>Entry {{ idx + 1 }}:</strong> | ||||
|                                 <ul> | ||||
|                                     {% for field_name, messages in entry_errors.items() %} | ||||
|                                         {% for message in messages %} | ||||
|                                             <li>{{ field_name }}: {{ message }}</li> | ||||
|                                         {% endfor %} | ||||
|                                     {% endfor %} | ||||
|                                 </ul> | ||||
|                             </li> | ||||
|                         {% endif %} | ||||
|                     {% endfor %} | ||||
|                 {% else %} | ||||
|                     {#  regular list of errors with this field #} | ||||
|                     {% for error in field.errors %} | ||||
|                         <li>{{ error }}</li> | ||||
|                     {% endfor %} | ||||
|                 {% endif %} | ||||
|             </ul> | ||||
|         {% endif %} | ||||
|     </div> | ||||
| {% endmacro %} | ||||
|  | ||||
| {% macro render_checkbox_field(field) %} | ||||
| @@ -78,6 +111,39 @@ | ||||
|   {{ field(**kwargs)|safe }} | ||||
| {% endmacro %} | ||||
|  | ||||
| {% macro render_fieldlist_with_inline_errors(fieldlist) %} | ||||
|   {# Specialized macro for FieldList(FormField(...)) that renders errors inline with each field #} | ||||
|   <div {% if fieldlist.errors %} class="error" {% endif %}>{{ fieldlist.label }}</div> | ||||
|   <div {% if fieldlist.errors %} class="error" {% endif %}> | ||||
|     <ul id="{{ fieldlist.id }}"> | ||||
|       {% for entry in fieldlist %} | ||||
|         <li {% if entry.errors %} class="error" {% endif %}> | ||||
|           <label for="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}>{{ fieldlist.label.text }}-{{ loop.index0 }}</label> | ||||
|           <table id="{{ entry.id }}" {% if entry.errors %} class="error" {% endif %}> | ||||
|             <tbody> | ||||
|               {% for subfield in entry %} | ||||
|                 <tr {% if subfield.errors %} class="error" {% endif %}> | ||||
|                   <th {% if subfield.errors %} class="error" {% endif %}><label for="{{ subfield.id }}" {% if subfield.errors %} class="error" {% endif %}>{{ subfield.label.text }}</label></th> | ||||
|                   <td {% if subfield.errors %} class="error" {% endif %}> | ||||
|                     {{ subfield(**kwargs)|safe }} | ||||
|                     {% if subfield.errors %} | ||||
|                       <ul class="errors"> | ||||
|                         {% for error in subfield.errors %} | ||||
|                           <li class="error">{{ error }}</li> | ||||
|                         {% endfor %} | ||||
|                       </ul> | ||||
|                     {% endif %} | ||||
|                   </td> | ||||
|                 </tr> | ||||
|               {% endfor %} | ||||
|             </tbody> | ||||
|           </table> | ||||
|         </li> | ||||
|       {% endfor %} | ||||
|     </ul> | ||||
|   </div> | ||||
| {% endmacro %} | ||||
|  | ||||
| {% macro render_conditions_fieldlist_of_formfields_as_table(fieldlist, table_id="rulesTable") %} | ||||
|   <div class="fieldlist_formfields" id="{{ table_id }}"> | ||||
|     <div class="fieldlist-header"> | ||||
| @@ -200,9 +266,7 @@ | ||||
|             <li id="timezone-info"> | ||||
|                 {{ render_field(form.time_schedule_limit.timezone, placeholder=timezone_default_config) }} <span id="local-time-in-tz"></span> | ||||
|                 <datalist id="timezones" style="display: none;"> | ||||
|                     {% for timezone in available_timezones %} | ||||
|                         <option value="{{ timezone }}">{{ timezone }}</option> | ||||
|                     {% endfor %} | ||||
|                     {%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%} | ||||
|                 </datalist> | ||||
|             </li> | ||||
|         </ul> | ||||
|   | ||||
| @@ -53,7 +53,7 @@ | ||||
|           <a class="pure-menu-heading" href="{{url_for('watchlist.index')}}"> | ||||
|             <strong>Change</strong>Detection.io</a> | ||||
|         {% endif %} | ||||
|         {% if current_diff_url %} | ||||
|         {% if current_diff_url and is_safe_valid_url(current_diff_url) %} | ||||
|           <a class="current-diff-url" href="{{ current_diff_url }}"> | ||||
|             <span style="max-width: 30%; overflow: hidden">{{ current_diff_url }}</span></a> | ||||
|         {% else %} | ||||
|   | ||||
| @@ -26,7 +26,10 @@ | ||||
|                             <li>Changing this will affect the comparison checksum which may trigger an alert</li> | ||||
|                         </ul> | ||||
|                 </span> | ||||
|  | ||||
|                 <br><br> | ||||
|                     <div class="pure-control-group"> | ||||
|                       {{ render_ternary_field(form.strip_ignored_lines) }} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|  | ||||
|                 <fieldset> | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import time | ||||
| from threading import Thread | ||||
|  | ||||
| import pytest | ||||
| import arrow | ||||
| from changedetectionio import changedetection_app | ||||
| from changedetectionio import store | ||||
| import os | ||||
| @@ -29,16 +30,39 @@ def reportlog(pytestconfig): | ||||
|     logger.remove(handler_id) | ||||
|  | ||||
|  | ||||
| @pytest.fixture | ||||
| def environment(mocker): | ||||
|     """Mock arrow.now() to return a fixed datetime for testing jinja2 time extension.""" | ||||
|     # Fixed datetime: Wed, 09 Dec 2015 23:33:01 UTC | ||||
|     # This is calculated to match the test expectations when offsets are applied | ||||
|     fixed_datetime = arrow.Arrow(2015, 12, 9, 23, 33, 1, tzinfo='UTC') | ||||
|     # Patch arrow.now in the TimeExtension module where it's actually used | ||||
|     mocker.patch('changedetectionio.jinja2_custom.extensions.TimeExtension.arrow.now', return_value=fixed_datetime) | ||||
|     return fixed_datetime | ||||
|  | ||||
|  | ||||
| def format_memory_human(bytes_value): | ||||
|     """Format memory in human-readable units (KB, MB, GB)""" | ||||
|     if bytes_value < 1024: | ||||
|         return f"{bytes_value} B" | ||||
|     elif bytes_value < 1024 ** 2: | ||||
|         return f"{bytes_value / 1024:.2f} KB" | ||||
|     elif bytes_value < 1024 ** 3: | ||||
|         return f"{bytes_value / (1024 ** 2):.2f} MB" | ||||
|     else: | ||||
|         return f"{bytes_value / (1024 ** 3):.2f} GB" | ||||
|  | ||||
| def track_memory(memory_usage, ): | ||||
|     process = psutil.Process(os.getpid()) | ||||
|     while not memory_usage["stop"]: | ||||
|         current_rss = process.memory_info().rss | ||||
|         memory_usage["peak"] = max(memory_usage["peak"], current_rss) | ||||
|         memory_usage["current"] = current_rss  # Keep updating current | ||||
|         time.sleep(0.01)  # Adjust the sleep time as needed | ||||
|  | ||||
| @pytest.fixture(scope='function') | ||||
| def measure_memory_usage(request): | ||||
|     memory_usage = {"peak": 0, "stop": False} | ||||
|     memory_usage = {"peak": 0, "current": 0, "stop": False} | ||||
|     tracker_thread = Thread(target=track_memory, args=(memory_usage,)) | ||||
|     tracker_thread.start() | ||||
|  | ||||
| @@ -47,16 +71,17 @@ def measure_memory_usage(request): | ||||
|     memory_usage["stop"] = True | ||||
|     tracker_thread.join() | ||||
|  | ||||
|     # Note: ru_maxrss is in kilobytes on Unix-based systems | ||||
|     max_memory_used = memory_usage["peak"] / 1024  # Convert to MB | ||||
|     s = f"Peak memory used by the test {request.node.fspath} - '{request.node.name}': {max_memory_used:.2f} MB" | ||||
|     # Note: psutil returns RSS memory in bytes | ||||
|     peak_human = format_memory_human(memory_usage["peak"]) | ||||
|  | ||||
|     s = f"{time.time()} {request.node.fspath} - '{request.node.name}' - Peak memory: {peak_human}" | ||||
|     logger.debug(s) | ||||
|  | ||||
|     with open("test-memory.log", 'a') as f: | ||||
|         f.write(f"{s}\n") | ||||
|  | ||||
|     # Assert that the memory usage is less than 200MB | ||||
| #    assert max_memory_used < 150, f"Memory usage exceeded 200MB: {max_memory_used:.2f} MB" | ||||
| #    assert peak_memory_kb < 150 * 1024, f"Memory usage exceeded 150MB: {peak_human}" | ||||
|  | ||||
|  | ||||
| def cleanup(datastore_path): | ||||
|   | ||||
| @@ -29,13 +29,8 @@ def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     if make_test_use_extra_browser: | ||||
| @@ -55,7 +50,8 @@ def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|                   "tags": "", | ||||
|                   "headers": "", | ||||
|                   'fetch_backend': f"extra_browser_{custom_browser_name}", | ||||
|                   'webdriver_js_execute_code': '' | ||||
|                   'webdriver_js_execute_code': '', | ||||
|                   "time_between_check_use_default": "y" | ||||
|             }, | ||||
|             follow_redirects=True | ||||
|         ) | ||||
|   | ||||
| @@ -28,6 +28,7 @@ def test_execute_custom_js(client, live_server, measure_memory_usage): | ||||
|             'fetch_backend': "html_webdriver", | ||||
|             'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();', | ||||
|             'headers': "testheader: yes\buser-agent: MyCustomAgent", | ||||
|             "time_between_check_use_default": "y", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
| @@ -27,6 +27,7 @@ def test_preferred_proxy(client, live_server, measure_memory_usage): | ||||
|                 "proxy": "proxy-two", | ||||
|                 "tags": "", | ||||
|                 "url": url, | ||||
|                 "time_between_check_use_default": "y", | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
| @@ -62,6 +62,7 @@ def test_noproxy_option(client, live_server, measure_memory_usage): | ||||
|                 "proxy": "no-proxy", | ||||
|                 "tags": "", | ||||
|                 "url": url, | ||||
|                 "time_between_check_use_default": "y", | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
| @@ -44,6 +44,7 @@ def test_proxy_noconnect_custom(client, live_server, measure_memory_usage): | ||||
|         "url": test_url, | ||||
|         "fetch_backend": "html_webdriver" if os.getenv('PLAYWRIGHT_DRIVER_URL') or os.getenv("WEBDRIVER_URL") else "html_requests", | ||||
|         "proxy": "ui-0custom-test-proxy", | ||||
|         "time_between_check_use_default": "y", | ||||
|     } | ||||
|  | ||||
|     res = client.post( | ||||
|   | ||||
| @@ -49,3 +49,39 @@ def test_select_custom(client, live_server, measure_memory_usage): | ||||
|     # | ||||
|     # Now we should see the request in the container logs for "squid-squid-custom" because it will be the only default | ||||
|  | ||||
|  | ||||
| def test_custom_proxy_validation(client, live_server, measure_memory_usage): | ||||
|     #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     # Goto settings, add our custom one | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-fetch_backend": 'html_requests', | ||||
|             "requests-extra_proxies-0-proxy_name": "custom-test-proxy", | ||||
|             "requests-extra_proxies-0-proxy_url": "xxxxhtt/333??p://test:awesome@squid-custom:3128", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." not in res.data | ||||
|     assert b'Proxy URLs must start with' in res.data | ||||
|  | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-fetch_backend": 'html_requests', | ||||
|             "requests-extra_proxies-0-proxy_name": "custom-test-proxy", | ||||
|             "requests-extra_proxies-0-proxy_url": "https://", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." not in res.data | ||||
|     assert b"Invalid URL." in res.data | ||||
|      | ||||
| @@ -2,7 +2,7 @@ | ||||
| import json | ||||
| import os | ||||
| from flask import url_for | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
| from changedetectionio.tests.util import live_server_setup, wait_for_all_checks, extract_UUID_from_client, delete_all_watches | ||||
|  | ||||
|  | ||||
| def set_response(): | ||||
| @@ -66,6 +66,7 @@ def test_socks5(client, live_server, measure_memory_usage): | ||||
|             "proxy": "ui-0socks5proxy", | ||||
|             "tags": "", | ||||
|             "url": test_url, | ||||
|             "time_between_check_use_default": "y", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -97,6 +98,5 @@ def test_socks5(client, live_server, measure_memory_usage): | ||||
|     ) | ||||
|     assert b"OK" in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|   | ||||
| @@ -53,6 +53,7 @@ def test_socks5_from_proxiesjson_file(client, live_server, measure_memory_usage) | ||||
|             "proxy": "socks5proxy", | ||||
|             "tags": "", | ||||
|             "url": test_url, | ||||
|             "time_between_check_use_default": "y", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
| @@ -1,51 +1,110 @@ | ||||
| #!/usr/bin/env python3 | ||||
| import asyncio | ||||
| import threading | ||||
| import time | ||||
| from aiosmtpd.controller import Controller | ||||
| from aiosmtpd.smtp import SMTP | ||||
| from flask import Flask, Response | ||||
| from email import message_from_bytes | ||||
| from email.policy import default | ||||
|  | ||||
| # Accept a SMTP message and offer a way to retrieve the last message via TCP Socket | ||||
| # Accept a SMTP message and offer a way to retrieve the last message via HTTP | ||||
|  | ||||
| last_received_message = b"Nothing" | ||||
| last_received_message = b"Nothing received yet." | ||||
| active_smtp_connections = 0 | ||||
| smtp_lock = threading.Lock() | ||||
|  | ||||
|  | ||||
| class CustomSMTPHandler: | ||||
|     async def handle_DATA(self, server, session, envelope): | ||||
|         global last_received_message | ||||
|         last_received_message = envelope.content | ||||
|         print('Receiving message from:', session.peer) | ||||
|         print('Message addressed from:', envelope.mail_from) | ||||
|         print('Message addressed to  :', envelope.rcpt_tos) | ||||
|         print('Message length        :', len(envelope.content)) | ||||
|         print(envelope.content.decode('utf8')) | ||||
|         return '250 Message accepted for delivery' | ||||
|         global last_received_message, active_smtp_connections | ||||
|  | ||||
|         with smtp_lock: | ||||
|             active_smtp_connections += 1 | ||||
|  | ||||
|         try: | ||||
|             last_received_message = envelope.content | ||||
|             print('Receiving message from:', session.peer) | ||||
|             print('Message addressed from:', envelope.mail_from) | ||||
|             print('Message addressed to  :', envelope.rcpt_tos) | ||||
|             print('Message length        :', len(envelope.content)) | ||||
|             print('*******************************') | ||||
|             print(envelope.content.decode('utf8')) | ||||
|             print('*******************************') | ||||
|  | ||||
|             # Parse the email message | ||||
|             msg = message_from_bytes(envelope.content, policy=default) | ||||
|             with open('/tmp/last.eml', 'wb') as f: | ||||
|                 f.write(envelope.content) | ||||
|  | ||||
|             # Write parts to files based on content type | ||||
|             if msg.is_multipart(): | ||||
|                 for part in msg.walk(): | ||||
|                     content_type = part.get_content_type() | ||||
|                     payload = part.get_payload(decode=True) | ||||
|  | ||||
|                     if payload: | ||||
|                         if content_type == 'text/plain': | ||||
|                             with open('/tmp/last.txt', 'wb') as f: | ||||
|                                 f.write(payload) | ||||
|                             print(f'Written text/plain part to /tmp/last.txt') | ||||
|                         elif content_type == 'text/html': | ||||
|                             with open('/tmp/last.html', 'wb') as f: | ||||
|                                 f.write(payload) | ||||
|                             print(f'Written text/html part to /tmp/last.html') | ||||
|             else: | ||||
|                 # Single part message | ||||
|                 content_type = msg.get_content_type() | ||||
|                 payload = msg.get_payload(decode=True) | ||||
|  | ||||
|                 if payload: | ||||
|                     if content_type == 'text/plain' or content_type.startswith('text/'): | ||||
|                         with open('/tmp/last.txt', 'wb') as f: | ||||
|                             f.write(payload) | ||||
|                         print(f'Written single part message to /tmp/last.txt') | ||||
|  | ||||
|             return '250 Message accepted for delivery' | ||||
|         finally: | ||||
|             with smtp_lock: | ||||
|                 active_smtp_connections -= 1 | ||||
|  | ||||
|  | ||||
| class EchoServerProtocol(asyncio.Protocol): | ||||
|     def connection_made(self, transport): | ||||
|         global last_received_message | ||||
|         self.transport = transport | ||||
|         peername = transport.get_extra_info('peername') | ||||
|         print('Incoming connection from {}'.format(peername)) | ||||
|         self.transport.write(last_received_message) | ||||
|  | ||||
|         last_received_message = b'' | ||||
|         self.transport.close() | ||||
| # Simple Flask HTTP server to echo back the last SMTP message | ||||
| app = Flask(__name__) | ||||
|  | ||||
|  | ||||
| async def main(): | ||||
| @app.route('/') | ||||
| def echo_last_message(): | ||||
|     global last_received_message, active_smtp_connections | ||||
|  | ||||
|     # Wait for any in-progress SMTP connections to complete | ||||
|     max_wait = 5  # Maximum 5 seconds | ||||
|     wait_interval = 0.05  # Check every 50ms | ||||
|     elapsed = 0 | ||||
|  | ||||
|     while elapsed < max_wait: | ||||
|         with smtp_lock: | ||||
|             if active_smtp_connections == 0: | ||||
|                 break | ||||
|         time.sleep(wait_interval) | ||||
|         elapsed += wait_interval | ||||
|  | ||||
|     return Response(last_received_message, mimetype='text/plain') | ||||
|  | ||||
|  | ||||
| def run_flask(): | ||||
|     app.run(host='0.0.0.0', port=11080, debug=False, use_reloader=False) | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     # Start the SMTP server | ||||
|     controller = Controller(CustomSMTPHandler(), hostname='0.0.0.0', port=11025) | ||||
|     controller.start() | ||||
|  | ||||
|     # Start the TCP Echo server | ||||
|     loop = asyncio.get_running_loop() | ||||
|     server = await loop.create_server( | ||||
|         lambda: EchoServerProtocol(), | ||||
|         '0.0.0.0', 11080 | ||||
|     ) | ||||
|     async with server: | ||||
|         await server.serve_forever() | ||||
|     # Start the HTTP server in a separate thread | ||||
|     flask_thread = threading.Thread(target=run_flask, daemon=True) | ||||
|     flask_thread.start() | ||||
|  | ||||
|  | ||||
| if __name__ == "__main__": | ||||
|     asyncio.run(main()) | ||||
|     # Keep the main thread alive | ||||
|     try: | ||||
|         flask_thread.join() | ||||
|     except KeyboardInterrupt: | ||||
|         print("Shutting down...") | ||||
|   | ||||
| @@ -1,18 +1,22 @@ | ||||
| import json | ||||
| import os | ||||
| import time | ||||
| import re | ||||
| from flask import url_for | ||||
| from email import message_from_string | ||||
| from email.policy import default as email_policy | ||||
|  | ||||
| from changedetectionio.diff import HTML_REMOVED_STYLE, HTML_ADDED_STYLE, HTML_CHANGED_STYLE | ||||
| from changedetectionio.notification_service import NotificationContextData, CUSTOM_LINEBREAK_PLACEHOLDER | ||||
| from changedetectionio.tests.util import set_original_response, set_modified_response, set_more_modified_response, live_server_setup, \ | ||||
|     wait_for_all_checks, \ | ||||
|     set_longer_modified_response | ||||
| from changedetectionio.tests.util import extract_UUID_from_client | ||||
|     set_longer_modified_response, delete_all_watches | ||||
|  | ||||
| import logging | ||||
| import base64 | ||||
|  | ||||
|  | ||||
| # NOTE - RELIES ON mailserver as hostname running, see github build recipes | ||||
| smtp_test_server = 'mailserver' | ||||
|  | ||||
| ALL_MARKUP_TOKENS = ''.join(f"TOKEN: '{t}'\n{{{{{t}}}}}\n" for t in NotificationContextData().keys()) | ||||
|  | ||||
| from changedetectionio.notification import ( | ||||
|     default_notification_body, | ||||
|     default_notification_format, | ||||
| @@ -23,16 +27,14 @@ from changedetectionio.notification import ( | ||||
|  | ||||
|  | ||||
| def get_last_message_from_smtp_server(): | ||||
|     import socket | ||||
|     port = 11080  # socket server port number | ||||
|  | ||||
|     client_socket = socket.socket()  # instantiate | ||||
|     client_socket.connect((smtp_test_server, port))  # connect to the server | ||||
|  | ||||
|     data = client_socket.recv(50024).decode()  # receive response | ||||
|     import requests | ||||
|     time.sleep(1) # wait for any smtp connects to die off | ||||
|     port = 11080  # HTTP server port number | ||||
|     # Make HTTP GET request to Flask server | ||||
|     response = requests.get(f'http://{smtp_test_server}:{port}/') | ||||
|     data = response.text | ||||
|     logging.info("get_last_message_from_smtp_server..") | ||||
|     logging.info(data) | ||||
|     client_socket.close()  # close the connection | ||||
|     return data | ||||
|  | ||||
|  | ||||
| @@ -50,8 +52,8 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": "fallback-body<br> " + default_notification_body, | ||||
|               "application-notification_format": 'HTML', | ||||
|               "application-notification_body": "some text\nfallback-body<br> " + default_notification_body, | ||||
|               "application-notification_format": 'html', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
| @@ -77,20 +79,233 @@ def test_check_notification_email_formats_default_HTML(client, live_server, meas | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     msg = get_last_message_from_smtp_server() | ||||
|     assert len(msg) >= 1 | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # The email should have two bodies, and the text/html part should be <br> | ||||
|     assert 'Content-Type: text/plain' in msg | ||||
|     assert '(added) So let\'s see what happens.\r\n' in msg  # The plaintext part with \r\n | ||||
|     assert 'Content-Type: text/html' in msg | ||||
|     assert '(added) So let\'s see what happens.<br>' in msg  # the html part | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative with text/plain and text/html) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain (the auto-generated plaintext version) | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     assert '(added) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|     assert 'fallback-body\r\n' in text_content  # The plaintext part | ||||
|     assert CUSTOM_LINEBREAK_PLACEHOLDER not in text_content | ||||
|  | ||||
|     # Second part should be text/html | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert 'some text<br>' in html_content  # We converted \n from the notification body | ||||
|     assert 'fallback-body<br>' in html_content  # kept the original <br> | ||||
|     assert '(added) So let\'s see what happens.<br>' in html_content  # the html part | ||||
|     assert CUSTOM_LINEBREAK_PLACEHOLDER not in html_content | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_check_notification_plaintext_format(client, live_server, measure_memory_usage): | ||||
|     set_original_response() | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": "some text\n" + default_notification_body, | ||||
|               "application-notification_format": 'text', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add a watch and trigger a HTTP POST | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     set_longer_modified_response() | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should be plain text only (not multipart) | ||||
|     assert not msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'text/plain' | ||||
|  | ||||
|     # Get the plain text content | ||||
|     text_content = msg.get_content() | ||||
|     assert '(added) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|  | ||||
|     # Should NOT contain HTML | ||||
|     assert '<br>' not in text_content  # We should not have HTML in plain text | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
|  | ||||
| def test_check_notification_html_color_format(client, live_server, measure_memory_usage): | ||||
|     set_original_response() | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"some text\n{default_notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'htmlcolor', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add a watch and trigger a HTTP POST | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": 'nice one'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response() | ||||
|     time.sleep(2) | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative with text/plain and text/html) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain (the auto-generated plaintext version) | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     assert 'So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|     assert '(added)' not in text_content # Because apprise only dumb converts the html to text | ||||
|  | ||||
|     # Second part should be text/html with color styling | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert HTML_CHANGED_STYLE or HTML_REMOVED_STYLE in html_content | ||||
|     assert HTML_ADDED_STYLE in html_content | ||||
|     assert '<' not in html_content | ||||
|  | ||||
|     assert 'some text<br>' in html_content | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_check_notification_markdown_format(client, live_server, measure_memory_usage): | ||||
|     set_original_response() | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": "*header*\n\nsome text\n" + default_notification_body, | ||||
|               "application-notification_format": 'markdown', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add a watch and trigger a HTTP POST | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": 'nice one'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response() | ||||
|     time.sleep(2) | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative with text/plain and text/html) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain (the auto-generated plaintext version) | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     # We wont see anything in the "FALLBACK" text but that's OK (no added/strikethrough etc) | ||||
|     assert 'So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|  | ||||
|  | ||||
|     # Second part should be text/html and roughly converted from markdown to HTML | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert '<p><em>header</em></p>' in html_content | ||||
|     assert '<strong>So let\'s see what happens.</strong><br>' in html_content # Additions are <strong> in markdown | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # Custom notification body with HTML, that is either sent as HTML or rendered to plaintext and sent | ||||
| def test_check_notification_email_formats_default_Text_override_HTML(client, live_server, measure_memory_usage): | ||||
|     ##  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|     # HTML problems? see this | ||||
|     # https://github.com/caronc/apprise/issues/633 | ||||
| @@ -116,7 +331,371 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": notification_body, | ||||
|               "application-notification_format": 'Text', | ||||
|               "application-notification_format": 'text', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add a watch and trigger a HTTP POST | ||||
|     test_url = url_for('test_endpoint',content_type="text/html", _external=True) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": 'nice one'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     #################################### FIRST SITUATION, PLAIN TEXT NOTIFICATION IS WANTED BUT WE HAVE HTML IN OUR TEMPLATE AND CONTENT ########## | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response() | ||||
|     time.sleep(2) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|     #    with open('/tmp/m.txt', 'w') as f: | ||||
|     #        f.write(msg_raw) | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should not have two bodies, should be TEXT only | ||||
|     assert not msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'text/plain' | ||||
|  | ||||
|     # Get the plain text content | ||||
|     text_content = msg.get_content() | ||||
|     assert '(added) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|     assert '<!DOCTYPE html>' in text_content # even tho they added html, they selected plaintext so it should have not got converted | ||||
|  | ||||
|  | ||||
|     #################################### SECOND SITUATION, HTML IS CORRECTLY PASSED THROUGH TO THE EMAIL #################### | ||||
|     set_original_response() | ||||
|     # Now override as HTML format | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             "notification_format": 'html', | ||||
|             'fetch_backend': "html_requests", | ||||
|             "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     assert '(removed) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|  | ||||
|     # Second part should be text/html | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert '(removed) So let\'s see what happens.' in html_content  # the html part | ||||
|     assert '<!DOCTYPE html' not in html_content | ||||
|     assert '<!DOCTYPE html' in html_content # Our original template is working correctly | ||||
|  | ||||
|     # https://github.com/dgtlmoon/changedetection.io/issues/2103 | ||||
|     assert '<h1>Test</h1>' in html_content | ||||
|     assert '<' not in html_content | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_check_plaintext_document_plaintext_notification_smtp(client, live_server, measure_memory_usage): | ||||
|     """When following a plaintext document, notification in Plain Text format is sent correctly""" | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("Some nice plain text\nwhich we add some extra data\nover here\n") | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""{default_notification_body}""" | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'text', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Change the content | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("Some nice plain text\nwhich we add some extra data\nAnd let's talk about <title> tags\nover here\n") | ||||
|  | ||||
|  | ||||
|     time.sleep(1) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(get_last_message_from_smtp_server(), policy=email_policy) | ||||
|  | ||||
|     assert not msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'text/plain' | ||||
|     body = msg.get_content() | ||||
|     # nothing is escaped, raw html stuff in text/plain | ||||
|     assert 'talk about <title> tags' in body | ||||
|     assert '(added)' in body | ||||
|     assert '<br' not in body | ||||
|     assert '<' not in body | ||||
|     assert '<pre' not in body | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_check_plaintext_document_html_notifications(client, live_server, measure_memory_usage): | ||||
|     """When following a plaintext document, notification in Plain Text format is sent correctly""" | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("    Some nice plain text\nwhich we add some extra data\nover here\n") | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""{default_notification_body}""" | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'html', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Change the content | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("    Some nice plain text\nwhich we add some extra data\nAnd let's talk about <title> tags\nover here\n") | ||||
|  | ||||
|  | ||||
|     time.sleep(2) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(get_last_message_from_smtp_server(), policy=email_policy) | ||||
|  | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|  | ||||
|  | ||||
|     assert 'And let\'s talk about <title> tags\r\n' in text_content | ||||
|     assert '<br' not in text_content | ||||
|     assert '<span' not in text_content | ||||
|  | ||||
|  | ||||
|     assert 'talk about <title>' not in html_content  # the html part, should have got marked up to < etc | ||||
|     assert 'talk about <title>' in html_content | ||||
|     # Should be the HTML, but not HTML Color | ||||
|     assert 'background-color' not in html_content | ||||
|     assert '<br>(added) And let's talk about <title> tags<br>' in html_content | ||||
|     assert '<br' not in html_content | ||||
|     assert '<pre role="article"' in html_content # Should have got wrapped nicely in email_helpers.py | ||||
|  | ||||
|     # And now for the whitespace retention | ||||
|     assert '    Some nice plain text' in html_content | ||||
|     assert '(added) And let' in html_content # just to show a single whitespace didnt get touched | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_check_plaintext_document_html_color_notifications(client, live_server, measure_memory_usage): | ||||
|     """When following a plaintext document, notification in Plain Text format is sent correctly""" | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("Some nice plain text\nwhich we add some extra data\nover here\n") | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""{default_notification_body}""" | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'htmlcolor', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Change the content | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("Some nice plain text\nwhich we add some extra data\nAnd let's talk about <title> tags\nover here\n") | ||||
|  | ||||
|     time.sleep(1) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(get_last_message_from_smtp_server(), policy=email_policy) | ||||
|  | ||||
|     # The email should have two bodies (multipart/alternative) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # First part should be text/plain | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|  | ||||
|  | ||||
|     assert 'And let\'s talk about <title> tags\r\n' in text_content | ||||
|     assert '<br' not in text_content | ||||
|     assert '<span' not in text_content | ||||
|  | ||||
|     assert 'talk about <title>' not in html_content  # the html part, should have got marked up to < etc | ||||
|     assert 'talk about <title>' in html_content | ||||
|     # Should be the HTML, but not HTML Color | ||||
|     assert 'background-color' in html_content | ||||
|     assert '(added) And let' not in html_content | ||||
|     assert '<br' not in html_content | ||||
|     assert '<br>' in html_content | ||||
|     assert '<pre role="article"' in html_content # Should have got wrapped nicely in email_helpers.py | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_check_html_document_plaintext_notification(client, live_server, measure_memory_usage): | ||||
|     """When following a HTML document, notification in Plain Text format is sent correctly""" | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("<html><body>some stuff<br>and more stuff<br>and even more stuff<br></body></html>") | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com' | ||||
|     notification_body = f"""{default_notification_body}""" | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": f"{notification_body}\nMore output test\n{ALL_MARKUP_TOKENS}", | ||||
|               "application-notification_format": 'text', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/html", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("<html><body>sxome stuff<br>and more stuff<br>lets slip this in<br>and this in<br>and even more stuff<br><tag></body></html>") | ||||
|  | ||||
|     time.sleep(0.1) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|  | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(get_last_message_from_smtp_server(), policy=email_policy) | ||||
|  | ||||
|     assert not msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'text/plain' | ||||
|     body = msg.get_content() | ||||
|  | ||||
|     assert '<tag>' in body # Should have got converted from original HTML to plaintext | ||||
|     assert '(changed) some stuff\r\n' in body | ||||
|     assert '(into) sxome stuff\r\n' in body | ||||
|     assert '(added) lets slip this in\r\n' in body | ||||
|     assert '(added) and this in\r\n' in body | ||||
|     assert ' ' not in body | ||||
|  | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_check_html_notification_with_apprise_format_is_html(client, live_server, measure_memory_usage): | ||||
|     ##  live_server_setup(live_server) # Setup on conftest per function | ||||
|     set_original_response() | ||||
|  | ||||
|     notification_url = f'mailto://changedetection@{smtp_test_server}:11025/?to=fff@home.com&format=html' | ||||
|  | ||||
|     ##################### | ||||
|     # Set this up for when we remove the notification from the watch, it should fallback with these details | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-notification_urls": notification_url, | ||||
|               "application-notification_title": "fallback-title " + default_notification_title, | ||||
|               "application-notification_body": "some text\nfallback-body<br> " + default_notification_body, | ||||
|               "application-notification_format": 'html', | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
| @@ -136,47 +715,40 @@ def test_check_notification_email_formats_default_Text_override_HTML(client, liv | ||||
|     wait_for_all_checks(client) | ||||
|     set_longer_modified_response() | ||||
|     time.sleep(2) | ||||
|  | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     time.sleep(3) | ||||
|     msg = get_last_message_from_smtp_server() | ||||
|     assert len(msg) >= 1 | ||||
|     #    with open('/tmp/m.txt', 'w') as f: | ||||
|     #        f.write(msg) | ||||
|  | ||||
|     # The email should not have two bodies, should be TEXT only | ||||
|     msg_raw = get_last_message_from_smtp_server() | ||||
|     assert len(msg_raw) >= 1 | ||||
|  | ||||
|     assert 'Content-Type: text/plain' in msg | ||||
|     assert '(added) So let\'s see what happens.\r\n' in msg  # The plaintext part with \r\n | ||||
|     # Parse the email properly using Python's email library | ||||
|     msg = message_from_string(msg_raw, policy=email_policy) | ||||
|  | ||||
|     set_original_response() | ||||
|     # Now override as HTML format | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             "notification_format": 'HTML', | ||||
|             'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     # The email should have two bodies (multipart/alternative with text/plain and text/html) | ||||
|     assert msg.is_multipart() | ||||
|     assert msg.get_content_type() == 'multipart/alternative' | ||||
|  | ||||
|     time.sleep(3) | ||||
|     msg = get_last_message_from_smtp_server() | ||||
|     assert len(msg) >= 1 | ||||
|     # Get the parts | ||||
|     parts = list(msg.iter_parts()) | ||||
|     assert len(parts) == 2 | ||||
|  | ||||
|     # The email should have two bodies, and the text/html part should be <br> | ||||
|     assert 'Content-Type: text/plain' in msg | ||||
|     assert '(removed) So let\'s see what happens.\r\n' in msg  # The plaintext part with \n | ||||
|     assert 'Content-Type: text/html' in msg | ||||
|     assert '(removed) So let\'s see what happens.<br>' in msg  # the html part | ||||
|     # First part should be text/plain (the auto-generated plaintext version) | ||||
|     text_part = parts[0] | ||||
|     assert text_part.get_content_type() == 'text/plain' | ||||
|     text_content = text_part.get_content() | ||||
|     assert '(added) So let\'s see what happens.\r\n' in text_content  # The plaintext part | ||||
|     assert 'fallback-body\r\n' in text_content  # The plaintext part | ||||
|     assert CUSTOM_LINEBREAK_PLACEHOLDER not in text_content | ||||
|  | ||||
|     # https://github.com/dgtlmoon/changedetection.io/issues/2103 | ||||
|     assert '<h1>Test</h1>' in msg | ||||
|     assert '<' not in msg | ||||
|     assert 'Content-Type: text/html' in msg | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     # Second part should be text/html | ||||
|     html_part = parts[1] | ||||
|     assert html_part.get_content_type() == 'text/html' | ||||
|     html_content = html_part.get_content() | ||||
|     assert 'some text<br>' in html_content  # We converted \n from the notification body | ||||
|     assert 'fallback-body<br>' in html_content  # kept the original <br> | ||||
|     assert '(added) So let\'s see what happens.<br>' in html_content  # the html part | ||||
|     assert CUSTOM_LINEBREAK_PLACEHOLDER not in html_content | ||||
|     delete_all_watches(client) | ||||
| @@ -2,7 +2,7 @@ from .util import live_server_setup, wait_for_all_checks | ||||
| from flask import url_for | ||||
| import time | ||||
|  | ||||
| def test_check_access_control(app, client, live_server): | ||||
| def test_check_access_control(app, client, live_server, measure_memory_usage): | ||||
|     # Still doesnt work, but this is closer. | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
|   | ||||
| @@ -3,9 +3,12 @@ | ||||
| import os.path | ||||
|  | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output | ||||
| from .util import live_server_setup, wait_for_all_checks, wait_for_notification_endpoint_output, delete_all_watches | ||||
| import time | ||||
|  | ||||
| from ..diff import ADDED_PLACEMARKER_OPEN | ||||
|  | ||||
|  | ||||
| def set_original(excluding=None, add_line=None): | ||||
|     test_return_data = """<html> | ||||
|      <body> | ||||
| @@ -44,12 +47,8 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|     set_original() | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -61,7 +60,8 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|         data={"trigger_text": 'The golden line', | ||||
|               "url": test_url, | ||||
|               'fetch_backend': "html_requests", | ||||
|               'filter_text_removed': 'y'}, | ||||
|               'filter_text_removed': 'y', | ||||
|               "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -74,7 +74,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(0.5) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # The trigger line is REMOVED,  this should trigger | ||||
|     set_original(excluding='The golden line') | ||||
| @@ -83,7 +83,7 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     time.sleep(1) | ||||
|  | ||||
| @@ -97,23 +97,21 @@ def test_check_removed_line_contains_trigger(client, live_server, measure_memory | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(1) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # Remove it again, and we should get a trigger | ||||
|     set_original(excluding='The golden line') | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_check_add_line_contains_trigger(client, live_server, measure_memory_usage): | ||||
|      | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
| @@ -126,6 +124,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|               "application-notification_body": 'triggered text was -{{triggered_text}}- ### 网站监测 内容更新了 ####', | ||||
|               # https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation | ||||
|               "application-notification_urls": test_notification_url, | ||||
|               "application-notification_format": 'text', | ||||
|               "application-minutes_between_check": 180, | ||||
|               "application-fetch_backend": "html_requests" | ||||
|               }, | ||||
| @@ -136,12 +135,8 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|     set_original() | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -154,7 +149,8 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|               'processor': 'text_json_diff', | ||||
|               'fetch_backend': "html_requests", | ||||
|               'filter_text_removed': '', | ||||
|               'filter_text_added': 'y'}, | ||||
|               'filter_text_added': 'y', | ||||
|               "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -167,7 +163,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # The trigger line is ADDED,  this should trigger | ||||
|     set_original(add_line='<p>Oh yes please</p>') | ||||
| @@ -175,15 +171,15 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|  | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     # Takes a moment for apprise to fire | ||||
|     wait_for_notification_endpoint_output() | ||||
|     assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file" | ||||
|     with open("test-datastore/notification.txt", 'rb') as f: | ||||
|         response = f.read() | ||||
|         assert ADDED_PLACEMARKER_OPEN.encode('utf-8') not in response #  _apply_diff_filtering shouldnt add something here | ||||
|         assert b'-Oh yes please' in response | ||||
|         assert '网站监测 内容更新了'.encode('utf-8') in response | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
|  | ||||
| import json | ||||
| import uuid | ||||
| @@ -276,8 +276,7 @@ def test_access_denied(client, live_server, measure_memory_usage): | ||||
|     assert res.status_code == 200 | ||||
|  | ||||
|     # Cleanup everything | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
| @@ -371,7 +370,7 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     ###################################################### | ||||
|  | ||||
|     # HTTP PUT try a field that doenst exist | ||||
|     # HTTP PUT try a field that doesn't exist | ||||
|  | ||||
|     # HTTP PUT an update | ||||
|     res = client.put( | ||||
| @@ -384,9 +383,19 @@ def test_api_watch_PUT_update(client, live_server, measure_memory_usage): | ||||
|     # Message will come from `flask_expects_json` | ||||
|     assert b'Additional properties are not allowed' in res.data | ||||
|  | ||||
|  | ||||
|     # Try a XSS URL | ||||
|     res = client.put( | ||||
|         url_for("watch", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'}, | ||||
|         data=json.dumps({ | ||||
|             'url': 'javascript:alert(document.domain)' | ||||
|         }), | ||||
|     ) | ||||
|     assert res.status_code == 400 | ||||
|  | ||||
|     # Cleanup everything | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_api_import(client, live_server, measure_memory_usage): | ||||
| @@ -396,6 +405,7 @@ def test_api_import(client, live_server, measure_memory_usage): | ||||
|     res = client.post( | ||||
|         url_for("import") + "?tag=import-test", | ||||
|         data='https://website1.com\r\nhttps://website2.com', | ||||
|         # We removed  'content-type': 'text/plain', the Import API should assume this if none is set #3547 #3542 | ||||
|         headers={'x-api-key': api_key}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
| @@ -4,7 +4,7 @@ from flask import url_for | ||||
| from .util import live_server_setup | ||||
| import json | ||||
|  | ||||
| def test_api_notifications_crud(client, live_server): | ||||
| def test_api_notifications_crud(client, live_server, measure_memory_usage): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|   | ||||
							
								
								
									
										199
									
								
								changedetectionio/tests/test_api_openapi.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										199
									
								
								changedetectionio/tests/test_api_openapi.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,199 @@ | ||||
| #!/usr/bin/env python3 | ||||
| """ | ||||
| OpenAPI validation tests for ChangeDetection.io API | ||||
|  | ||||
| This test file specifically verifies that OpenAPI validation is working correctly | ||||
| by testing various scenarios that should trigger validation errors. | ||||
| """ | ||||
|  | ||||
| import time | ||||
| import json | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_invalid_content_type_on_create_watch(client, live_server, measure_memory_usage): | ||||
|     """Test that creating a watch with invalid content-type triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # Try to create a watch with JSON data but without proper content-type header | ||||
|     res = client.post( | ||||
|         url_for("createwatch"), | ||||
|         data=json.dumps({"url": "https://example.com", "title": "Test Watch"}), | ||||
|         headers={'x-api-key': api_key},  # Missing 'content-type': 'application/json' | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Should get 400 error due to OpenAPI validation failure | ||||
|     assert res.status_code == 400, f"Expected 400 but got {res.status_code}" | ||||
|     assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_missing_required_field_create_watch(client, live_server, measure_memory_usage): | ||||
|     """Test that creating a watch without required URL field triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # Try to create a watch without the required 'url' field | ||||
|     res = client.post( | ||||
|         url_for("createwatch"), | ||||
|         data=json.dumps({"title": "Test Watch Without URL"}),  # Missing required 'url' field | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Should get 400 error due to missing required field | ||||
|     assert res.status_code == 400, f"Expected 400 but got {res.status_code}" | ||||
|     assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_invalid_field_in_request_body(client, live_server, measure_memory_usage): | ||||
|     """Test that including invalid fields triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # First create a valid watch | ||||
|     res = client.post( | ||||
|         url_for("createwatch"), | ||||
|         data=json.dumps({"url": "https://example.com", "title": "Test Watch"}), | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert res.status_code == 201, "Watch creation should succeed" | ||||
|  | ||||
|     # Get the watch list to find the UUID | ||||
|     res = client.get( | ||||
|         url_for("createwatch"), | ||||
|         headers={'x-api-key': api_key} | ||||
|     ) | ||||
|     assert res.status_code == 200 | ||||
|     watch_uuid = list(res.json.keys())[0] | ||||
|  | ||||
|     # Now try to update the watch with an invalid field | ||||
|     res = client.put( | ||||
|         url_for("watch", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'}, | ||||
|         data=json.dumps({ | ||||
|             "title": "Updated title", | ||||
|             "invalid_field_that_doesnt_exist": "this should cause validation error" | ||||
|         }), | ||||
|     ) | ||||
|  | ||||
|     # Should get 400 error due to invalid field (this will be caught by internal validation) | ||||
|     # Note: This tests the flow where OpenAPI validation passes but internal validation catches it | ||||
|     assert res.status_code == 400, f"Expected 400 but got {res.status_code}" | ||||
|     assert b"Additional properties are not allowed" in res.data, "Should contain validation error about additional properties" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_import_wrong_content_type(client, live_server, measure_memory_usage): | ||||
|     """Test that import endpoint with wrong content-type triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # Try to import URLs with JSON content-type instead of text/plain | ||||
|     res = client.post( | ||||
|         url_for("import") + "?tag=test-import", | ||||
|         data='https://website1.com\nhttps://website2.com', | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'},  # Wrong content-type | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Should get 400 error due to content-type mismatch | ||||
|     assert res.status_code == 400, f"Expected 400 but got {res.status_code}" | ||||
|     assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_import_correct_content_type_succeeds(client, live_server, measure_memory_usage): | ||||
|     """Test that import endpoint with correct content-type succeeds (positive test).""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # Import URLs with correct text/plain content-type | ||||
|     res = client.post( | ||||
|         url_for("import") + "?tag=test-import", | ||||
|         data='https://website1.com\nhttps://website2.com', | ||||
|         headers={'x-api-key': api_key, 'content-type': 'text/plain'},  # Correct content-type | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Should succeed | ||||
|     assert res.status_code == 200, f"Expected 200 but got {res.status_code}" | ||||
|     assert len(res.json) == 2, "Should import 2 URLs" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_get_requests_bypass_validation(client, live_server, measure_memory_usage): | ||||
|     """Test that GET requests bypass OpenAPI validation entirely.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # Disable API token requirement first | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-fetch_backend": "html_requests", | ||||
|             "application-api_access_token_enabled": "" | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     # Make GET request to list watches - should succeed even without API key or content-type | ||||
|     res = client.get(url_for("createwatch"))  # No headers needed for GET | ||||
|     assert res.status_code == 200, f"GET requests should succeed without OpenAPI validation, got {res.status_code}" | ||||
|  | ||||
|     # Should return JSON with watch list (empty in this case) | ||||
|     assert isinstance(res.json, dict), "Should return JSON dictionary for watch list" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_create_tag_missing_required_title(client, live_server, measure_memory_usage): | ||||
|     """Test that creating a tag without required title triggers OpenAPI validation error.""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # Try to create a tag without the required 'title' field | ||||
|     res = client.post( | ||||
|         url_for("tag"), | ||||
|         data=json.dumps({"notification_urls": ["mailto:test@example.com"]}),  # Missing required 'title' field | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Should get 400 error due to missing required field | ||||
|     assert res.status_code == 400, f"Expected 400 but got {res.status_code}" | ||||
|     assert b"OpenAPI validation failed" in res.data, "Should contain OpenAPI validation error message" | ||||
|  | ||||
|  | ||||
| def test_openapi_validation_watch_update_allows_partial_updates(client, live_server, measure_memory_usage): | ||||
|     """Test that watch updates allow partial updates without requiring all fields (positive test).""" | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|     # First create a valid watch | ||||
|     res = client.post( | ||||
|         url_for("createwatch"), | ||||
|         data=json.dumps({"url": "https://example.com", "title": "Test Watch"}), | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert res.status_code == 201, "Watch creation should succeed" | ||||
|  | ||||
|     # Get the watch list to find the UUID | ||||
|     res = client.get( | ||||
|         url_for("createwatch"), | ||||
|         headers={'x-api-key': api_key} | ||||
|     ) | ||||
|     assert res.status_code == 200 | ||||
|     watch_uuid = list(res.json.keys())[0] | ||||
|  | ||||
|     # Update only the title (partial update) - should succeed | ||||
|     res = client.put( | ||||
|         url_for("watch", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key, 'content-type': 'application/json'}, | ||||
|         data=json.dumps({"title": "Updated Title Only"}),  # Only updating title, not URL | ||||
|     ) | ||||
|  | ||||
|     # Should succeed because UpdateWatch schema allows partial updates | ||||
|     assert res.status_code == 200, f"Partial updates should succeed, got {res.status_code}" | ||||
|  | ||||
|     # Verify the update worked | ||||
|     res = client.get( | ||||
|         url_for("watch", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key} | ||||
|     ) | ||||
|     assert res.status_code == 200 | ||||
|     assert res.json.get('title') == 'Updated Title Only', "Title should be updated" | ||||
|     assert res.json.get('url') == 'https://example.com', "URL should remain unchanged" | ||||
| @@ -6,7 +6,7 @@ import time | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def test_api_search(client, live_server): | ||||
| def test_api_search(client, live_server, measure_memory_usage): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|  | ||||
|   | ||||
| @@ -12,18 +12,14 @@ def test_basic_auth(client, live_server, measure_memory_usage): | ||||
|     # This page will echo back any auth info | ||||
|     test_url = url_for('test_basicauth_method', _external=True).replace("//","//myuser:mypass@") | ||||
|     time.sleep(1) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     time.sleep(1) | ||||
|     # Check form validation | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={"include_filters": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": "", "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|   | ||||
| @@ -86,12 +86,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Should get a notice that it's available | ||||
| @@ -129,12 +125,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'ldjson-price-track-offer' not in res.data | ||||
| @@ -146,12 +138,8 @@ def test_check_ldjson_price_autodetect(client, live_server, measure_memory_usage | ||||
| def _test_runner_check_bad_format_ignored(live_server, client, has_ldjson_price_data): | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     for k,v in client.application.config.get('DATASTORE').data['watching'].items(): | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, \ | ||||
|     extract_UUID_from_client | ||||
|     extract_UUID_from_client, delete_all_watches | ||||
|  | ||||
| sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
| @@ -38,9 +38,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|         # Give the thread time to pick it up | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         # It should report nothing found (no new 'has-unread-changes' class) | ||||
|         res = client.get(url_for("watchlist.index")) | ||||
|         assert b'unviewed' not in res.data | ||||
|         assert b'has-unread-changes' not in res.data | ||||
|         assert b'test-endpoint' in res.data | ||||
|  | ||||
|         # Default no password set, this stuff should be always available. | ||||
| @@ -74,9 +74,9 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     res = client.get(url_for("ui.ui_edit.watch_get_latest_html", uuid=uuid)) | ||||
|     assert b'which has this one new line' in res.data | ||||
|  | ||||
|     # Now something should be ready, indicated by having a 'unviewed' class | ||||
|     # Now something should be ready, indicated by having a 'has-unread-changes' class | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     # #75, and it should be in the RSS feed | ||||
|     rss_token = extract_rss_token_from_UI(client) | ||||
| @@ -90,7 +90,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|  | ||||
|     assert expected_url.encode('utf-8') in res.data | ||||
| # | ||||
|     # Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times | ||||
|     # Following the 'diff' link, it should no longer display as 'has-unread-changes' even after we recheck it a few times | ||||
|     res = client.get(url_for("ui.ui_views.diff_history_page", uuid=uuid)) | ||||
|     assert b'selected=""' in res.data, "Confirm diff history page loaded" | ||||
|  | ||||
| @@ -111,12 +111,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|         # Give the thread time to pick it up | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         # It should report nothing found (no new 'has-unread-changes' class) | ||||
|         res = client.get(url_for("watchlist.index")) | ||||
|  | ||||
|  | ||||
|         assert b'unviewed' not in res.data | ||||
|         assert b'class="has-unviewed' not in res.data | ||||
|         assert b'has-unread-changes' not in res.data | ||||
|         assert b'class="has-unread-changes' not in res.data | ||||
|         assert b'head title' in res.data  # Should be ON by default | ||||
|         assert b'test-endpoint' in res.data | ||||
|  | ||||
| @@ -140,8 +140,8 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'class="has-unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|     assert b'class="has-unread-changes' in res.data | ||||
|     assert b'head title' not in res.data  # should now be off | ||||
|  | ||||
|  | ||||
| @@ -151,8 +151,8 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|     # hit the mark all viewed link | ||||
|     res = client.get(url_for("ui.mark_all_viewed"), follow_redirects=True) | ||||
|  | ||||
|     assert b'class="has-unviewed' not in res.data | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'class="has-unread-changes' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again | ||||
|     client.get(url_for("ui.clear_watch_history", uuid=uuid)) | ||||
| @@ -163,5 +163,219 @@ def test_check_basic_change_detection_functionality(client, live_server, measure | ||||
|  | ||||
|     # | ||||
|     # Cleanup everything | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| # Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that | ||||
| def test_requests_timeout(client, live_server, measure_memory_usage): | ||||
|     delay = 2 | ||||
|     test_url = url_for('test_endpoint', delay=delay, _external=True) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-ui-use_page_title_in_list": "", | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               "requests-timeout": delay - 1, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # requests takes >2 sec but we timeout at 1 second | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'Read timed out. (read timeout=1)' in res.data | ||||
|  | ||||
|     ##### Now set a longer timeout | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={"application-ui-use_page_title_in_list": "", | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               "requests-timeout": delay + 1, # timeout should be a second more than the reply time | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'Read timed out' not in res.data | ||||
|  | ||||
| def test_non_text_mime_or_downloads(client, live_server, measure_memory_usage): | ||||
|     """ | ||||
|  | ||||
|     https://github.com/dgtlmoon/changedetection.io/issues/3434 | ||||
|     I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8, | ||||
|     but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog), | ||||
|     changedetection somehow ignores all line breaks and treats the document file as if everything is on one line. | ||||
|  | ||||
|     WHAT THIS DOES - makes the system rely on 'magic' to determine what is it | ||||
|  | ||||
|     :param client: | ||||
|     :param live_server: | ||||
|     :param measure_memory_usage: | ||||
|     :return: | ||||
|     """ | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""some random text that should be split by line | ||||
| and not parsed with html_to_text | ||||
| this way we know that it correctly parsed as plain text | ||||
| \r\n | ||||
| ok\r\n | ||||
| got it\r\n | ||||
| """) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', content_type="application/octet-stream", _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     ### check the front end | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"some random text that should be split by line\n" in res.data | ||||
|     #### | ||||
|  | ||||
|     # Check the snapshot by API that it has linefeeds too | ||||
|     watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|     res = client.get( | ||||
|         url_for("watchhistory", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|  | ||||
|     # Fetch a snapshot by timestamp, check the right one was found | ||||
|     res = client.get( | ||||
|         url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|     assert b"some random text that should be split by line\n" in res.data | ||||
|  | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_standard_text_plain(client, live_server, measure_memory_usage): | ||||
|     """ | ||||
|  | ||||
|     https://github.com/dgtlmoon/changedetection.io/issues/3434 | ||||
|     I noticed that a watched website can be monitored fine as long as the server sends content-type: text/plain; charset=utf-8, | ||||
|     but once the server sends content-type: application/octet-stream (which is usually done to force the browser to show the Download dialog), | ||||
|     changedetection somehow ignores all line breaks and treats the document file as if everything is on one line. | ||||
|  | ||||
|     The real bug here can be that it will try to process plain-text as HTML, losing <etc> | ||||
|  | ||||
|     :param client: | ||||
|     :param live_server: | ||||
|     :param measure_memory_usage: | ||||
|     :return: | ||||
|     """ | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""some random text that should be split by line | ||||
| and not parsed with html_to_text | ||||
| <title>Even this title should stay because we are just plain text</title> | ||||
| this way we know that it correctly parsed as plain text | ||||
| \r\n | ||||
| ok\r\n | ||||
| got it\r\n | ||||
| """) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     ### check the front end | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"some random text that should be split by line\n" in res.data | ||||
|     #### | ||||
|  | ||||
|     # Check the snapshot by API that it has linefeeds too | ||||
|     watch_uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
|     api_key = live_server.app.config['DATASTORE'].data['settings']['application'].get('api_access_token') | ||||
|     res = client.get( | ||||
|         url_for("watchhistory", uuid=watch_uuid), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|  | ||||
|     # Fetch a snapshot by timestamp, check the right one was found | ||||
|     res = client.get( | ||||
|         url_for("watchsinglehistory", uuid=watch_uuid, timestamp=list(res.json.keys())[-1]), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|     assert b"some random text that should be split by line\n" in res.data | ||||
|     assert b"<title>Even this title should stay because we are just plain text</title>" in res.data | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # Server says its plaintext, we should always treat it as plaintext | ||||
| def test_plaintext_even_if_xml_content(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""<?xml version="1.0" encoding="utf-8"?> | ||||
| <resources xmlns:tools="http://schemas.android.com/tools"> | ||||
|     <!--Activity and fragment titles--> | ||||
|     <string name="feed_update_receiver_name">Abonnementen bijwerken</string> | ||||
| </resources> | ||||
| """) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'<string name="feed_update_receiver_name"' in res.data | ||||
|  | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # Server says its plaintext, we should always treat it as plaintext, and then if they have a filter, try to apply that | ||||
| def test_plaintext_even_if_xml_content_and_can_apply_filters(client, live_server, measure_memory_usage): | ||||
|  | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""<?xml version="1.0" encoding="utf-8"?> | ||||
| <resources xmlns:tools="http://schemas.android.com/tools"> | ||||
|     <!--Activity and fragment titles--> | ||||
|     <string name="feed_update_receiver_name">Abonnementen bijwerken</string> | ||||
|     <foobar>ok man</foobar> | ||||
| </resources> | ||||
| """) | ||||
|  | ||||
|     test_url=url_for('test_endpoint', content_type="text/plain", _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"include_filters": ['//string']}) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'<string name="feed_update_receiver_name"' in res.data | ||||
|     assert b'<foobar' not in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|   | ||||
| @@ -58,6 +58,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
|  | ||||
|     has_watch_update = False | ||||
|     has_unviewed_update = False | ||||
|     got_general_stats_update = False | ||||
|  | ||||
|     for i in range(10): | ||||
|         # Get received events | ||||
| @@ -65,15 +66,11 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
|  | ||||
|         if received: | ||||
|             logger.info(f"Received {len(received)} events after {i+1} seconds") | ||||
|  | ||||
|             # Check for watch_update events with unviewed=True | ||||
|             for event in received: | ||||
|                 if event['name'] == 'watch_update': | ||||
|                     has_watch_update = True | ||||
|                     if event['args'][0]['watch'].get('unviewed', False): | ||||
|                         has_unviewed_update = True | ||||
|                         logger.info("Found unviewed update event!") | ||||
|                         break | ||||
|                 if event['name'] == 'general_stats_update': | ||||
|                     got_general_stats_update = True | ||||
|  | ||||
|         if has_unviewed_update: | ||||
|             break | ||||
| @@ -92,7 +89,7 @@ def run_socketio_watch_update_test(client, live_server, password_mode=""): | ||||
|     assert has_watch_update, "No watch_update events received" | ||||
|  | ||||
|     # Verify we received an unviewed event | ||||
|     assert has_unviewed_update, "No watch_update event with unviewed=True received" | ||||
|     assert got_general_stats_update, "Got general stats update event" | ||||
|  | ||||
|     # Alternatively, check directly if the watch in the datastore is marked as unviewed | ||||
|     from changedetectionio.flask_app import app | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
| from changedetectionio import html_tools | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
| @@ -70,12 +70,8 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -86,7 +82,8 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={"text_should_not_be_present": ignore_text, | ||||
|               "url": test_url, | ||||
|               'fetch_backend': "html_requests" | ||||
|               'fetch_backend': "html_requests", | ||||
|               "time_between_check_use_default": "y" | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -106,9 +103,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     # It should report nothing found (no new 'has-unread-changes' class) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|     # The page changed, BUT the text is still there, just the rest of it changes, we should not see a change | ||||
| @@ -119,9 +116,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     # It should report nothing found (no new 'has-unread-changes' class) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|     # 2548 | ||||
| @@ -130,7 +127,7 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|  | ||||
|     # Now we set a change where the text is gone AND its different content, it should now trigger | ||||
| @@ -138,10 +135,9 @@ def test_check_block_changedetection_text_NOT_present(client, live_server, measu | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|   | ||||
| @@ -14,12 +14,8 @@ def test_clone_functionality(client, live_server, measure_memory_usage): | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # So that we can be sure the same history doesnt carry over | ||||
|   | ||||
| @@ -3,7 +3,7 @@ import json | ||||
| import time | ||||
|  | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
| from ..model import CONDITIONS_MATCH_LOGIC_DEFAULT | ||||
|  | ||||
|  | ||||
| @@ -47,11 +47,11 @@ def set_number_out_of_range_response(number="150"): | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| # def test_setup(client, live_server): | ||||
| # def test_setup(client, live_server, measure_memory_usage): | ||||
|     """Test that both text and number conditions work together with AND logic.""" | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|  | ||||
| def test_conditions_with_text_and_number(client, live_server): | ||||
| def test_conditions_with_text_and_number(client, live_server, measure_memory_usage): | ||||
|     """Test that both text and number conditions work together with AND logic.""" | ||||
|      | ||||
|     set_original_response("50") | ||||
| @@ -60,12 +60,8 @@ def test_conditions_with_text_and_number(client, live_server): | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Configure the watch with two conditions connected with AND: | ||||
| @@ -105,6 +101,7 @@ def test_conditions_with_text_and_number(client, live_server): | ||||
|             "conditions-5-operator": "contains_regex", | ||||
|             "conditions-5-field": "page_filtered_text", | ||||
|             "conditions-5-value": "\d", | ||||
|             "time_between_check_use_default": "y", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -124,7 +121,7 @@ def test_conditions_with_text_and_number(client, live_server): | ||||
|     time.sleep(2) | ||||
|     # 75 is > 20 and < 100 and contains "5" | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|  | ||||
|     # Case 2: Change with one condition violated | ||||
| @@ -140,25 +137,20 @@ def test_conditions_with_text_and_number(client, live_server): | ||||
|  | ||||
|     # Should NOT be marked as having changes since not all conditions are met | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # The 'validate' button next to each rule row | ||||
| def test_condition_validate_rule_row(client, live_server): | ||||
| def test_condition_validate_rule_row(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     set_original_response("50") | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
| @@ -229,12 +221,8 @@ def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -288,7 +276,8 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|             "conditions_match_logic": CONDITIONS_MATCH_LOGIC_DEFAULT,  # ALL = AND logic | ||||
|             "conditions-0-field": "levenshtein_ratio", | ||||
|             "conditions-0-operator": "<", | ||||
|             "conditions-0-value": "0.8" # needs to be more of a diff to trigger a change | ||||
|             "conditions-0-value": "0.8", # needs to be more of a diff to trigger a change | ||||
|             "time_between_check_use_default": "y" | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -297,7 +286,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|  | ||||
|     # Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions | ||||
|     res = client.get( | ||||
| @@ -324,7 +313,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold | ||||
|     assert b'has-unread-changes' not in res.data #because this will be like 0.90 not 0.8 threshold | ||||
|  | ||||
|     ############### Now change it a MORE THAN 50% | ||||
|     test_return_data = """<html> | ||||
| @@ -343,7 +332,7 @@ def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|     assert b'Queued 1 watch for rechecking.' in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("ui.form_delete", uuid="all"), | ||||
|   | ||||
| @@ -81,12 +81,8 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
| @@ -95,7 +91,7 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={"include_filters": include_filters, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": include_filters, "url": test_url, "tags": "", "headers": "", 'fetch_backend': "html_requests", "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -116,10 +112,10 @@ def test_check_markup_include_filters_restriction(client, live_server, measure_m | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # It should have 'unviewed' still | ||||
|     # It should have 'has-unread-changes' still | ||||
|     # Because it should be looking at only that 'sametext' id | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|  | ||||
| # Tests the whole stack works with the CSS Filter | ||||
| @@ -138,12 +134,8 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
| @@ -154,7 +146,8 @@ def test_check_multiple_filters(client, live_server, measure_memory_usage): | ||||
|               "url": test_url, | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_requests"}, | ||||
|               'fetch_backend': "html_requests", | ||||
|               "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -192,12 +185,8 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
| @@ -208,7 +197,8 @@ def test_filter_is_empty_help_suggestion(client, live_server, measure_memory_usa | ||||
|               "url": test_url, | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_requests"}, | ||||
|               'fetch_backend': "html_requests", | ||||
|               "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|   | ||||
| @@ -5,7 +5,7 @@ import time | ||||
| from flask import url_for | ||||
|  | ||||
| from ..html_tools import * | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -171,6 +171,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage): | ||||
|             "tags": "", | ||||
|             "headers": "", | ||||
|             "fetch_backend": "html_requests", | ||||
|             "time_between_check_use_default": "y", | ||||
|         }, | ||||
|         follow_redirects=True, | ||||
|     ) | ||||
| @@ -189,7 +190,7 @@ def test_element_removal_full(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # so that we set the state to 'unviewed' after all the edits | ||||
|     # so that we set the state to 'has-unread-changes' after all the edits | ||||
|     client.get(url_for("ui.ui_views.diff_history_page", uuid="first")) | ||||
|  | ||||
|     #  Make a change to header/footer/nav | ||||
| @@ -208,47 +209,32 @@ def test_element_removal_full(client, live_server, measure_memory_usage): | ||||
|  | ||||
| # Re #2752 | ||||
| def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage): | ||||
|      | ||||
|  | ||||
|     set_response_with_multiple_index() | ||||
|     subtractive_selectors_data = [""" | ||||
| body > table > tr:nth-child(1) > th:nth-child(2) | ||||
|     subtractive_selectors_data = [ | ||||
| ### css style ### | ||||
| """body > table > tr:nth-child(1) > th:nth-child(2) | ||||
| body > table >  tr:nth-child(2) > td:nth-child(2) | ||||
| body > table > tr:nth-child(3) > td:nth-child(2) | ||||
| body > table > tr:nth-child(1) > th:nth-child(3) | ||||
| body > table >  tr:nth-child(2) > td:nth-child(3) | ||||
| body > table > tr:nth-child(3) > td:nth-child(3)""", | ||||
| ### second type, xpath ### | ||||
| """//body/table/tr[1]/th[2] | ||||
| //body/table/tr[2]/td[2] | ||||
| //body/table/tr[3]/td[2] | ||||
| //body/table/tr[1]/th[3] | ||||
| //body/table/tr[2]/td[3] | ||||
| //body/table/tr[3]/td[3]"""] | ||||
|      | ||||
|     test_url = url_for("test_endpoint", _external=True) | ||||
|  | ||||
|     for selector_list in subtractive_selectors_data: | ||||
|  | ||||
|         res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|         assert b'Deleted' in res.data | ||||
|         delete_all_watches(client) | ||||
|  | ||||
|         # Add our URL to the import page | ||||
|         test_url = url_for("test_endpoint", _external=True) | ||||
|         res = client.post( | ||||
|             url_for("imports.import_page"), data={"urls": test_url}, follow_redirects=True | ||||
|         ) | ||||
|         assert b"1 Imported" in res.data | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|         res = client.post( | ||||
|             url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|             data={ | ||||
|                 "subtractive_selectors": selector_list, | ||||
|                 "url": test_url, | ||||
|                 "tags": "", | ||||
|                 "fetch_backend": "html_requests", | ||||
|             }, | ||||
|             follow_redirects=True, | ||||
|         ) | ||||
|         assert b"Updated watch." in res.data | ||||
|         uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"subtractive_selectors": selector_list.splitlines()}) | ||||
|         client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|         res = client.get( | ||||
| @@ -256,6 +242,7 @@ body > table > tr:nth-child(3) > td:nth-child(3)""", | ||||
|             follow_redirects=True | ||||
|         ) | ||||
|  | ||||
|         # the filters above should have removed this but they never say to remove the "emil" column | ||||
|         assert b"Tobias" not in res.data | ||||
|         assert b"Linus" not in res.data | ||||
|         assert b"Person 2" not in res.data | ||||
|   | ||||
| @@ -28,11 +28,8 @@ def test_check_encoding_detection(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="text/html", _external=True) | ||||
|     client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -59,11 +56,8 @@ def test_check_encoding_detection_missing_content_type_header(client, live_serve | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| import time | ||||
|  | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -19,19 +19,15 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text): | ||||
|                        status_code=http_code, | ||||
|                        _external=True) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     # no change | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'has-unread-changes' not in res.data | ||||
|     assert bytes(expected_text.encode('utf-8')) in res.data | ||||
|  | ||||
|  | ||||
| @@ -47,8 +43,7 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text): | ||||
|     #assert b'Error Screenshot' in res.data | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|  | ||||
| def test_http_error_handler(client, live_server, measure_memory_usage): | ||||
| @@ -56,8 +51,7 @@ def test_http_error_handler(client, live_server, measure_memory_usage): | ||||
|     _runner_test_http_errors(client, live_server, 404, 'Page not found') | ||||
|     _runner_test_http_errors(client, live_server, 500, '(Internal server error) received') | ||||
|     _runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400') | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # Just to be sure error text is properly handled | ||||
| def test_DNS_errors(client, live_server, measure_memory_usage): | ||||
| @@ -87,8 +81,7 @@ def test_DNS_errors(client, live_server, measure_memory_usage): | ||||
|     assert found_name_resolution_error | ||||
|     # Should always record that we tried | ||||
|     assert bytes("just now".encode('utf-8')) in res.data | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| # Re 1513 | ||||
| def test_low_level_errors_clear_correctly(client, live_server, measure_memory_usage): | ||||
| @@ -127,7 +120,8 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             "fetch_backend": "html_requests"}, | ||||
|             "fetch_backend": "html_requests", | ||||
|             "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
| @@ -144,5 +138,4 @@ def test_low_level_errors_clear_correctly(client, live_server, measure_memory_us | ||||
|     ) | ||||
|     assert not found_name_resolution_error | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import live_server_setup, wait_for_all_checks, delete_all_watches | ||||
|  | ||||
| from ..html_tools import * | ||||
|  | ||||
| @@ -76,12 +76,8 @@ def test_check_filter_multiline(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
| @@ -95,7 +91,8 @@ def test_check_filter_multiline(client, live_server, measure_memory_usage): | ||||
|               "url": test_url, | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_requests" | ||||
|               'fetch_backend': "html_requests", | ||||
|               "time_between_check_use_default": "y" | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -130,12 +127,8 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -149,7 +142,8 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag | ||||
|               "url": test_url, | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_requests" | ||||
|               'fetch_backend': "html_requests", | ||||
|               "time_between_check_use_default": "y" | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -172,10 +166,10 @@ def test_check_filter_and_regex_extract(client, live_server, measure_memory_usag | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # It should have 'unviewed' still | ||||
|     # It should have 'has-unread-changes' still | ||||
|     # Because it should be looking at only that 'sametext' id | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     assert b'has-unread-changes' in res.data | ||||
|  | ||||
|     # Check HTML conversion detected and workd | ||||
|     res = client.get( | ||||
| @@ -210,23 +204,19 @@ def test_regex_error_handling(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     ### test regex error handling | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|         data={"extract_text": '/something bad\d{3/XYZ', | ||||
|               "url": test_url, | ||||
|               "fetch_backend": "html_requests"}, | ||||
|               "fetch_backend": "html_requests", | ||||
|               "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'is not a valid regular expression.' in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|   | ||||
| @@ -86,15 +86,18 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se | ||||
|                                                    "Diff Full: {{diff_full}}\n" | ||||
|                                                    "Diff as Patch: {{diff_patch}}\n" | ||||
|                                                    ":-)", | ||||
|                               "notification_format": "Text"} | ||||
|                               "notification_format": 'text'} | ||||
|  | ||||
|     notification_form_data.update({ | ||||
|         "url": test_url, | ||||
|         "tags": "my tag", | ||||
|         "title": "my title", | ||||
|         "headers": "", | ||||
|         "include_filters": '.ticket-available', | ||||
|         "fetch_backend": "html_requests"}) | ||||
|         # preprended with extra filter that intentionally doesn't match any entry, | ||||
|         # notification should still be sent even if first filter does not match (PR#3516) | ||||
|         "include_filters": ".non-matching-selector\n.ticket-available", | ||||
|         "fetch_backend": "html_requests", | ||||
|         "time_between_check_use_default": "y"}) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|   | ||||
| @@ -1,10 +1,8 @@ | ||||
| import os | ||||
| import time | ||||
| from loguru import logger | ||||
| from flask import url_for | ||||
| from .util import set_original_response, live_server_setup, extract_UUID_from_client, wait_for_all_checks, \ | ||||
|     wait_for_notification_endpoint_output | ||||
| from changedetectionio.model import App | ||||
| from .util import set_original_response,  wait_for_all_checks, wait_for_notification_endpoint_output | ||||
| from ..notification import valid_notification_formats | ||||
|  | ||||
|  | ||||
| def set_response_with_filter(): | ||||
| @@ -23,13 +21,14 @@ def set_response_with_filter(): | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| def run_filter_test(client, live_server, content_filter): | ||||
| def run_filter_test(client, live_server, content_filter, app_notification_format): | ||||
|  | ||||
|     # Response WITHOUT the filter ID element | ||||
|     set_original_response() | ||||
|     live_server.app.config['DATASTORE'].data['settings']['application']['notification_format'] = app_notification_format | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'json') | ||||
|     notification_url = url_for('test_notification_endpoint', _external=True).replace('http', 'post') | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
| @@ -42,13 +41,8 @@ def run_filter_test(client, live_server, content_filter): | ||||
|     if os.path.isfile("test-datastore/notification.txt"): | ||||
|         os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
| @@ -69,9 +63,10 @@ def run_filter_test(client, live_server, content_filter): | ||||
|                                        "Diff Full: {{diff_full}}\n" | ||||
|                                        "Diff as Patch: {{diff_patch}}\n" | ||||
|                                        ":-)", | ||||
|                   "notification_format": "Text", | ||||
|                   "notification_format": 'text', | ||||
|                   "fetch_backend": "html_requests", | ||||
|                   "filter_failure_notification_send": 'y', | ||||
|                   "time_between_check_use_default": "y", | ||||
|                   "headers": "", | ||||
|                   "tags": "my tag", | ||||
|                   "title": "my title 123", | ||||
| @@ -131,8 +126,23 @@ def run_filter_test(client, live_server, content_filter): | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         notification = f.read() | ||||
|  | ||||
|     assert 'CSS/xPath filter was not present in the page' in notification | ||||
|     assert content_filter.replace('"', '\\"') in notification | ||||
|     assert 'Your configured CSS/xPath filters' in notification | ||||
|  | ||||
|  | ||||
|     # Text (or HTML conversion) markup to make the notifications a little nicer should have worked | ||||
|     if app_notification_format.startswith('html'): | ||||
|         # apprise should have used sax-escape (' instead of ", " etc), lets check it worked | ||||
|  | ||||
|         from apprise.conversion import convert_between | ||||
|         from apprise.common import NotifyFormat | ||||
|         escaped_filter = convert_between(NotifyFormat.TEXT, NotifyFormat.HTML, content_filter) | ||||
|  | ||||
|         assert escaped_filter in notification or escaped_filter.replace('"', '"') in notification | ||||
|         assert 'a href="' in notification # Quotes should still be there so the link works | ||||
|  | ||||
|     else: | ||||
|         assert 'a href' not in notification | ||||
|         assert content_filter in notification | ||||
|  | ||||
|     # Remove it and prove that it doesn't trigger when not expected | ||||
|     # It should register a change, but no 'filter not found' | ||||
| @@ -163,14 +173,20 @@ def run_filter_test(client, live_server, content_filter): | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| def test_check_include_filters_failure_notification(client, live_server, measure_memory_usage): | ||||
| #   #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     run_filter_test(client, live_server,'#nope-doesnt-exist') | ||||
|     #   #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('htmlcolor')) | ||||
|     # Check markup send conversion didnt affect plaintext preference | ||||
|     run_filter_test(client=client, live_server=live_server, content_filter='#nope-doesnt-exist', app_notification_format=valid_notification_formats.get('text')) | ||||
|  | ||||
| def test_check_xpath_filter_failure_notification(client, live_server, measure_memory_usage): | ||||
| #   #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     run_filter_test(client, live_server, '//*[@id="nope-doesnt-exist"]') | ||||
|     #   #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     run_filter_test(client=client, live_server=live_server, content_filter='//*[@id="nope-doesnt-exist"]', app_notification_format=valid_notification_formats.get('htmlcolor')) | ||||
|  | ||||
| # Test that notification is never sent | ||||
|  | ||||
| def test_basic_markup_from_text(client, live_server, measure_memory_usage): | ||||
|     # Test the notification error templates convert to HTML if needed (link activate) | ||||
|     from ..notification.handler import markup_text_links_to_html | ||||
|     x = markup_text_links_to_html("hello https://google.com") | ||||
|     assert 'a href' in x | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client | ||||
| from .util import live_server_setup, wait_for_all_checks, extract_rss_token_from_UI, get_UUID_for_tag_name, extract_UUID_from_client, delete_all_watches | ||||
| import os | ||||
|  | ||||
|  | ||||
| @@ -127,8 +127,7 @@ def test_setup_group_tag(client, live_server, measure_memory_usage): | ||||
|     assert b"should-be-excluded" not in res.data | ||||
|     assert res.status_code == 200 | ||||
|     assert b"first-imported=1" in res.data | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_tag_import_singular(client, live_server, measure_memory_usage): | ||||
|      | ||||
| @@ -147,8 +146,7 @@ def test_tag_import_singular(client, live_server, measure_memory_usage): | ||||
|     ) | ||||
|     # Should be only 1 tag because they both had the same | ||||
|     assert res.data.count(b'test-tag') == 1 | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_tag_add_in_ui(client, live_server, measure_memory_usage): | ||||
|      | ||||
| @@ -164,8 +162,7 @@ def test_tag_add_in_ui(client, live_server, measure_memory_usage): | ||||
|     res = client.get(url_for("tags.delete_all"), follow_redirects=True) | ||||
|     assert b'All tags deleted' in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_group_tag_notification(client, live_server, measure_memory_usage): | ||||
|      | ||||
| @@ -198,7 +195,7 @@ def test_group_tag_notification(client, live_server, measure_memory_usage): | ||||
|                                                    "Diff as Patch: {{diff_patch}}\n" | ||||
|                                                    ":-)", | ||||
|                               "notification_screenshot": True, | ||||
|                               "notification_format": "Text", | ||||
|                               "notification_format": 'text', | ||||
|                               "title": "test-tag"} | ||||
|  | ||||
|     res = client.post( | ||||
| @@ -232,8 +229,7 @@ def test_group_tag_notification(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     #@todo Test that multiple notifications fired | ||||
|     #@todo Test that each of multiple notifications with different settings | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_limit_tag_ui(client, live_server, measure_memory_usage): | ||||
|  | ||||
| @@ -264,15 +260,12 @@ def test_limit_tag_ui(client, live_server, measure_memory_usage): | ||||
|     client.get(url_for('ui.mark_all_viewed', tag=tag_uuid), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     with open('/tmp/fuck.html', 'wb') as f: | ||||
|         f.write(res.data) | ||||
|     # Should be only 1 unviewed | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert res.data.count(b' unviewed ') == 1 | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|     res = client.get(url_for("tags.delete_all"), follow_redirects=True) | ||||
|     assert b'All tags deleted' in res.data | ||||
|  | ||||
| @@ -299,8 +292,7 @@ def test_clone_tag_on_import(client, live_server, measure_memory_usage): | ||||
|     # 2 times plus the top link to tag | ||||
|     assert res.data.count(b'test-tag') == 3 | ||||
|     assert res.data.count(b'another-tag') == 3 | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
| def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usage): | ||||
|      | ||||
| @@ -327,8 +319,7 @@ def test_clone_tag_on_quickwatchform_add(client, live_server, measure_memory_usa | ||||
|     # 2 times plus the top link to tag | ||||
|     assert res.data.count(b'test-tag') == 3 | ||||
|     assert res.data.count(b'another-tag') == 3 | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|  | ||||
|     res = client.get(url_for("tags.delete_all"), follow_redirects=True) | ||||
|     assert b'All tags deleted' in res.data | ||||
| @@ -391,12 +382,8 @@ def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measu | ||||
|         f.write(d) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     filters = [ | ||||
| @@ -424,7 +411,8 @@ def test_order_of_filters_tag_filter_and_watch_filter(client, live_server, measu | ||||
|             "url": test_url, | ||||
|             "tags": "test-tag-keep-order", | ||||
|             "headers": "", | ||||
|             'fetch_backend': "html_requests"}, | ||||
|             'fetch_backend': "html_requests", | ||||
|             "time_between_check_use_default": "y"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -481,5 +469,4 @@ the {test} appeared before. {test in res.data[:n]=} | ||||
|         """ | ||||
|         n += t_index + len(test) | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|   | ||||
| @@ -3,9 +3,8 @@ | ||||
| import time | ||||
| import os | ||||
| import json | ||||
| import logging | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
| from .util import wait_for_all_checks, delete_all_watches | ||||
| from urllib.parse import urlparse, parse_qs | ||||
|  | ||||
| def test_consistent_history(client, live_server, measure_memory_usage): | ||||
| @@ -81,19 +80,15 @@ def test_consistent_history(client, live_server, measure_memory_usage): | ||||
|         assert '"default"' not in f.read(), "'default' probably shouldnt be here, it came from when the 'default' Watch vars were accidently being saved" | ||||
|  | ||||
|  | ||||
| def test_check_text_history_view(client, live_server): | ||||
| def test_check_text_history_view(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("<html>test-one</html>") | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -122,5 +117,4 @@ def test_check_text_history_view(client, live_server): | ||||
|     assert b'test-two' in res.data | ||||
|     assert b'test-one' not in res.data | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|     delete_all_watches(client) | ||||
|   | ||||
| @@ -27,12 +27,8 @@ def test_ignore(client, live_server, measure_memory_usage): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     set_original_ignore_response() | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
| @@ -58,3 +54,35 @@ def test_ignore(client, live_server, measure_memory_usage): | ||||
|     # Should be in base.html | ||||
|     assert b'csrftoken' in res.data | ||||
|  | ||||
|  | ||||
| def test_strip_ignore_lines(client, live_server, measure_memory_usage): | ||||
|    #  live_server_setup(live_server) # Setup on conftest per function | ||||
|     set_original_ignore_response() | ||||
|  | ||||
|  | ||||
|     # Goto the settings page, add our ignore text | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-strip_ignored_lines": "y", | ||||
|             "application-global_ignore_text": "Which is across multiple", | ||||
|             'application-fetch_backend': "html_requests" | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     uuid = client.application.config.get('DATASTORE').add_watch(url=test_url) | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|     uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
|  | ||||
|     # It should not be in the preview anymore | ||||
|     res = client.get(url_for("ui.ui_views.preview_page", uuid=uuid)) | ||||
|     assert b'<div class="ignored">' not in res.data | ||||
|     assert b'Which is across multiple' not in res.data | ||||
|   | ||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user