mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 08:34:57 +00:00 
			
		
		
		
	Compare commits
	
		
			252 Commits
		
	
	
		
			update-sto
			...
			dependabot
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					00e9832fd0 | ||
| 
						 | 
					2e4f40b172 | ||
| 
						 | 
					80b614afa1 | ||
| 
						 | 
					d18029ffe4 | ||
| 
						 | 
					9a44509134 | ||
| 
						 | 
					33ab4c8891 | ||
| 
						 | 
					e1028f822d | ||
| 
						 | 
					ae1cd61e61 | ||
| 
						 | 
					a5fe1a771f | ||
| 
						 | 
					b0980f45b8 | ||
| 
						 | 
					afadaf5467 | ||
| 
						 | 
					db11f601a1 | ||
| 
						 | 
					ef04840dd2 | ||
| 
						 | 
					1628586553 | ||
| 
						 | 
					a23c07ba94 | ||
| 
						 | 
					431fd168a1 | ||
| 
						 | 
					7dbd0b75b2 | ||
| 
						 | 
					ae532c82e8 | ||
| 
						 | 
					ab0b85d088 | ||
| 
						 | 
					66aec365c2 | ||
| 
						 | 
					e09cea60ef | ||
| 
						 | 
					f304ae19db | ||
| 
						 | 
					2116b2cb93 | ||
| 
						 | 
					8f580ac96b | ||
| 
						 | 
					a8cadc3d16 | ||
| 
						 | 
					c9290d73e0 | ||
| 
						 | 
					2db5e906e9 | ||
| 
						 | 
					0751bd371a | ||
| 
						 | 
					3ffa0805e9 | ||
| 
						 | 
					3335270692 | ||
| 
						 | 
					a7573b10ec | ||
| 
						 | 
					df945ad743 | ||
| 
						 | 
					4536e95205 | ||
| 
						 | 
					1479d7bd46 | ||
| 
						 | 
					9ba2094f75 | ||
| 
						 | 
					8aa012ba8e | ||
| 
						 | 
					8bc6b10db1 | ||
| 
						 | 
					76d799c95b | ||
| 
						 | 
					7c8bdfcc9f | ||
| 
						 | 
					01a938d7ce | ||
| 
						 | 
					e44853c439 | ||
| 
						 | 
					3830bec891 | ||
| 
						 | 
					88ab663330 | ||
| 
						 | 
					68335b95c3 | ||
| 
						 | 
					7bbfa0ef32 | ||
| 
						 | 
					e233d52931 | ||
| 
						 | 
					181d32e82a | ||
| 
						 | 
					a51614f83d | ||
| 
						 | 
					07f98d6bd3 | ||
| 
						 | 
					f71550da4d | ||
| 
						 | 
					8c3d0d7e31 | ||
| 
						 | 
					46658a85d6 | ||
| 
						 | 
					d699652955 | ||
| 
						 | 
					9e88db5d9b | ||
| 
						 | 
					5d9c102aff | ||
| 
						 | 
					cb1c36d97d | ||
| 
						 | 
					cc29ba5ea9 | ||
| 
						 | 
					6f371b1bc6 | ||
| 
						 | 
					785dabd071 | ||
| 
						 | 
					09914d54a0 | ||
| 
						 | 
					58b5586674 | ||
| 
						 | 
					cb02ccc8b4 | ||
| 
						 | 
					ec692ed727 | ||
| 
						 | 
					2fb2ea573e | ||
| 
						 | 
					ada2dc6112 | ||
| 
						 | 
					ad9024a4f0 | ||
| 
						 | 
					047c10e23c | ||
| 
						 | 
					4f83164544 | ||
| 
						 | 
					6f926ed595 | ||
| 
						 | 
					249dc55212 | ||
| 
						 | 
					46252bc6f3 | ||
| 
						 | 
					64350a2e78 | ||
| 
						 | 
					2902c63a3b | ||
| 
						 | 
					55b8588f1f | ||
| 
						 | 
					02ecc4ae9a | ||
| 
						 | 
					3ee50b7832 | ||
| 
						 | 
					66ddd87ee4 | ||
| 
						 | 
					233189e4f7 | ||
| 
						 | 
					b237fd7201 | ||
| 
						 | 
					3c81efe2f4 | ||
| 
						 | 
					0fcfb94690 | ||
| 
						 | 
					bb6d4c2756 | ||
| 
						 | 
					b59ce190ac | ||
| 
						 | 
					80be1a30f2 | ||
| 
						 | 
					93b4f79006 | ||
| 
						 | 
					3009e46617 | ||
| 
						 | 
					8f040a1a84 | ||
| 
						 | 
					4dbab8d77a | ||
| 
						 | 
					cde42c8a49 | ||
| 
						 | 
					3b9d19df43 | ||
| 
						 | 
					6ad4acc9fc | ||
| 
						 | 
					3e59521f48 | ||
| 
						 | 
					0970c087c8 | ||
| 
						 | 
					676c550e6e | ||
| 
						 | 
					78fa47f6f8 | ||
| 
						 | 
					4aa5bb6da3 | ||
| 
						 | 
					f7dfc9bbb8 | ||
| 
						 | 
					584b6e378d | ||
| 
						 | 
					754febfd33 | ||
| 
						 | 
					0c9c475f32 | ||
| 
						 | 
					e4baca1127 | ||
| 
						 | 
					bb61a35a54 | ||
| 
						 | 
					4b9ae5a97c | ||
| 
						 | 
					c8caa0662d | ||
| 
						 | 
					f4e8d1963f | ||
| 
						 | 
					45d5e961dc | ||
| 
						 | 
					45f2863966 | ||
| 
						 | 
					01c1ac4c0c | ||
| 
						 | 
					b2f9aec383 | ||
| 
						 | 
					a95aa67aef | ||
| 
						 | 
					cbeefeccbb | ||
| 
						 | 
					2b72d38235 | ||
| 
						 | 
					8fe7aec3c6 | ||
| 
						 | 
					6e1f5a8503 | ||
| 
						 | 
					b74b76c9f9 | ||
| 
						 | 
					a27265450c | ||
| 
						 | 
					cc5455c3dc | ||
| 
						 | 
					9db7fb83eb | ||
| 
						 | 
					f0061110c9 | ||
| 
						 | 
					a13fedc0d6 | ||
| 
						 | 
					7576bec66a | ||
| 
						 | 
					7672190923 | ||
| 
						 | 
					0ade4307b0 | ||
| 
						 | 
					8c03b65dc6 | ||
| 
						 | 
					8a07459e43 | ||
| 
						 | 
					cd8e115118 | ||
| 
						 | 
					4ff7b20fcf | ||
| 
						 | 
					8120f00148 | ||
| 
						 | 
					127abf49f1 | ||
| 
						 | 
					db81c3c5e2 | ||
| 
						 | 
					9952af7a52 | ||
| 
						 | 
					790577c1b6 | ||
| 
						 | 
					bab362fb7d | ||
| 
						 | 
					a177d02406 | ||
| 
						 | 
					8b8f280565 | ||
| 
						 | 
					e752875504 | ||
| 
						 | 
					0a4562fc09 | ||
| 
						 | 
					c84ac2eab1 | ||
| 
						 | 
					3ae07ac633 | ||
| 
						 | 
					8379fdb1f8 | ||
| 
						 | 
					3f77e075b9 | ||
| 
						 | 
					685bd01156 | ||
| 
						 | 
					20bcca578a | ||
| 
						 | 
					f05f143b46 | ||
| 
						 | 
					d7f00679a0 | ||
| 
						 | 
					b7da6f0ca7 | ||
| 
						 | 
					e4a81ebe08 | ||
| 
						 | 
					a4edc46af0 | ||
| 
						 | 
					767db3b79b | ||
| 
						 | 
					4f6e9dcc56 | ||
| 
						 | 
					aa4e182549 | ||
| 
						 | 
					fe1f7c30e1 | ||
| 
						 | 
					e5ed1ae349 | ||
| 
						 | 
					d1b1dd70f4 | ||
| 
						 | 
					93b14c9fc8 | ||
| 
						 | 
					c9c5de20d8 | ||
| 
						 | 
					011fa3540e | ||
| 
						 | 
					c3c3671f8b | ||
| 
						 | 
					5980bd9bcd | ||
| 
						 | 
					438871429c | ||
| 
						 | 
					173ce5bfa2 | ||
| 
						 | 
					106b1f85fa | ||
| 
						 | 
					a5c7f343d0 | ||
| 
						 | 
					401886bcda | ||
| 
						 | 
					c66fca9de9 | ||
| 
						 | 
					daee4c5c17 | ||
| 
						 | 
					af5d0b6963 | ||
| 
						 | 
					f92dd81c8f | ||
| 
						 | 
					55cdcfe3ea | ||
| 
						 | 
					2f7520a6c5 | ||
| 
						 | 
					4fdc5d7da2 | ||
| 
						 | 
					308f30b2e8 | ||
| 
						 | 
					4fa2042d12 | ||
| 
						 | 
					2a4e1bad4e | ||
| 
						 | 
					8a317eead5 | ||
| 
						 | 
					b58094877f | ||
| 
						 | 
					afe252126c | ||
| 
						 | 
					342e6119f1 | ||
| 
						 | 
					e4ff87e970 | ||
| 
						 | 
					e45a544f15 | ||
| 
						 | 
					9a5abaa17a | ||
| 
						 | 
					b8ecfff861 | ||
| 
						 | 
					58e2a41c95 | ||
| 
						 | 
					a7214db9c3 | ||
| 
						 | 
					b9da4af64f | ||
| 
						 | 
					b77105be7b | ||
| 
						 | 
					3d5a544ea6 | ||
| 
						 | 
					4f362385e1 | ||
| 
						 | 
					a01d6169d2 | ||
| 
						 | 
					9beda3911d | ||
| 
						 | 
					5ed596bfa9 | ||
| 
						 | 
					99ca8787ab | ||
| 
						 | 
					8f1a6feb90 | ||
| 
						 | 
					c0e229201b | ||
| 
						 | 
					66bc7fbc04 | ||
| 
						 | 
					530bd40ca5 | ||
| 
						 | 
					36004cf74b | ||
| 
						 | 
					c7374245e1 | ||
| 
						 | 
					59df59e9cd | ||
| 
						 | 
					c0c2898b91 | ||
| 
						 | 
					abac660bac | ||
| 
						 | 
					26de64d873 | ||
| 
						 | 
					79d9a8ca28 | ||
| 
						 | 
					5c391fbcad | ||
| 
						 | 
					d7e24f64a5 | ||
| 
						 | 
					d6427d823f | ||
| 
						 | 
					47eb874f47 | ||
| 
						 | 
					37019355fd | ||
| 
						 | 
					a8e7f8236e | ||
| 
						 | 
					2414b61fcb | ||
| 
						 | 
					a63ffa89b1 | ||
| 
						 | 
					59e93c29d0 | ||
| 
						 | 
					d7173bb96e | ||
| 
						 | 
					d544e11a20 | ||
| 
						 | 
					7f0c19c61c | ||
| 
						 | 
					30e84f1030 | ||
| 
						 | 
					d5af91d8f7 | ||
| 
						 | 
					4b18c633ba | ||
| 
						 | 
					08728d7d03 | ||
| 
						 | 
					73f3beda00 | ||
| 
						 | 
					7b8d335c43 | ||
| 
						 | 
					ba0b6071e6 | ||
| 
						 | 
					a6603d5ad6 | ||
| 
						 | 
					26833781a7 | ||
| 
						 | 
					f3ed9bdbb5 | ||
| 
						 | 
					0f65178190 | ||
| 
						 | 
					a58fc82575 | ||
| 
						 | 
					2575c03ae0 | ||
| 
						 | 
					9b7372fff0 | ||
| 
						 | 
					fcd6ebe0ee | ||
| 
						 | 
					c162ec9d52 | ||
| 
						 | 
					bb7f7f473b | ||
| 
						 | 
					a9ca511004 | ||
| 
						 | 
					8df61f5eaa | ||
| 
						 | 
					162f573967 | ||
| 
						 | 
					eada0ef08d | ||
| 
						 | 
					f57bc10973 | ||
| 
						 | 
					d2e8f822d6 | ||
| 
						 | 
					5fd8200fd9 | ||
| 
						 | 
					d0da8c9825 | ||
| 
						 | 
					fd7574d21b | ||
| 
						 | 
					c70706a27b | ||
| 
						 | 
					968c364999 | ||
| 
						 | 
					031cb76b7d | ||
| 
						 | 
					af568d064c | ||
| 
						 | 
					a75f57de43 | ||
| 
						 | 
					72a1c3dda1 | ||
| 
						 | 
					ffde79ecac | ||
| 
						 | 
					66ad43b2df | ||
| 
						 | 
					6b0e56ca80 | ||
| 
						 | 
					5a2d84d8b4 | ||
| 
						 | 
					a941156f26 | 
@@ -29,3 +29,34 @@ venv/
 | 
			
		||||
 | 
			
		||||
# Visual Studio
 | 
			
		||||
.vscode/
 | 
			
		||||
 | 
			
		||||
# Test and development files
 | 
			
		||||
test-datastore/
 | 
			
		||||
tests/
 | 
			
		||||
*.md
 | 
			
		||||
!README.md
 | 
			
		||||
 | 
			
		||||
# Temporary and log files
 | 
			
		||||
*.log
 | 
			
		||||
*.tmp
 | 
			
		||||
tmp/
 | 
			
		||||
temp/
 | 
			
		||||
 | 
			
		||||
# Training data and large files
 | 
			
		||||
train-data/
 | 
			
		||||
works-data/
 | 
			
		||||
 | 
			
		||||
# Container files
 | 
			
		||||
Dockerfile*
 | 
			
		||||
docker-compose*.yml
 | 
			
		||||
.dockerignore
 | 
			
		||||
 | 
			
		||||
# Development certificates and keys
 | 
			
		||||
*.pem
 | 
			
		||||
*.key
 | 
			
		||||
*.crt
 | 
			
		||||
profile_output.prof
 | 
			
		||||
 | 
			
		||||
# Large binary files that shouldn't be in container
 | 
			
		||||
*.pdf
 | 
			
		||||
chrome.json
 | 
			
		||||
							
								
								
									
										51
									
								
								.github/actions/extract-memory-report/action.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										51
									
								
								.github/actions/extract-memory-report/action.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							@@ -0,0 +1,51 @@
 | 
			
		||||
name: 'Extract Memory Test Report'
 | 
			
		||||
description: 'Extracts and displays memory test report from a container'
 | 
			
		||||
inputs:
 | 
			
		||||
  container-name:
 | 
			
		||||
    description: 'Name of the container to extract logs from'
 | 
			
		||||
    required: true
 | 
			
		||||
  python-version:
 | 
			
		||||
    description: 'Python version for artifact naming'
 | 
			
		||||
    required: true
 | 
			
		||||
  output-dir:
 | 
			
		||||
    description: 'Directory to store output logs'
 | 
			
		||||
    required: false
 | 
			
		||||
    default: 'output-logs'
 | 
			
		||||
 | 
			
		||||
runs:
 | 
			
		||||
  using: "composite"
 | 
			
		||||
  steps:
 | 
			
		||||
    - name: Create output directory
 | 
			
		||||
      shell: bash
 | 
			
		||||
      run: |
 | 
			
		||||
        mkdir -p ${{ inputs.output-dir }}
 | 
			
		||||
 | 
			
		||||
    - name: Dump container log
 | 
			
		||||
      shell: bash
 | 
			
		||||
      run: |
 | 
			
		||||
        echo "Disabled for now"
 | 
			
		||||
#        return
 | 
			
		||||
#        docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout"
 | 
			
		||||
#        docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr"
 | 
			
		||||
 | 
			
		||||
    - name: Extract and display memory test report
 | 
			
		||||
      shell: bash
 | 
			
		||||
      run: |
 | 
			
		||||
        echo "Disabled for now"
 | 
			
		||||
#        echo "Extracting test-memory.log from container..."
 | 
			
		||||
#        docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container"
 | 
			
		||||
#
 | 
			
		||||
#        echo "=== Top 10 Highest Peak Memory Tests ==="
 | 
			
		||||
#        if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then
 | 
			
		||||
#          grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \
 | 
			
		||||
#            sed 's/.*Peak memory: //' | \
 | 
			
		||||
#            paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \
 | 
			
		||||
#            sort -t'|' -k1 -nr | \
 | 
			
		||||
#            cut -d'|' -f2 | \
 | 
			
		||||
#            head -10
 | 
			
		||||
#          echo ""
 | 
			
		||||
#          echo "=== Full Memory Test Report ==="
 | 
			
		||||
#          cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log
 | 
			
		||||
#        else
 | 
			
		||||
#          echo "No memory log available"
 | 
			
		||||
#        fi
 | 
			
		||||
							
								
								
									
										8
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
								
							@@ -4,11 +4,11 @@ updates:
 | 
			
		||||
    directory: /
 | 
			
		||||
    schedule:
 | 
			
		||||
      interval: "weekly"
 | 
			
		||||
    "caronc/apprise":
 | 
			
		||||
      versioning-strategy: "increase"
 | 
			
		||||
      schedule:
 | 
			
		||||
        interval: "daily"
 | 
			
		||||
    groups:
 | 
			
		||||
      all:
 | 
			
		||||
        patterns:
 | 
			
		||||
        - "*"
 | 
			
		||||
  - package-ecosystem: pip
 | 
			
		||||
    directory: /
 | 
			
		||||
    schedule:
 | 
			
		||||
      interval: "weekly"
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										6
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							@@ -2,7 +2,7 @@
 | 
			
		||||
# Test that we can still build on Alpine (musl modified libc https://musl.libc.org/)
 | 
			
		||||
# Some packages wont install via pypi because they dont have a wheel available under this architecture.
 | 
			
		||||
 | 
			
		||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.21
 | 
			
		||||
FROM ghcr.io/linuxserver/baseimage-alpine:3.22
 | 
			
		||||
ENV PYTHONUNBUFFERED=1
 | 
			
		||||
 | 
			
		||||
COPY requirements.txt /requirements.txt
 | 
			
		||||
@@ -18,17 +18,19 @@ RUN \
 | 
			
		||||
    libxslt-dev \
 | 
			
		||||
    openssl-dev \
 | 
			
		||||
    python3-dev \
 | 
			
		||||
    file \
 | 
			
		||||
    zip \
 | 
			
		||||
    zlib-dev && \
 | 
			
		||||
  apk add --update --no-cache \
 | 
			
		||||
    libjpeg \
 | 
			
		||||
    libxslt \
 | 
			
		||||
    file \
 | 
			
		||||
    nodejs \
 | 
			
		||||
    poppler-utils \
 | 
			
		||||
    python3 && \
 | 
			
		||||
  echo "**** pip3 install test of changedetection.io ****" && \
 | 
			
		||||
  python3 -m venv /lsiopy  && \
 | 
			
		||||
  pip install -U pip wheel setuptools && \
 | 
			
		||||
  pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.21/ -r /requirements.txt && \
 | 
			
		||||
  pip install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.22/ -r /requirements.txt && \
 | 
			
		||||
  apk del --purge \
 | 
			
		||||
    build-dependencies
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										8
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										8
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							@@ -30,11 +30,11 @@ jobs:
 | 
			
		||||
 | 
			
		||||
    steps:
 | 
			
		||||
    - name: Checkout repository
 | 
			
		||||
      uses: actions/checkout@v4
 | 
			
		||||
      uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
    # Initializes the CodeQL tools for scanning.
 | 
			
		||||
    - name: Initialize CodeQL
 | 
			
		||||
      uses: github/codeql-action/init@v3
 | 
			
		||||
      uses: github/codeql-action/init@v4
 | 
			
		||||
      with:
 | 
			
		||||
        languages: ${{ matrix.language }}
 | 
			
		||||
        # If you wish to specify custom queries, you can do so here or in a config file.
 | 
			
		||||
@@ -45,7 +45,7 @@ jobs:
 | 
			
		||||
    # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java).
 | 
			
		||||
    # If this step fails, then you should remove it and run the build manually (see below)
 | 
			
		||||
    - name: Autobuild
 | 
			
		||||
      uses: github/codeql-action/autobuild@v3
 | 
			
		||||
      uses: github/codeql-action/autobuild@v4
 | 
			
		||||
 | 
			
		||||
    # ℹ️ Command-line programs to run using the OS shell.
 | 
			
		||||
    # 📚 https://git.io/JvXDl
 | 
			
		||||
@@ -59,4 +59,4 @@ jobs:
 | 
			
		||||
    #   make release
 | 
			
		||||
 | 
			
		||||
    - name: Perform CodeQL Analysis
 | 
			
		||||
      uses: github/codeql-action/analyze@v3
 | 
			
		||||
      uses: github/codeql-action/analyze@v4
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										30
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										30
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							@@ -39,12 +39,20 @@ jobs:
 | 
			
		||||
    # Or if we are in a tagged release scenario.
 | 
			
		||||
    if: ${{ github.event.workflow_run.conclusion == 'success' }} || ${{ github.event.release.tag_name }} != ''
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v4
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
      - name: Set up Python 3.11
 | 
			
		||||
        uses: actions/setup-python@v5
 | 
			
		||||
        uses: actions/setup-python@v6
 | 
			
		||||
        with:
 | 
			
		||||
          python-version: 3.11
 | 
			
		||||
 | 
			
		||||
      - name: Cache pip packages
 | 
			
		||||
        uses: actions/cache@v4
 | 
			
		||||
        with:
 | 
			
		||||
          path: ~/.cache/pip
 | 
			
		||||
          key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
 | 
			
		||||
          restore-keys: |
 | 
			
		||||
            ${{ runner.os }}-pip-
 | 
			
		||||
 | 
			
		||||
      - name: Install dependencies
 | 
			
		||||
        run: |
 | 
			
		||||
          python -m pip install --upgrade pip
 | 
			
		||||
@@ -95,7 +103,7 @@ jobs:
 | 
			
		||||
          push: true
 | 
			
		||||
          tags: |
 | 
			
		||||
            ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
 | 
			
		||||
          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
 | 
			
		||||
          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
 | 
			
		||||
          cache-from: type=gha
 | 
			
		||||
          cache-to: type=gha,mode=max
 | 
			
		||||
 | 
			
		||||
@@ -103,6 +111,13 @@ jobs:
 | 
			
		||||
#          provenance: false
 | 
			
		||||
 | 
			
		||||
      # A new tagged release is required, which builds :tag and :latest
 | 
			
		||||
      - name: Debug release info
 | 
			
		||||
        if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
 | 
			
		||||
        run: |
 | 
			
		||||
          echo "Release tag: ${{ github.event.release.tag_name }}"
 | 
			
		||||
          echo "Github ref: ${{ github.ref }}"
 | 
			
		||||
          echo "Github ref name: ${{ github.ref_name }}"
 | 
			
		||||
          
 | 
			
		||||
      - name: Docker meta :tag
 | 
			
		||||
        if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
 | 
			
		||||
        uses: docker/metadata-action@v5
 | 
			
		||||
@@ -112,9 +127,10 @@ jobs:
 | 
			
		||||
                ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io
 | 
			
		||||
                ghcr.io/dgtlmoon/changedetection.io
 | 
			
		||||
            tags: |
 | 
			
		||||
                type=semver,pattern={{version}}
 | 
			
		||||
                type=semver,pattern={{major}}.{{minor}}
 | 
			
		||||
                type=semver,pattern={{major}}
 | 
			
		||||
                type=semver,pattern={{version}},value=${{ github.event.release.tag_name }}
 | 
			
		||||
                type=semver,pattern={{major}}.{{minor}},value=${{ github.event.release.tag_name }}
 | 
			
		||||
                type=semver,pattern={{major}},value=${{ github.event.release.tag_name }}
 | 
			
		||||
                type=raw,value=latest
 | 
			
		||||
 | 
			
		||||
      - name: Build and push :tag
 | 
			
		||||
        id: docker_build_tag_release
 | 
			
		||||
@@ -125,7 +141,7 @@ jobs:
 | 
			
		||||
          file: ./Dockerfile
 | 
			
		||||
          push: true
 | 
			
		||||
          tags: ${{ steps.meta.outputs.tags }}
 | 
			
		||||
          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
 | 
			
		||||
          platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8
 | 
			
		||||
          cache-from: type=gha
 | 
			
		||||
          cache-to: type=gha,mode=max
 | 
			
		||||
# Looks like this was disabled
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										39
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										39
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							@@ -7,9 +7,9 @@ jobs:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
 | 
			
		||||
    steps:
 | 
			
		||||
    - uses: actions/checkout@v4
 | 
			
		||||
    - uses: actions/checkout@v5
 | 
			
		||||
    - name: Set up Python
 | 
			
		||||
      uses: actions/setup-python@v5
 | 
			
		||||
      uses: actions/setup-python@v6
 | 
			
		||||
      with:
 | 
			
		||||
        python-version: "3.11"
 | 
			
		||||
    - name: Install pypa/build
 | 
			
		||||
@@ -21,39 +21,60 @@ jobs:
 | 
			
		||||
    - name: Build a binary wheel and a source tarball
 | 
			
		||||
      run: python3 -m build
 | 
			
		||||
    - name: Store the distribution packages
 | 
			
		||||
      uses: actions/upload-artifact@v4
 | 
			
		||||
      uses: actions/upload-artifact@v5
 | 
			
		||||
      with:
 | 
			
		||||
        name: python-package-distributions
 | 
			
		||||
        path: dist/
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  test-pypi-package:
 | 
			
		||||
    name: Test the built 📦 package works basically.
 | 
			
		||||
    name: Test the built package works basically.
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs:
 | 
			
		||||
    - build
 | 
			
		||||
    steps:
 | 
			
		||||
    - name: Download all the dists
 | 
			
		||||
      uses: actions/download-artifact@v4
 | 
			
		||||
      uses: actions/download-artifact@v6
 | 
			
		||||
      with:
 | 
			
		||||
        name: python-package-distributions
 | 
			
		||||
        path: dist/
 | 
			
		||||
    - name: Set up Python 3.11
 | 
			
		||||
      uses: actions/setup-python@v5
 | 
			
		||||
      uses: actions/setup-python@v6
 | 
			
		||||
      with:
 | 
			
		||||
        python-version: '3.11'
 | 
			
		||||
 | 
			
		||||
    - name: Test that the basic pip built package runs without error
 | 
			
		||||
      run: |
 | 
			
		||||
        set -ex
 | 
			
		||||
        ls -alR 
 | 
			
		||||
        
 | 
			
		||||
        # Find and install the first .whl file
 | 
			
		||||
        find dist -type f -name "*.whl" -exec pip3 install {} \; -quit
 | 
			
		||||
        # Install the first wheel found in dist/
 | 
			
		||||
        WHEEL=$(find dist -type f -name "*.whl" -print -quit)
 | 
			
		||||
        echo Installing $WHEEL
 | 
			
		||||
        python3 -m pip install --upgrade pip
 | 
			
		||||
        python3 -m pip install "$WHEEL"
 | 
			
		||||
        changedetection.io -d /tmp -p 10000 &
 | 
			
		||||
        
 | 
			
		||||
        sleep 3
 | 
			
		||||
        curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null
 | 
			
		||||
        curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null
 | 
			
		||||
        
 | 
			
		||||
        # --- API test ---
 | 
			
		||||
        # This also means that the docs/api-spec.yml was shipped and could be read
 | 
			
		||||
        test -f /tmp/url-watches.json
 | 
			
		||||
        API_KEY=$(jq -r '.. | .api_access_token? // empty' /tmp/url-watches.json)
 | 
			
		||||
        echo Test API KEY is $API_KEY
 | 
			
		||||
        curl -X POST "http://127.0.0.1:10000/api/v1/watch" \
 | 
			
		||||
          -H "x-api-key: ${API_KEY}" \
 | 
			
		||||
          -H "Content-Type: application/json" \
 | 
			
		||||
          --show-error --fail \
 | 
			
		||||
          --retry 6 --retry-delay 1 --retry-connrefused \
 | 
			
		||||
          -d '{
 | 
			
		||||
            "url": "https://example.com",
 | 
			
		||||
            "title": "Example Site Monitor",
 | 
			
		||||
            "time_between_check": { "hours": 1 }
 | 
			
		||||
          }'
 | 
			
		||||
          
 | 
			
		||||
        killall changedetection.io
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -72,7 +93,7 @@ jobs:
 | 
			
		||||
 | 
			
		||||
    steps:
 | 
			
		||||
    - name: Download all the dists
 | 
			
		||||
      uses: actions/download-artifact@v4
 | 
			
		||||
      uses: actions/download-artifact@v6
 | 
			
		||||
      with:
 | 
			
		||||
        name: python-package-distributions
 | 
			
		||||
        path: dist/
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										52
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										52
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							@@ -23,15 +23,41 @@ on:
 | 
			
		||||
  # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing
 | 
			
		||||
  # @todo: some kind of path filter for requirements.txt and Dockerfile
 | 
			
		||||
jobs:
 | 
			
		||||
  test-container-build:
 | 
			
		||||
  builder:
 | 
			
		||||
    name: Build ${{ matrix.platform }} (${{ matrix.dockerfile == './Dockerfile' && 'main' || 'alpine' }})
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    strategy:
 | 
			
		||||
      matrix:
 | 
			
		||||
        include:
 | 
			
		||||
          # Main Dockerfile platforms
 | 
			
		||||
          - platform: linux/amd64
 | 
			
		||||
            dockerfile: ./Dockerfile
 | 
			
		||||
          - platform: linux/arm64
 | 
			
		||||
            dockerfile: ./Dockerfile
 | 
			
		||||
          - platform: linux/arm/v7
 | 
			
		||||
            dockerfile: ./Dockerfile
 | 
			
		||||
          - platform: linux/arm/v8
 | 
			
		||||
            dockerfile: ./Dockerfile
 | 
			
		||||
          # Alpine Dockerfile platforms (musl via alpine check)
 | 
			
		||||
          - platform: linux/amd64
 | 
			
		||||
            dockerfile: ./.github/test/Dockerfile-alpine
 | 
			
		||||
          - platform: linux/arm64
 | 
			
		||||
            dockerfile: ./.github/test/Dockerfile-alpine
 | 
			
		||||
    steps:
 | 
			
		||||
        - uses: actions/checkout@v4
 | 
			
		||||
        - uses: actions/checkout@v5
 | 
			
		||||
        - name: Set up Python 3.11
 | 
			
		||||
          uses: actions/setup-python@v5
 | 
			
		||||
          uses: actions/setup-python@v6
 | 
			
		||||
          with:
 | 
			
		||||
            python-version: 3.11
 | 
			
		||||
 | 
			
		||||
        - name: Cache pip packages
 | 
			
		||||
          uses: actions/cache@v4
 | 
			
		||||
          with:
 | 
			
		||||
            path: ~/.cache/pip
 | 
			
		||||
            key: ${{ runner.os }}-pip-${{ hashFiles('requirements.txt') }}
 | 
			
		||||
            restore-keys: |
 | 
			
		||||
              ${{ runner.os }}-pip-
 | 
			
		||||
 | 
			
		||||
        # Just test that the build works, some libraries won't compile on ARM/rPi etc
 | 
			
		||||
        - name: Set up QEMU
 | 
			
		||||
          uses: docker/setup-qemu-action@v3
 | 
			
		||||
@@ -47,24 +73,14 @@ jobs:
 | 
			
		||||
            version: latest
 | 
			
		||||
            driver-opts: image=moby/buildkit:master
 | 
			
		||||
 | 
			
		||||
        # https://github.com/dgtlmoon/changedetection.io/pull/1067
 | 
			
		||||
        # Check we can still build under alpine/musl
 | 
			
		||||
        - name: Test that the docker containers can build (musl via alpine check)
 | 
			
		||||
          id: docker_build_musl
 | 
			
		||||
          uses: docker/build-push-action@v6
 | 
			
		||||
          with:
 | 
			
		||||
            context: ./
 | 
			
		||||
            file: ./.github/test/Dockerfile-alpine
 | 
			
		||||
            platforms: linux/amd64,linux/arm64
 | 
			
		||||
 | 
			
		||||
        - name: Test that the docker containers can build
 | 
			
		||||
        - name: Test that the docker containers can build (${{ matrix.platform }} - ${{ matrix.dockerfile }})
 | 
			
		||||
          id: docker_build
 | 
			
		||||
          uses: docker/build-push-action@v6
 | 
			
		||||
          # https://github.com/docker/build-push-action#customizing
 | 
			
		||||
          with:
 | 
			
		||||
            context: ./
 | 
			
		||||
            file: ./Dockerfile
 | 
			
		||||
            platforms: linux/amd64,linux/arm64,linux/arm/v7,linux/arm/v8,linux/arm64/v8
 | 
			
		||||
            cache-from: type=local,src=/tmp/.buildx-cache
 | 
			
		||||
            cache-to: type=local,dest=/tmp/.buildx-cache
 | 
			
		||||
            file: ${{ matrix.dockerfile }}
 | 
			
		||||
            platforms: ${{ matrix.platform }}
 | 
			
		||||
            cache-from: type=gha
 | 
			
		||||
            cache-to: type=gha,mode=max
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										28
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										28
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							@@ -7,16 +7,22 @@ jobs:
 | 
			
		||||
  lint-code:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v4
 | 
			
		||||
      - name: Lint with flake8
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
      - name: Lint with Ruff
 | 
			
		||||
        run: |
 | 
			
		||||
          pip3 install flake8
 | 
			
		||||
          # stop the build if there are Python syntax errors or undefined names
 | 
			
		||||
          flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
 | 
			
		||||
          # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
 | 
			
		||||
          flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
 | 
			
		||||
          pip install ruff
 | 
			
		||||
          # Check for syntax errors and undefined names
 | 
			
		||||
          ruff check . --select E9,F63,F7,F82
 | 
			
		||||
          # Complete check with errors treated as warnings
 | 
			
		||||
          ruff check . --exit-zero
 | 
			
		||||
      - name: Validate OpenAPI spec
 | 
			
		||||
        run: |
 | 
			
		||||
          pip install openapi-spec-validator
 | 
			
		||||
          python3 -c "from openapi_spec_validator import validate_spec; import yaml; validate_spec(yaml.safe_load(open('docs/api-spec.yaml')))"
 | 
			
		||||
 | 
			
		||||
  test-application-3-10:
 | 
			
		||||
    # Only run on push to master (including PR merges)
 | 
			
		||||
    if: github.event_name == 'push' && github.ref == 'refs/heads/master'
 | 
			
		||||
    needs: lint-code
 | 
			
		||||
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
 | 
			
		||||
    with:
 | 
			
		||||
@@ -24,12 +30,15 @@ jobs:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
  test-application-3-11:
 | 
			
		||||
    # Always run
 | 
			
		||||
    needs: lint-code
 | 
			
		||||
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
 | 
			
		||||
    with:
 | 
			
		||||
      python-version: '3.11'
 | 
			
		||||
 | 
			
		||||
  test-application-3-12:
 | 
			
		||||
    # Only run on push to master (including PR merges)
 | 
			
		||||
    if: github.event_name == 'push' && github.ref == 'refs/heads/master'
 | 
			
		||||
    needs: lint-code
 | 
			
		||||
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
 | 
			
		||||
    with:
 | 
			
		||||
@@ -37,9 +46,10 @@ jobs:
 | 
			
		||||
      skip-pypuppeteer: true
 | 
			
		||||
 | 
			
		||||
  test-application-3-13:
 | 
			
		||||
    # Only run on push to master (including PR merges)
 | 
			
		||||
    if: github.event_name == 'push' && github.ref == 'refs/heads/master'
 | 
			
		||||
    needs: lint-code
 | 
			
		||||
    uses: ./.github/workflows/test-stack-reusable-workflow.yml
 | 
			
		||||
    with:
 | 
			
		||||
      python-version: '3.13'
 | 
			
		||||
      skip-pypuppeteer: true
 | 
			
		||||
      
 | 
			
		||||
      skip-pypuppeteer: true
 | 
			
		||||
							
								
								
									
										459
									
								
								.github/workflows/test-stack-reusable-workflow.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										459
									
								
								.github/workflows/test-stack-reusable-workflow.yml
									
									
									
									
										vendored
									
									
								
							@@ -15,138 +15,294 @@ on:
 | 
			
		||||
        default: false
 | 
			
		||||
 | 
			
		||||
jobs:
 | 
			
		||||
  test-application:
 | 
			
		||||
  # Build the Docker image once and share it with all test jobs
 | 
			
		||||
  build:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v4
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      # Mainly just for link/flake8
 | 
			
		||||
      - name: Set up Python ${{ env.PYTHON_VERSION }}
 | 
			
		||||
        uses: actions/setup-python@v5
 | 
			
		||||
        uses: actions/setup-python@v6
 | 
			
		||||
        with:
 | 
			
		||||
          python-version: ${{ env.PYTHON_VERSION }}
 | 
			
		||||
 | 
			
		||||
      - name: Cache pip packages
 | 
			
		||||
        uses: actions/cache@v4
 | 
			
		||||
        with:
 | 
			
		||||
          path: ~/.cache/pip
 | 
			
		||||
          key: ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-${{ hashFiles('requirements.txt') }}
 | 
			
		||||
          restore-keys: |
 | 
			
		||||
            ${{ runner.os }}-pip-py${{ env.PYTHON_VERSION }}-
 | 
			
		||||
            ${{ runner.os }}-pip-
 | 
			
		||||
 | 
			
		||||
      - name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
 | 
			
		||||
        run: |
 | 
			
		||||
          echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
 | 
			
		||||
          # Build a changedetection.io container and start testing inside
 | 
			
		||||
          docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
 | 
			
		||||
          # Debug info
 | 
			
		||||
          docker run test-changedetectionio  bash -c 'pip list'         
 | 
			
		||||
          docker run test-changedetectionio bash -c 'pip list'
 | 
			
		||||
 | 
			
		||||
      - name: We should be Python ${{ env.PYTHON_VERSION }} ...
 | 
			
		||||
        run: |         
 | 
			
		||||
          docker run test-changedetectionio  bash -c 'python3 --version'
 | 
			
		||||
 | 
			
		||||
      - name: Spin up ancillary testable services
 | 
			
		||||
        run: |
 | 
			
		||||
          
 | 
			
		||||
          docker network create changedet-network
 | 
			
		||||
          
 | 
			
		||||
          # Selenium
 | 
			
		||||
          docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4
 | 
			
		||||
          
 | 
			
		||||
          # SocketPuppetBrowser + Extra for custom browser test
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest                    
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url  -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
          docker run test-changedetectionio bash -c 'python3 --version'
 | 
			
		||||
 | 
			
		||||
      - name: Spin up ancillary SMTP+Echo message test server
 | 
			
		||||
      - name: Save Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          # Debug SMTP server/echo message back server
 | 
			
		||||
          docker run --network changedet-network -d -p 11025:11025 -p 11080:11080  --hostname mailserver test-changedetectionio  bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
 | 
			
		||||
          docker ps
 | 
			
		||||
          docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Show docker container state and other debug info
 | 
			
		||||
      - name: Upload Docker image artifact
 | 
			
		||||
        uses: actions/upload-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp/test-changedetectionio.tar
 | 
			
		||||
          retention-days: 1
 | 
			
		||||
 | 
			
		||||
  # Unit tests (lightweight, no ancillary services needed)
 | 
			
		||||
  unit-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          set -x
 | 
			
		||||
          echo "Running processes in docker..."
 | 
			
		||||
          docker ps
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Run Unit Tests
 | 
			
		||||
        run: |
 | 
			
		||||
          # Unit tests
 | 
			
		||||
          docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
 | 
			
		||||
          docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
 | 
			
		||||
          docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
 | 
			
		||||
          docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
 | 
			
		||||
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
 | 
			
		||||
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
 | 
			
		||||
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
 | 
			
		||||
          docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
 | 
			
		||||
 | 
			
		||||
      - name: Test built container with Pytest (generally as requests/plaintext fetching)
 | 
			
		||||
  # Basic pytest tests with ancillary services
 | 
			
		||||
  basic-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 25
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          # All tests
 | 
			
		||||
          echo "run test with pytest"
 | 
			
		||||
          # The default pytest logger_level is TRACE
 | 
			
		||||
          # To change logger_level for pytest(test/conftest.py),
 | 
			
		||||
          # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG'
 | 
			
		||||
          docker run --name test-cdio-basic-tests --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio && ./run_basic_tests.sh'
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
# PLAYWRIGHT/NODE-> CDP
 | 
			
		||||
      - name: Playwright and SocketPuppetBrowser - Specific tests in built container
 | 
			
		||||
      - name: Test built container with Pytest
 | 
			
		||||
        run: |
 | 
			
		||||
          # Playwright via Sockpuppetbrowser fetch
 | 
			
		||||
          # tests/visualselector/test_fetch_data.py will do browser steps  
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
 | 
			
		||||
          docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
 | 
			
		||||
          docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
 | 
			
		||||
 | 
			
		||||
      - name: Extract memory report and logs
 | 
			
		||||
        if: always()
 | 
			
		||||
        uses: ./.github/actions/extract-memory-report
 | 
			
		||||
        with:
 | 
			
		||||
          container-name: test-cdio-basic-tests
 | 
			
		||||
          python-version: ${{ env.PYTHON_VERSION }}
 | 
			
		||||
 | 
			
		||||
      - name: Playwright and SocketPuppetBrowser - Headers and requests
 | 
			
		||||
        run: |       
 | 
			
		||||
          # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
 | 
			
		||||
          docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py; pwd;find .'
 | 
			
		||||
      - name: Store test artifacts
 | 
			
		||||
        if: always()
 | 
			
		||||
        uses: actions/upload-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: output-logs
 | 
			
		||||
 | 
			
		||||
      - name: Playwright and SocketPuppetBrowser - Restock detection
 | 
			
		||||
        run: |                            
 | 
			
		||||
          # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
 | 
			
		||||
          docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
 | 
			
		||||
  # Playwright tests
 | 
			
		||||
  playwright-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
# STRAIGHT TO CDP
 | 
			
		||||
      - name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
 | 
			
		||||
        if: ${{ inputs.skip-pypuppeteer == false }}
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          # Playwright via Sockpuppetbrowser fetch 
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
 | 
			
		||||
        if: ${{ inputs.skip-pypuppeteer == false }}
 | 
			
		||||
      - name: Spin up ancillary services
 | 
			
		||||
        run: |
 | 
			
		||||
          # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
 | 
			
		||||
          docker run --name "changedet" --hostname changedet --rm  -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py'
 | 
			
		||||
          docker network create changedet-network
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
 | 
			
		||||
      - name: Pyppeteer and SocketPuppetBrowser - Restock detection
 | 
			
		||||
        if: ${{ inputs.skip-pypuppeteer == false }}
 | 
			
		||||
        run: |                            
 | 
			
		||||
          # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
 | 
			
		||||
          docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet"  -e "FAST_PUPPETEER_CHROME_FETCHER=True"  -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
 | 
			
		||||
      - name: Playwright - Specific tests in built container
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
 | 
			
		||||
 | 
			
		||||
      - name: Playwright - Headers and requests
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
 | 
			
		||||
 | 
			
		||||
      - name: Playwright - Restock detection
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
 | 
			
		||||
 | 
			
		||||
  # Pyppeteer tests
 | 
			
		||||
  pyppeteer-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    if: ${{ inputs.skip-pypuppeteer == false }}
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Spin up ancillary services
 | 
			
		||||
        run: |
 | 
			
		||||
          docker network create changedet-network
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
 | 
			
		||||
      - name: Pyppeteer - Specific tests in built container
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
 | 
			
		||||
          docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
 | 
			
		||||
 | 
			
		||||
      - name: Pyppeteer - Headers and requests checks
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
 | 
			
		||||
 | 
			
		||||
      - name: Pyppeteer - Restock detection
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
 | 
			
		||||
 | 
			
		||||
  # Selenium tests
 | 
			
		||||
  selenium-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Spin up ancillary services
 | 
			
		||||
        run: |
 | 
			
		||||
          docker network create changedet-network
 | 
			
		||||
          docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
 | 
			
		||||
          sleep 3
 | 
			
		||||
 | 
			
		||||
      - name: Specific tests for headers and requests checks with Selenium
 | 
			
		||||
        run: |
 | 
			
		||||
 | 
			
		||||
          docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
 | 
			
		||||
 | 
			
		||||
# SELENIUM
 | 
			
		||||
      - name: Specific tests in built container for Selenium
 | 
			
		||||
        run: |
 | 
			
		||||
          # Selenium fetch
 | 
			
		||||
          docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
 | 
			
		||||
          docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
 | 
			
		||||
 | 
			
		||||
      - name: Specific tests in built container for headers and requests checks with Selenium
 | 
			
		||||
 | 
			
		||||
  # SMTP tests
 | 
			
		||||
  smtp-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py'
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Spin up SMTP test server
 | 
			
		||||
        run: |
 | 
			
		||||
          docker network create changedet-network
 | 
			
		||||
          docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
 | 
			
		||||
 | 
			
		||||
# OTHER STUFF
 | 
			
		||||
      - name: Test SMTP notification mime types
 | 
			
		||||
        run: |
 | 
			
		||||
          # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above
 | 
			
		||||
          # "mailserver" hostname defined above
 | 
			
		||||
          docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
 | 
			
		||||
 | 
			
		||||
      # @todo Add a test via playwright/puppeteer
 | 
			
		||||
      # squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py
 | 
			
		||||
      - name: Test proxy squid style interaction
 | 
			
		||||
  # Proxy tests
 | 
			
		||||
  proxy-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Spin up services
 | 
			
		||||
        run: |
 | 
			
		||||
          docker network create changedet-network
 | 
			
		||||
          docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
 | 
			
		||||
      - name: Test proxy Squid style interaction
 | 
			
		||||
        run: |
 | 
			
		||||
          cd changedetectionio
 | 
			
		||||
          ./run_proxy_tests.sh
 | 
			
		||||
          docker ps
 | 
			
		||||
          cd ..
 | 
			
		||||
 | 
			
		||||
      - name: Test proxy SOCKS5 style interaction
 | 
			
		||||
@@ -155,87 +311,132 @@ jobs:
 | 
			
		||||
          ./run_socks_proxy_tests.sh
 | 
			
		||||
          cd ..
 | 
			
		||||
 | 
			
		||||
  # Custom browser URL tests
 | 
			
		||||
  custom-browser-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Spin up ancillary services
 | 
			
		||||
        run: |
 | 
			
		||||
          docker network create changedet-network
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
          docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
 | 
			
		||||
 | 
			
		||||
      - name: Test custom browser URL
 | 
			
		||||
        run: |
 | 
			
		||||
          cd changedetectionio
 | 
			
		||||
          ./run_custom_browser_url_tests.sh
 | 
			
		||||
          cd ..
 | 
			
		||||
 | 
			
		||||
      - name: Test changedetection.io container starts+runs basically without error
 | 
			
		||||
  # Container startup tests
 | 
			
		||||
  container-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --name test-changedetectionio -p 5556:5000  -d test-changedetectionio
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Test container starts+runs basically without error
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
 | 
			
		||||
          sleep 3
 | 
			
		||||
          # Should return 0 (no error) when grep finds it
 | 
			
		||||
          curl --retry-connrefused --retry 6  -s http://localhost:5556 |grep -q checkbox-uuid
 | 
			
		||||
          
 | 
			
		||||
          # and IPv6
 | 
			
		||||
          curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
 | 
			
		||||
 | 
			
		||||
          # Check whether TRACE log is enabled.
 | 
			
		||||
          # Also, check whether TRACE is came from STDERR
 | 
			
		||||
          docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1
 | 
			
		||||
          # Check whether DEBUG is came from STDOUT
 | 
			
		||||
          curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
 | 
			
		||||
          curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
 | 
			
		||||
          docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
 | 
			
		||||
          docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
 | 
			
		||||
 | 
			
		||||
          docker kill test-changedetectionio
 | 
			
		||||
 | 
			
		||||
      - name: Test changedetection.io SIGTERM and SIGINT signal shutdown
 | 
			
		||||
      - name: Test HTTPS SSL mode
 | 
			
		||||
        run: |
 | 
			
		||||
          openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost"
 | 
			
		||||
          docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio
 | 
			
		||||
          sleep 3
 | 
			
		||||
          curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid
 | 
			
		||||
          docker kill test-changedetectionio-ssl
 | 
			
		||||
 | 
			
		||||
      - name: Test IPv6 Mode
 | 
			
		||||
        run: |
 | 
			
		||||
          docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio
 | 
			
		||||
          sleep 3
 | 
			
		||||
          curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid
 | 
			
		||||
          docker kill test-changedetectionio-ipv6
 | 
			
		||||
 | 
			
		||||
  # Signal tests
 | 
			
		||||
  signal-tests:
 | 
			
		||||
    runs-on: ubuntu-latest
 | 
			
		||||
    needs: build
 | 
			
		||||
    timeout-minutes: 10
 | 
			
		||||
    env:
 | 
			
		||||
      PYTHON_VERSION: ${{ inputs.python-version }}
 | 
			
		||||
    steps:
 | 
			
		||||
      - uses: actions/checkout@v5
 | 
			
		||||
 | 
			
		||||
      - name: Download Docker image artifact
 | 
			
		||||
        uses: actions/download-artifact@v5
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-changedetectionio-${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: /tmp
 | 
			
		||||
 | 
			
		||||
      - name: Load Docker image
 | 
			
		||||
        run: |
 | 
			
		||||
          docker load -i /tmp/test-changedetectionio.tar
 | 
			
		||||
 | 
			
		||||
      - name: Test SIGTERM and SIGINT signal shutdown
 | 
			
		||||
        run: |
 | 
			
		||||
          
 | 
			
		||||
          echo SIGINT Shutdown request test
 | 
			
		||||
          docker run --name sig-test -d test-changedetectionio
 | 
			
		||||
          sleep 3
 | 
			
		||||
          echo ">>> Sending SIGINT to sig-test container"
 | 
			
		||||
          docker kill --signal=SIGINT sig-test
 | 
			
		||||
          sleep 3
 | 
			
		||||
          # invert the check (it should be not 0/not running)
 | 
			
		||||
          docker ps
 | 
			
		||||
          # check signal catch(STDERR) log. Because of
 | 
			
		||||
          # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
 | 
			
		||||
          docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1
 | 
			
		||||
          test -z "`docker ps|grep sig-test`"
 | 
			
		||||
          if [ $? -ne 0 ]
 | 
			
		||||
          then
 | 
			
		||||
          if [ $? -ne 0 ]; then
 | 
			
		||||
            echo "Looks like container was running when it shouldnt be"
 | 
			
		||||
            docker ps
 | 
			
		||||
            exit 1
 | 
			
		||||
          fi
 | 
			
		||||
          
 | 
			
		||||
          # @todo - scan the container log to see the right "graceful shutdown" text exists 
 | 
			
		||||
          docker rm sig-test
 | 
			
		||||
          
 | 
			
		||||
 | 
			
		||||
          echo SIGTERM Shutdown request test
 | 
			
		||||
          docker run --name sig-test -d test-changedetectionio
 | 
			
		||||
          sleep 3
 | 
			
		||||
          echo ">>> Sending SIGTERM to sig-test container"
 | 
			
		||||
          docker kill --signal=SIGTERM sig-test
 | 
			
		||||
          sleep 3
 | 
			
		||||
          # invert the check (it should be not 0/not running)
 | 
			
		||||
          docker ps
 | 
			
		||||
          # check signal catch(STDERR) log. Because of
 | 
			
		||||
          # changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
 | 
			
		||||
          docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1
 | 
			
		||||
          test -z "`docker ps|grep sig-test`"
 | 
			
		||||
          if [ $? -ne 0 ]
 | 
			
		||||
          then
 | 
			
		||||
          if [ $? -ne 0 ]; then
 | 
			
		||||
            echo "Looks like container was running when it shouldnt be"
 | 
			
		||||
            docker ps
 | 
			
		||||
            exit 1
 | 
			
		||||
          fi
 | 
			
		||||
          
 | 
			
		||||
          # @todo - scan the container log to see the right "graceful shutdown" text exists           
 | 
			
		||||
          docker rm sig-test
 | 
			
		||||
 | 
			
		||||
      - name: Dump container log
 | 
			
		||||
        if: always()
 | 
			
		||||
        run: |
 | 
			
		||||
          mkdir output-logs
 | 
			
		||||
          docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
 | 
			
		||||
          docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
 | 
			
		||||
 | 
			
		||||
      - name: Store everything including test-datastore
 | 
			
		||||
        if: always()
 | 
			
		||||
        uses: actions/upload-artifact@v4
 | 
			
		||||
        with:
 | 
			
		||||
          name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
 | 
			
		||||
          path: .
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -16,6 +16,7 @@ dist/
 | 
			
		||||
.env
 | 
			
		||||
.venv/
 | 
			
		||||
venv/
 | 
			
		||||
.python-version
 | 
			
		||||
 | 
			
		||||
# IDEs
 | 
			
		||||
.idea
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,9 @@
 | 
			
		||||
repos:
 | 
			
		||||
  - repo: https://github.com/astral-sh/ruff-pre-commit
 | 
			
		||||
    rev: v0.11.2
 | 
			
		||||
    hooks:
 | 
			
		||||
      # Lint (and apply safe fixes)
 | 
			
		||||
      - id: ruff
 | 
			
		||||
        args: [--fix]
 | 
			
		||||
      # Fomrat
 | 
			
		||||
      - id: ruff-format
 | 
			
		||||
							
								
								
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,48 @@
 | 
			
		||||
# Minimum supported version
 | 
			
		||||
target-version = "py310"
 | 
			
		||||
 | 
			
		||||
# Formatting options
 | 
			
		||||
line-length = 100
 | 
			
		||||
indent-width = 4
 | 
			
		||||
 | 
			
		||||
exclude = [
 | 
			
		||||
    "__pycache__",
 | 
			
		||||
    ".eggs",
 | 
			
		||||
    ".git",
 | 
			
		||||
    ".tox",
 | 
			
		||||
    ".venv",
 | 
			
		||||
    "*.egg-info",
 | 
			
		||||
    "*.pyc",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[lint]
 | 
			
		||||
# https://docs.astral.sh/ruff/rules/
 | 
			
		||||
select = [
 | 
			
		||||
    "B", # flake8-bugbear
 | 
			
		||||
    "B9",
 | 
			
		||||
    "C", 
 | 
			
		||||
    "E", # pycodestyle
 | 
			
		||||
    "F", # Pyflakes
 | 
			
		||||
    "I", # isort
 | 
			
		||||
    "N", # pep8-naming
 | 
			
		||||
    "UP", # pyupgrade
 | 
			
		||||
    "W", # pycodestyle
 | 
			
		||||
]
 | 
			
		||||
ignore = [
 | 
			
		||||
    "B007", # unused-loop-control-variable
 | 
			
		||||
    "B909", # loop-iterator-mutation
 | 
			
		||||
    "E203", # whitespace-before-punctuation
 | 
			
		||||
    "E266", # multiple-leading-hashes-for-block-comment
 | 
			
		||||
    "E501", # redundant-backslash
 | 
			
		||||
    "F403", # undefined-local-with-import-star
 | 
			
		||||
    "N802", # invalid-function-name
 | 
			
		||||
    "N806", # non-lowercase-variable-in-function
 | 
			
		||||
    "N815", # mixed-case-variable-in-class-scope
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
[lint.mccabe]
 | 
			
		||||
max-complexity = 12
 | 
			
		||||
 | 
			
		||||
[format]
 | 
			
		||||
indent-style = "space"
 | 
			
		||||
quote-style = "preserve"
 | 
			
		||||
							
								
								
									
										44
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								Dockerfile
									
									
									
									
									
								
							@@ -5,7 +5,6 @@ ARG PYTHON_VERSION=3.11
 | 
			
		||||
FROM python:${PYTHON_VERSION}-slim-bookworm AS builder
 | 
			
		||||
 | 
			
		||||
# See `cryptography` pin comment in requirements.txt
 | 
			
		||||
ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1
 | 
			
		||||
 | 
			
		||||
RUN apt-get update && apt-get install -y --no-install-recommends \
 | 
			
		||||
    g++ \
 | 
			
		||||
@@ -16,6 +15,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 | 
			
		||||
    libssl-dev \
 | 
			
		||||
    libxslt-dev \
 | 
			
		||||
    make \
 | 
			
		||||
    patch \
 | 
			
		||||
    pkg-config \
 | 
			
		||||
    zlib1g-dev
 | 
			
		||||
 | 
			
		||||
RUN mkdir /install
 | 
			
		||||
@@ -23,14 +24,37 @@ WORKDIR /install
 | 
			
		||||
 | 
			
		||||
COPY requirements.txt /requirements.txt
 | 
			
		||||
 | 
			
		||||
# --extra-index-url https://www.piwheels.org/simple  is for cryptography module to be prebuilt (or rustc etc needs to be installed)
 | 
			
		||||
RUN pip install --extra-index-url https://www.piwheels.org/simple  --target=/dependencies -r /requirements.txt
 | 
			
		||||
# Use cache mounts and multiple wheel sources for faster ARM builds
 | 
			
		||||
ENV PIP_CACHE_DIR=/tmp/pip-cache
 | 
			
		||||
# Help Rust find OpenSSL for cryptography package compilation on ARM
 | 
			
		||||
ENV PKG_CONFIG_PATH="/usr/lib/pkgconfig:/usr/lib/arm-linux-gnueabihf/pkgconfig:/usr/lib/aarch64-linux-gnu/pkgconfig"
 | 
			
		||||
ENV PKG_CONFIG_ALLOW_SYSTEM_CFLAGS=1
 | 
			
		||||
ENV OPENSSL_DIR="/usr"
 | 
			
		||||
ENV OPENSSL_LIB_DIR="/usr/lib/arm-linux-gnueabihf"
 | 
			
		||||
ENV OPENSSL_INCLUDE_DIR="/usr/include/openssl"
 | 
			
		||||
# Additional environment variables for cryptography Rust build
 | 
			
		||||
ENV CRYPTOGRAPHY_DONT_BUILD_RUST=1
 | 
			
		||||
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
 | 
			
		||||
  pip install \
 | 
			
		||||
  --prefer-binary \
 | 
			
		||||
  --extra-index-url https://www.piwheels.org/simple \
 | 
			
		||||
  --extra-index-url https://pypi.anaconda.org/ARM-software/simple \
 | 
			
		||||
  --cache-dir=/tmp/pip-cache \
 | 
			
		||||
  --target=/dependencies \
 | 
			
		||||
  -r /requirements.txt
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Playwright is an alternative to Selenium
 | 
			
		||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
 | 
			
		||||
# https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported)
 | 
			
		||||
RUN pip install --target=/dependencies playwright~=1.48.0 \
 | 
			
		||||
    || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
 | 
			
		||||
RUN --mount=type=cache,id=pip,sharing=locked,target=/tmp/pip-cache \
 | 
			
		||||
  pip install \
 | 
			
		||||
  --prefer-binary \
 | 
			
		||||
  --cache-dir=/tmp/pip-cache \
 | 
			
		||||
  --target=/dependencies \
 | 
			
		||||
  playwright~=1.48.0 \
 | 
			
		||||
  || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Final image stage
 | 
			
		||||
FROM python:${PYTHON_VERSION}-slim-bookworm
 | 
			
		||||
@@ -42,6 +66,8 @@ RUN apt-get update && apt-get install -y --no-install-recommends \
 | 
			
		||||
    locales \
 | 
			
		||||
    # For pdftohtml
 | 
			
		||||
    poppler-utils \
 | 
			
		||||
    # favicon type detection and other uses
 | 
			
		||||
    file \
 | 
			
		||||
    zlib1g \
 | 
			
		||||
    && apt-get clean && rm -rf /var/lib/apt/lists/*
 | 
			
		||||
 | 
			
		||||
@@ -62,6 +88,11 @@ EXPOSE 5000
 | 
			
		||||
 | 
			
		||||
# The actual flask app module
 | 
			
		||||
COPY changedetectionio /app/changedetectionio
 | 
			
		||||
 | 
			
		||||
# Also for OpenAPI validation wrapper - needs the YML
 | 
			
		||||
RUN [ ! -d "/app/docs" ] && mkdir /app/docs
 | 
			
		||||
COPY docs/api-spec.yaml /app/docs/api-spec.yaml
 | 
			
		||||
 | 
			
		||||
# Starting wrapper
 | 
			
		||||
COPY changedetection.py /app/changedetection.py
 | 
			
		||||
 | 
			
		||||
@@ -70,6 +101,9 @@ COPY changedetection.py /app/changedetection.py
 | 
			
		||||
ARG LOGGER_LEVEL=''
 | 
			
		||||
ENV LOGGER_LEVEL="$LOGGER_LEVEL"
 | 
			
		||||
 | 
			
		||||
# Default
 | 
			
		||||
ENV LC_ALL=en_US.UTF-8
 | 
			
		||||
 | 
			
		||||
WORKDIR /app
 | 
			
		||||
CMD ["python", "./changedetection.py", "-d", "/datastore"]
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							
							
						
						
									
										2
									
								
								LICENSE
									
									
									
									
									
								
							@@ -186,7 +186,7 @@
 | 
			
		||||
      same "printed page" as the copyright notice for easier
 | 
			
		||||
      identification within third-party archives.
 | 
			
		||||
 | 
			
		||||
   Copyright [yyyy] [name of copyright owner]
 | 
			
		||||
   Copyright 2025 Web Technologies s.r.o.
 | 
			
		||||
 | 
			
		||||
   Licensed under the Apache License, Version 2.0 (the "License");
 | 
			
		||||
   you may not use this file except in compliance with the License.
 | 
			
		||||
 
 | 
			
		||||
@@ -1,13 +1,17 @@
 | 
			
		||||
recursive-include changedetectionio/api *
 | 
			
		||||
include docs/api-spec.yaml
 | 
			
		||||
recursive-include changedetectionio/blueprint *
 | 
			
		||||
recursive-include changedetectionio/content_fetchers *
 | 
			
		||||
recursive-include changedetectionio/conditions *
 | 
			
		||||
recursive-include changedetectionio/content_fetchers *
 | 
			
		||||
recursive-include changedetectionio/jinja2_custom *
 | 
			
		||||
recursive-include changedetectionio/model *
 | 
			
		||||
recursive-include changedetectionio/notification *
 | 
			
		||||
recursive-include changedetectionio/processors *
 | 
			
		||||
recursive-include changedetectionio/realtime *
 | 
			
		||||
recursive-include changedetectionio/static *
 | 
			
		||||
recursive-include changedetectionio/templates *
 | 
			
		||||
recursive-include changedetectionio/tests *
 | 
			
		||||
recursive-include changedetectionio/widgets *
 | 
			
		||||
prune changedetectionio/static/package-lock.json
 | 
			
		||||
prune changedetectionio/static/styles/node_modules
 | 
			
		||||
prune changedetectionio/static/styles/package-lock.json
 | 
			
		||||
 
 | 
			
		||||
@@ -1,11 +1,21 @@
 | 
			
		||||
## Web Site Change Detection, Monitoring and Notification.
 | 
			
		||||
# Monitor website changes
 | 
			
		||||
 | 
			
		||||
Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more
 | 
			
		||||
Detect WebPage Changes Automatically — Monitor Web Page Changes in Real Time
 | 
			
		||||
 | 
			
		||||
Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more.
 | 
			
		||||
 | 
			
		||||
Detect web page content changes and get instant alerts.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
[Changedetection.io is the best tool to monitor web-pages for changes](https://changedetection.io) Track website content changes and receive notifications via Discord, Email, Slack, Telegram and 90+ more
 | 
			
		||||
 | 
			
		||||
Ideal for monitoring price changes, content edits, conditional changes and more.
 | 
			
		||||
 | 
			
		||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring, list of websites with changes"  title="Self-hosted web page change monitoring, list of websites with changes"  />](https://changedetection.io)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
[**Don't have time? Let us host it for you! try our extremely affordable subscription use our proxies and support!**](https://changedetection.io) 
 | 
			
		||||
[**Don't have time? Try our extremely affordable subscription use our proxies and support!**](https://changedetection.io) 
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
### Target specific parts of the webpage using the Visual Selector tool.
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										20
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										20
									
								
								README.md
									
									
									
									
									
								
							@@ -1,11 +1,13 @@
 | 
			
		||||
## Web Site Change Detection, Restock monitoring and notifications.
 | 
			
		||||
# Detect Website Changes Automatically — Monitor Web Page Changes in Real Time
 | 
			
		||||
 | 
			
		||||
**_Detect website content changes and perform meaningful actions - trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._**
 | 
			
		||||
Monitor websites for updates — get notified via Discord, Email, Slack, Telegram, Webhook and many more.
 | 
			
		||||
 | 
			
		||||
_Live your data-life pro-actively._ 
 | 
			
		||||
**Detect web page content changes and get instant alerts.**  
 | 
			
		||||
 | 
			
		||||
Ideal for monitoring price changes, content edits, conditional changes and more.
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web site page change monitoring"  title="Self-hosted web site page change monitoring"  />](https://changedetection.io?src=github)
 | 
			
		||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Web site page change monitoring"  title="Web site page change monitoring"  />](https://changedetection.io?src=github)
 | 
			
		||||
 | 
			
		||||
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
 | 
			
		||||
 | 
			
		||||
@@ -13,6 +15,7 @@ _Live your data-life pro-actively._
 | 
			
		||||
 | 
			
		||||
[**Get started with website page change monitoring straight away. Don't have time? Try our $8.99/month subscription, use our proxies and support!**](https://changedetection.io) , _half the price of other website change monitoring services!_
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
- Chrome browser included.
 | 
			
		||||
- Nothing to install, access via browser login after signup.
 | 
			
		||||
- Super fast, no registration needed setup.
 | 
			
		||||
@@ -99,9 +102,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W
 | 
			
		||||
- Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration)
 | 
			
		||||
- Send a screenshot with the notification when a change is detected in the web page
 | 
			
		||||
 | 
			
		||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link.
 | 
			
		||||
 | 
			
		||||
[Oxylabs](https://oxylabs.go2cloud.org/SH2d) is also an excellent proxy provider and well worth using, they offer Residental, ISP, Rotating and many other proxy types to suit your project. 
 | 
			
		||||
We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $150 using our signup link.
 | 
			
		||||
 | 
			
		||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
 | 
			
		||||
 | 
			
		||||
@@ -279,7 +280,10 @@ Excel import is recommended - that way you can better organise tags/groups of we
 | 
			
		||||
 | 
			
		||||
## API Support
 | 
			
		||||
 | 
			
		||||
Supports managing the website watch list [via our API](https://changedetection.io/docs/api_v1/index.html)
 | 
			
		||||
Full REST API for programmatic management of watches, tags, notifications and more. 
 | 
			
		||||
 | 
			
		||||
- **[Interactive API Documentation](https://changedetection.io/docs/api_v1/index.html)** - Complete API reference with live testing
 | 
			
		||||
- **[OpenAPI Specification](docs/api-spec.yaml)** - Generate SDKs for any programming language
 | 
			
		||||
 | 
			
		||||
## Support us
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,98 @@
 | 
			
		||||
# Creating Plugins for changedetection.io
 | 
			
		||||
 | 
			
		||||
This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways.
 | 
			
		||||
 | 
			
		||||
## Plugin Types
 | 
			
		||||
 | 
			
		||||
### UI Stats Tab Plugins
 | 
			
		||||
 | 
			
		||||
These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch.
 | 
			
		||||
 | 
			
		||||
#### Creating a UI Stats Tab Plugin
 | 
			
		||||
 | 
			
		||||
1. Create a Python file in a directory that will be loaded by the plugin system.
 | 
			
		||||
 | 
			
		||||
2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
import pluggy
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
 | 
			
		||||
 | 
			
		||||
@global_hookimpl
 | 
			
		||||
def ui_edit_stats_extras(watch):
 | 
			
		||||
    """Add custom content to the stats tab"""
 | 
			
		||||
    # Calculate or retrieve your stats
 | 
			
		||||
    my_stat = calculate_something(watch)
 | 
			
		||||
    
 | 
			
		||||
    # Return HTML content as a string
 | 
			
		||||
    html = f"""
 | 
			
		||||
    <div class="my-plugin-stats">
 | 
			
		||||
        <h4>My Plugin Statistics</h4>
 | 
			
		||||
        <p>My statistic: {my_stat}</p>
 | 
			
		||||
    </div>
 | 
			
		||||
    """
 | 
			
		||||
    return html
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
3. The HTML you return will be included in the Stats tab.
 | 
			
		||||
 | 
			
		||||
## Plugin Loading
 | 
			
		||||
 | 
			
		||||
Plugins can be loaded from:
 | 
			
		||||
 | 
			
		||||
1. Built-in plugin directories in the codebase
 | 
			
		||||
2. External packages using setuptools entry points
 | 
			
		||||
 | 
			
		||||
To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`.
 | 
			
		||||
 | 
			
		||||
## Example Plugin
 | 
			
		||||
 | 
			
		||||
Here's a simple example of a plugin that adds a word count statistic to the Stats tab:
 | 
			
		||||
 | 
			
		||||
```python
 | 
			
		||||
import pluggy
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
 | 
			
		||||
 | 
			
		||||
def count_words_in_history(watch):
 | 
			
		||||
    """Count words in the latest snapshot"""
 | 
			
		||||
    try:
 | 
			
		||||
        if not watch.history.keys():
 | 
			
		||||
            return 0
 | 
			
		||||
            
 | 
			
		||||
        latest_key = list(watch.history.keys())[-1]
 | 
			
		||||
        latest_content = watch.get_history_snapshot(latest_key)
 | 
			
		||||
        return len(latest_content.split())
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error counting words: {str(e)}")
 | 
			
		||||
        return 0
 | 
			
		||||
 | 
			
		||||
@global_hookimpl
 | 
			
		||||
def ui_edit_stats_extras(watch):
 | 
			
		||||
    """Add word count to the Stats tab"""
 | 
			
		||||
    word_count = count_words_in_history(watch)
 | 
			
		||||
    
 | 
			
		||||
    html = f"""
 | 
			
		||||
    <div class="word-count-stats">
 | 
			
		||||
        <h4>Content Analysis</h4>
 | 
			
		||||
        <table class="pure-table">
 | 
			
		||||
            <tbody>
 | 
			
		||||
                <tr>
 | 
			
		||||
                    <td>Word count (latest snapshot)</td>
 | 
			
		||||
                    <td>{word_count}</td>
 | 
			
		||||
                </tr>
 | 
			
		||||
            </tbody>
 | 
			
		||||
        </table>
 | 
			
		||||
    </div>
 | 
			
		||||
    """
 | 
			
		||||
    return html
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Testing Your Plugin
 | 
			
		||||
 | 
			
		||||
1. Place your plugin in one of the directories scanned by the plugin system
 | 
			
		||||
2. Restart changedetection.io
 | 
			
		||||
3. Go to the Edit page of a watch and check the Stats tab to see your content
 | 
			
		||||
@@ -2,20 +2,19 @@
 | 
			
		||||
 | 
			
		||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
 | 
			
		||||
 | 
			
		||||
__version__ = '0.49.14'
 | 
			
		||||
__version__ = '0.50.38'
 | 
			
		||||
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
from json.decoder import JSONDecodeError
 | 
			
		||||
import os
 | 
			
		||||
os.environ['EVENTLET_NO_GREENDNS'] = 'yes'
 | 
			
		||||
import eventlet
 | 
			
		||||
import eventlet.wsgi
 | 
			
		||||
import getopt
 | 
			
		||||
import platform
 | 
			
		||||
import signal
 | 
			
		||||
import socket
 | 
			
		||||
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
# Eventlet completely removed - using threading mode for SocketIO
 | 
			
		||||
# This provides better Python 3.12+ compatibility and eliminates eventlet/asyncio conflicts
 | 
			
		||||
from changedetectionio import store
 | 
			
		||||
from changedetectionio.flask_app import changedetection_app
 | 
			
		||||
from loguru import logger
 | 
			
		||||
@@ -30,13 +29,43 @@ def get_version():
 | 
			
		||||
# Parent wrapper or OS sends us a SIGTERM/SIGINT, do everything required for a clean shutdown
 | 
			
		||||
def sigshutdown_handler(_signo, _stack_frame):
 | 
			
		||||
    name = signal.Signals(_signo).name
 | 
			
		||||
    logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Saving DB to disk and calling shutdown')
 | 
			
		||||
    datastore.sync_to_json()
 | 
			
		||||
    logger.success('Sync JSON to disk complete.')
 | 
			
		||||
    # This will throw a SystemExit exception, because eventlet.wsgi.server doesn't know how to deal with it.
 | 
			
		||||
    # Solution: move to gevent or other server in the future (#2014)
 | 
			
		||||
    datastore.stop_thread = True
 | 
			
		||||
    logger.critical(f'Shutdown: Got Signal - {name} ({_signo}), Fast shutdown initiated')
 | 
			
		||||
    
 | 
			
		||||
    # Set exit flag immediately to stop all loops
 | 
			
		||||
    app.config.exit.set()
 | 
			
		||||
    datastore.stop_thread = True
 | 
			
		||||
    
 | 
			
		||||
    # Shutdown workers and queues immediately
 | 
			
		||||
    try:
 | 
			
		||||
        from changedetectionio import worker_handler
 | 
			
		||||
        worker_handler.shutdown_workers()
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error shutting down workers: {str(e)}")
 | 
			
		||||
    
 | 
			
		||||
    # Close janus queues properly
 | 
			
		||||
    try:
 | 
			
		||||
        from changedetectionio.flask_app import update_q, notification_q
 | 
			
		||||
        update_q.close()
 | 
			
		||||
        notification_q.close()
 | 
			
		||||
        logger.debug("Janus queues closed successfully")
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.critical(f"CRITICAL: Failed to close janus queues: {e}")
 | 
			
		||||
    
 | 
			
		||||
    # Shutdown socketio server fast
 | 
			
		||||
    from changedetectionio.flask_app import socketio_server
 | 
			
		||||
    if socketio_server and hasattr(socketio_server, 'shutdown'):
 | 
			
		||||
        try:
 | 
			
		||||
            socketio_server.shutdown()
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Error shutting down Socket.IO server: {str(e)}")
 | 
			
		||||
    
 | 
			
		||||
    # Save data quickly
 | 
			
		||||
    try:
 | 
			
		||||
        datastore.sync_to_json()
 | 
			
		||||
        logger.success('Fast sync to disk complete.')
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error syncing to disk: {str(e)}")
 | 
			
		||||
    
 | 
			
		||||
    sys.exit()
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
@@ -45,9 +74,8 @@ def main():
 | 
			
		||||
 | 
			
		||||
    datastore_path = None
 | 
			
		||||
    do_cleanup = False
 | 
			
		||||
    host = ''
 | 
			
		||||
    ipv6_enabled = False
 | 
			
		||||
    port = os.environ.get('PORT') or 5000
 | 
			
		||||
    host = os.environ.get("LISTEN_HOST", "0.0.0.0").strip()
 | 
			
		||||
    port = int(os.environ.get('PORT', 5000))
 | 
			
		||||
    ssl_mode = False
 | 
			
		||||
 | 
			
		||||
    # On Windows, create and use a default path.
 | 
			
		||||
@@ -88,10 +116,6 @@ def main():
 | 
			
		||||
        if opt == '-d':
 | 
			
		||||
            datastore_path = arg
 | 
			
		||||
 | 
			
		||||
        if opt == '-6':
 | 
			
		||||
            logger.success("Enabling IPv6 listen support")
 | 
			
		||||
            ipv6_enabled = True
 | 
			
		||||
 | 
			
		||||
        # Cleanup (remove text files that arent in the index)
 | 
			
		||||
        if opt == '-c':
 | 
			
		||||
            do_cleanup = True
 | 
			
		||||
@@ -103,10 +127,24 @@ def main():
 | 
			
		||||
        if opt == '-l':
 | 
			
		||||
            logger_level = int(arg) if arg.isdigit() else arg.upper()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    logger.success(f"changedetection.io version {get_version()} starting.")
 | 
			
		||||
    # Launch using SocketIO run method for proper integration (if enabled)
 | 
			
		||||
    ssl_cert_file = os.getenv("SSL_CERT_FILE", 'cert.pem')
 | 
			
		||||
    ssl_privkey_file = os.getenv("SSL_PRIVKEY_FILE", 'privkey.pem')
 | 
			
		||||
    if os.getenv("SSL_CERT_FILE") and os.getenv("SSL_PRIVKEY_FILE"):
 | 
			
		||||
        ssl_mode = True
 | 
			
		||||
 | 
			
		||||
    # SSL mode could have been set by -s too, therefor fallback to default values
 | 
			
		||||
    if ssl_mode:
 | 
			
		||||
        if not os.path.isfile(ssl_cert_file) or not os.path.isfile(ssl_privkey_file):
 | 
			
		||||
            logger.critical(f"Cannot start SSL/HTTPS mode, Please be sure that {ssl_cert_file}' and '{ssl_privkey_file}' exist in in {os.getcwd()}")
 | 
			
		||||
            os._exit(2)
 | 
			
		||||
 | 
			
		||||
    # Without this, a logger will be duplicated
 | 
			
		||||
    logger.remove()
 | 
			
		||||
    try:
 | 
			
		||||
        log_level_for_stdout = { 'DEBUG', 'SUCCESS' }
 | 
			
		||||
        log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' }
 | 
			
		||||
        logger.configure(handlers=[
 | 
			
		||||
            {"sink": sys.stdout, "level": logger_level,
 | 
			
		||||
             "filter" : lambda record: record['level'].name in log_level_for_stdout},
 | 
			
		||||
@@ -143,6 +181,11 @@ def main():
 | 
			
		||||
 | 
			
		||||
    app = changedetection_app(app_config, datastore)
 | 
			
		||||
 | 
			
		||||
    # Get the SocketIO instance from the Flask app (created in flask_app.py)
 | 
			
		||||
    from changedetectionio.flask_app import socketio_server
 | 
			
		||||
    global socketio
 | 
			
		||||
    socketio = socketio_server
 | 
			
		||||
 | 
			
		||||
    signal.signal(signal.SIGTERM, sigshutdown_handler)
 | 
			
		||||
    signal.signal(signal.SIGINT, sigshutdown_handler)
 | 
			
		||||
    
 | 
			
		||||
@@ -167,10 +210,11 @@ def main():
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @app.context_processor
 | 
			
		||||
    def inject_version():
 | 
			
		||||
    def inject_template_globals():
 | 
			
		||||
        return dict(right_sticky="v{}".format(datastore.data['version_tag']),
 | 
			
		||||
                    new_version_available=app.config['NEW_VERSION_AVAILABLE'],
 | 
			
		||||
                    has_password=datastore.data['settings']['application']['password'] != False
 | 
			
		||||
                    has_password=datastore.data['settings']['application']['password'] != False,
 | 
			
		||||
                    socket_io_enabled=datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
 | 
			
		||||
                    )
 | 
			
		||||
 | 
			
		||||
    # Monitored websites will not receive a Referer header when a user clicks on an outgoing link.
 | 
			
		||||
@@ -194,15 +238,21 @@ def main():
 | 
			
		||||
        from werkzeug.middleware.proxy_fix import ProxyFix
 | 
			
		||||
        app.wsgi_app = ProxyFix(app.wsgi_app, x_prefix=1, x_host=1)
 | 
			
		||||
 | 
			
		||||
    s_type = socket.AF_INET6 if ipv6_enabled else socket.AF_INET
 | 
			
		||||
 | 
			
		||||
    if ssl_mode:
 | 
			
		||||
        # @todo finalise SSL config, but this should get you in the right direction if you need it.
 | 
			
		||||
        eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), s_type),
 | 
			
		||||
                                               certfile='cert.pem',
 | 
			
		||||
                                               keyfile='privkey.pem',
 | 
			
		||||
                                               server_side=True), app)
 | 
			
		||||
 | 
			
		||||
    # SocketIO instance is already initialized in flask_app.py
 | 
			
		||||
    if socketio_server:
 | 
			
		||||
        if ssl_mode:
 | 
			
		||||
            logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
 | 
			
		||||
            socketio.run(app, host=host, port=int(port), debug=False,
 | 
			
		||||
                         ssl_context=(ssl_cert_file, ssl_privkey_file), allow_unsafe_werkzeug=True)
 | 
			
		||||
        else:
 | 
			
		||||
            socketio.run(app, host=host, port=int(port), debug=False, allow_unsafe_werkzeug=True)
 | 
			
		||||
    else:
 | 
			
		||||
        eventlet.wsgi.server(eventlet.listen((host, int(port)), s_type), app)
 | 
			
		||||
 | 
			
		||||
        # Run Flask app without Socket.IO if disabled
 | 
			
		||||
        logger.info("Starting Flask app without Socket.IO server")
 | 
			
		||||
        if ssl_mode:
 | 
			
		||||
            logger.success(f"SSL mode enabled, attempting to start with '{ssl_cert_file}' and '{ssl_privkey_file}' in {os.getcwd()}")
 | 
			
		||||
            app.run(host=host, port=int(port), debug=False,
 | 
			
		||||
                    ssl_context=(ssl_cert_file, ssl_privkey_file))
 | 
			
		||||
        else:
 | 
			
		||||
            app.run(host=host, port=int(port), debug=False)
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,22 @@
 | 
			
		||||
import os
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
from flask_restful import abort, Resource
 | 
			
		||||
from flask import request
 | 
			
		||||
import validators
 | 
			
		||||
from . import auth
 | 
			
		||||
from functools import wraps
 | 
			
		||||
from . import auth, validate_openapi_request
 | 
			
		||||
from ..validate_url import is_safe_valid_url
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def default_content_type(content_type='text/plain'):
 | 
			
		||||
    """Decorator to set a default Content-Type header if none is provided."""
 | 
			
		||||
    def decorator(f):
 | 
			
		||||
        @wraps(f)
 | 
			
		||||
        def wrapper(*args, **kwargs):
 | 
			
		||||
            if not request.content_type:
 | 
			
		||||
                # Set default content type in the request environment
 | 
			
		||||
                request.environ['CONTENT_TYPE'] = content_type
 | 
			
		||||
            return f(*args, **kwargs)
 | 
			
		||||
        return wrapper
 | 
			
		||||
    return decorator
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Import(Resource):
 | 
			
		||||
@@ -12,17 +25,10 @@ class Import(Resource):
 | 
			
		||||
        self.datastore = kwargs['datastore']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @default_content_type('text/plain') #3547 #3542
 | 
			
		||||
    @validate_openapi_request('importWatches')
 | 
			
		||||
    def post(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {post} /api/v1/import Import a list of watched URLs
 | 
			
		||||
        @apiDescription Accepts a line-feed separated list of URLs to import, additionally with ?tag_uuids=(tag  id), ?tag=(name), ?proxy={key}, ?dedupe=true (default true) one URL per line.
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/import --data-binary @list-of-sites.txt -H"x-api-key:8a111a21bc2f8f1dd9b9353bbd46049a"
 | 
			
		||||
        @apiName Import
 | 
			
		||||
        @apiGroup Watch
 | 
			
		||||
        @apiSuccess (200) {List} OK List of watch UUIDs added
 | 
			
		||||
        @apiSuccess (500) {String} ERR Some other error
 | 
			
		||||
        """
 | 
			
		||||
        """Import a list of watched URLs."""
 | 
			
		||||
 | 
			
		||||
        extras = {}
 | 
			
		||||
 | 
			
		||||
@@ -43,14 +49,13 @@ class Import(Resource):
 | 
			
		||||
 | 
			
		||||
        urls = request.get_data().decode('utf8').splitlines()
 | 
			
		||||
        added = []
 | 
			
		||||
        allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
 | 
			
		||||
        for url in urls:
 | 
			
		||||
            url = url.strip()
 | 
			
		||||
            if not len(url):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            # If hosts that only contain alphanumerics are allowed ("localhost" for example)
 | 
			
		||||
            if not validators.url(url, simple_host=allow_simplehost):
 | 
			
		||||
            if not is_safe_valid_url(url):
 | 
			
		||||
                return f"Invalid or unsupported URL - {url}", 400
 | 
			
		||||
 | 
			
		||||
            if dedupe and self.datastore.url_exists(url):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,7 @@
 | 
			
		||||
from flask_expects_json import expects_json
 | 
			
		||||
from flask_restful import Resource
 | 
			
		||||
from . import auth
 | 
			
		||||
from flask_restful import abort, Resource
 | 
			
		||||
from flask_restful import Resource, abort
 | 
			
		||||
from flask import request
 | 
			
		||||
from . import auth
 | 
			
		||||
from . import auth, validate_openapi_request
 | 
			
		||||
from . import schema_create_notification_urls, schema_delete_notification_urls
 | 
			
		||||
 | 
			
		||||
class Notifications(Resource):
 | 
			
		||||
@@ -12,19 +10,9 @@ class Notifications(Resource):
 | 
			
		||||
        self.datastore = kwargs['datastore']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('getNotifications')
 | 
			
		||||
    def get(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/notifications Return Notification URL List
 | 
			
		||||
        @apiDescription Return the Notification URL List from the configuration
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            HTTP/1.0 200
 | 
			
		||||
            {
 | 
			
		||||
                'notification_urls': ["notification-urls-list"]
 | 
			
		||||
            }
 | 
			
		||||
        @apiName Get
 | 
			
		||||
        @apiGroup Notifications
 | 
			
		||||
        """
 | 
			
		||||
        """Return Notification URL List."""
 | 
			
		||||
 | 
			
		||||
        notification_urls = self.datastore.data.get('settings', {}).get('application', {}).get('notification_urls', [])        
 | 
			
		||||
 | 
			
		||||
@@ -33,18 +21,10 @@ class Notifications(Resource):
 | 
			
		||||
               }, 200
 | 
			
		||||
    
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('addNotifications')
 | 
			
		||||
    @expects_json(schema_create_notification_urls)
 | 
			
		||||
    def post(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {post} /api/v1/notifications Create Notification URLs
 | 
			
		||||
        @apiDescription Add one or more notification URLs from the configuration
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/notifications/batch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
 | 
			
		||||
        @apiName CreateBatch
 | 
			
		||||
        @apiGroup Notifications
 | 
			
		||||
        @apiSuccess (201) {Object[]} notification_urls List of added notification URLs
 | 
			
		||||
        @apiError (400) {String} Invalid input
 | 
			
		||||
        """
 | 
			
		||||
        """Create Notification URLs."""
 | 
			
		||||
 | 
			
		||||
        json_data = request.get_json()
 | 
			
		||||
        notification_urls = json_data.get("notification_urls", [])
 | 
			
		||||
@@ -69,18 +49,10 @@ class Notifications(Resource):
 | 
			
		||||
        return {'notification_urls': added_urls}, 201
 | 
			
		||||
    
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('replaceNotifications')
 | 
			
		||||
    @expects_json(schema_create_notification_urls)
 | 
			
		||||
    def put(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {put} /api/v1/notifications Replace Notification URLs
 | 
			
		||||
        @apiDescription Replace all notification URLs with the provided list (can be empty)
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl -X PUT http://localhost:5000/api/v1/notifications -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
 | 
			
		||||
        @apiName Replace
 | 
			
		||||
        @apiGroup Notifications
 | 
			
		||||
        @apiSuccess (200) {Object[]} notification_urls List of current notification URLs
 | 
			
		||||
        @apiError (400) {String} Invalid input
 | 
			
		||||
        """
 | 
			
		||||
        """Replace Notification URLs."""
 | 
			
		||||
        json_data = request.get_json()
 | 
			
		||||
        notification_urls = json_data.get("notification_urls", [])
 | 
			
		||||
 | 
			
		||||
@@ -100,19 +72,10 @@ class Notifications(Resource):
 | 
			
		||||
        return {'notification_urls': clean_urls}, 200
 | 
			
		||||
        
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('deleteNotifications')
 | 
			
		||||
    @expects_json(schema_delete_notification_urls)
 | 
			
		||||
    def delete(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {delete} /api/v1/notifications Delete Notification URLs
 | 
			
		||||
        @apiDescription Deletes one or more notification URLs from the configuration
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/notifications -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"notification_urls": ["url1", "url2"]}'
 | 
			
		||||
        @apiParam {String[]} notification_urls The notification URLs to delete.
 | 
			
		||||
        @apiName Delete
 | 
			
		||||
        @apiGroup Notifications
 | 
			
		||||
        @apiSuccess (204) {String} OK Deleted
 | 
			
		||||
        @apiError (400) {String} No matching notification URLs found.
 | 
			
		||||
        """
 | 
			
		||||
        """Delete Notification URLs."""
 | 
			
		||||
 | 
			
		||||
        json_data = request.get_json()
 | 
			
		||||
        urls_to_delete = json_data.get("notification_urls", [])
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
from flask_restful import Resource, abort
 | 
			
		||||
from flask import request
 | 
			
		||||
from . import auth
 | 
			
		||||
from . import auth, validate_openapi_request
 | 
			
		||||
 | 
			
		||||
class Search(Resource):
 | 
			
		||||
    def __init__(self, **kwargs):
 | 
			
		||||
@@ -8,21 +8,9 @@ class Search(Resource):
 | 
			
		||||
        self.datastore = kwargs['datastore']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('searchWatches')
 | 
			
		||||
    def get(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/search Search for watches
 | 
			
		||||
        @apiDescription Search watches by URL or title text
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl "http://localhost:5000/api/v1/search?q=https://example.com/page1" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            curl "http://localhost:5000/api/v1/search?q=https://example.com/page1?tag=Favourites" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            curl "http://localhost:5000/api/v1/search?q=https://example.com?partial=true" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
        @apiName Search
 | 
			
		||||
        @apiGroup Watch Management
 | 
			
		||||
        @apiQuery {String} q Search query to match against watch URLs and titles
 | 
			
		||||
        @apiQuery {String} [tag] Optional name of tag to limit results (name not UUID)
 | 
			
		||||
        @apiQuery {String} [partial] Allow partial matching of URL query
 | 
			
		||||
        @apiSuccess (200) {Object} JSON Object containing matched watches
 | 
			
		||||
        """
 | 
			
		||||
        """Search for watches by URL or title text."""
 | 
			
		||||
        query = request.args.get('q', '').strip()
 | 
			
		||||
        tag_limit = request.args.get('tag', '').strip()
 | 
			
		||||
        from changedetectionio.strtobool import strtobool
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
from flask_restful import Resource
 | 
			
		||||
from . import auth
 | 
			
		||||
from . import auth, validate_openapi_request
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SystemInfo(Resource):
 | 
			
		||||
@@ -9,23 +9,9 @@ class SystemInfo(Resource):
 | 
			
		||||
        self.update_q = kwargs['update_q']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('getSystemInfo')
 | 
			
		||||
    def get(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/systeminfo Return system info
 | 
			
		||||
        @apiDescription Return some info about the current system state
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/systeminfo -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            HTTP/1.0 200
 | 
			
		||||
            {
 | 
			
		||||
                'queue_size': 10 ,
 | 
			
		||||
                'overdue_watches': ["watch-uuid-list"],
 | 
			
		||||
                'uptime': 38344.55,
 | 
			
		||||
                'watch_count': 800,
 | 
			
		||||
                'version': "0.40.1"
 | 
			
		||||
            }
 | 
			
		||||
        @apiName Get Info
 | 
			
		||||
        @apiGroup System Information
 | 
			
		||||
        """
 | 
			
		||||
        """Return system info."""
 | 
			
		||||
        import time
 | 
			
		||||
        overdue_watches = []
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,39 +1,46 @@
 | 
			
		||||
from changedetectionio import queuedWatchMetaData
 | 
			
		||||
from changedetectionio import worker_handler
 | 
			
		||||
from flask_expects_json import expects_json
 | 
			
		||||
from flask_restful import abort, Resource
 | 
			
		||||
 | 
			
		||||
from flask import request
 | 
			
		||||
from . import auth
 | 
			
		||||
 | 
			
		||||
# Import schemas from __init__.py
 | 
			
		||||
from . import schema_tag, schema_create_tag, schema_update_tag
 | 
			
		||||
from . import schema_tag, schema_create_tag, schema_update_tag, validate_openapi_request
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Tag(Resource):
 | 
			
		||||
    def __init__(self, **kwargs):
 | 
			
		||||
        # datastore is a black box dependency
 | 
			
		||||
        self.datastore = kwargs['datastore']
 | 
			
		||||
        self.update_q = kwargs['update_q']
 | 
			
		||||
 | 
			
		||||
    # Get information about a single tag
 | 
			
		||||
    # curl http://localhost:5000/api/v1/tag/<string:uuid>
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('getTag')
 | 
			
		||||
    def get(self, uuid):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/tag/:uuid Single tag - get data or toggle notification muting.
 | 
			
		||||
        @apiDescription Retrieve tag information and set notification_muted status
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            curl "http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=muted" -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
        @apiName Tag
 | 
			
		||||
        @apiGroup Tag
 | 
			
		||||
        @apiParam {uuid} uuid Tag unique ID.
 | 
			
		||||
        @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
 | 
			
		||||
        @apiSuccess (200) {String} OK When muted operation OR full JSON object of the tag
 | 
			
		||||
        @apiSuccess (200) {JSON} TagJSON JSON Full JSON object of the tag
 | 
			
		||||
        """
 | 
			
		||||
        """Get data for a single tag/group, toggle notification muting, or recheck all."""
 | 
			
		||||
        from copy import deepcopy
 | 
			
		||||
        tag = deepcopy(self.datastore.data['settings']['application']['tags'].get(uuid))
 | 
			
		||||
        if not tag:
 | 
			
		||||
            abort(404, message=f'No tag exists with the UUID of {uuid}')
 | 
			
		||||
 | 
			
		||||
        if request.args.get('recheck'):
 | 
			
		||||
            # Recheck all, including muted
 | 
			
		||||
            # Get most overdue first
 | 
			
		||||
            i=0
 | 
			
		||||
            for k in sorted(self.datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
 | 
			
		||||
                watch_uuid = k[0]
 | 
			
		||||
                watch = k[1]
 | 
			
		||||
                if not watch['paused'] and tag['uuid'] not in watch['tags']:
 | 
			
		||||
                    continue
 | 
			
		||||
                worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
 | 
			
		||||
                i+=1
 | 
			
		||||
 | 
			
		||||
            return f"OK, {i} watches queued", 200
 | 
			
		||||
 | 
			
		||||
        if request.args.get('muted', '') == 'muted':
 | 
			
		||||
            self.datastore.data['settings']['application']['tags'][uuid]['notification_muted'] = True
 | 
			
		||||
            return "OK", 200
 | 
			
		||||
@@ -44,16 +51,9 @@ class Tag(Resource):
 | 
			
		||||
        return tag
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('deleteTag')
 | 
			
		||||
    def delete(self, uuid):
 | 
			
		||||
        """
 | 
			
		||||
        @api {delete} /api/v1/tag/:uuid Delete a tag and remove it from all watches
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
        @apiParam {uuid} uuid Tag unique ID.
 | 
			
		||||
        @apiName DeleteTag
 | 
			
		||||
        @apiGroup Tag
 | 
			
		||||
        @apiSuccess (200) {String} OK Was deleted
 | 
			
		||||
        """
 | 
			
		||||
        """Delete a tag/group and remove it from all watches."""
 | 
			
		||||
        if not self.datastore.data['settings']['application']['tags'].get(uuid):
 | 
			
		||||
            abort(400, message='No tag exists with the UUID of {}'.format(uuid))
 | 
			
		||||
 | 
			
		||||
@@ -68,21 +68,10 @@ class Tag(Resource):
 | 
			
		||||
        return 'OK', 204
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('updateTag')
 | 
			
		||||
    @expects_json(schema_update_tag)
 | 
			
		||||
    def put(self, uuid):
 | 
			
		||||
        """
 | 
			
		||||
        @api {put} /api/v1/tag/:uuid Update tag information
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            Update (PUT)
 | 
			
		||||
            curl http://localhost:5000/api/v1/tag/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"title": "New Tag Title"}'
 | 
			
		||||
 | 
			
		||||
        @apiDescription Updates an existing tag using JSON
 | 
			
		||||
        @apiParam {uuid} uuid Tag unique ID.
 | 
			
		||||
        @apiName UpdateTag
 | 
			
		||||
        @apiGroup Tag
 | 
			
		||||
        @apiSuccess (200) {String} OK Was updated
 | 
			
		||||
        @apiSuccess (500) {String} ERR Some other error
 | 
			
		||||
        """
 | 
			
		||||
        """Update tag information."""
 | 
			
		||||
        tag = self.datastore.data['settings']['application']['tags'].get(uuid)
 | 
			
		||||
        if not tag:
 | 
			
		||||
            abort(404, message='No tag exists with the UUID of {}'.format(uuid))
 | 
			
		||||
@@ -94,17 +83,10 @@ class Tag(Resource):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('createTag')
 | 
			
		||||
    # Only cares for {'title': 'xxxx'}
 | 
			
		||||
    def post(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {post} /api/v1/watch Create a single tag
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"name": "Work related"}'
 | 
			
		||||
        @apiName Create
 | 
			
		||||
        @apiGroup Tag
 | 
			
		||||
        @apiSuccess (200) {String} OK Was created
 | 
			
		||||
        @apiSuccess (500) {String} ERR Some other error
 | 
			
		||||
        """
 | 
			
		||||
        """Create a single tag/group."""
 | 
			
		||||
 | 
			
		||||
        json_data = request.get_json()
 | 
			
		||||
        title = json_data.get("title",'').strip()
 | 
			
		||||
@@ -122,28 +104,9 @@ class Tags(Resource):
 | 
			
		||||
        self.datastore = kwargs['datastore']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('listTags')
 | 
			
		||||
    def get(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/tags List tags
 | 
			
		||||
        @apiDescription Return list of available tags
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/tags -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            {
 | 
			
		||||
                "cc0cfffa-f449-477b-83ea-0caafd1dc091": {
 | 
			
		||||
                    "title": "Tech News",
 | 
			
		||||
                    "notification_muted": false,
 | 
			
		||||
                    "date_created": 1677103794
 | 
			
		||||
                },
 | 
			
		||||
                "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
 | 
			
		||||
                    "title": "Shopping",
 | 
			
		||||
                    "notification_muted": true,
 | 
			
		||||
                    "date_created": 1676662819
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
        @apiName ListTags
 | 
			
		||||
        @apiGroup Tag Management
 | 
			
		||||
        @apiSuccess (200) {String} OK JSON dict
 | 
			
		||||
        """
 | 
			
		||||
        """List tags/groups."""
 | 
			
		||||
        result = {}
 | 
			
		||||
        for uuid, tag in self.datastore.data['settings']['application']['tags'].items():
 | 
			
		||||
            result[uuid] = {
 | 
			
		||||
 
 | 
			
		||||
@@ -1,16 +1,50 @@
 | 
			
		||||
import os
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
 | 
			
		||||
from changedetectionio.validate_url import is_safe_valid_url
 | 
			
		||||
 | 
			
		||||
from flask_expects_json import expects_json
 | 
			
		||||
from changedetectionio import queuedWatchMetaData
 | 
			
		||||
from changedetectionio import worker_handler
 | 
			
		||||
from flask_restful import abort, Resource
 | 
			
		||||
from flask import request, make_response
 | 
			
		||||
import validators
 | 
			
		||||
from flask import request, make_response, send_from_directory
 | 
			
		||||
from . import auth
 | 
			
		||||
import copy
 | 
			
		||||
 | 
			
		||||
# Import schemas from __init__.py
 | 
			
		||||
from . import schema, schema_create_watch, schema_update_watch
 | 
			
		||||
from . import schema, schema_create_watch, schema_update_watch, validate_openapi_request
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def validate_time_between_check_required(json_data):
 | 
			
		||||
    """
 | 
			
		||||
    Validate that at least one time interval is specified when not using default settings.
 | 
			
		||||
    Returns None if valid, or error message string if invalid.
 | 
			
		||||
    Defaults to using global settings if time_between_check_use_default is not provided.
 | 
			
		||||
    """
 | 
			
		||||
    # Default to using global settings if not specified
 | 
			
		||||
    use_default = json_data.get('time_between_check_use_default', True)
 | 
			
		||||
 | 
			
		||||
    # If using default settings, no validation needed
 | 
			
		||||
    if use_default:
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    # If not using defaults, check if time_between_check exists and has at least one non-zero value
 | 
			
		||||
    time_check = json_data.get('time_between_check')
 | 
			
		||||
    if not time_check:
 | 
			
		||||
        # No time_between_check provided and not using defaults - this is an error
 | 
			
		||||
        return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
 | 
			
		||||
 | 
			
		||||
    # time_between_check exists, check if it has at least one non-zero value
 | 
			
		||||
    if any([
 | 
			
		||||
        (time_check.get('weeks') or 0) > 0,
 | 
			
		||||
        (time_check.get('days') or 0) > 0,
 | 
			
		||||
        (time_check.get('hours') or 0) > 0,
 | 
			
		||||
        (time_check.get('minutes') or 0) > 0,
 | 
			
		||||
        (time_check.get('seconds') or 0) > 0
 | 
			
		||||
    ]):
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    # time_between_check exists but all values are 0 or empty - this is an error
 | 
			
		||||
    return "At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings."
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Watch(Resource):
 | 
			
		||||
@@ -24,30 +58,16 @@ class Watch(Resource):
 | 
			
		||||
    # @todo - version2 - ?muted and ?paused should be able to be called together, return the watch struct not "OK"
 | 
			
		||||
    # ?recheck=true
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('getWatch')
 | 
			
		||||
    def get(self, uuid):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/watch/:uuid Single watch - get data, recheck, pause, mute.
 | 
			
		||||
        @apiDescription Retrieve watch information and set muted/paused status
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091  -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?muted=unmuted"  -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            curl "http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091?paused=unpaused"  -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
        @apiName Watch
 | 
			
		||||
        @apiGroup Watch
 | 
			
		||||
        @apiParam {uuid} uuid Watch unique ID.
 | 
			
		||||
        @apiQuery {Boolean} [recheck] Recheck this watch `recheck=1`
 | 
			
		||||
        @apiQuery {String} [paused] =`paused` or =`unpaused` , Sets the PAUSED state
 | 
			
		||||
        @apiQuery {String} [muted] =`muted` or =`unmuted` , Sets the MUTE NOTIFICATIONS state
 | 
			
		||||
        @apiSuccess (200) {String} OK When paused/muted/recheck operation OR full JSON object of the watch
 | 
			
		||||
        @apiSuccess (200) {JSON} WatchJSON JSON Full JSON object of the watch
 | 
			
		||||
        """
 | 
			
		||||
        """Get information about a single watch, recheck, pause, or mute."""
 | 
			
		||||
        from copy import deepcopy
 | 
			
		||||
        watch = deepcopy(self.datastore.data['watching'].get(uuid))
 | 
			
		||||
        if not watch:
 | 
			
		||||
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
 | 
			
		||||
 | 
			
		||||
        if request.args.get('recheck'):
 | 
			
		||||
            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
            worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
            return "OK", 200
 | 
			
		||||
        if request.args.get('paused', '') == 'paused':
 | 
			
		||||
            self.datastore.data['watching'].get(uuid).pause()
 | 
			
		||||
@@ -68,19 +88,14 @@ class Watch(Resource):
 | 
			
		||||
        # attr .last_changed will check for the last written text snapshot on change
 | 
			
		||||
        watch['last_changed'] = watch.last_changed
 | 
			
		||||
        watch['viewed'] = watch.viewed
 | 
			
		||||
        watch['link'] = watch.link,
 | 
			
		||||
 | 
			
		||||
        return watch
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('deleteWatch')
 | 
			
		||||
    def delete(self, uuid):
 | 
			
		||||
        """
 | 
			
		||||
        @api {delete} /api/v1/watch/:uuid Delete a watch and related history
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X DELETE -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
        @apiParam {uuid} uuid Watch unique ID.
 | 
			
		||||
        @apiName Delete
 | 
			
		||||
        @apiGroup Watch
 | 
			
		||||
        @apiSuccess (200) {String} OK Was deleted
 | 
			
		||||
        """
 | 
			
		||||
        """Delete a watch and related history."""
 | 
			
		||||
        if not self.datastore.data['watching'].get(uuid):
 | 
			
		||||
            abort(400, message='No watch exists with the UUID of {}'.format(uuid))
 | 
			
		||||
 | 
			
		||||
@@ -88,21 +103,10 @@ class Watch(Resource):
 | 
			
		||||
        return 'OK', 204
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('updateWatch')
 | 
			
		||||
    @expects_json(schema_update_watch)
 | 
			
		||||
    def put(self, uuid):
 | 
			
		||||
        """
 | 
			
		||||
        @api {put} /api/v1/watch/:uuid Update watch information
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            Update (PUT)
 | 
			
		||||
            curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091 -X PUT -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "new list"}'
 | 
			
		||||
 | 
			
		||||
        @apiDescription Updates an existing watch using JSON, accepts the same structure as returned in <a href="#api-Watch-Watch">get single watch information</a>
 | 
			
		||||
        @apiParam {uuid} uuid Watch unique ID.
 | 
			
		||||
        @apiName Update a watch
 | 
			
		||||
        @apiGroup Watch
 | 
			
		||||
        @apiSuccess (200) {String} OK Was updated
 | 
			
		||||
        @apiSuccess (500) {String} ERR Some other error
 | 
			
		||||
        """
 | 
			
		||||
        """Update watch information."""
 | 
			
		||||
        watch = self.datastore.data['watching'].get(uuid)
 | 
			
		||||
        if not watch:
 | 
			
		||||
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
 | 
			
		||||
@@ -112,6 +116,15 @@ class Watch(Resource):
 | 
			
		||||
            if not request.json.get('proxy') in plist:
 | 
			
		||||
                return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
 | 
			
		||||
 | 
			
		||||
        # Validate time_between_check when not using defaults
 | 
			
		||||
        validation_error = validate_time_between_check_required(request.json)
 | 
			
		||||
        if validation_error:
 | 
			
		||||
            return validation_error, 400
 | 
			
		||||
 | 
			
		||||
        # XSS etc protection
 | 
			
		||||
        if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
 | 
			
		||||
            return "Invalid URL", 400
 | 
			
		||||
 | 
			
		||||
        watch.update(request.json)
 | 
			
		||||
 | 
			
		||||
        return "OK", 200
 | 
			
		||||
@@ -125,22 +138,9 @@ class WatchHistory(Resource):
 | 
			
		||||
    # Get a list of available history for a watch by UUID
 | 
			
		||||
    # curl http://localhost:5000/api/v1/watch/<string:uuid>/history
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('getWatchHistory')
 | 
			
		||||
    def get(self, uuid):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/watch/<string:uuid>/history Get a list of all historical snapshots available for a watch
 | 
			
		||||
        @apiDescription Requires `uuid`, returns list
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
 | 
			
		||||
            {
 | 
			
		||||
                "1676649279": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/cb7e9be8258368262246910e6a2a4c30.txt",
 | 
			
		||||
                "1677092785": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/e20db368d6fc633e34f559ff67bb4044.txt",
 | 
			
		||||
                "1677103794": "/tmp/data/6a4b7d5c-fee4-4616-9f43-4ac97046b595/02efdd37dacdae96554a8cc85dc9c945.txt"
 | 
			
		||||
            }
 | 
			
		||||
        @apiName Get list of available stored snapshots for watch
 | 
			
		||||
        @apiGroup Watch History
 | 
			
		||||
        @apiSuccess (200) {String} OK
 | 
			
		||||
        @apiSuccess (404) {String} ERR Not found
 | 
			
		||||
        """
 | 
			
		||||
        """Get a list of all historical snapshots available for a watch."""
 | 
			
		||||
        watch = self.datastore.data['watching'].get(uuid)
 | 
			
		||||
        if not watch:
 | 
			
		||||
            abort(404, message='No watch exists with the UUID of {}'.format(uuid))
 | 
			
		||||
@@ -153,18 +153,9 @@ class WatchSingleHistory(Resource):
 | 
			
		||||
        self.datastore = kwargs['datastore']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('getWatchSnapshot')
 | 
			
		||||
    def get(self, uuid, timestamp):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/watch/<string:uuid>/history/<int:timestamp> Get single snapshot from watch
 | 
			
		||||
        @apiDescription Requires watch `uuid` and `timestamp`. `timestamp` of "`latest`" for latest available snapshot, or <a href="#api-Watch_History-Get_list_of_available_stored_snapshots_for_watch">use the list returned here</a>
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/watch/cc0cfffa-f449-477b-83ea-0caafd1dc091/history/1677092977 -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json"
 | 
			
		||||
        @apiName Get single snapshot content
 | 
			
		||||
        @apiGroup Watch History
 | 
			
		||||
        @apiParam {String} [html]       Optional Set to =1 to return the last HTML (only stores last 2 snapshots, use `latest` as timestamp)
 | 
			
		||||
        @apiSuccess (200) {String} OK
 | 
			
		||||
        @apiSuccess (404) {String} ERR Not found
 | 
			
		||||
        """
 | 
			
		||||
        """Get single snapshot from watch."""
 | 
			
		||||
        watch = self.datastore.data['watching'].get(uuid)
 | 
			
		||||
        if not watch:
 | 
			
		||||
            abort(404, message=f"No watch exists with the UUID of {uuid}")
 | 
			
		||||
@@ -190,6 +181,39 @@ class WatchSingleHistory(Resource):
 | 
			
		||||
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
class WatchFavicon(Resource):
 | 
			
		||||
    def __init__(self, **kwargs):
 | 
			
		||||
        # datastore is a black box dependency
 | 
			
		||||
        self.datastore = kwargs['datastore']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('getWatchFavicon')
 | 
			
		||||
    def get(self, uuid):
 | 
			
		||||
        """Get favicon for a watch."""
 | 
			
		||||
        watch = self.datastore.data['watching'].get(uuid)
 | 
			
		||||
        if not watch:
 | 
			
		||||
            abort(404, message=f"No watch exists with the UUID of {uuid}")
 | 
			
		||||
 | 
			
		||||
        favicon_filename = watch.get_favicon_filename()
 | 
			
		||||
        if favicon_filename:
 | 
			
		||||
            try:
 | 
			
		||||
                import magic
 | 
			
		||||
                mime = magic.from_file(
 | 
			
		||||
                    os.path.join(watch.watch_data_dir, favicon_filename),
 | 
			
		||||
                    mime=True
 | 
			
		||||
                )
 | 
			
		||||
            except ImportError:
 | 
			
		||||
                # Fallback, no python-magic
 | 
			
		||||
                import mimetypes
 | 
			
		||||
                mime, encoding = mimetypes.guess_type(favicon_filename)
 | 
			
		||||
 | 
			
		||||
            response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
 | 
			
		||||
            response.headers['Content-type'] = mime
 | 
			
		||||
            response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate
 | 
			
		||||
            return response
 | 
			
		||||
 | 
			
		||||
        abort(404, message=f'No Favicon available for {uuid}')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class CreateWatch(Resource):
 | 
			
		||||
    def __init__(self, **kwargs):
 | 
			
		||||
@@ -198,25 +222,15 @@ class CreateWatch(Resource):
 | 
			
		||||
        self.update_q = kwargs['update_q']
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('createWatch')
 | 
			
		||||
    @expects_json(schema_create_watch)
 | 
			
		||||
    def post(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {post} /api/v1/watch Create a single watch
 | 
			
		||||
        @apiDescription Requires atleast `url` set, can accept the same structure as <a href="#api-Watch-Watch">get single watch information</a> to create.
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45" -H "Content-Type: application/json" -d '{"url": "https://my-nice.com" , "tag": "nice list"}'
 | 
			
		||||
        @apiName Create
 | 
			
		||||
        @apiGroup Watch
 | 
			
		||||
        @apiSuccess (200) {String} OK Was created
 | 
			
		||||
        @apiSuccess (500) {String} ERR Some other error
 | 
			
		||||
        """
 | 
			
		||||
        """Create a single watch."""
 | 
			
		||||
 | 
			
		||||
        json_data = request.get_json()
 | 
			
		||||
        url = json_data['url'].strip()
 | 
			
		||||
 | 
			
		||||
        # If hosts that only contain alphanumerics are allowed ("localhost" for example)
 | 
			
		||||
        allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
 | 
			
		||||
        if not validators.url(url, simple_host=allow_simplehost):
 | 
			
		||||
        if not is_safe_valid_url(url):
 | 
			
		||||
            return "Invalid or unsupported URL", 400
 | 
			
		||||
 | 
			
		||||
        if json_data.get('proxy'):
 | 
			
		||||
@@ -224,6 +238,11 @@ class CreateWatch(Resource):
 | 
			
		||||
            if not json_data.get('proxy') in plist:
 | 
			
		||||
                return "Invalid proxy choice, currently supported proxies are '{}'".format(', '.join(plist)), 400
 | 
			
		||||
 | 
			
		||||
        # Validate time_between_check when not using defaults
 | 
			
		||||
        validation_error = validate_time_between_check_required(json_data)
 | 
			
		||||
        if validation_error:
 | 
			
		||||
            return validation_error, 400
 | 
			
		||||
 | 
			
		||||
        extras = copy.deepcopy(json_data)
 | 
			
		||||
 | 
			
		||||
        # Because we renamed 'tag' to 'tags' but don't want to change the API (can do this in v2 of the API)
 | 
			
		||||
@@ -236,41 +255,15 @@ class CreateWatch(Resource):
 | 
			
		||||
 | 
			
		||||
        new_uuid = self.datastore.add_watch(url=url, extras=extras, tag=tags)
 | 
			
		||||
        if new_uuid:
 | 
			
		||||
            self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
 | 
			
		||||
            worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
 | 
			
		||||
            return {'uuid': new_uuid}, 201
 | 
			
		||||
        else:
 | 
			
		||||
            return "Invalid or unsupported URL", 400
 | 
			
		||||
 | 
			
		||||
    @auth.check_token
 | 
			
		||||
    @validate_openapi_request('listWatches')
 | 
			
		||||
    def get(self):
 | 
			
		||||
        """
 | 
			
		||||
        @api {get} /api/v1/watch List watches
 | 
			
		||||
        @apiDescription Return concise list of available watches and some very basic info
 | 
			
		||||
        @apiExample {curl} Example usage:
 | 
			
		||||
            curl http://localhost:5000/api/v1/watch -H"x-api-key:813031b16330fe25e3780cf0325daa45"
 | 
			
		||||
            {
 | 
			
		||||
                "6a4b7d5c-fee4-4616-9f43-4ac97046b595": {
 | 
			
		||||
                    "last_changed": 1677103794,
 | 
			
		||||
                    "last_checked": 1677103794,
 | 
			
		||||
                    "last_error": false,
 | 
			
		||||
                    "title": "",
 | 
			
		||||
                    "url": "http://www.quotationspage.com/random.php"
 | 
			
		||||
                },
 | 
			
		||||
                "e6f5fd5c-dbfe-468b-b8f3-f9d6ff5ad69b": {
 | 
			
		||||
                    "last_changed": 0,
 | 
			
		||||
                    "last_checked": 1676662819,
 | 
			
		||||
                    "last_error": false,
 | 
			
		||||
                    "title": "QuickLook",
 | 
			
		||||
                    "url": "https://github.com/QL-Win/QuickLook/tags"
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
        @apiParam {String} [recheck_all]       Optional Set to =1 to force recheck of all watches
 | 
			
		||||
        @apiParam {String} [tag]               Optional name of tag to limit results
 | 
			
		||||
        @apiName ListWatches
 | 
			
		||||
        @apiGroup Watch Management
 | 
			
		||||
        @apiSuccess (200) {String} OK JSON dict
 | 
			
		||||
        """
 | 
			
		||||
        """List watches."""
 | 
			
		||||
        list = {}
 | 
			
		||||
 | 
			
		||||
        tag_limit = request.args.get('tag', '').lower()
 | 
			
		||||
@@ -284,6 +277,8 @@ class CreateWatch(Resource):
 | 
			
		||||
                'last_changed': watch.last_changed,
 | 
			
		||||
                'last_checked': watch['last_checked'],
 | 
			
		||||
                'last_error': watch['last_error'],
 | 
			
		||||
                'link': watch.link,
 | 
			
		||||
                'page_title': watch['page_title'],
 | 
			
		||||
                'title': watch['title'],
 | 
			
		||||
                'url': watch['url'],
 | 
			
		||||
                'viewed': watch.viewed
 | 
			
		||||
@@ -291,7 +286,7 @@ class CreateWatch(Resource):
 | 
			
		||||
 | 
			
		||||
        if request.args.get('recheck_all'):
 | 
			
		||||
            for uuid in self.datastore.data['watching'].keys():
 | 
			
		||||
                self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
                worker_handler.queue_item_async_safe(self.update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
            return {'status': "OK"}, 200
 | 
			
		||||
 | 
			
		||||
        return list, 200
 | 
			
		||||
@@ -1,4 +1,7 @@
 | 
			
		||||
import copy
 | 
			
		||||
import functools
 | 
			
		||||
from flask import request, abort
 | 
			
		||||
from loguru import logger
 | 
			
		||||
from . import api_schema
 | 
			
		||||
from ..model import watch_base
 | 
			
		||||
 | 
			
		||||
@@ -8,6 +11,7 @@ schema = api_schema.build_watch_json_schema(watch_base_config)
 | 
			
		||||
 | 
			
		||||
schema_create_watch = copy.deepcopy(schema)
 | 
			
		||||
schema_create_watch['required'] = ['url']
 | 
			
		||||
del schema_create_watch['properties']['last_viewed']
 | 
			
		||||
 | 
			
		||||
schema_update_watch = copy.deepcopy(schema)
 | 
			
		||||
schema_update_watch['additionalProperties'] = False
 | 
			
		||||
@@ -25,9 +29,58 @@ schema_create_notification_urls['required'] = ['notification_urls']
 | 
			
		||||
schema_delete_notification_urls = copy.deepcopy(schema_notification_urls)
 | 
			
		||||
schema_delete_notification_urls['required'] = ['notification_urls']
 | 
			
		||||
 | 
			
		||||
@functools.cache
 | 
			
		||||
def get_openapi_spec():
 | 
			
		||||
    """Lazy load OpenAPI spec and dependencies only when validation is needed."""
 | 
			
		||||
    import os
 | 
			
		||||
    import yaml  # Lazy import - only loaded when API validation is actually used
 | 
			
		||||
    from openapi_core import OpenAPI  # Lazy import - saves ~10.7 MB on startup
 | 
			
		||||
 | 
			
		||||
    spec_path = os.path.join(os.path.dirname(__file__), '../../docs/api-spec.yaml')
 | 
			
		||||
    if not os.path.exists(spec_path):
 | 
			
		||||
        # Possibly for pip3 packages
 | 
			
		||||
        spec_path = os.path.join(os.path.dirname(__file__), '../docs/api-spec.yaml')
 | 
			
		||||
 | 
			
		||||
    with open(spec_path, 'r') as f:
 | 
			
		||||
        spec_dict = yaml.safe_load(f)
 | 
			
		||||
    _openapi_spec = OpenAPI.from_dict(spec_dict)
 | 
			
		||||
    return _openapi_spec
 | 
			
		||||
 | 
			
		||||
def validate_openapi_request(operation_id):
 | 
			
		||||
    """Decorator to validate incoming requests against OpenAPI spec."""
 | 
			
		||||
    def decorator(f):
 | 
			
		||||
        @functools.wraps(f)
 | 
			
		||||
        def wrapper(*args, **kwargs):
 | 
			
		||||
            try:
 | 
			
		||||
                # Skip OpenAPI validation for GET requests since they don't have request bodies
 | 
			
		||||
                if request.method.upper() != 'GET':
 | 
			
		||||
                    # Lazy import - only loaded when actually validating a request
 | 
			
		||||
                    from openapi_core.contrib.flask import FlaskOpenAPIRequest
 | 
			
		||||
 | 
			
		||||
                    spec = get_openapi_spec()
 | 
			
		||||
                    openapi_request = FlaskOpenAPIRequest(request)
 | 
			
		||||
                    result = spec.unmarshal_request(openapi_request)
 | 
			
		||||
                    if result.errors:
 | 
			
		||||
                        from werkzeug.exceptions import BadRequest
 | 
			
		||||
                        error_details = []
 | 
			
		||||
                        for error in result.errors:
 | 
			
		||||
                            error_details.append(str(error))
 | 
			
		||||
                        raise BadRequest(f"OpenAPI validation failed: {error_details}")
 | 
			
		||||
            except BadRequest:
 | 
			
		||||
                # Re-raise BadRequest exceptions (validation failures)
 | 
			
		||||
                raise
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                # If OpenAPI spec loading fails, log but don't break existing functionality
 | 
			
		||||
                logger.critical(f"OpenAPI validation warning for {operation_id}: {e}")
 | 
			
		||||
                abort(500)
 | 
			
		||||
            return f(*args, **kwargs)
 | 
			
		||||
        return wrapper
 | 
			
		||||
    return decorator
 | 
			
		||||
 | 
			
		||||
# Import all API resources
 | 
			
		||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch
 | 
			
		||||
from .Watch import Watch, WatchHistory, WatchSingleHistory, CreateWatch, WatchFavicon
 | 
			
		||||
from .Tags import Tags, Tag
 | 
			
		||||
from .Import import Import
 | 
			
		||||
from .SystemInfo import SystemInfo
 | 
			
		||||
from .Notifications import Notifications
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -78,6 +78,13 @@ def build_watch_json_schema(d):
 | 
			
		||||
              ]:
 | 
			
		||||
        schema['properties'][v]['anyOf'].append({'type': 'string', "maxLength": 5000})
 | 
			
		||||
 | 
			
		||||
    for v in ['last_viewed']:
 | 
			
		||||
        schema['properties'][v] = {
 | 
			
		||||
            "type": "integer",
 | 
			
		||||
            "description": "Unix timestamp in seconds of the last time the watch was viewed.",
 | 
			
		||||
            "minimum": 0
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
    # None or Boolean
 | 
			
		||||
    schema['properties']['track_ldjson_price_data']['anyOf'].append({'type': 'boolean'})
 | 
			
		||||
 | 
			
		||||
@@ -89,7 +96,10 @@ def build_watch_json_schema(d):
 | 
			
		||||
                                                           "enum": ["html_requests", "html_webdriver"]
 | 
			
		||||
                                                           })
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    schema['properties']['processor'] = {"anyOf": [
 | 
			
		||||
        {"type": "string", "enum": ["restock_diff", "text_json_diff"]},
 | 
			
		||||
        {"type": "null"}
 | 
			
		||||
    ]}
 | 
			
		||||
 | 
			
		||||
    # All headers must be key/value type dict
 | 
			
		||||
    schema['properties']['headers'] = {
 | 
			
		||||
@@ -112,6 +122,12 @@ def build_watch_json_schema(d):
 | 
			
		||||
 | 
			
		||||
    schema['properties']['time_between_check'] = build_time_between_check_json_schema()
 | 
			
		||||
 | 
			
		||||
    schema['properties']['time_between_check_use_default'] = {
 | 
			
		||||
        "type": "boolean",
 | 
			
		||||
        "default": True,
 | 
			
		||||
        "description": "Whether to use global settings for time between checks - defaults to true if not set"
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    schema['properties']['browser_steps'] = {
 | 
			
		||||
        "anyOf": [
 | 
			
		||||
            {
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										496
									
								
								changedetectionio/async_update_worker.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										496
									
								
								changedetectionio/async_update_worker.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,496 @@
 | 
			
		||||
from .processors.exceptions import ProcessorException
 | 
			
		||||
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
 | 
			
		||||
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
 | 
			
		||||
from changedetectionio import html_tools
 | 
			
		||||
from changedetectionio.flask_app import watch_check_update
 | 
			
		||||
 | 
			
		||||
import asyncio
 | 
			
		||||
import importlib
 | 
			
		||||
import os
 | 
			
		||||
import queue
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
# Async version of update_worker
 | 
			
		||||
# Processes jobs from AsyncSignalPriorityQueue instead of threaded queue
 | 
			
		||||
 | 
			
		||||
async def async_update_worker(worker_id, q, notification_q, app, datastore):
 | 
			
		||||
    """
 | 
			
		||||
    Async worker function that processes watch check jobs from the queue.
 | 
			
		||||
    
 | 
			
		||||
    Args:
 | 
			
		||||
        worker_id: Unique identifier for this worker
 | 
			
		||||
        q: AsyncSignalPriorityQueue containing jobs to process
 | 
			
		||||
        notification_q: Standard queue for notifications
 | 
			
		||||
        app: Flask application instance
 | 
			
		||||
        datastore: Application datastore
 | 
			
		||||
    """
 | 
			
		||||
    # Set a descriptive name for this task
 | 
			
		||||
    task = asyncio.current_task()
 | 
			
		||||
    if task:
 | 
			
		||||
        task.set_name(f"async-worker-{worker_id}")
 | 
			
		||||
    
 | 
			
		||||
    logger.info(f"Starting async worker {worker_id}")
 | 
			
		||||
    
 | 
			
		||||
    while not app.config.exit.is_set():
 | 
			
		||||
        update_handler = None
 | 
			
		||||
        watch = None
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            # Use native janus async interface - no threads needed!
 | 
			
		||||
            queued_item_data = await asyncio.wait_for(q.async_get(), timeout=1.0)
 | 
			
		||||
            
 | 
			
		||||
        except asyncio.TimeoutError:
 | 
			
		||||
            # No jobs available, continue loop
 | 
			
		||||
            continue
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Worker {worker_id} failed to get queue item: {type(e).__name__}: {e}")
 | 
			
		||||
            
 | 
			
		||||
            # Log queue health for debugging
 | 
			
		||||
            try:
 | 
			
		||||
                queue_size = q.qsize()
 | 
			
		||||
                is_empty = q.empty()
 | 
			
		||||
                logger.critical(f"CRITICAL: Worker {worker_id} queue health - size: {queue_size}, empty: {is_empty}")
 | 
			
		||||
            except Exception as health_e:
 | 
			
		||||
                logger.critical(f"CRITICAL: Worker {worker_id} queue health check failed: {health_e}")
 | 
			
		||||
            
 | 
			
		||||
            await asyncio.sleep(0.1)
 | 
			
		||||
            continue
 | 
			
		||||
        
 | 
			
		||||
        uuid = queued_item_data.item.get('uuid')
 | 
			
		||||
        fetch_start_time = round(time.time())
 | 
			
		||||
        
 | 
			
		||||
        # Mark this UUID as being processed
 | 
			
		||||
        from changedetectionio import worker_handler
 | 
			
		||||
        worker_handler.set_uuid_processing(uuid, processing=True)
 | 
			
		||||
        
 | 
			
		||||
        try:
 | 
			
		||||
            if uuid in list(datastore.data['watching'].keys()) and datastore.data['watching'][uuid].get('url'):
 | 
			
		||||
                changed_detected = False
 | 
			
		||||
                contents = b''
 | 
			
		||||
                process_changedetection_results = True
 | 
			
		||||
                update_obj = {}
 | 
			
		||||
 | 
			
		||||
                # Clear last errors
 | 
			
		||||
                datastore.data['watching'][uuid]['browser_steps_last_error_step'] = None
 | 
			
		||||
                datastore.data['watching'][uuid]['last_checked'] = fetch_start_time
 | 
			
		||||
 | 
			
		||||
                watch = datastore.data['watching'].get(uuid)
 | 
			
		||||
 | 
			
		||||
                logger.info(f"Worker {worker_id} processing watch UUID {uuid} Priority {queued_item_data.priority} URL {watch['url']}")
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    watch_check_update.send(watch_uuid=uuid)
 | 
			
		||||
 | 
			
		||||
                    # Processor is what we are using for detecting the "Change"
 | 
			
		||||
                    processor = watch.get('processor', 'text_json_diff')
 | 
			
		||||
 | 
			
		||||
                    # Init a new 'difference_detection_processor'
 | 
			
		||||
                    processor_module_name = f"changedetectionio.processors.{processor}.processor"
 | 
			
		||||
                    try:
 | 
			
		||||
                        processor_module = importlib.import_module(processor_module_name)
 | 
			
		||||
                    except ModuleNotFoundError as e:
 | 
			
		||||
                        print(f"Processor module '{processor}' not found.")
 | 
			
		||||
                        raise e
 | 
			
		||||
 | 
			
		||||
                    update_handler = processor_module.perform_site_check(datastore=datastore,
 | 
			
		||||
                                                                         watch_uuid=uuid)
 | 
			
		||||
 | 
			
		||||
                    # All fetchers are now async, so call directly
 | 
			
		||||
                    await update_handler.call_browser()
 | 
			
		||||
 | 
			
		||||
                    # Run change detection (this is synchronous)
 | 
			
		||||
                    changed_detected, update_obj, contents = update_handler.run_changedetection(watch=watch)
 | 
			
		||||
 | 
			
		||||
                except PermissionError as e:
 | 
			
		||||
                    logger.critical(f"File permission error updating file, watch: {uuid}")
 | 
			
		||||
                    logger.critical(str(e))
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                except ProcessorException as e:
 | 
			
		||||
                    if e.screenshot:
 | 
			
		||||
                        watch.save_screenshot(screenshot=e.screenshot)
 | 
			
		||||
                    if e.xpath_data:
 | 
			
		||||
                        watch.save_xpath_data(data=e.xpath_data)
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                except content_fetchers_exceptions.ReplyWithContentButNoText as e:
 | 
			
		||||
                    extra_help = ""
 | 
			
		||||
                    if e.has_filters:
 | 
			
		||||
                        has_img = html_tools.include_filters(include_filters='img',
 | 
			
		||||
                                                             html_content=e.html_content)
 | 
			
		||||
                        if has_img:
 | 
			
		||||
                            extra_help = ", it's possible that the filters you have give an empty result or contain only an image."
 | 
			
		||||
                        else:
 | 
			
		||||
                            extra_help = ", it's possible that the filters were found, but contained no usable text."
 | 
			
		||||
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={
 | 
			
		||||
                        'last_error': f"Got HTML content but no text found (With {e.status_code} reply code){extra_help}"
 | 
			
		||||
                    })
 | 
			
		||||
 | 
			
		||||
                    if e.screenshot:
 | 
			
		||||
                        watch.save_screenshot(screenshot=e.screenshot, as_error=True)
 | 
			
		||||
 | 
			
		||||
                    if e.xpath_data:
 | 
			
		||||
                        watch.save_xpath_data(data=e.xpath_data)
 | 
			
		||||
                        
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
 | 
			
		||||
                    if e.status_code == 403:
 | 
			
		||||
                        err_text = "Error - 403 (Access denied) received"
 | 
			
		||||
                    elif e.status_code == 404:
 | 
			
		||||
                        err_text = "Error - 404 (Page not found) received"
 | 
			
		||||
                    elif e.status_code == 407:
 | 
			
		||||
                        err_text = "Error - 407 (Proxy authentication required) received, did you need a username and password for the proxy?"
 | 
			
		||||
                    elif e.status_code == 500:
 | 
			
		||||
                        err_text = "Error - 500 (Internal server error) received from the web site"
 | 
			
		||||
                    else:
 | 
			
		||||
                        extra = ' (Access denied or blocked)' if str(e.status_code).startswith('4') else ''
 | 
			
		||||
                        err_text = f"Error - Request returned a HTTP error code {e.status_code}{extra}"
 | 
			
		||||
 | 
			
		||||
                    if e.screenshot:
 | 
			
		||||
                        watch.save_screenshot(screenshot=e.screenshot, as_error=True)
 | 
			
		||||
                    if e.xpath_data:
 | 
			
		||||
                        watch.save_xpath_data(data=e.xpath_data, as_error=True)
 | 
			
		||||
                    if e.page_text:
 | 
			
		||||
                        watch.save_error_text(contents=e.page_text)
 | 
			
		||||
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                except FilterNotFoundInResponse as e:
 | 
			
		||||
                    if not datastore.data['watching'].get(uuid):
 | 
			
		||||
                        continue
 | 
			
		||||
 | 
			
		||||
                    err_text = "Warning, no filters were found, no change detection ran - Did the page change layout? update your Visual Filter if necessary."
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
 | 
			
		||||
 | 
			
		||||
                    # Filter wasnt found, but we should still update the visual selector so that they can have a chance to set it up again
 | 
			
		||||
                    if e.screenshot:
 | 
			
		||||
                        watch.save_screenshot(screenshot=e.screenshot)
 | 
			
		||||
 | 
			
		||||
                    if e.xpath_data:
 | 
			
		||||
                        watch.save_xpath_data(data=e.xpath_data)
 | 
			
		||||
 | 
			
		||||
                    # Only when enabled, send the notification
 | 
			
		||||
                    if watch.get('filter_failure_notification_send', False):
 | 
			
		||||
                        c = watch.get('consecutive_filter_failures', 0)
 | 
			
		||||
                        c += 1
 | 
			
		||||
                        # Send notification if we reached the threshold?
 | 
			
		||||
                        threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
 | 
			
		||||
                        logger.debug(f"Filter for {uuid} not found, consecutive_filter_failures: {c} of threshold {threshold}")
 | 
			
		||||
                        if c >= threshold:
 | 
			
		||||
                            if not watch.get('notification_muted'):
 | 
			
		||||
                                logger.debug(f"Sending filter failed notification for {uuid}")
 | 
			
		||||
                                await send_filter_failure_notification(uuid, notification_q, datastore)
 | 
			
		||||
                            c = 0
 | 
			
		||||
                            logger.debug(f"Reset filter failure count back to zero")
 | 
			
		||||
 | 
			
		||||
                        datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
 | 
			
		||||
                    else:
 | 
			
		||||
                        logger.trace(f"{uuid} - filter_failure_notification_send not enabled, skipping")
 | 
			
		||||
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
 | 
			
		||||
                    # Yes fine, so nothing todo, don't continue to process.
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
                    changed_detected = False
 | 
			
		||||
                    
 | 
			
		||||
                except content_fetchers_exceptions.BrowserConnectError as e:
 | 
			
		||||
                    datastore.update_watch(uuid=uuid,
 | 
			
		||||
                                         update_obj={'last_error': e.msg})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
                    
 | 
			
		||||
                except content_fetchers_exceptions.BrowserFetchTimedOut as e:
 | 
			
		||||
                    datastore.update_watch(uuid=uuid,
 | 
			
		||||
                                         update_obj={'last_error': e.msg})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
                    
 | 
			
		||||
                except content_fetchers_exceptions.BrowserStepsStepException as e:
 | 
			
		||||
                    if not datastore.data['watching'].get(uuid):
 | 
			
		||||
                        continue
 | 
			
		||||
 | 
			
		||||
                    error_step = e.step_n + 1
 | 
			
		||||
                    from playwright._impl._errors import TimeoutError, Error
 | 
			
		||||
 | 
			
		||||
                    # Generally enough info for TimeoutError (couldnt locate the element after default seconds)
 | 
			
		||||
                    err_text = f"Browser step at position {error_step} could not run, check the watch, add a delay if necessary, view Browser Steps to see screenshot at that step."
 | 
			
		||||
 | 
			
		||||
                    if e.original_e.name == "TimeoutError":
 | 
			
		||||
                        # Just the first line is enough, the rest is the stack trace
 | 
			
		||||
                        err_text += " Could not find the target."
 | 
			
		||||
                    else:
 | 
			
		||||
                        # Other Error, more info is good.
 | 
			
		||||
                        err_text += " " + str(e.original_e).splitlines()[0]
 | 
			
		||||
 | 
			
		||||
                    logger.debug(f"BrowserSteps exception at step {error_step} {str(e.original_e)}")
 | 
			
		||||
 | 
			
		||||
                    datastore.update_watch(uuid=uuid,
 | 
			
		||||
                                         update_obj={'last_error': err_text,
 | 
			
		||||
                                                   'browser_steps_last_error_step': error_step})
 | 
			
		||||
 | 
			
		||||
                    if watch.get('filter_failure_notification_send', False):
 | 
			
		||||
                        c = watch.get('consecutive_filter_failures', 0)
 | 
			
		||||
                        c += 1
 | 
			
		||||
                        # Send notification if we reached the threshold?
 | 
			
		||||
                        threshold = datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
 | 
			
		||||
                        logger.error(f"Step for {uuid} not found, consecutive_filter_failures: {c}")
 | 
			
		||||
                        if threshold > 0 and c >= threshold:
 | 
			
		||||
                            if not watch.get('notification_muted'):
 | 
			
		||||
                                await send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n, notification_q=notification_q, datastore=datastore)
 | 
			
		||||
                            c = 0
 | 
			
		||||
 | 
			
		||||
                        datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c})
 | 
			
		||||
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                except content_fetchers_exceptions.EmptyReply as e:
 | 
			
		||||
                    # Some kind of custom to-str handler in the exception handler that does this?
 | 
			
		||||
                    err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                'last_check_status': e.status_code})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
                    
 | 
			
		||||
                except content_fetchers_exceptions.ScreenshotUnavailable as e:
 | 
			
		||||
                    err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                'last_check_status': e.status_code})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
                    
 | 
			
		||||
                except content_fetchers_exceptions.JSActionExceptions as e:
 | 
			
		||||
                    err_text = "Error running JS Actions - Page request - "+e.message
 | 
			
		||||
                    if e.screenshot:
 | 
			
		||||
                        watch.save_screenshot(screenshot=e.screenshot, as_error=True)
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                'last_check_status': e.status_code})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
                    
 | 
			
		||||
                except content_fetchers_exceptions.PageUnloadable as e:
 | 
			
		||||
                    err_text = "Page request from server didnt respond correctly"
 | 
			
		||||
                    if e.message:
 | 
			
		||||
                        err_text = "{} - {}".format(err_text, e.message)
 | 
			
		||||
 | 
			
		||||
                    if e.screenshot:
 | 
			
		||||
                        watch.save_screenshot(screenshot=e.screenshot, as_error=True)
 | 
			
		||||
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                'last_check_status': e.status_code,
 | 
			
		||||
                                                                'has_ldjson_price_data': None})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
                    
 | 
			
		||||
                except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
 | 
			
		||||
                    err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
                    logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}")
 | 
			
		||||
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.error(f"Worker {worker_id} exception processing watch UUID: {uuid}")
 | 
			
		||||
                    logger.error(str(e))
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)})
 | 
			
		||||
                    process_changedetection_results = False
 | 
			
		||||
 | 
			
		||||
                else:
 | 
			
		||||
                    if not datastore.data['watching'].get(uuid):
 | 
			
		||||
                        continue
 | 
			
		||||
 | 
			
		||||
                    update_obj['content-type'] = update_handler.fetcher.get_all_headers().get('content-type', '').lower()
 | 
			
		||||
 | 
			
		||||
                    if not watch.get('ignore_status_codes'):
 | 
			
		||||
                        update_obj['consecutive_filter_failures'] = 0
 | 
			
		||||
 | 
			
		||||
                    update_obj['last_error'] = False
 | 
			
		||||
                    cleanup_error_artifacts(uuid, datastore)
 | 
			
		||||
 | 
			
		||||
                if not datastore.data['watching'].get(uuid):
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
                if process_changedetection_results:
 | 
			
		||||
                    try:
 | 
			
		||||
                        datastore.update_watch(uuid=uuid, update_obj=update_obj)
 | 
			
		||||
 | 
			
		||||
                        if changed_detected or not watch.history_n:
 | 
			
		||||
                            if update_handler.screenshot:
 | 
			
		||||
                                watch.save_screenshot(screenshot=update_handler.screenshot)
 | 
			
		||||
 | 
			
		||||
                            if update_handler.xpath_data:
 | 
			
		||||
                                watch.save_xpath_data(data=update_handler.xpath_data)
 | 
			
		||||
 | 
			
		||||
                            # Ensure unique timestamp for history
 | 
			
		||||
                            if watch.newest_history_key and int(fetch_start_time) == int(watch.newest_history_key):
 | 
			
		||||
                                logger.warning(f"Timestamp {fetch_start_time} already exists, waiting 1 seconds")
 | 
			
		||||
                                fetch_start_time += 1
 | 
			
		||||
                                await asyncio.sleep(1)
 | 
			
		||||
 | 
			
		||||
                            watch.save_history_text(contents=contents,
 | 
			
		||||
                                                    timestamp=int(fetch_start_time),
 | 
			
		||||
                                                    snapshot_id=update_obj.get('previous_md5', 'none'))
 | 
			
		||||
 | 
			
		||||
                            empty_pages_are_a_change = datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
 | 
			
		||||
                            if update_handler.fetcher.content or (not update_handler.fetcher.content and empty_pages_are_a_change):
 | 
			
		||||
                                watch.save_last_fetched_html(contents=update_handler.fetcher.content, timestamp=int(fetch_start_time))
 | 
			
		||||
 | 
			
		||||
                            # Explicitly delete large content variables to free memory IMMEDIATELY after saving
 | 
			
		||||
                            # These are no longer needed after being saved to history
 | 
			
		||||
                            del contents
 | 
			
		||||
 | 
			
		||||
                            # Send notifications on second+ check
 | 
			
		||||
                            if watch.history_n >= 2:
 | 
			
		||||
                                logger.info(f"Change detected in UUID {uuid} - {watch['url']}")
 | 
			
		||||
                                if not watch.get('notification_muted'):
 | 
			
		||||
                                    await send_content_changed_notification(uuid, notification_q, datastore)
 | 
			
		||||
 | 
			
		||||
                    except Exception as e:
 | 
			
		||||
                        logger.critical(f"Worker {worker_id} exception in process_changedetection_results")
 | 
			
		||||
                        logger.critical(str(e))
 | 
			
		||||
                        datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
 | 
			
		||||
 | 
			
		||||
                # Always record attempt count
 | 
			
		||||
                count = watch.get('check_count', 0) + 1
 | 
			
		||||
 | 
			
		||||
                # Always record page title (used in notifications, and can change even when the content is the same)
 | 
			
		||||
                try:
 | 
			
		||||
                    page_title = html_tools.extract_title(data=update_handler.fetcher.content)
 | 
			
		||||
                    logger.debug(f"UUID: {uuid} Page <title> is '{page_title}'")
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'page_title': page_title})
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.warning(f"UUID: {uuid} Exception when extracting <title> - {str(e)}")
 | 
			
		||||
 | 
			
		||||
                # Record server header
 | 
			
		||||
                try:
 | 
			
		||||
                    server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255]
 | 
			
		||||
                    datastore.update_watch(uuid=uuid, update_obj={'remote_server_reply': server_header})
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    pass
 | 
			
		||||
 | 
			
		||||
                # Store favicon if necessary
 | 
			
		||||
                if update_handler.fetcher.favicon_blob and update_handler.fetcher.favicon_blob.get('base64'):
 | 
			
		||||
                    watch.bump_favicon(url=update_handler.fetcher.favicon_blob.get('url'),
 | 
			
		||||
                                       favicon_base_64=update_handler.fetcher.favicon_blob.get('base64')
 | 
			
		||||
                                       )
 | 
			
		||||
 | 
			
		||||
                datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - fetch_start_time, 3),
 | 
			
		||||
                                                               'check_count': count})
 | 
			
		||||
 | 
			
		||||
                # NOW clear fetcher content - after all processing is complete
 | 
			
		||||
                # This is the last point where we need the fetcher data
 | 
			
		||||
                if update_handler and hasattr(update_handler, 'fetcher') and update_handler.fetcher:
 | 
			
		||||
                    update_handler.fetcher.clear_content()
 | 
			
		||||
                    logger.debug(f"Cleared fetcher content for UUID {uuid}")
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Worker {worker_id} unexpected error processing {uuid}: {e}")
 | 
			
		||||
            logger.error(f"Worker {worker_id} traceback:", exc_info=True)
 | 
			
		||||
            
 | 
			
		||||
            # Also update the watch with error information
 | 
			
		||||
            if datastore and uuid in datastore.data['watching']:
 | 
			
		||||
                datastore.update_watch(uuid=uuid, update_obj={'last_error': f"Worker error: {str(e)}"})
 | 
			
		||||
        
 | 
			
		||||
        finally:
 | 
			
		||||
            # Always cleanup - this runs whether there was an exception or not
 | 
			
		||||
            if uuid:
 | 
			
		||||
                try:
 | 
			
		||||
                    # Mark UUID as no longer being processed
 | 
			
		||||
                    worker_handler.set_uuid_processing(uuid, processing=False)
 | 
			
		||||
                    
 | 
			
		||||
                    # Send completion signal
 | 
			
		||||
                    if watch:
 | 
			
		||||
                        #logger.info(f"Worker {worker_id} sending completion signal for UUID {watch['uuid']}")
 | 
			
		||||
                        watch_check_update.send(watch_uuid=watch['uuid'])
 | 
			
		||||
 | 
			
		||||
                    # Explicitly clean up update_handler and all its references
 | 
			
		||||
                    if update_handler:
 | 
			
		||||
                        # Clear fetcher content using the proper method
 | 
			
		||||
                        if hasattr(update_handler, 'fetcher') and update_handler.fetcher:
 | 
			
		||||
                            update_handler.fetcher.clear_content()
 | 
			
		||||
 | 
			
		||||
                        # Clear processor references
 | 
			
		||||
                        if hasattr(update_handler, 'content_processor'):
 | 
			
		||||
                            update_handler.content_processor = None
 | 
			
		||||
 | 
			
		||||
                        update_handler = None
 | 
			
		||||
 | 
			
		||||
                    # Clear local contents variable if it still exists
 | 
			
		||||
                    if 'contents' in locals():
 | 
			
		||||
                        del contents
 | 
			
		||||
 | 
			
		||||
                    # Note: We don't set watch = None here because:
 | 
			
		||||
                    # 1. watch is just a local reference to datastore.data['watching'][uuid]
 | 
			
		||||
                    # 2. Setting it to None doesn't affect the datastore
 | 
			
		||||
                    # 3. GC can't collect the object anyway (still referenced by datastore)
 | 
			
		||||
                    # 4. It would just cause confusion
 | 
			
		||||
 | 
			
		||||
                    logger.debug(f"Worker {worker_id} completed watch {uuid} in {time.time()-fetch_start_time:.2f}s")
 | 
			
		||||
                except Exception as cleanup_error:
 | 
			
		||||
                    logger.error(f"Worker {worker_id} error during cleanup: {cleanup_error}")
 | 
			
		||||
            
 | 
			
		||||
            # Brief pause before continuing to avoid tight error loops (only on error)
 | 
			
		||||
            if 'e' in locals():
 | 
			
		||||
                await asyncio.sleep(1.0)
 | 
			
		||||
            else:
 | 
			
		||||
                # Small yield for normal completion
 | 
			
		||||
                await asyncio.sleep(0.01)
 | 
			
		||||
 | 
			
		||||
        # Check if we should exit
 | 
			
		||||
        if app.config.exit.is_set():
 | 
			
		||||
            break
 | 
			
		||||
 | 
			
		||||
    # Check if we're in pytest environment - if so, be more gentle with logging
 | 
			
		||||
    import sys
 | 
			
		||||
    in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
 | 
			
		||||
    
 | 
			
		||||
    if not in_pytest:
 | 
			
		||||
        logger.info(f"Worker {worker_id} shutting down")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cleanup_error_artifacts(uuid, datastore):
 | 
			
		||||
    """Helper function to clean up error artifacts"""
 | 
			
		||||
    cleanup_files = ["last-error-screenshot.png", "last-error.txt"]
 | 
			
		||||
    for f in cleanup_files:
 | 
			
		||||
        full_path = os.path.join(datastore.datastore_path, uuid, f)
 | 
			
		||||
        if os.path.isfile(full_path):
 | 
			
		||||
            os.unlink(full_path)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def send_content_changed_notification(watch_uuid, notification_q, datastore):
 | 
			
		||||
    """Helper function to queue notifications using the new notification service"""
 | 
			
		||||
    try:
 | 
			
		||||
        from changedetectionio.notification_service import create_notification_service
 | 
			
		||||
        
 | 
			
		||||
        # Create notification service instance
 | 
			
		||||
        notification_service = create_notification_service(datastore, notification_q)
 | 
			
		||||
        
 | 
			
		||||
        notification_service.send_content_changed_notification(watch_uuid)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error sending notification for {watch_uuid}: {e}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def send_filter_failure_notification(watch_uuid, notification_q, datastore):
 | 
			
		||||
    """Helper function to send filter failure notifications using the new notification service"""
 | 
			
		||||
    try:
 | 
			
		||||
        from changedetectionio.notification_service import create_notification_service
 | 
			
		||||
        
 | 
			
		||||
        # Create notification service instance
 | 
			
		||||
        notification_service = create_notification_service(datastore, notification_q)
 | 
			
		||||
        
 | 
			
		||||
        notification_service.send_filter_failure_notification(watch_uuid)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error sending filter failure notification for {watch_uuid}: {e}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
async def send_step_failure_notification(watch_uuid, step_n, notification_q, datastore):
 | 
			
		||||
    """Helper function to send step failure notifications using the new notification service"""
 | 
			
		||||
    try:
 | 
			
		||||
        from changedetectionio.notification_service import create_notification_service
 | 
			
		||||
        
 | 
			
		||||
        # Create notification service instance
 | 
			
		||||
        notification_service = create_notification_service(datastore, notification_q)
 | 
			
		||||
        
 | 
			
		||||
        notification_service.send_step_failure_notification(watch_uuid, step_n)
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error sending step failure notification for {watch_uuid}: {e}")
 | 
			
		||||
@@ -25,42 +25,53 @@ io_interface_context = None
 | 
			
		||||
import json
 | 
			
		||||
import hashlib
 | 
			
		||||
from flask import Response
 | 
			
		||||
import asyncio
 | 
			
		||||
import threading
 | 
			
		||||
 | 
			
		||||
def run_async_in_browser_loop(coro):
 | 
			
		||||
    """Run async coroutine using the existing async worker event loop"""
 | 
			
		||||
    from changedetectionio import worker_handler
 | 
			
		||||
    
 | 
			
		||||
    # Use the existing async worker event loop instead of creating a new one
 | 
			
		||||
    if worker_handler.USE_ASYNC_WORKERS and worker_handler.async_loop and not worker_handler.async_loop.is_closed():
 | 
			
		||||
        logger.debug("Browser steps using existing async worker event loop")
 | 
			
		||||
        future = asyncio.run_coroutine_threadsafe(coro, worker_handler.async_loop)
 | 
			
		||||
        return future.result()
 | 
			
		||||
    else:
 | 
			
		||||
        # Fallback: create a new event loop (for sync workers or if async loop not available)
 | 
			
		||||
        logger.debug("Browser steps creating temporary event loop")
 | 
			
		||||
        loop = asyncio.new_event_loop()
 | 
			
		||||
        asyncio.set_event_loop(loop)
 | 
			
		||||
        try:
 | 
			
		||||
            return loop.run_until_complete(coro)
 | 
			
		||||
        finally:
 | 
			
		||||
            loop.close()
 | 
			
		||||
 | 
			
		||||
def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
    browser_steps_blueprint = Blueprint('browser_steps', __name__, template_folder="templates")
 | 
			
		||||
 | 
			
		||||
    def start_browsersteps_session(watch_uuid):
 | 
			
		||||
        from . import nonContext
 | 
			
		||||
    async def start_browsersteps_session(watch_uuid):
 | 
			
		||||
        from . import browser_steps
 | 
			
		||||
        import time
 | 
			
		||||
        global io_interface_context
 | 
			
		||||
        from playwright.async_api import async_playwright
 | 
			
		||||
 | 
			
		||||
        # We keep the playwright session open for many minutes
 | 
			
		||||
        keepalive_seconds = int(os.getenv('BROWSERSTEPS_MINUTES_KEEPALIVE', 10)) * 60
 | 
			
		||||
 | 
			
		||||
        browsersteps_start_session = {'start_time': time.time()}
 | 
			
		||||
 | 
			
		||||
        # You can only have one of these running
 | 
			
		||||
        # This should be very fine to leave running for the life of the application
 | 
			
		||||
        # @idea - Make it global so the pool of watch fetchers can use it also
 | 
			
		||||
        if not io_interface_context:
 | 
			
		||||
            io_interface_context = nonContext.c_sync_playwright()
 | 
			
		||||
            # Start the Playwright context, which is actually a nodejs sub-process and communicates over STDIN/STDOUT pipes
 | 
			
		||||
            io_interface_context = io_interface_context.start()
 | 
			
		||||
        # Create a new async playwright instance for browser steps
 | 
			
		||||
        playwright_instance = async_playwright()
 | 
			
		||||
        playwright_context = await playwright_instance.start()
 | 
			
		||||
 | 
			
		||||
        keepalive_ms = ((keepalive_seconds + 3) * 1000)
 | 
			
		||||
        base_url = os.getenv('PLAYWRIGHT_DRIVER_URL', '').strip('"')
 | 
			
		||||
        a = "?" if not '?' in base_url else '&'
 | 
			
		||||
        base_url += a + f"timeout={keepalive_ms}"
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            if 'ECONNREFUSED' in str(e):
 | 
			
		||||
                return make_response('Unable to start the Playwright Browser session, is it running?', 401)
 | 
			
		||||
            else:
 | 
			
		||||
                # Other errors, bad URL syntax, bad reply etc
 | 
			
		||||
                return make_response(str(e), 401)
 | 
			
		||||
        browser = await playwright_context.chromium.connect_over_cdp(base_url, timeout=keepalive_ms)
 | 
			
		||||
        browsersteps_start_session['browser'] = browser
 | 
			
		||||
        browsersteps_start_session['playwright_context'] = playwright_context
 | 
			
		||||
 | 
			
		||||
        proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
 | 
			
		||||
        proxy = None
 | 
			
		||||
@@ -82,15 +93,20 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
                logger.debug(f"Browser Steps: UUID {watch_uuid} selected proxy {proxy_url}")
 | 
			
		||||
 | 
			
		||||
        # Tell Playwright to connect to Chrome and setup a new session via our stepper interface
 | 
			
		||||
        browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
 | 
			
		||||
            playwright_browser=browsersteps_start_session['browser'],
 | 
			
		||||
        browserstepper = browser_steps.browsersteps_live_ui(
 | 
			
		||||
            playwright_browser=browser,
 | 
			
		||||
            proxy=proxy,
 | 
			
		||||
            start_url=datastore.data['watching'][watch_uuid].link,
 | 
			
		||||
            headers=datastore.data['watching'][watch_uuid].get('headers')
 | 
			
		||||
        )
 | 
			
		||||
        
 | 
			
		||||
        # Initialize the async connection
 | 
			
		||||
        await browserstepper.connect(proxy=proxy)
 | 
			
		||||
        
 | 
			
		||||
        browsersteps_start_session['browserstepper'] = browserstepper
 | 
			
		||||
 | 
			
		||||
        # For test
 | 
			
		||||
        #browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
 | 
			
		||||
        #await browsersteps_start_session['browserstepper'].action_goto_url(value="http://example.com?time="+str(time.time()))
 | 
			
		||||
 | 
			
		||||
        return browsersteps_start_session
 | 
			
		||||
 | 
			
		||||
@@ -99,7 +115,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
    @browser_steps_blueprint.route("/browsersteps_start_session", methods=['GET'])
 | 
			
		||||
    def browsersteps_start_session():
 | 
			
		||||
        # A new session was requested, return sessionID
 | 
			
		||||
 | 
			
		||||
        import asyncio
 | 
			
		||||
        import uuid
 | 
			
		||||
        browsersteps_session_id = str(uuid.uuid4())
 | 
			
		||||
        watch_uuid = request.args.get('uuid')
 | 
			
		||||
@@ -109,7 +125,19 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
 | 
			
		||||
        logger.debug("Starting connection with playwright")
 | 
			
		||||
        logger.debug("browser_steps.py connecting")
 | 
			
		||||
        browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            # Run the async function in the dedicated browser steps event loop
 | 
			
		||||
            browsersteps_sessions[browsersteps_session_id] = run_async_in_browser_loop(
 | 
			
		||||
                start_browsersteps_session(watch_uuid)
 | 
			
		||||
            )
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            if 'ECONNREFUSED' in str(e):
 | 
			
		||||
                return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
 | 
			
		||||
            else:
 | 
			
		||||
                # Other errors, bad URL syntax, bad reply etc
 | 
			
		||||
                return make_response(str(e), 401)
 | 
			
		||||
 | 
			
		||||
        logger.debug("Starting connection with playwright - done")
 | 
			
		||||
        return {'browsersteps_session_id': browsersteps_session_id}
 | 
			
		||||
 | 
			
		||||
@@ -166,12 +194,15 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
            step_optional_value = request.form.get('optional_value')
 | 
			
		||||
            is_last_step = strtobool(request.form.get('is_last_step'))
 | 
			
		||||
 | 
			
		||||
            # @todo try.. accept.. nice errors not popups..
 | 
			
		||||
            try:
 | 
			
		||||
 | 
			
		||||
                browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation,
 | 
			
		||||
                                         selector=step_selector,
 | 
			
		||||
                                         optional_value=step_optional_value)
 | 
			
		||||
                # Run the async call_action method in the dedicated browser steps event loop
 | 
			
		||||
                run_async_in_browser_loop(
 | 
			
		||||
                    browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(
 | 
			
		||||
                        action_name=step_operation,
 | 
			
		||||
                        selector=step_selector,
 | 
			
		||||
                        optional_value=step_optional_value
 | 
			
		||||
                    )
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.error(f"Exception when calling step operation {step_operation} {str(e)}")
 | 
			
		||||
@@ -185,7 +216,11 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
 | 
			
		||||
        # Screenshots and other info only needed on requesting a step (POST)
 | 
			
		||||
        try:
 | 
			
		||||
            (screenshot, xpath_data) = browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
 | 
			
		||||
            # Run the async get_current_state method in the dedicated browser steps event loop
 | 
			
		||||
            (screenshot, xpath_data) = run_async_in_browser_loop(
 | 
			
		||||
                browsersteps_sessions[browsersteps_session_id]['browserstepper'].get_current_state()
 | 
			
		||||
            )
 | 
			
		||||
                
 | 
			
		||||
            if is_last_step:
 | 
			
		||||
                watch = datastore.data['watching'].get(uuid)
 | 
			
		||||
                u = browsersteps_sessions[browsersteps_session_id]['browserstepper'].page.url
 | 
			
		||||
@@ -193,13 +228,10 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
                    watch.save_screenshot(screenshot=screenshot)
 | 
			
		||||
                    watch.save_xpath_data(data=xpath_data)
 | 
			
		||||
 | 
			
		||||
        except playwright._impl._api_types.Error as e:
 | 
			
		||||
            return make_response("Browser session ran out of time :( Please reload this page."+str(e), 401)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            return make_response("Error fetching screenshot and element data - " + str(e), 401)
 | 
			
		||||
            return make_response(f"Error fetching screenshot and element data - {str(e)}", 401)
 | 
			
		||||
 | 
			
		||||
        # SEND THIS BACK TO THE BROWSER
 | 
			
		||||
 | 
			
		||||
        output = {
 | 
			
		||||
            "screenshot": f"data:image/jpeg;base64,{base64.b64encode(screenshot).decode('ascii')}",
 | 
			
		||||
            "xpath_data": xpath_data,
 | 
			
		||||
 
 | 
			
		||||
@@ -6,7 +6,7 @@ from loguru import logger
 | 
			
		||||
 | 
			
		||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT
 | 
			
		||||
from changedetectionio.content_fetchers.base import manage_user_agent
 | 
			
		||||
from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -35,6 +35,7 @@ browser_step_ui_config = {'Choose one': '0 0',
 | 
			
		||||
                          'Make all child elements visible': '1 0',
 | 
			
		||||
                          'Press Enter': '0 0',
 | 
			
		||||
                          'Select by label': '1 1',
 | 
			
		||||
                          '<select> by option text': '1 1',
 | 
			
		||||
                          'Scroll down': '0 0',
 | 
			
		||||
                          'Uncheck checkbox': '1 0',
 | 
			
		||||
                          'Wait for seconds': '0 1',
 | 
			
		||||
@@ -54,14 +55,17 @@ browser_step_ui_config = {'Choose one': '0 0',
 | 
			
		||||
class steppable_browser_interface():
 | 
			
		||||
    page = None
 | 
			
		||||
    start_url = None
 | 
			
		||||
 | 
			
		||||
    action_timeout = 10 * 1000
 | 
			
		||||
 | 
			
		||||
    def __init__(self, start_url):
 | 
			
		||||
        self.start_url = start_url
 | 
			
		||||
 | 
			
		||||
    # Convert and perform "Click Button" for example
 | 
			
		||||
    def call_action(self, action_name, selector=None, optional_value=None):
 | 
			
		||||
    async def call_action(self, action_name, selector=None, optional_value=None):
 | 
			
		||||
        if self.page is None:
 | 
			
		||||
            logger.warning("Cannot call action on None page object")
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
 | 
			
		||||
        if call_action_name == 'choose_one':
 | 
			
		||||
@@ -72,136 +76,216 @@ class steppable_browser_interface():
 | 
			
		||||
        if selector and selector.startswith('/') and not selector.startswith('//'):
 | 
			
		||||
            selector = "xpath=" + selector
 | 
			
		||||
 | 
			
		||||
        # Check if action handler exists
 | 
			
		||||
        if not hasattr(self, "action_" + call_action_name):
 | 
			
		||||
            logger.warning(f"Action handler for '{call_action_name}' not found")
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        action_handler = getattr(self, "action_" + call_action_name)
 | 
			
		||||
 | 
			
		||||
        # Support for Jinja2 variables in the value and selector
 | 
			
		||||
 | 
			
		||||
        if selector and ('{%' in selector or '{{' in selector):
 | 
			
		||||
            selector = jinja_render(template_str=selector)
 | 
			
		||||
 | 
			
		||||
        if optional_value and ('{%' in optional_value or '{{' in optional_value):
 | 
			
		||||
            optional_value = jinja_render(template_str=optional_value)
 | 
			
		||||
 | 
			
		||||
        action_handler(selector, optional_value)
 | 
			
		||||
        self.page.wait_for_timeout(1.5 * 1000)
 | 
			
		||||
        # Trigger click and cautiously handle potential navigation
 | 
			
		||||
        # This means the page redirects/reloads/changes JS etc etc
 | 
			
		||||
        if call_action_name.startswith('click_'):
 | 
			
		||||
            try:
 | 
			
		||||
                # Set up navigation expectation before the click (like sync version)
 | 
			
		||||
                async with self.page.expect_event("framenavigated", timeout=3000) as navigation_info:
 | 
			
		||||
                    await action_handler(selector, optional_value)
 | 
			
		||||
                
 | 
			
		||||
                # Check if navigation actually occurred
 | 
			
		||||
                try:
 | 
			
		||||
                    await navigation_info.value  # This waits for the navigation promise
 | 
			
		||||
                    logger.debug(f"Navigation occurred on {call_action_name}.")
 | 
			
		||||
                except Exception:
 | 
			
		||||
                    logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.")
 | 
			
		||||
                    
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                # If expect_event itself times out, that means no navigation occurred - that's OK
 | 
			
		||||
                if "framenavigated" in str(e) and "exceeded" in str(e):
 | 
			
		||||
                    logger.debug(f"No navigation occurred within timeout when calling {call_action_name}, that's OK, continuing.")
 | 
			
		||||
                else:
 | 
			
		||||
                    raise e
 | 
			
		||||
        else:
 | 
			
		||||
            # Some other action that probably a navigation is not expected
 | 
			
		||||
            await action_handler(selector, optional_value)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # Safely wait for timeout
 | 
			
		||||
        await self.page.wait_for_timeout(1.5 * 1000)
 | 
			
		||||
        logger.debug(f"Call action done in {time.time()-now:.2f}s")
 | 
			
		||||
 | 
			
		||||
    def action_goto_url(self, selector=None, value=None):
 | 
			
		||||
        # self.page.set_viewport_size({"width": 1280, "height": 5000})
 | 
			
		||||
    async def action_goto_url(self, selector=None, value=None):
 | 
			
		||||
        if not value:
 | 
			
		||||
            logger.warning("No URL provided for goto_url action")
 | 
			
		||||
            return None
 | 
			
		||||
            
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        response = self.page.goto(value, timeout=0, wait_until='load')
 | 
			
		||||
        # Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout)
 | 
			
		||||
        #and also wait for seconds ?
 | 
			
		||||
        #await page.waitForTimeout(1000);
 | 
			
		||||
        #await page.waitForTimeout(extra_wait_ms);
 | 
			
		||||
        response = await self.page.goto(value, timeout=0, wait_until='load')
 | 
			
		||||
        logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    # Incase they request to go back to the start
 | 
			
		||||
    def action_goto_site(self, selector=None, value=None):
 | 
			
		||||
        return self.action_goto_url(value=self.start_url)
 | 
			
		||||
    async def action_goto_site(self, selector=None, value=None):
 | 
			
		||||
        return await self.action_goto_url(value=re.sub(r'^source:', '', self.start_url, flags=re.IGNORECASE))
 | 
			
		||||
 | 
			
		||||
    def action_click_element_containing_text(self, selector=None, value=''):
 | 
			
		||||
    async def action_click_element_containing_text(self, selector=None, value=''):
 | 
			
		||||
        logger.debug("Clicking element containing text")
 | 
			
		||||
        if not len(value.strip()):
 | 
			
		||||
        if not value or not len(value.strip()):
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        elem = self.page.get_by_text(value)
 | 
			
		||||
        if elem.count():
 | 
			
		||||
            elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
 | 
			
		||||
        if await elem.count():
 | 
			
		||||
            await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
 | 
			
		||||
 | 
			
		||||
    def action_click_element_containing_text_if_exists(self, selector=None, value=''):
 | 
			
		||||
 | 
			
		||||
    async def action_click_element_containing_text_if_exists(self, selector=None, value=''):
 | 
			
		||||
        logger.debug("Clicking element containing text if exists")
 | 
			
		||||
        if not len(value.strip()):
 | 
			
		||||
        if not value or not len(value.strip()):
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        elem = self.page.get_by_text(value)
 | 
			
		||||
        logger.debug(f"Clicking element containing text - {elem.count()} elements found")
 | 
			
		||||
        if elem.count():
 | 
			
		||||
            elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
 | 
			
		||||
        else:
 | 
			
		||||
        count = await elem.count()
 | 
			
		||||
        logger.debug(f"Clicking element containing text - {count} elements found")
 | 
			
		||||
        if count:
 | 
			
		||||
            await elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
 | 
			
		||||
                
 | 
			
		||||
 | 
			
		||||
    async def action_enter_text_in_field(self, selector, value):
 | 
			
		||||
        if not selector or not len(selector.strip()):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
    def action_enter_text_in_field(self, selector, value):
 | 
			
		||||
        if not len(selector.strip()):
 | 
			
		||||
            return
 | 
			
		||||
        await self.page.fill(selector, value, timeout=self.action_timeout)
 | 
			
		||||
 | 
			
		||||
        self.page.fill(selector, value, timeout=self.action_timeout)
 | 
			
		||||
    async def action_execute_js(self, selector, value):
 | 
			
		||||
        if not value:
 | 
			
		||||
            return None
 | 
			
		||||
            
 | 
			
		||||
        return await self.page.evaluate(value)
 | 
			
		||||
 | 
			
		||||
    def action_execute_js(self, selector, value):
 | 
			
		||||
        response = self.page.evaluate(value)
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    def action_click_element(self, selector, value):
 | 
			
		||||
    async def action_click_element(self, selector, value):
 | 
			
		||||
        logger.debug("Clicking element")
 | 
			
		||||
        if not len(selector.strip()):
 | 
			
		||||
        if not selector or not len(selector.strip()):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
 | 
			
		||||
        await self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
 | 
			
		||||
 | 
			
		||||
    def action_click_element_if_exists(self, selector, value):
 | 
			
		||||
    async def action_click_element_if_exists(self, selector, value):
 | 
			
		||||
        import playwright._impl._errors as _api_types
 | 
			
		||||
        logger.debug("Clicking element if exists")
 | 
			
		||||
        if not len(selector.strip()):
 | 
			
		||||
        if not selector or not len(selector.strip()):
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        try:
 | 
			
		||||
            self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
 | 
			
		||||
        except _api_types.TimeoutError as e:
 | 
			
		||||
            await self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
 | 
			
		||||
        except _api_types.TimeoutError:
 | 
			
		||||
            return
 | 
			
		||||
        except _api_types.Error as e:
 | 
			
		||||
        except _api_types.Error:
 | 
			
		||||
            # Element was there, but page redrew and now its long long gone
 | 
			
		||||
            return
 | 
			
		||||
                
 | 
			
		||||
 | 
			
		||||
    def action_click_x_y(self, selector, value):
 | 
			
		||||
        if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
 | 
			
		||||
            raise Exception("'Click X,Y' step should be in the format of '100 , 90'")
 | 
			
		||||
    async def action_click_x_y(self, selector, value):
 | 
			
		||||
        if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
 | 
			
		||||
            logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        x, y = value.strip().split(',')
 | 
			
		||||
        x = int(float(x.strip()))
 | 
			
		||||
        y = int(float(y.strip()))
 | 
			
		||||
        self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
 | 
			
		||||
        try:
 | 
			
		||||
            x, y = value.strip().split(',')
 | 
			
		||||
            x = int(float(x.strip()))
 | 
			
		||||
            y = int(float(y.strip()))
 | 
			
		||||
            
 | 
			
		||||
            await self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
 | 
			
		||||
                
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Error parsing x,y coordinates: {str(e)}")
 | 
			
		||||
 | 
			
		||||
    def action_scroll_down(self, selector, value):
 | 
			
		||||
    async def action__select_by_option_text(self, selector, value):
 | 
			
		||||
        if not selector or not len(selector.strip()):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        await self.page.select_option(selector, label=value, timeout=self.action_timeout)
 | 
			
		||||
 | 
			
		||||
    async def action_scroll_down(self, selector, value):
 | 
			
		||||
        # Some sites this doesnt work on for some reason
 | 
			
		||||
        self.page.mouse.wheel(0, 600)
 | 
			
		||||
        self.page.wait_for_timeout(1000)
 | 
			
		||||
        await self.page.mouse.wheel(0, 600)
 | 
			
		||||
        await self.page.wait_for_timeout(1000)
 | 
			
		||||
 | 
			
		||||
    def action_wait_for_seconds(self, selector, value):
 | 
			
		||||
        self.page.wait_for_timeout(float(value.strip()) * 1000)
 | 
			
		||||
    async def action_wait_for_seconds(self, selector, value):
 | 
			
		||||
        try:
 | 
			
		||||
            seconds = float(value.strip()) if value else 1.0
 | 
			
		||||
            await self.page.wait_for_timeout(seconds * 1000)
 | 
			
		||||
        except (ValueError, TypeError) as e:
 | 
			
		||||
            logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
 | 
			
		||||
 | 
			
		||||
    def action_wait_for_text(self, selector, value):
 | 
			
		||||
    async def action_wait_for_text(self, selector, value):
 | 
			
		||||
        if not value:
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        import json
 | 
			
		||||
        v = json.dumps(value)
 | 
			
		||||
        self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000)
 | 
			
		||||
        await self.page.wait_for_function(
 | 
			
		||||
            f'document.querySelector("body").innerText.includes({v});',
 | 
			
		||||
            timeout=30000
 | 
			
		||||
        )
 | 
			
		||||
            
 | 
			
		||||
 | 
			
		||||
    def action_wait_for_text_in_element(self, selector, value):
 | 
			
		||||
    async def action_wait_for_text_in_element(self, selector, value):
 | 
			
		||||
        if not selector or not value:
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        import json
 | 
			
		||||
        s = json.dumps(selector)
 | 
			
		||||
        v = json.dumps(value)
 | 
			
		||||
        self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
 | 
			
		||||
        
 | 
			
		||||
        await self.page.wait_for_function(
 | 
			
		||||
            f'document.querySelector({s}).innerText.includes({v});',
 | 
			
		||||
            timeout=30000
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    # @todo - in the future make some popout interface to capture what needs to be set
 | 
			
		||||
    # https://playwright.dev/python/docs/api/class-keyboard
 | 
			
		||||
    def action_press_enter(self, selector, value):
 | 
			
		||||
        self.page.keyboard.press("Enter", delay=randint(200, 500))
 | 
			
		||||
    async def action_press_enter(self, selector, value):
 | 
			
		||||
        await self.page.keyboard.press("Enter", delay=randint(200, 500))
 | 
			
		||||
            
 | 
			
		||||
 | 
			
		||||
    def action_press_page_up(self, selector, value):
 | 
			
		||||
        self.page.keyboard.press("PageUp", delay=randint(200, 500))
 | 
			
		||||
    async def action_press_page_up(self, selector, value):
 | 
			
		||||
        await self.page.keyboard.press("PageUp", delay=randint(200, 500))
 | 
			
		||||
 | 
			
		||||
    def action_press_page_down(self, selector, value):
 | 
			
		||||
        self.page.keyboard.press("PageDown", delay=randint(200, 500))
 | 
			
		||||
    async def action_press_page_down(self, selector, value):
 | 
			
		||||
        await self.page.keyboard.press("PageDown", delay=randint(200, 500))
 | 
			
		||||
 | 
			
		||||
    def action_check_checkbox(self, selector, value):
 | 
			
		||||
        self.page.locator(selector).check(timeout=self.action_timeout)
 | 
			
		||||
    async def action_check_checkbox(self, selector, value):
 | 
			
		||||
        if not selector:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
    def action_uncheck_checkbox(self, selector, value):
 | 
			
		||||
        self.page.locator(selector).uncheck(timeout=self.action_timeout)
 | 
			
		||||
        await self.page.locator(selector).check(timeout=self.action_timeout)
 | 
			
		||||
 | 
			
		||||
    def action_remove_elements(self, selector, value):
 | 
			
		||||
    async def action_uncheck_checkbox(self, selector, value):
 | 
			
		||||
        if not selector:
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        await self.page.locator(selector).uncheck(timeout=self.action_timeout)
 | 
			
		||||
            
 | 
			
		||||
 | 
			
		||||
    async def action_remove_elements(self, selector, value):
 | 
			
		||||
        """Removes all elements matching the given selector from the DOM."""
 | 
			
		||||
        self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
 | 
			
		||||
        if not selector:
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        await self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
 | 
			
		||||
 | 
			
		||||
    def action_make_all_child_elements_visible(self, selector, value):
 | 
			
		||||
    async def action_make_all_child_elements_visible(self, selector, value):
 | 
			
		||||
        """Recursively makes all child elements inside the given selector fully visible."""
 | 
			
		||||
        self.page.locator(selector).locator("*").evaluate_all("""
 | 
			
		||||
        if not selector:
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        await self.page.locator(selector).locator("*").evaluate_all("""
 | 
			
		||||
            els => els.forEach(el => {
 | 
			
		||||
                el.style.display = 'block';   // Forces it to be displayed
 | 
			
		||||
                el.style.visibility = 'visible';   // Ensures it's not hidden
 | 
			
		||||
@@ -224,7 +308,9 @@ class browsersteps_live_ui(steppable_browser_interface):
 | 
			
		||||
    # bump and kill this if idle after X sec
 | 
			
		||||
    age_start = 0
 | 
			
		||||
    headers = {}
 | 
			
		||||
 | 
			
		||||
    # Track if resources are properly cleaned up
 | 
			
		||||
    _is_cleaned_up = False
 | 
			
		||||
    
 | 
			
		||||
    # use a special driver, maybe locally etc
 | 
			
		||||
    command_executor = os.getenv(
 | 
			
		||||
        "PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
 | 
			
		||||
@@ -243,17 +329,23 @@ class browsersteps_live_ui(steppable_browser_interface):
 | 
			
		||||
        self.age_start = time.time()
 | 
			
		||||
        self.playwright_browser = playwright_browser
 | 
			
		||||
        self.start_url = start_url
 | 
			
		||||
        if self.context is None:
 | 
			
		||||
            self.connect(proxy=proxy)
 | 
			
		||||
        self._is_cleaned_up = False
 | 
			
		||||
        self.proxy = proxy
 | 
			
		||||
        # Note: connect() is now async and must be called separately
 | 
			
		||||
 | 
			
		||||
    def __del__(self):
 | 
			
		||||
        # Ensure cleanup happens if object is garbage collected
 | 
			
		||||
        # Note: cleanup is now async, so we can only mark as cleaned up here
 | 
			
		||||
        self._is_cleaned_up = True
 | 
			
		||||
 | 
			
		||||
    # Connect and setup a new context
 | 
			
		||||
    def connect(self, proxy=None):
 | 
			
		||||
    async def connect(self, proxy=None):
 | 
			
		||||
        # Should only get called once - test that
 | 
			
		||||
        keep_open = 1000 * 60 * 5
 | 
			
		||||
        now = time.time()
 | 
			
		||||
 | 
			
		||||
        # @todo handle multiple contexts, bind a unique id from the browser on each req?
 | 
			
		||||
        self.context = self.playwright_browser.new_context(
 | 
			
		||||
        self.context = await self.playwright_browser.new_context(
 | 
			
		||||
            accept_downloads=False,  # Should never be needed
 | 
			
		||||
            bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others
 | 
			
		||||
            extra_http_headers=self.headers,
 | 
			
		||||
@@ -264,69 +356,142 @@ class browsersteps_live_ui(steppable_browser_interface):
 | 
			
		||||
            user_agent=manage_user_agent(headers=self.headers),
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        self.page = self.context.new_page()
 | 
			
		||||
        self.page = await self.context.new_page()
 | 
			
		||||
 | 
			
		||||
        # self.page.set_default_navigation_timeout(keep_open)
 | 
			
		||||
        self.page.set_default_timeout(keep_open)
 | 
			
		||||
        # @todo probably this doesnt work
 | 
			
		||||
        self.page.on(
 | 
			
		||||
            "close",
 | 
			
		||||
            self.mark_as_closed,
 | 
			
		||||
        )
 | 
			
		||||
        # Set event handlers
 | 
			
		||||
        self.page.on("close", self.mark_as_closed)
 | 
			
		||||
        # Listen for all console events and handle errors
 | 
			
		||||
        self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
 | 
			
		||||
 | 
			
		||||
        logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
 | 
			
		||||
        self.page.wait_for_timeout(1 * 1000)
 | 
			
		||||
 | 
			
		||||
        await self.page.wait_for_timeout(1 * 1000)
 | 
			
		||||
 | 
			
		||||
    def mark_as_closed(self):
 | 
			
		||||
        logger.debug("Page closed, cleaning up..")
 | 
			
		||||
        # Note: This is called from a sync context (event handler)
 | 
			
		||||
        # so we'll just mark as cleaned up and let __del__ handle the rest
 | 
			
		||||
        self._is_cleaned_up = True
 | 
			
		||||
 | 
			
		||||
    async def cleanup(self):
 | 
			
		||||
        """Properly clean up all resources to prevent memory leaks"""
 | 
			
		||||
        if self._is_cleaned_up:
 | 
			
		||||
            return
 | 
			
		||||
            
 | 
			
		||||
        logger.debug("Cleaning up browser steps resources")
 | 
			
		||||
        
 | 
			
		||||
        # Clean up page
 | 
			
		||||
        if hasattr(self, 'page') and self.page is not None:
 | 
			
		||||
            try:
 | 
			
		||||
                # Force garbage collection before closing
 | 
			
		||||
                await self.page.request_gc()
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.debug(f"Error during page garbage collection: {str(e)}")
 | 
			
		||||
                
 | 
			
		||||
            try:
 | 
			
		||||
                # Remove event listeners before closing
 | 
			
		||||
                self.page.remove_listener("close", self.mark_as_closed)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.debug(f"Error removing event listeners: {str(e)}")
 | 
			
		||||
                
 | 
			
		||||
            try:
 | 
			
		||||
                await self.page.close()
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.debug(f"Error closing page: {str(e)}")
 | 
			
		||||
            
 | 
			
		||||
            self.page = None
 | 
			
		||||
 | 
			
		||||
        # Clean up context
 | 
			
		||||
        if hasattr(self, 'context') and self.context is not None:
 | 
			
		||||
            try:
 | 
			
		||||
                await self.context.close()
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.debug(f"Error closing context: {str(e)}")
 | 
			
		||||
            
 | 
			
		||||
            self.context = None
 | 
			
		||||
            
 | 
			
		||||
        self._is_cleaned_up = True
 | 
			
		||||
        logger.debug("Browser steps resources cleanup complete")
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def has_expired(self):
 | 
			
		||||
        if not self.page:
 | 
			
		||||
        if not self.page or self._is_cleaned_up:
 | 
			
		||||
            return True
 | 
			
		||||
        
 | 
			
		||||
        # Check if session has expired based on age
 | 
			
		||||
        max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10))  # Default 10 minutes
 | 
			
		||||
        if (time.time() - self.age_start) > max_age_seconds:
 | 
			
		||||
            logger.debug(f"Browser steps session expired after {max_age_seconds} seconds")
 | 
			
		||||
            return True
 | 
			
		||||
            
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def get_current_state(self):
 | 
			
		||||
    async def get_current_state(self):
 | 
			
		||||
        """Return the screenshot and interactive elements mapping, generally always called after action_()"""
 | 
			
		||||
        import importlib.resources
 | 
			
		||||
        import json
 | 
			
		||||
        # because we for now only run browser steps in playwright mode (not puppeteer mode)
 | 
			
		||||
        from changedetectionio.content_fetchers.playwright import capture_full_page
 | 
			
		||||
        from changedetectionio.content_fetchers.playwright import capture_full_page_async
 | 
			
		||||
 | 
			
		||||
        # Safety check - don't proceed if resources are cleaned up
 | 
			
		||||
        if self._is_cleaned_up or self.page is None:
 | 
			
		||||
            logger.warning("Attempted to get current state after cleanup")
 | 
			
		||||
            return (None, None)
 | 
			
		||||
 | 
			
		||||
        xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
 | 
			
		||||
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        self.page.wait_for_timeout(1 * 1000)
 | 
			
		||||
        await self.page.wait_for_timeout(1 * 1000)
 | 
			
		||||
 | 
			
		||||
        screenshot = capture_full_page(page=self.page)
 | 
			
		||||
        screenshot = None
 | 
			
		||||
        xpath_data = None
 | 
			
		||||
        
 | 
			
		||||
        try:
 | 
			
		||||
            # Get screenshot first
 | 
			
		||||
            screenshot = await capture_full_page_async(page=self.page)
 | 
			
		||||
            if not screenshot:
 | 
			
		||||
                logger.error("No screenshot was retrieved :((")
 | 
			
		||||
 | 
			
		||||
        logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
 | 
			
		||||
            logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
 | 
			
		||||
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        self.page.evaluate("var include_filters=''")
 | 
			
		||||
        # Go find the interactive elements
 | 
			
		||||
        # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
 | 
			
		||||
            # Then get interactive elements
 | 
			
		||||
            now = time.time()
 | 
			
		||||
            await self.page.evaluate("var include_filters=''")
 | 
			
		||||
            await self.page.request_gc()
 | 
			
		||||
 | 
			
		||||
        self.page.request_gc()
 | 
			
		||||
            scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
 | 
			
		||||
 | 
			
		||||
        scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
 | 
			
		||||
            MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
 | 
			
		||||
            xpath_data = json.loads(await self.page.evaluate(xpath_element_js, {
 | 
			
		||||
                "visualselector_xpath_selectors": scan_elements,
 | 
			
		||||
                "max_height": MAX_TOTAL_HEIGHT
 | 
			
		||||
            }))
 | 
			
		||||
            await self.page.request_gc()
 | 
			
		||||
 | 
			
		||||
        MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
 | 
			
		||||
        xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
 | 
			
		||||
            "visualselector_xpath_selectors": scan_elements,
 | 
			
		||||
            "max_height": MAX_TOTAL_HEIGHT
 | 
			
		||||
        }))
 | 
			
		||||
        self.page.request_gc()
 | 
			
		||||
            # Sort elements by size
 | 
			
		||||
            xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
 | 
			
		||||
            logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Error getting current state: {str(e)}")
 | 
			
		||||
            # If the page has navigated (common with logins) then the context is destroyed on navigation, continue
 | 
			
		||||
            # I'm not sure that this is required anymore because we have the "expect navigation wrapper" at the top
 | 
			
		||||
            if "Execution context was destroyed" in str(e):
 | 
			
		||||
                logger.debug("Execution context was destroyed, most likely because of navigation, continuing...")
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        # So the JS will find the smallest one first
 | 
			
		||||
        xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
 | 
			
		||||
        logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
 | 
			
		||||
 | 
			
		||||
        # playwright._impl._api_types.Error: Browser closed.
 | 
			
		||||
        # @todo show some countdown timer?
 | 
			
		||||
            # Attempt recovery - force garbage collection
 | 
			
		||||
            try:
 | 
			
		||||
                await self.page.request_gc()
 | 
			
		||||
            except:
 | 
			
		||||
                pass
 | 
			
		||||
        
 | 
			
		||||
        # Request garbage collection one final time
 | 
			
		||||
        try:
 | 
			
		||||
            await self.page.request_gc()
 | 
			
		||||
        except:
 | 
			
		||||
            pass
 | 
			
		||||
            
 | 
			
		||||
        return (screenshot, xpath_data)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,17 +0,0 @@
 | 
			
		||||
from playwright.sync_api import PlaywrightContextManager
 | 
			
		||||
 | 
			
		||||
# So playwright wants to run as a context manager, but we do something horrible and hacky
 | 
			
		||||
# we are holding the session open for as long as possible, then shutting it down, and opening a new one
 | 
			
		||||
# So it means we don't get to use PlaywrightContextManager' __enter__ __exit__
 | 
			
		||||
# To work around this, make goodbye() act the same as the __exit__()
 | 
			
		||||
#
 | 
			
		||||
# But actually I think this is because the context is opened correctly with __enter__() but we timeout the connection
 | 
			
		||||
# then theres some lock condition where we cant destroy it without it hanging
 | 
			
		||||
 | 
			
		||||
class c_PlaywrightContextManager(PlaywrightContextManager):
 | 
			
		||||
 | 
			
		||||
    def goodbye(self) -> None:
 | 
			
		||||
        self.__exit__()
 | 
			
		||||
 | 
			
		||||
def c_sync_playwright() -> PlaywrightContextManager:
 | 
			
		||||
    return c_PlaywrightContextManager()
 | 
			
		||||
@@ -33,7 +33,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
    def long_task(uuid, preferred_proxy):
 | 
			
		||||
        import time
 | 
			
		||||
        from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions
 | 
			
		||||
        from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
        from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
 | 
			
		||||
        status = {'status': '', 'length': 0, 'text': ''}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
from flask import Blueprint, request, redirect, url_for, flash, render_template
 | 
			
		||||
from changedetectionio.store import ChangeDetectionStore
 | 
			
		||||
from changedetectionio.auth_decorator import login_optionally_required
 | 
			
		||||
from changedetectionio import worker_handler
 | 
			
		||||
from changedetectionio.blueprint.imports.importer import (
 | 
			
		||||
    import_url_list, 
 | 
			
		||||
    import_distill_io_json, 
 | 
			
		||||
@@ -24,7 +25,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
                importer_handler = import_url_list()
 | 
			
		||||
                importer_handler.run(data=request.values.get('urls'), flash=flash, datastore=datastore, processor=request.values.get('processor', 'text_json_diff'))
 | 
			
		||||
                for uuid in importer_handler.new_uuids:
 | 
			
		||||
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
                    worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
 | 
			
		||||
                if len(importer_handler.remaining_data) == 0:
 | 
			
		||||
                    return redirect(url_for('watchlist.index'))
 | 
			
		||||
@@ -37,7 +38,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
                d_importer = import_distill_io_json()
 | 
			
		||||
                d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore)
 | 
			
		||||
                for uuid in d_importer.new_uuids:
 | 
			
		||||
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
                    worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
 | 
			
		||||
            # XLSX importer
 | 
			
		||||
            if request.files and request.files.get('xlsx_file'):
 | 
			
		||||
@@ -60,7 +61,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
                    w_importer.run(data=file, flash=flash, datastore=datastore)
 | 
			
		||||
 | 
			
		||||
                for uuid in w_importer.new_uuids:
 | 
			
		||||
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
                    worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
 | 
			
		||||
        # Could be some remaining, or we could be on GET
 | 
			
		||||
        form = forms.importForm(formdata=request.form if request.method == 'POST' else None)
 | 
			
		||||
 
 | 
			
		||||
@@ -4,6 +4,7 @@ from flask import Blueprint, flash, redirect, url_for
 | 
			
		||||
from flask_login import login_required
 | 
			
		||||
from changedetectionio.store import ChangeDetectionStore
 | 
			
		||||
from changedetectionio import queuedWatchMetaData
 | 
			
		||||
from changedetectionio import worker_handler
 | 
			
		||||
from queue import PriorityQueue
 | 
			
		||||
 | 
			
		||||
PRICE_DATA_TRACK_ACCEPT = 'accepted'
 | 
			
		||||
@@ -19,7 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue
 | 
			
		||||
        datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT
 | 
			
		||||
        datastore.data['watching'][uuid]['processor'] = 'restock_diff'
 | 
			
		||||
        datastore.data['watching'][uuid].clear_watch()
 | 
			
		||||
        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
        worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
        return redirect(url_for("watchlist.index"))
 | 
			
		||||
 | 
			
		||||
    @login_required
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,6 @@
 | 
			
		||||
 | 
			
		||||
from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
from changedetectionio.notification.handler import apply_service_tweaks
 | 
			
		||||
from changedetectionio.store import ChangeDetectionStore
 | 
			
		||||
from feedgen.feed import FeedGenerator
 | 
			
		||||
from flask import Blueprint, make_response, request, url_for, redirect
 | 
			
		||||
@@ -108,18 +109,25 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
 | 
			
		||||
                fe.link(link=diff_link)
 | 
			
		||||
 | 
			
		||||
                # @todo watch should be a getter - watch.get('title') (internally if URL else..)
 | 
			
		||||
                # Same logic as watch-overview.html
 | 
			
		||||
                if datastore.data['settings']['application']['ui'].get('use_page_title_in_list') or watch.get('use_page_title_in_list'):
 | 
			
		||||
                    watch_label = watch.label
 | 
			
		||||
                else:
 | 
			
		||||
                    watch_label = watch.get('url')
 | 
			
		||||
 | 
			
		||||
                watch_title = watch.get('title') if watch.get('title') else watch.get('url')
 | 
			
		||||
                fe.title(title=watch_title)
 | 
			
		||||
                fe.title(title=watch_label)
 | 
			
		||||
                try:
 | 
			
		||||
 | 
			
		||||
                    html_diff = diff.render_diff(previous_version_file_contents=watch.get_history_snapshot(dates[-2]),
 | 
			
		||||
                                                 newest_version_file_contents=watch.get_history_snapshot(dates[-1]),
 | 
			
		||||
                                                 include_equal=False,
 | 
			
		||||
                                                 line_feed_sep="<br>",
 | 
			
		||||
                                                 html_colour=html_colour_enable
 | 
			
		||||
                                                 line_feed_sep="<br>"
 | 
			
		||||
                                                 )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                    requested_output_format = 'htmlcolor' if html_colour_enable else 'html'
 | 
			
		||||
                    html_diff = apply_service_tweaks(url='', n_body=html_diff, n_title=None, requested_output_format=requested_output_format)
 | 
			
		||||
 | 
			
		||||
                except FileNotFoundError as e:
 | 
			
		||||
                    html_diff = f"History snapshot file for watch {watch.get('uuid')}@{watch.last_changed} - '{watch.get('title')} not found."
 | 
			
		||||
 | 
			
		||||
@@ -127,7 +135,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
                # @todo User could decide if <link> goes to the diff page, or to the watch link
 | 
			
		||||
                rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n"
 | 
			
		||||
 | 
			
		||||
                content = jinja_render(template_str=rss_template, watch_title=watch_title, html_diff=html_diff, watch_url=watch.link)
 | 
			
		||||
                content = jinja_render(template_str=rss_template, watch_title=watch_label, html_diff=html_diff, watch_url=watch.link)
 | 
			
		||||
 | 
			
		||||
                # Out of range chars could also break feedgen
 | 
			
		||||
                if scan_invalid_chars_in_rss(content):
 | 
			
		||||
 
 | 
			
		||||
@@ -67,7 +67,32 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
                    del (app_update['password'])
 | 
			
		||||
 | 
			
		||||
                datastore.data['settings']['application'].update(app_update)
 | 
			
		||||
                
 | 
			
		||||
                # Handle dynamic worker count adjustment
 | 
			
		||||
                old_worker_count = datastore.data['settings']['requests'].get('workers', 1)
 | 
			
		||||
                new_worker_count = form.data['requests'].get('workers', 1)
 | 
			
		||||
                
 | 
			
		||||
                datastore.data['settings']['requests'].update(form.data['requests'])
 | 
			
		||||
                
 | 
			
		||||
                # Adjust worker count if it changed
 | 
			
		||||
                if new_worker_count != old_worker_count:
 | 
			
		||||
                    from changedetectionio import worker_handler
 | 
			
		||||
                    from changedetectionio.flask_app import update_q, notification_q, app, datastore as ds
 | 
			
		||||
                    
 | 
			
		||||
                    result = worker_handler.adjust_async_worker_count(
 | 
			
		||||
                        new_count=new_worker_count,
 | 
			
		||||
                        update_q=update_q,
 | 
			
		||||
                        notification_q=notification_q,
 | 
			
		||||
                        app=app,
 | 
			
		||||
                        datastore=ds
 | 
			
		||||
                    )
 | 
			
		||||
                    
 | 
			
		||||
                    if result['status'] == 'success':
 | 
			
		||||
                        flash(f"Worker count adjusted: {result['message']}", 'notice')
 | 
			
		||||
                    elif result['status'] == 'not_supported':
 | 
			
		||||
                        flash("Dynamic worker adjustment not supported for sync workers", 'warning')
 | 
			
		||||
                    elif result['status'] == 'error':
 | 
			
		||||
                        flash(f"Error adjusting workers: {result['message']}", 'error')
 | 
			
		||||
 | 
			
		||||
                if not os.getenv("SALTED_PASS", False) and len(form.application.form.password.encrypted_password):
 | 
			
		||||
                    datastore.data['settings']['application']['password'] = form.application.form.password.encrypted_password
 | 
			
		||||
@@ -94,7 +119,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
                                hide_remove_pass=os.getenv("SALTED_PASS", False),
 | 
			
		||||
                                min_system_recheck_seconds=int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3)),
 | 
			
		||||
                                settings_application=datastore.data['settings']['application'],
 | 
			
		||||
                                timezone_default_config=datastore.data['settings']['application'].get('timezone'),
 | 
			
		||||
                                timezone_default_config=datastore.data['settings']['application'].get('scheduler_timezone_default'),
 | 
			
		||||
                                utc_time=utc_time,
 | 
			
		||||
                                )
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,7 +1,7 @@
 | 
			
		||||
{% extends 'base.html' %}
 | 
			
		||||
 | 
			
		||||
{% block content %}
 | 
			
		||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form %}
 | 
			
		||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, render_ternary_field, render_fieldlist_with_inline_errors %}
 | 
			
		||||
{% from '_common_fields.html' import render_common_settings_form %}
 | 
			
		||||
<script>
 | 
			
		||||
    const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="global-settings")}}";
 | 
			
		||||
@@ -72,33 +72,23 @@
 | 
			
		||||
                        <span class="pure-form-message-inline">Allow access to view watch diff page when password is enabled (Good for sharing the diff page)
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.application.form.pager_size) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.application.form.rss_content_format) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.application.form.extract_title_as_title) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Note: This will automatically apply to all existing watches.</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                {% if form.requests.proxy %}
 | 
			
		||||
                    <div class="pure-control-group inline-radio">
 | 
			
		||||
                        {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">
 | 
			
		||||
                        Choose a default proxy for all watches
 | 
			
		||||
                        </span>
 | 
			
		||||
                    <div class="grey-form-border">
 | 
			
		||||
                        <div class="pure-control-group">
 | 
			
		||||
                            {{ render_checkbox_field(form.application.form.rss_hide_muted_watches) }}
 | 
			
		||||
                        </div>
 | 
			
		||||
                        <div class="pure-control-group">
 | 
			
		||||
                            {{ render_field(form.application.form.rss_content_format) }}
 | 
			
		||||
                            <span class="pure-form-message-inline">Love RSS? Does your reader support HTML? Set it here</span>
 | 
			
		||||
                        </div>
 | 
			
		||||
                        <div class="pure-control-group">
 | 
			
		||||
                            {{ render_checkbox_field(form.application.form.rss_reader_mode) }}
 | 
			
		||||
                            <span class="pure-form-message-inline">Transforms RSS/RDF feed watches into beautiful text only</span>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                {% endif %}
 | 
			
		||||
                </fieldset>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
@@ -135,6 +125,16 @@
 | 
			
		||||
                        {{ render_field(form.application.form.webdriver_delay) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_field(form.requests.form.workers) }}
 | 
			
		||||
                    {% set worker_info = get_worker_status_info() %}
 | 
			
		||||
                    <span class="pure-form-message-inline">Number of concurrent workers to process watches. More workers = faster processing but higher memory usage.<br>
 | 
			
		||||
                    Currently running: <strong>{{ worker_info.count }}</strong> operational {{ worker_info.type }} workers{% if worker_info.active_workers > 0 %} ({{ worker_info.active_workers }} actively processing){% endif %}.</span>
 | 
			
		||||
                </div>
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_field(form.requests.form.timeout) }}
 | 
			
		||||
                    <span class="pure-form-message-inline">For regular plain requests (not chrome based), maximum number of seconds until timeout, 1-999.<br>
 | 
			
		||||
                </div>
 | 
			
		||||
                <div class="pure-control-group inline-radio">
 | 
			
		||||
                    {{ render_field(form.requests.form.default_ua) }}
 | 
			
		||||
                    <span class="pure-form-message-inline">
 | 
			
		||||
@@ -193,11 +193,17 @@ nav
 | 
			
		||||
                        </ul>
 | 
			
		||||
                     </span>
 | 
			
		||||
                    </fieldset>
 | 
			
		||||
                    <fieldset class="pure-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.application.form.strip_ignored_lines) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Remove any text that appears in the "Ignore text" from the output (otherwise its just ignored for change-detection)<br>
 | 
			
		||||
                        <i>Note:</i> Changing this will change the status of your existing watches, possibly trigger alerts etc.
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </fieldset>
 | 
			
		||||
           </div>
 | 
			
		||||
 | 
			
		||||
            <div class="tab-pane-inner" id="api">
 | 
			
		||||
                <h4>API Access</h4>
 | 
			
		||||
                <p>Drive your changedetection.io via API, More about <a href="https://github.com/dgtlmoon/changedetection.io/wiki/API-Reference">API access here</a></p>
 | 
			
		||||
                <p>Drive your changedetection.io via API, More about <a href="https://changedetection.io/docs/api_v1/index.html">API access and examples here</a>.</p>
 | 
			
		||||
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_checkbox_field(form.application.form.api_access_token_enabled) }}
 | 
			
		||||
@@ -232,11 +238,9 @@ nav
 | 
			
		||||
                    <p><strong>UTC Time & Date from Server:</strong> <span id="utc-time" >{{ utc_time }}</span></p>
 | 
			
		||||
                    <p><strong>Local Time & Date in Browser:</strong> <span class="local-time" data-utc="{{ utc_time }}"></span></p>
 | 
			
		||||
                    <p>
 | 
			
		||||
                       {{ render_field(form.application.form.timezone) }}
 | 
			
		||||
                       {{ render_field(form.application.form.scheduler_timezone_default) }}
 | 
			
		||||
                        <datalist id="timezones" style="display: none;">
 | 
			
		||||
                            {% for tz_name in available_timezones %}
 | 
			
		||||
                                <option value="{{ tz_name }}">{{ tz_name }}</option>
 | 
			
		||||
                            {% endfor %}
 | 
			
		||||
                            {%- for timezone in available_timezones -%}<option value="{{ timezone }}">{{ timezone }}</option>{%- endfor -%}
 | 
			
		||||
                        </datalist>
 | 
			
		||||
                    </p>
 | 
			
		||||
                </div>
 | 
			
		||||
@@ -246,6 +250,22 @@ nav
 | 
			
		||||
                    {{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
 | 
			
		||||
                    <span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
 | 
			
		||||
                </div>
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_checkbox_field(form.application.form.ui.form.socket_io_enabled, class="socket_io_enabled") }}
 | 
			
		||||
                    <span class="pure-form-message-inline">Realtime UI Updates Enabled - (Restart required if this is changed)</span>
 | 
			
		||||
                </div>
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_checkbox_field(form.application.form.ui.form.favicons_enabled, class="") }}
 | 
			
		||||
                    <span class="pure-form-message-inline">Enable or Disable Favicons next to the watch list</span>
 | 
			
		||||
                </div>
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_checkbox_field(form.application.form.ui.use_page_title_in_list) }}
 | 
			
		||||
                </div>
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_field(form.application.form.pager_size) }}
 | 
			
		||||
                    <span class="pure-form-message-inline">Number of items per page in the watch overview list, 0 to disable.</span>
 | 
			
		||||
                </div>
 | 
			
		||||
 | 
			
		||||
            </div>
 | 
			
		||||
            <div class="tab-pane-inner" id="proxies">
 | 
			
		||||
                <div id="recommended-proxy">
 | 
			
		||||
@@ -294,23 +314,33 @@ nav
 | 
			
		||||
               <p><strong>Tip</strong>: "Residential" and "Mobile" proxy type can be more successfull than "Data Center" for blocked websites.
 | 
			
		||||
 | 
			
		||||
                <div class="pure-control-group" id="extra-proxies-setting">
 | 
			
		||||
                {{ render_field(form.requests.form.extra_proxies) }}
 | 
			
		||||
                {{ render_fieldlist_with_inline_errors(form.requests.form.extra_proxies) }}
 | 
			
		||||
                <span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span><br>
 | 
			
		||||
                <span class="pure-form-message-inline">SOCKS5 proxies with authentication are only supported with 'plain requests' fetcher, for other fetchers you should whitelist the IP access instead</span>
 | 
			
		||||
                {% if form.requests.proxy %}
 | 
			
		||||
                <div>
 | 
			
		||||
                <br>
 | 
			
		||||
                    <div class="inline-radio">
 | 
			
		||||
                        {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Choose a default proxy for all watches</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </div>
 | 
			
		||||
                {% endif %}
 | 
			
		||||
                </div>
 | 
			
		||||
                <div class="pure-control-group" id="extra-browsers-setting">
 | 
			
		||||
                    <p>
 | 
			
		||||
                    <span class="pure-form-message-inline"><i>Extra Browsers</i> can be attached to further defeat CAPTCHA's on websites that are particularly hard to scrape.</span><br>
 | 
			
		||||
                    <span class="pure-form-message-inline">Simply paste the connection address into the box, <a href="https://changedetection.io/tutorial/using-bright-datas-scraping-browser-pass-captchas-and-other-protection-when-monitoring">More instructions and examples here</a> </span>
 | 
			
		||||
                    </p>
 | 
			
		||||
                    {{ render_field(form.requests.form.extra_browsers) }}
 | 
			
		||||
                    {{ render_fieldlist_with_inline_errors(form.requests.form.extra_browsers) }}
 | 
			
		||||
                </div>
 | 
			
		||||
            
 | 
			
		||||
            </div>
 | 
			
		||||
            <div id="actions">
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_button(form.save_button) }}
 | 
			
		||||
                    <a href="{{url_for('watchlist.index')}}" class="pure-button button-small button-cancel">Back</a>
 | 
			
		||||
                    <a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-small button-error">Clear Snapshot History</a>
 | 
			
		||||
                    <a href="{{url_for('watchlist.index')}}" class="pure-button button-cancel">Back</a>
 | 
			
		||||
                    <a href="{{url_for('ui.clear_all_history')}}" class="pure-button button-error">Clear Snapshot History</a>
 | 
			
		||||
                </div>
 | 
			
		||||
            </div>
 | 
			
		||||
        </form>
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
{% extends 'base.html' %}
 | 
			
		||||
{% block content %}
 | 
			
		||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button %}
 | 
			
		||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_ternary_field %}
 | 
			
		||||
{% from '_common_fields.html' import render_common_settings_form %}
 | 
			
		||||
<script>
 | 
			
		||||
    const notification_base_url="{{url_for('ui.ui_notification.ajax_callback_send_notification_test', mode="group-settings")}}";
 | 
			
		||||
@@ -64,7 +64,7 @@
 | 
			
		||||
            <div class="tab-pane-inner" id="notifications">
 | 
			
		||||
                <fieldset>
 | 
			
		||||
                    <div  class="pure-control-group inline-radio">
 | 
			
		||||
                      {{ render_checkbox_field(form.notification_muted) }}
 | 
			
		||||
                      {{ render_ternary_field(form.notification_muted, BooleanField=True) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    {% if 1 %}
 | 
			
		||||
                    <div class="pure-control-group inline-radio">
 | 
			
		||||
 
 | 
			
		||||
@@ -10,7 +10,7 @@
 | 
			
		||||
            <legend>Add a new organisational tag</legend>
 | 
			
		||||
            <div id="watch-add-wrapper-zone">
 | 
			
		||||
                <div>
 | 
			
		||||
                    {{ render_simple_field(form.name, placeholder="watch label / tag") }}
 | 
			
		||||
                    {{ render_simple_field(form.name, placeholder="Watch group / tag") }}
 | 
			
		||||
                </div>
 | 
			
		||||
                <div>
 | 
			
		||||
                    {{ render_simple_field(form.save_button, title="Save" ) }}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,14 +1,112 @@
 | 
			
		||||
import time
 | 
			
		||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, session
 | 
			
		||||
from loguru import logger
 | 
			
		||||
from functools import wraps
 | 
			
		||||
 | 
			
		||||
from changedetectionio.store import ChangeDetectionStore
 | 
			
		||||
from changedetectionio.blueprint.ui.edit import construct_blueprint as construct_edit_blueprint
 | 
			
		||||
from changedetectionio.blueprint.ui.notification import construct_blueprint as construct_notification_blueprint
 | 
			
		||||
from changedetectionio.blueprint.ui.views import construct_blueprint as construct_views_blueprint
 | 
			
		||||
 | 
			
		||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_update_threads, queuedWatchMetaData):
 | 
			
		||||
def _handle_operations(op, uuids, datastore, worker_handler, update_q, queuedWatchMetaData, watch_check_update, extra_data=None, emit_flash=True):
 | 
			
		||||
    from flask import request, flash
 | 
			
		||||
 | 
			
		||||
    if op == 'delete':
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.delete(uuid)
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches deleted")
 | 
			
		||||
 | 
			
		||||
    elif op == 'pause':
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.data['watching'][uuid]['paused'] = True
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches paused")
 | 
			
		||||
 | 
			
		||||
    elif op == 'unpause':
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.data['watching'][uuid.strip()]['paused'] = False
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches unpaused")
 | 
			
		||||
 | 
			
		||||
    elif (op == 'mark-viewed'):
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.set_last_viewed(uuid, int(time.time()))
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches updated")
 | 
			
		||||
 | 
			
		||||
    elif (op == 'mute'):
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.data['watching'][uuid]['notification_muted'] = True
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches muted")
 | 
			
		||||
 | 
			
		||||
    elif (op == 'unmute'):
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.data['watching'][uuid]['notification_muted'] = False
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches un-muted")
 | 
			
		||||
 | 
			
		||||
    elif (op == 'recheck'):
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                # Recheck and require a full reprocessing
 | 
			
		||||
                worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches queued for rechecking")
 | 
			
		||||
 | 
			
		||||
    elif (op == 'clear-errors'):
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.data['watching'][uuid]["last_error"] = False
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches errors cleared")
 | 
			
		||||
 | 
			
		||||
    elif (op == 'clear-history'):
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.clear_watch_history(uuid)
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches cleared/reset.")
 | 
			
		||||
 | 
			
		||||
    elif (op == 'notification-default'):
 | 
			
		||||
        from changedetectionio.notification import (
 | 
			
		||||
            USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
 | 
			
		||||
        )
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            if datastore.data['watching'].get(uuid):
 | 
			
		||||
                datastore.data['watching'][uuid]['notification_title'] = None
 | 
			
		||||
                datastore.data['watching'][uuid]['notification_body'] = None
 | 
			
		||||
                datastore.data['watching'][uuid]['notification_urls'] = []
 | 
			
		||||
                datastore.data['watching'][uuid]['notification_format'] = USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches set to use default notification settings")
 | 
			
		||||
 | 
			
		||||
    elif (op == 'assign-tag'):
 | 
			
		||||
        op_extradata = extra_data
 | 
			
		||||
        if op_extradata:
 | 
			
		||||
            tag_uuid = datastore.add_tag(title=op_extradata)
 | 
			
		||||
            if op_extradata and tag_uuid:
 | 
			
		||||
                for uuid in uuids:
 | 
			
		||||
                    if datastore.data['watching'].get(uuid):
 | 
			
		||||
                        # Bug in old versions caused by bad edit page/tag handler
 | 
			
		||||
                        if isinstance(datastore.data['watching'][uuid]['tags'], str):
 | 
			
		||||
                            datastore.data['watching'][uuid]['tags'] = []
 | 
			
		||||
 | 
			
		||||
                        datastore.data['watching'][uuid]['tags'].append(tag_uuid)
 | 
			
		||||
        if emit_flash:
 | 
			
		||||
            flash(f"{len(uuids)} watches were tagged")
 | 
			
		||||
 | 
			
		||||
    if uuids:
 | 
			
		||||
        for uuid in uuids:
 | 
			
		||||
            watch_check_update.send(watch_uuid=uuid)
 | 
			
		||||
 | 
			
		||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, worker_handler, queuedWatchMetaData, watch_check_update):
 | 
			
		||||
    ui_blueprint = Blueprint('ui', __name__, template_folder="templates")
 | 
			
		||||
    
 | 
			
		||||
    # Register the edit blueprint
 | 
			
		||||
@@ -20,9 +118,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
 | 
			
		||||
    ui_blueprint.register_blueprint(notification_blueprint)
 | 
			
		||||
    
 | 
			
		||||
    # Register the views blueprint
 | 
			
		||||
    views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData)
 | 
			
		||||
    views_blueprint = construct_views_blueprint(datastore, update_q, queuedWatchMetaData, watch_check_update)
 | 
			
		||||
    ui_blueprint.register_blueprint(views_blueprint)
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    # Import the login decorator
 | 
			
		||||
    from changedetectionio.auth_decorator import login_optionally_required
 | 
			
		||||
 | 
			
		||||
@@ -35,7 +133,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
 | 
			
		||||
            flash('Watch not found', 'error')
 | 
			
		||||
        else:
 | 
			
		||||
            flash("Cleared snapshot history for watch {}".format(uuid))
 | 
			
		||||
 | 
			
		||||
        return redirect(url_for('watchlist.index'))
 | 
			
		||||
 | 
			
		||||
    @ui_blueprint.route("/clear_history", methods=['GET', 'POST'])
 | 
			
		||||
@@ -47,7 +144,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
 | 
			
		||||
            if confirmtext == 'clear':
 | 
			
		||||
                for uuid in datastore.data['watching'].keys():
 | 
			
		||||
                    datastore.clear_watch_history(uuid)
 | 
			
		||||
 | 
			
		||||
                flash("Cleared snapshot history for all watches")
 | 
			
		||||
            else:
 | 
			
		||||
                flash('Incorrect confirmation text.', 'error')
 | 
			
		||||
@@ -63,12 +159,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
 | 
			
		||||
    def mark_all_viewed():
 | 
			
		||||
        # Save the current newest history as the most recently viewed
 | 
			
		||||
        with_errors = request.args.get('with_errors') == "1"
 | 
			
		||||
        tag_limit = request.args.get('tag')
 | 
			
		||||
        logger.debug(f"Limiting to tag {tag_limit}")
 | 
			
		||||
        now = int(time.time())
 | 
			
		||||
        for watch_uuid, watch in datastore.data['watching'].items():
 | 
			
		||||
            if with_errors and not watch.get('last_error'):
 | 
			
		||||
                continue
 | 
			
		||||
            datastore.set_last_viewed(watch_uuid, int(time.time()))
 | 
			
		||||
 | 
			
		||||
        return redirect(url_for('watchlist.index'))
 | 
			
		||||
            if tag_limit and ( not watch.get('tags') or tag_limit not in watch['tags'] ):
 | 
			
		||||
                logger.debug(f"Skipping watch {watch_uuid}")
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            datastore.set_last_viewed(watch_uuid, now)
 | 
			
		||||
 | 
			
		||||
        return redirect(url_for('watchlist.index', tag=tag_limit))
 | 
			
		||||
 | 
			
		||||
    @ui_blueprint.route("/delete", methods=['GET'])
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
@@ -98,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
 | 
			
		||||
        new_uuid = datastore.clone(uuid)
 | 
			
		||||
 | 
			
		||||
        if not datastore.data['watching'].get(uuid).get('paused'):
 | 
			
		||||
            update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
 | 
			
		||||
            worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid}))
 | 
			
		||||
 | 
			
		||||
        flash('Cloned, you are editing the new watch.')
 | 
			
		||||
 | 
			
		||||
@@ -114,13 +218,11 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
 | 
			
		||||
 | 
			
		||||
        i = 0
 | 
			
		||||
 | 
			
		||||
        running_uuids = []
 | 
			
		||||
        for t in running_update_threads:
 | 
			
		||||
            running_uuids.append(t.current_uuid)
 | 
			
		||||
        running_uuids = worker_handler.get_running_uuids()
 | 
			
		||||
 | 
			
		||||
        if uuid:
 | 
			
		||||
            if uuid not in running_uuids:
 | 
			
		||||
                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
                worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
                i += 1
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
@@ -137,7 +239,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
 | 
			
		||||
                        if tag != None and tag not in watch['tags']:
 | 
			
		||||
                            continue
 | 
			
		||||
 | 
			
		||||
                        update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
 | 
			
		||||
                        worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid}))
 | 
			
		||||
                        i += 1
 | 
			
		||||
 | 
			
		||||
        if i == 1:
 | 
			
		||||
@@ -153,100 +255,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
    def form_watch_list_checkbox_operations():
 | 
			
		||||
        op = request.form['op']
 | 
			
		||||
        uuids = request.form.getlist('uuids')
 | 
			
		||||
 | 
			
		||||
        if (op == 'delete'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.delete(uuid.strip())
 | 
			
		||||
            flash("{} watches deleted".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'pause'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.data['watching'][uuid.strip()]['paused'] = True
 | 
			
		||||
            flash("{} watches paused".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'unpause'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.data['watching'][uuid.strip()]['paused'] = False
 | 
			
		||||
            flash("{} watches unpaused".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'mark-viewed'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.set_last_viewed(uuid, int(time.time()))
 | 
			
		||||
            flash("{} watches updated".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'mute'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.data['watching'][uuid.strip()]['notification_muted'] = True
 | 
			
		||||
            flash("{} watches muted".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'unmute'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.data['watching'][uuid.strip()]['notification_muted'] = False
 | 
			
		||||
            flash("{} watches un-muted".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'recheck'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    # Recheck and require a full reprocessing
 | 
			
		||||
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
            flash("{} watches queued for rechecking".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'clear-errors'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.data['watching'][uuid]["last_error"] = False
 | 
			
		||||
            flash(f"{len(uuids)} watches errors cleared")
 | 
			
		||||
 | 
			
		||||
        elif (op == 'clear-history'):
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.clear_watch_history(uuid)
 | 
			
		||||
            flash("{} watches cleared/reset.".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'notification-default'):
 | 
			
		||||
            from changedetectionio.notification import (
 | 
			
		||||
                default_notification_format_for_watch
 | 
			
		||||
            )
 | 
			
		||||
            for uuid in uuids:
 | 
			
		||||
                uuid = uuid.strip()
 | 
			
		||||
                if datastore.data['watching'].get(uuid):
 | 
			
		||||
                    datastore.data['watching'][uuid.strip()]['notification_title'] = None
 | 
			
		||||
                    datastore.data['watching'][uuid.strip()]['notification_body'] = None
 | 
			
		||||
                    datastore.data['watching'][uuid.strip()]['notification_urls'] = []
 | 
			
		||||
                    datastore.data['watching'][uuid.strip()]['notification_format'] = default_notification_format_for_watch
 | 
			
		||||
            flash("{} watches set to use default notification settings".format(len(uuids)))
 | 
			
		||||
 | 
			
		||||
        elif (op == 'assign-tag'):
 | 
			
		||||
            op_extradata = request.form.get('op_extradata', '').strip()
 | 
			
		||||
            if op_extradata:
 | 
			
		||||
                tag_uuid = datastore.add_tag(title=op_extradata)
 | 
			
		||||
                if op_extradata and tag_uuid:
 | 
			
		||||
                    for uuid in uuids:
 | 
			
		||||
                        uuid = uuid.strip()
 | 
			
		||||
                        if datastore.data['watching'].get(uuid):
 | 
			
		||||
                            # Bug in old versions caused by bad edit page/tag handler
 | 
			
		||||
                            if isinstance(datastore.data['watching'][uuid]['tags'], str):
 | 
			
		||||
                                datastore.data['watching'][uuid]['tags'] = []
 | 
			
		||||
 | 
			
		||||
                            datastore.data['watching'][uuid]['tags'].append(tag_uuid)
 | 
			
		||||
 | 
			
		||||
            flash(f"{len(uuids)} watches were tagged")
 | 
			
		||||
        uuids = [u.strip() for u in request.form.getlist('uuids') if u]
 | 
			
		||||
        extra_data = request.form.get('op_extradata', '').strip()
 | 
			
		||||
        _handle_operations(
 | 
			
		||||
            datastore=datastore,
 | 
			
		||||
            extra_data=extra_data,
 | 
			
		||||
            queuedWatchMetaData=queuedWatchMetaData,
 | 
			
		||||
            uuids=uuids,
 | 
			
		||||
            worker_handler=worker_handler,
 | 
			
		||||
            update_q=update_q,
 | 
			
		||||
            watch_check_update=watch_check_update,
 | 
			
		||||
            op=op,
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        return redirect(url_for('watchlist.index'))
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,7 @@ from jinja2 import Environment, FileSystemLoader
 | 
			
		||||
from changedetectionio.store import ChangeDetectionStore
 | 
			
		||||
from changedetectionio.auth_decorator import login_optionally_required
 | 
			
		||||
from changedetectionio.time_handler import is_within_schedule
 | 
			
		||||
from changedetectionio import worker_handler
 | 
			
		||||
 | 
			
		||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
 | 
			
		||||
    edit_blueprint = Blueprint('ui_edit', __name__, template_folder="../ui/templates")
 | 
			
		||||
@@ -186,7 +187,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
 | 
			
		||||
            tz_name = time_schedule_limit.get('timezone')
 | 
			
		||||
            if not tz_name:
 | 
			
		||||
                tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
 | 
			
		||||
                tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
 | 
			
		||||
 | 
			
		||||
            if time_schedule_limit and time_schedule_limit.get('enabled'):
 | 
			
		||||
                try:
 | 
			
		||||
@@ -201,7 +202,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
            #############################
 | 
			
		||||
            if not datastore.data['watching'][uuid].get('paused') and is_in_schedule:
 | 
			
		||||
                # Queue the watch for immediate recheck, with a higher priority
 | 
			
		||||
                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
                worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
 | 
			
		||||
            # Diff page [edit] link should go back to diff page
 | 
			
		||||
            if request.args.get("next") and request.args.get("next") == 'diff':
 | 
			
		||||
@@ -233,11 +234,15 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
 | 
			
		||||
            # Only works reliably with Playwright
 | 
			
		||||
 | 
			
		||||
            # Import the global plugin system
 | 
			
		||||
            from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras
 | 
			
		||||
            
 | 
			
		||||
            template_args = {
 | 
			
		||||
                'available_processors': processors.available_processors(),
 | 
			
		||||
                'available_timezones': sorted(available_timezones()),
 | 
			
		||||
                'browser_steps_config': browser_step_ui_config,
 | 
			
		||||
                'emailprefix': os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
 | 
			
		||||
                'extra_classes': 'checking-now' if worker_handler.is_watch_running(uuid) else '',
 | 
			
		||||
                'extra_notification_token_placeholder_info': datastore.get_unique_notification_token_placeholders_available(),
 | 
			
		||||
                'extra_processor_config': form.extra_tab_content(),
 | 
			
		||||
                'extra_title': f" - Edit - {watch.label}",
 | 
			
		||||
@@ -250,8 +255,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
                'settings_application': datastore.data['settings']['application'],
 | 
			
		||||
                'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'),
 | 
			
		||||
                'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'),
 | 
			
		||||
                'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch),
 | 
			
		||||
                'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid),
 | 
			
		||||
                'timezone_default_config': datastore.data['settings']['application'].get('timezone'),
 | 
			
		||||
                'timezone_default_config': datastore.data['settings']['application'].get('scheduler_timezone_default'),
 | 
			
		||||
                'using_global_webdriver_wait': not default['webdriver_delay'],
 | 
			
		||||
                'uuid': uuid,
 | 
			
		||||
                'watch': watch,
 | 
			
		||||
 
 | 
			
		||||
@@ -2,6 +2,7 @@ from flask import Blueprint, request, make_response
 | 
			
		||||
import random
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from changedetectionio.notification_service import NotificationContextData, set_basic_notification_vars
 | 
			
		||||
from changedetectionio.store import ChangeDetectionStore
 | 
			
		||||
from changedetectionio.auth_decorator import login_optionally_required
 | 
			
		||||
 | 
			
		||||
@@ -19,6 +20,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
        import apprise
 | 
			
		||||
        from changedetectionio.notification.handler import process_notification
 | 
			
		||||
        from changedetectionio.notification.apprise_plugin.assets import apprise_asset
 | 
			
		||||
        from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
 | 
			
		||||
        from changedetectionio.notification.apprise_plugin.custom_handlers import apprise_http_custom_handler
 | 
			
		||||
 | 
			
		||||
@@ -37,11 +39,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
            return make_response("Error: You must have atleast one watch configured for 'test notification' to work", 400)
 | 
			
		||||
 | 
			
		||||
        watch = datastore.data['watching'].get(watch_uuid)
 | 
			
		||||
 | 
			
		||||
        notification_urls = None
 | 
			
		||||
 | 
			
		||||
        if request.form.get('notification_urls'):
 | 
			
		||||
            notification_urls = request.form['notification_urls'].strip().splitlines()
 | 
			
		||||
        notification_urls = request.form.get('notification_urls','').strip().splitlines()
 | 
			
		||||
 | 
			
		||||
        if not notification_urls:
 | 
			
		||||
            logger.debug("Test notification - Trying by group/tag in the edit form if available")
 | 
			
		||||
@@ -61,20 +59,26 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
            return 'Error: No Notification URLs set/found'
 | 
			
		||||
 | 
			
		||||
        for n_url in notification_urls:
 | 
			
		||||
            # We are ONLY validating the apprise:// part here, convert all tags to something so as not to break apprise URLs
 | 
			
		||||
            generic_notification_context_data = NotificationContextData()
 | 
			
		||||
            generic_notification_context_data.set_random_for_validation()
 | 
			
		||||
            n_url = jinja_render(template_str=n_url, **generic_notification_context_data).strip()
 | 
			
		||||
            if len(n_url.strip()):
 | 
			
		||||
                if not apobj.add(n_url):
 | 
			
		||||
                    return f'Error:  {n_url} is not a valid AppRise URL.'
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            # use the same as when it is triggered, but then override it with the form test values
 | 
			
		||||
            n_object = {
 | 
			
		||||
            n_object = NotificationContextData({
 | 
			
		||||
                'watch_url': request.form.get('window_url', "https://changedetection.io"),
 | 
			
		||||
                'notification_urls': notification_urls
 | 
			
		||||
            }
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
            # Only use if present, if not set in n_object it should use the default system value
 | 
			
		||||
            if 'notification_format' in request.form and request.form['notification_format'].strip():
 | 
			
		||||
                n_object['notification_format'] = request.form.get('notification_format', '').strip()
 | 
			
		||||
            else:
 | 
			
		||||
                n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
 | 
			
		||||
 | 
			
		||||
            if 'notification_title' in request.form and request.form['notification_title'].strip():
 | 
			
		||||
                n_object['notification_title'] = request.form.get('notification_title', '').strip()
 | 
			
		||||
@@ -91,7 +95,44 @@ def construct_blueprint(datastore: ChangeDetectionStore):
 | 
			
		||||
                n_object['notification_body'] = "Test body"
 | 
			
		||||
 | 
			
		||||
            n_object['as_async'] = False
 | 
			
		||||
            n_object.update(watch.extra_notification_token_values())
 | 
			
		||||
 | 
			
		||||
            #  Same like in notification service, should be refactored
 | 
			
		||||
            dates = []
 | 
			
		||||
            trigger_text = ''
 | 
			
		||||
            snapshot_contents = ''
 | 
			
		||||
            if watch:
 | 
			
		||||
                watch_history = watch.history
 | 
			
		||||
                dates = list(watch_history.keys())
 | 
			
		||||
                trigger_text = watch.get('trigger_text', [])
 | 
			
		||||
                # Add text that was triggered
 | 
			
		||||
                if len(dates):
 | 
			
		||||
                    snapshot_contents = watch.get_history_snapshot(dates[-1])
 | 
			
		||||
                else:
 | 
			
		||||
                    snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
 | 
			
		||||
 | 
			
		||||
                if len(trigger_text):
 | 
			
		||||
                    from . import html_tools
 | 
			
		||||
                    triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
 | 
			
		||||
                    if triggered_text:
 | 
			
		||||
                        triggered_text = '\n'.join(triggered_text)
 | 
			
		||||
 | 
			
		||||
            # Could be called as a 'test notification' with only 1 snapshot available
 | 
			
		||||
            prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
 | 
			
		||||
            current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            if len(dates) > 1:
 | 
			
		||||
                prev_snapshot = watch.get_history_snapshot(dates[-2])
 | 
			
		||||
                current_snapshot = watch.get_history_snapshot(dates[-1])
 | 
			
		||||
 | 
			
		||||
            n_object.update(set_basic_notification_vars(snapshot_contents=snapshot_contents,
 | 
			
		||||
                                                        current_snapshot=current_snapshot,
 | 
			
		||||
                                                        prev_snapshot=prev_snapshot,
 | 
			
		||||
                                                        watch=watch,
 | 
			
		||||
                                                        triggered_text=trigger_text))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            sent_obj = process_notification(n_object, datastore)
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,6 @@
 | 
			
		||||
{% extends 'base.html' %}
 | 
			
		||||
{% block content %}
 | 
			
		||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %}
 | 
			
		||||
{% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table, render_ternary_field %}
 | 
			
		||||
{% from '_common_fields.html' import render_common_settings_form %}
 | 
			
		||||
<script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 | 
			
		||||
<script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script>
 | 
			
		||||
@@ -72,15 +72,16 @@
 | 
			
		||||
                        <div class="pure-form-message">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></div>
 | 
			
		||||
                        <div class="pure-form-message">Variables are supported in the URL (<a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a>).</div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.tags) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group inline-radio">
 | 
			
		||||
                        {{ render_field(form.processor) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.title, class="m-d") }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.tags) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span>
 | 
			
		||||
                        {{ render_field(form.title, class="m-d", placeholder=watch.label) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Automatically uses the page title if found, you can also use your own title/description here</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group time-between-check border-fieldset">
 | 
			
		||||
 | 
			
		||||
@@ -101,15 +102,16 @@
 | 
			
		||||
                        </div>
 | 
			
		||||
<br>
 | 
			
		||||
              </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.extract_title_as_title) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.filter_failure_notification_send) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">
 | 
			
		||||
                         Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_ternary_field(form.use_page_title_in_list) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
@@ -239,7 +241,7 @@ Math: {{ 1 + 1 }}") }}
 | 
			
		||||
                                </div>
 | 
			
		||||
                            </div>
 | 
			
		||||
                            <div id="browser-steps-fieldlist" >
 | 
			
		||||
                                <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
 | 
			
		||||
                                <span id="browser-seconds-remaining">Press "Play" to start.</span> <span style="font-size: 80%;"> (<a target="newwindow" href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span>
 | 
			
		||||
                                {{ render_field(form.browser_steps) }}
 | 
			
		||||
                            </div>
 | 
			
		||||
                        </div>
 | 
			
		||||
@@ -262,7 +264,7 @@ Math: {{ 1 + 1 }}") }}
 | 
			
		||||
            <div class="tab-pane-inner" id="notifications">
 | 
			
		||||
                <fieldset>
 | 
			
		||||
                    <div  class="pure-control-group inline-radio">
 | 
			
		||||
                      {{ render_checkbox_field(form.notification_muted) }}
 | 
			
		||||
                      {{ render_ternary_field(form.notification_muted, BooleanField=true) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    {% if watch_needs_selenium_or_playwright %}
 | 
			
		||||
                    <div class="pure-control-group inline-radio">
 | 
			
		||||
@@ -383,13 +385,13 @@ Math: {{ 1 + 1 }}") }}
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {% if watch_needs_selenium_or_playwright %}
 | 
			
		||||
                            {% if system_has_playwright_configured %}
 | 
			
		||||
                                {%  if visual_selector_data_ready %}
 | 
			
		||||
                            <span class="pure-form-message-inline" id="visual-selector-heading">
 | 
			
		||||
                                The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
 | 
			
		||||
                            </span>
 | 
			
		||||
 | 
			
		||||
                            <div id="selector-header">
 | 
			
		||||
                                <a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
 | 
			
		||||
                                <!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
 | 
			
		||||
                                <i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
 | 
			
		||||
                            </div>
 | 
			
		||||
                            <div id="selector-wrapper" style="display: none">
 | 
			
		||||
@@ -400,9 +402,6 @@ Math: {{ 1 + 1 }}") }}
 | 
			
		||||
                                <canvas id="selector-canvas"></canvas>
 | 
			
		||||
                            </div>
 | 
			
		||||
                            <div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div>
 | 
			
		||||
                            {% else %}
 | 
			
		||||
                                <strong>Error, The Visual selector data is not ready, it needs to complete atleast one fetch, please queue the item and reload.</strong>
 | 
			
		||||
                            {% endif %}
 | 
			
		||||
                        {% else %}
 | 
			
		||||
                            {# The watch needed chrome but system says that playwright is not ready #}
 | 
			
		||||
                            {{ playwright_warning() }}
 | 
			
		||||
@@ -453,6 +452,13 @@ Math: {{ 1 + 1 }}") }}
 | 
			
		||||
                        </tr>
 | 
			
		||||
                        </tbody>
 | 
			
		||||
                    </table>
 | 
			
		||||
 | 
			
		||||
                    {% if ui_edit_stats_extras %}
 | 
			
		||||
                    <div class="plugin-stats-extras"> <!-- from pluggy plugin -->
 | 
			
		||||
                        {{ ui_edit_stats_extras|safe }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
 | 
			
		||||
                    {% if watch.history_n %}
 | 
			
		||||
                        <p>
 | 
			
		||||
                             <a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>
 | 
			
		||||
@@ -465,11 +471,11 @@ Math: {{ 1 + 1 }}") }}
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_button(form.save_button) }}
 | 
			
		||||
                    <a href="{{url_for('ui.form_delete', uuid=uuid)}}"
 | 
			
		||||
                       class="pure-button button-small button-error ">Delete</a>
 | 
			
		||||
                       class="pure-button button-error ">Delete</a>
 | 
			
		||||
                    {% if watch.history_n %}<a href="{{url_for('ui.clear_watch_history', uuid=uuid)}}"
 | 
			
		||||
                       class="pure-button button-small button-error ">Clear History</a>{% endif %}
 | 
			
		||||
                       class="pure-button button-error">Clear History</a>{% endif %}
 | 
			
		||||
                    <a href="{{url_for('ui.form_clone', uuid=uuid)}}"
 | 
			
		||||
                       class="pure-button button-small ">Clone & Edit</a>
 | 
			
		||||
                       class="pure-button">Clone & Edit</a>
 | 
			
		||||
                </div>
 | 
			
		||||
            </div>
 | 
			
		||||
        </form>
 | 
			
		||||
@@ -1,14 +1,14 @@
 | 
			
		||||
from flask import Blueprint, request, redirect, url_for, flash, render_template, make_response, send_from_directory, abort
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
from copy import deepcopy
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from changedetectionio.store import ChangeDetectionStore
 | 
			
		||||
from changedetectionio.auth_decorator import login_optionally_required
 | 
			
		||||
from changedetectionio import html_tools
 | 
			
		||||
from changedetectionio import worker_handler
 | 
			
		||||
 | 
			
		||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData):
 | 
			
		||||
def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMetaData, watch_check_update):
 | 
			
		||||
    views_blueprint = Blueprint('ui_views', __name__, template_folder="../ui/templates")
 | 
			
		||||
    
 | 
			
		||||
    @views_blueprint.route("/preview/<string:uuid>", methods=['GET'])
 | 
			
		||||
@@ -77,9 +77,46 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
    @views_blueprint.route("/diff/<string:uuid>", methods=['GET', 'POST'])
 | 
			
		||||
    @views_blueprint.route("/diff/<string:uuid>", methods=['POST'])
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
    def diff_history_page(uuid):
 | 
			
		||||
    def diff_history_page_build_report(uuid):
 | 
			
		||||
        from changedetectionio import forms
 | 
			
		||||
 | 
			
		||||
        # More for testing, possible to return the first/only
 | 
			
		||||
        if uuid == 'first':
 | 
			
		||||
            uuid = list(datastore.data['watching'].keys()).pop()
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            watch = datastore.data['watching'][uuid]
 | 
			
		||||
        except KeyError:
 | 
			
		||||
            flash("No history found for the specified link, bad link?", "error")
 | 
			
		||||
            return redirect(url_for('watchlist.index'))
 | 
			
		||||
 | 
			
		||||
        # For submission of requesting an extract
 | 
			
		||||
        extract_form = forms.extractDataForm(formdata=request.form,
 | 
			
		||||
                                             data={'extract_regex': request.form.get('extract_regex', '')}
 | 
			
		||||
                                             )
 | 
			
		||||
        if not extract_form.validate():
 | 
			
		||||
            flash("An error occurred, please see below.", "error")
 | 
			
		||||
            return _render_diff_template(uuid, extract_form)
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            extract_regex = request.form.get('extract_regex', '').strip()
 | 
			
		||||
            output = watch.extract_regex_from_all_history(extract_regex)
 | 
			
		||||
            if output:
 | 
			
		||||
                watch_dir = os.path.join(datastore.datastore_path, uuid)
 | 
			
		||||
                response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
 | 
			
		||||
                response.headers['Content-type'] = 'text/csv'
 | 
			
		||||
                response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
 | 
			
		||||
                response.headers['Pragma'] = 'no-cache'
 | 
			
		||||
                response.headers['Expires'] = "0"
 | 
			
		||||
                return response
 | 
			
		||||
 | 
			
		||||
            flash('No matches found while scanning all of the watch history for that RegEx.', 'error')
 | 
			
		||||
        return redirect(url_for('ui.ui_views.diff_history_page', uuid=uuid) + '#extract')
 | 
			
		||||
 | 
			
		||||
    def _render_diff_template(uuid, extract_form=None):
 | 
			
		||||
        """Helper function to render the diff template with all required data"""
 | 
			
		||||
        from changedetectionio import forms
 | 
			
		||||
 | 
			
		||||
        # More for testing, possible to return the first/only
 | 
			
		||||
@@ -93,62 +130,36 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
            flash("No history found for the specified link, bad link?", "error")
 | 
			
		||||
            return redirect(url_for('watchlist.index'))
 | 
			
		||||
 | 
			
		||||
        # For submission of requesting an extract
 | 
			
		||||
        extract_form = forms.extractDataForm(request.form)
 | 
			
		||||
        if request.method == 'POST':
 | 
			
		||||
            if not extract_form.validate():
 | 
			
		||||
                flash("An error occurred, please see below.", "error")
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                extract_regex = request.form.get('extract_regex').strip()
 | 
			
		||||
                output = watch.extract_regex_from_all_history(extract_regex)
 | 
			
		||||
                if output:
 | 
			
		||||
                    watch_dir = os.path.join(datastore.datastore_path, uuid)
 | 
			
		||||
                    response = make_response(send_from_directory(directory=watch_dir, path=output, as_attachment=True))
 | 
			
		||||
                    response.headers['Content-type'] = 'text/csv'
 | 
			
		||||
                    response.headers['Cache-Control'] = 'no-cache, no-store, must-revalidate'
 | 
			
		||||
                    response.headers['Pragma'] = 'no-cache'
 | 
			
		||||
                    response.headers['Expires'] = 0
 | 
			
		||||
                    return response
 | 
			
		||||
 | 
			
		||||
                flash('Nothing matches that RegEx', 'error')
 | 
			
		||||
                redirect(url_for('ui_views.diff_history_page', uuid=uuid)+'#extract')
 | 
			
		||||
        # Use provided form or create a new one
 | 
			
		||||
        if extract_form is None:
 | 
			
		||||
            extract_form = forms.extractDataForm(formdata=request.form,
 | 
			
		||||
                                                 data={'extract_regex': request.form.get('extract_regex', '')}
 | 
			
		||||
                                                 )
 | 
			
		||||
 | 
			
		||||
        history = watch.history
 | 
			
		||||
        dates = list(history.keys())
 | 
			
		||||
 | 
			
		||||
        if len(dates) < 2:
 | 
			
		||||
            flash("Not enough saved change detection snapshots to produce a report.", "error")
 | 
			
		||||
            return redirect(url_for('watchlist.index'))
 | 
			
		||||
        # If a "from_version" was requested, then find it (or the closest one)
 | 
			
		||||
        # Also set "from version" to be the closest version to the one that was last viewed.
 | 
			
		||||
 | 
			
		||||
        # Save the current newest history as the most recently viewed
 | 
			
		||||
        datastore.set_last_viewed(uuid, time.time())
 | 
			
		||||
        best_last_viewed_timestamp = watch.get_from_version_based_on_last_viewed
 | 
			
		||||
        from_version_timestamp = best_last_viewed_timestamp if best_last_viewed_timestamp else dates[-2]
 | 
			
		||||
        from_version = request.args.get('from_version', from_version_timestamp )
 | 
			
		||||
 | 
			
		||||
        # Read as binary and force decode as UTF-8
 | 
			
		||||
        # Windows may fail decode in python if we just use 'r' mode (chardet decode exception)
 | 
			
		||||
        from_version = request.args.get('from_version')
 | 
			
		||||
        from_version_index = -2  # second newest
 | 
			
		||||
        if from_version and from_version in dates:
 | 
			
		||||
            from_version_index = dates.index(from_version)
 | 
			
		||||
        else:
 | 
			
		||||
            from_version = dates[from_version_index]
 | 
			
		||||
        # Use the current one if nothing was specified
 | 
			
		||||
        to_version = request.args.get('to_version', str(dates[-1]))
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            from_version_file_contents = watch.get_history_snapshot(dates[from_version_index])
 | 
			
		||||
            to_version_file_contents = watch.get_history_snapshot(timestamp=to_version)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            from_version_file_contents = f"Unable to read to-version at index {dates[from_version_index]}.\n"
 | 
			
		||||
 | 
			
		||||
        to_version = request.args.get('to_version')
 | 
			
		||||
        to_version_index = -1
 | 
			
		||||
        if to_version and to_version in dates:
 | 
			
		||||
            to_version_index = dates.index(to_version)
 | 
			
		||||
        else:
 | 
			
		||||
            to_version = dates[to_version_index]
 | 
			
		||||
            logger.error(f"Unable to read watch history to-version for version {to_version}: {str(e)}")
 | 
			
		||||
            to_version_file_contents = f"Unable to read to-version at {to_version}.\n"
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            to_version_file_contents = watch.get_history_snapshot(dates[to_version_index])
 | 
			
		||||
            from_version_file_contents = watch.get_history_snapshot(timestamp=from_version)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            to_version_file_contents = "Unable to read to-version at index{}.\n".format(dates[to_version_index])
 | 
			
		||||
            logger.error(f"Unable to read watch history from-version for version {from_version}: {str(e)}")
 | 
			
		||||
            from_version_file_contents = f"Unable to read to-version {from_version}.\n"
 | 
			
		||||
 | 
			
		||||
        screenshot_url = watch.get_screenshot()
 | 
			
		||||
 | 
			
		||||
@@ -162,7 +173,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
        if datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False):
 | 
			
		||||
            password_enabled_and_share_is_off = not datastore.data['settings']['application'].get('shared_diff_access')
 | 
			
		||||
 | 
			
		||||
        output = render_template("diff.html",
 | 
			
		||||
        datastore.set_last_viewed(uuid, time.time())
 | 
			
		||||
 | 
			
		||||
        return render_template("diff.html",
 | 
			
		||||
                                 current_diff_url=watch['url'],
 | 
			
		||||
                                 from_version=str(from_version),
 | 
			
		||||
                                 to_version=str(to_version),
 | 
			
		||||
@@ -185,7 +198,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
                                 watch_a=watch
 | 
			
		||||
                                 )
 | 
			
		||||
 | 
			
		||||
        return output
 | 
			
		||||
    @views_blueprint.route("/diff/<string:uuid>", methods=['GET'])
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
    def diff_history_page(uuid):
 | 
			
		||||
        return _render_diff_template(uuid)
 | 
			
		||||
 | 
			
		||||
    @views_blueprint.route("/form/add/quickwatch", methods=['POST'])
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
@@ -212,7 +228,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
                return redirect(url_for('ui.ui_edit.edit_page', uuid=new_uuid, unpause_on_save=1, tag=request.args.get('tag')))
 | 
			
		||||
            else:
 | 
			
		||||
                # Straight into the queue.
 | 
			
		||||
                update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
 | 
			
		||||
                worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid}))
 | 
			
		||||
                flash("Watch added.")
 | 
			
		||||
 | 
			
		||||
        return redirect(url_for('watchlist.index', tag=request.args.get('tag','')))
 | 
			
		||||
 
 | 
			
		||||
@@ -44,12 +44,16 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
        # Sort by last_changed and add the uuid which is usually the key..
 | 
			
		||||
        sorted_watches = []
 | 
			
		||||
        with_errors = request.args.get('with_errors') == "1"
 | 
			
		||||
        unread_only = request.args.get('unread') == "1"
 | 
			
		||||
        errored_count = 0
 | 
			
		||||
        search_q = request.args.get('q').strip().lower() if request.args.get('q') else False
 | 
			
		||||
        for uuid, watch in datastore.data['watching'].items():
 | 
			
		||||
            if with_errors and not watch.get('last_error'):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if unread_only and (watch.viewed or watch.last_changed == 0) :
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if active_tag_uuid and not active_tag_uuid in watch['tags']:
 | 
			
		||||
                    continue
 | 
			
		||||
            if watch.get('last_error'):
 | 
			
		||||
@@ -72,31 +76,32 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe
 | 
			
		||||
                                per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic")
 | 
			
		||||
 | 
			
		||||
        sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title'])
 | 
			
		||||
 | 
			
		||||
        output = render_template(
 | 
			
		||||
            "watch-overview.html",
 | 
			
		||||
                                 active_tag=active_tag,
 | 
			
		||||
                                 active_tag_uuid=active_tag_uuid,
 | 
			
		||||
                                 app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
 | 
			
		||||
                                 datastore=datastore,
 | 
			
		||||
                                 errored_count=errored_count,
 | 
			
		||||
                                 form=form,
 | 
			
		||||
                                 guid=datastore.data['app_guid'],
 | 
			
		||||
                                 has_proxies=datastore.proxy_list,
 | 
			
		||||
                                 has_unviewed=datastore.has_unviewed,
 | 
			
		||||
                                 hosted_sticky=os.getenv("SALTED_PASS", False) == False,
 | 
			
		||||
                                 now_time_server=time.time(),
 | 
			
		||||
                                 pagination=pagination,
 | 
			
		||||
                                 queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
 | 
			
		||||
                                 search_q=request.args.get('q', '').strip(),
 | 
			
		||||
                                 sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
 | 
			
		||||
                                 sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
 | 
			
		||||
                                 system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
 | 
			
		||||
                                 tags=sorted_tags,
 | 
			
		||||
                                 watches=sorted_watches
 | 
			
		||||
                                 )
 | 
			
		||||
            active_tag=active_tag,
 | 
			
		||||
            active_tag_uuid=active_tag_uuid,
 | 
			
		||||
            app_rss_token=datastore.data['settings']['application'].get('rss_access_token'),
 | 
			
		||||
            datastore=datastore,
 | 
			
		||||
            errored_count=errored_count,
 | 
			
		||||
            form=form,
 | 
			
		||||
            guid=datastore.data['app_guid'],
 | 
			
		||||
            has_proxies=datastore.proxy_list,
 | 
			
		||||
            hosted_sticky=os.getenv("SALTED_PASS", False) == False,
 | 
			
		||||
            now_time_server=round(time.time()),
 | 
			
		||||
            pagination=pagination,
 | 
			
		||||
            queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue],
 | 
			
		||||
            search_q=request.args.get('q', '').strip(),
 | 
			
		||||
            sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'),
 | 
			
		||||
            sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'),
 | 
			
		||||
            system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'),
 | 
			
		||||
            tags=sorted_tags,
 | 
			
		||||
            unread_changes_count=datastore.unread_changes_count,
 | 
			
		||||
            watches=sorted_watches
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        if session.get('share-link'):
 | 
			
		||||
            del(session['share-link'])
 | 
			
		||||
            del (session['share-link'])
 | 
			
		||||
 | 
			
		||||
        resp = make_response(output)
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,16 @@
 | 
			
		||||
{% extends 'base.html' %}
 | 
			
		||||
{% block content %}
 | 
			
		||||
{% from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title %}
 | 
			
		||||
{%- extends 'base.html' -%}
 | 
			
		||||
{%- block content -%}
 | 
			
		||||
{%- from '_helpers.html' import render_simple_field, render_field, render_nolabel_field, sort_by_title -%}
 | 
			
		||||
<script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script>
 | 
			
		||||
<script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script>
 | 
			
		||||
<script>let nowtimeserver={{ now_time_server }};</script>
 | 
			
		||||
 | 
			
		||||
<script>let favicon_baseURL="{{ url_for('static_content', group='favicon', filename="PLACEHOLDER")}}";</script>
 | 
			
		||||
<script>
 | 
			
		||||
// Initialize Feather icons after the page loads
 | 
			
		||||
document.addEventListener('DOMContentLoaded', function() {
 | 
			
		||||
    feather.replace();
 | 
			
		||||
});
 | 
			
		||||
</script>
 | 
			
		||||
<style>
 | 
			
		||||
.checking-now .last-checked {
 | 
			
		||||
    background-image: linear-gradient(to bottom, transparent 0%, rgba(0,0,0,0.05) 40%, rgba(0,0,0,0.1) 100%);
 | 
			
		||||
@@ -13,19 +19,20 @@
 | 
			
		||||
    transition: background-size 0.9s ease
 | 
			
		||||
}
 | 
			
		||||
</style>
 | 
			
		||||
<div class="box">
 | 
			
		||||
<div class="box" id="form-quick-watch-add">
 | 
			
		||||
 | 
			
		||||
    <form class="pure-form" action="{{ url_for('ui.ui_views.form_quick_watch_add', tag=active_tag_uuid) }}" method="POST" id="new-watch-form">
 | 
			
		||||
        <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
 | 
			
		||||
        <fieldset>
 | 
			
		||||
            <legend>Add a new change detection watch</legend>
 | 
			
		||||
            <legend>Add a new web page change detection watch</legend>
 | 
			
		||||
            <div id="watch-add-wrapper-zone">
 | 
			
		||||
 | 
			
		||||
                    {{ render_nolabel_field(form.url, placeholder="https://...", required=true) }}
 | 
			
		||||
                    {{ render_nolabel_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="watch label / tag") }}
 | 
			
		||||
                    {{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }}
 | 
			
		||||
                    {{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }}
 | 
			
		||||
            </div>
 | 
			
		||||
            <div id="watch-group-tag">
 | 
			
		||||
               {{ render_field(form.tags, value=active_tag.title if active_tag_uuid else '', placeholder="Watch group / tag", class="transparent-field") }}
 | 
			
		||||
            </div>
 | 
			
		||||
            <div id="quick-watch-processor-type">
 | 
			
		||||
                {{ render_simple_field(form.processor) }}
 | 
			
		||||
            </div>
 | 
			
		||||
@@ -33,215 +40,227 @@
 | 
			
		||||
        </fieldset>
 | 
			
		||||
        <span style="color:#eee; font-size: 80%;"><img alt="Create a shareable link" style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" > Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></span>
 | 
			
		||||
    </form>
 | 
			
		||||
 | 
			
		||||
</div>
 | 
			
		||||
<div class="box">
 | 
			
		||||
    <form class="pure-form" action="{{ url_for('ui.form_watch_list_checkbox_operations') }}" method="POST" id="watch-list-form">
 | 
			
		||||
    <input type="hidden" name="csrf_token" value="{{ csrf_token() }}" >
 | 
			
		||||
    <input type="hidden" id="op_extradata" name="op_extradata" value="" >
 | 
			
		||||
    <div id="checkbox-operations">
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall"  name="op" value="pause">Pause</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall"  name="op" value="unpause">UnPause</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall"  name="op" value="mute">Mute</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall"  name="op" value="unmute">UnMute</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="recheck">Recheck</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag">Tag</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors">Clear errors</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history">Clear/reset history</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete">Delete</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall"  name="op" value="pause"><i data-feather="pause" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Pause</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall"  name="op" value="unpause"><i data-feather="play" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnPause</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall"  name="op" value="mute"><i data-feather="volume-x" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mute</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall"  name="op" value="unmute"><i data-feather="volume-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>UnMute</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="recheck"><i data-feather="refresh-cw" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Recheck</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag"><i data-feather="tag" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Tag</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed"><i data-feather="eye" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Mark viewed</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default"><i data-feather="bell" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Use default notification</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors"><i data-feather="x-circle" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear errors</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history"><i data-feather="trash-2" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Clear/reset history</button>
 | 
			
		||||
        <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete"><i data-feather="trash" style="width: 14px; height: 14px; stroke: white; margin-right: 4px;"></i>Delete</button>
 | 
			
		||||
    </div>
 | 
			
		||||
    {% if watches|length >= pagination.per_page %}
 | 
			
		||||
    {%- if watches|length >= pagination.per_page -%}
 | 
			
		||||
        {{ pagination.info }}
 | 
			
		||||
    {% endif %}
 | 
			
		||||
    {% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %}
 | 
			
		||||
    {%- endif -%}
 | 
			
		||||
    {%- if search_q -%}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{%- endif -%}
 | 
			
		||||
    <div>
 | 
			
		||||
        <a href="{{url_for('watchlist.index')}}" class="pure-button button-tag {{'active' if not active_tag_uuid }}">All</a>
 | 
			
		||||
 | 
			
		||||
    <!-- tag list -->
 | 
			
		||||
    {% for uuid, tag in tags %}
 | 
			
		||||
        {% if tag != "" %}
 | 
			
		||||
    {%- for uuid, tag in tags -%}
 | 
			
		||||
        {%- if tag != "" -%}
 | 
			
		||||
            <a href="{{url_for('watchlist.index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a>
 | 
			
		||||
        {% endif %}
 | 
			
		||||
    {% endfor %}
 | 
			
		||||
        {%- endif -%}
 | 
			
		||||
    {%- endfor -%}
 | 
			
		||||
    </div>
 | 
			
		||||
 | 
			
		||||
    {% set sort_order = sort_order or 'asc' %}
 | 
			
		||||
    {% set sort_attribute = sort_attribute or 'last_changed'  %}
 | 
			
		||||
    {% set pagination_page = request.args.get('page', 0) %}
 | 
			
		||||
    {% set cols_required = 6 %}
 | 
			
		||||
    {% set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") %}
 | 
			
		||||
    {% if any_has_restock_price_processor %}
 | 
			
		||||
        {% set cols_required = cols_required + 1 %}
 | 
			
		||||
    {% endif %}
 | 
			
		||||
 | 
			
		||||
    <div id="watch-table-wrapper">
 | 
			
		||||
 | 
			
		||||
        <table class="pure-table pure-table-striped watch-table">
 | 
			
		||||
    {%- set sort_order = sort_order or 'asc' -%}
 | 
			
		||||
    {%- set sort_attribute = sort_attribute or 'last_changed'  -%}
 | 
			
		||||
    {%- set pagination_page = request.args.get('page', 0) -%}
 | 
			
		||||
    {%- set cols_required = 6 -%}
 | 
			
		||||
    {%- set any_has_restock_price_processor = datastore.any_watches_have_processor_by_name("restock_diff") -%}
 | 
			
		||||
    {%- if any_has_restock_price_processor -%}
 | 
			
		||||
        {%- set cols_required = cols_required + 1 -%}
 | 
			
		||||
    {%- endif -%}
 | 
			
		||||
    {%- set ui_settings = datastore.data['settings']['application']['ui'] -%}
 | 
			
		||||
    {%- set wrapper_classes = [
 | 
			
		||||
        'has-unread-changes' if unread_changes_count else '',
 | 
			
		||||
        'has-error' if errored_count else '',
 | 
			
		||||
    ] -%}
 | 
			
		||||
    <div id="watch-table-wrapper" class="{{ wrapper_classes | reject('equalto', '') | join(' ') }}">
 | 
			
		||||
        {%- set table_classes = [
 | 
			
		||||
            'favicon-enabled' if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] else 'favicon-not-enabled',
 | 
			
		||||
        ] -%}
 | 
			
		||||
        <table class="pure-table pure-table-striped watch-table {{ table_classes | reject('equalto', '') | join(' ') }}">
 | 
			
		||||
            <thead>
 | 
			
		||||
            <tr>
 | 
			
		||||
                {% set link_order = "desc" if sort_order  == 'asc' else "asc" %}
 | 
			
		||||
                {% set arrow_span = "" %}
 | 
			
		||||
                {%- set link_order = "desc" if sort_order  == 'asc' else "asc" -%}
 | 
			
		||||
                {%- set arrow_span = "" -%}
 | 
			
		||||
                <th><input style="vertical-align: middle" type="checkbox" id="check-all" > <a class="{{ 'active '+link_order if sort_attribute == 'date_created' else 'inactive' }}"  href="{{url_for('watchlist.index', sort='date_created', order=link_order, tag=active_tag_uuid)}}"># <span class='arrow {{link_order}}'></span></a></th>
 | 
			
		||||
                <th class="empty-cell"></th>
 | 
			
		||||
                <th>
 | 
			
		||||
                    <a class="{{ 'active '+link_order if sort_attribute == 'paused' else 'inactive' }}" href="{{url_for('watchlist.index', sort='paused', order=link_order, tag=active_tag_uuid)}}"><i data-feather="pause" style="vertical-align: bottom; width: 14px; height: 14px;  margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
 | 
			
		||||
                     
 | 
			
		||||
                    <a class="{{ 'active '+link_order if sort_attribute == 'notification_muted' else 'inactive' }}" href="{{url_for('watchlist.index', sort='notification_muted', order=link_order, tag=active_tag_uuid)}}"><i data-feather="volume-2" style="vertical-align: bottom; width: 14px; height: 14px;  margin-right: 4px;"></i><span class='arrow {{link_order}}'></span></a>
 | 
			
		||||
                </th>
 | 
			
		||||
                <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('watchlist.index', sort='label', order=link_order, tag=active_tag_uuid)}}">Website <span class='arrow {{link_order}}'></span></a></th>
 | 
			
		||||
             {% if any_has_restock_price_processor %}
 | 
			
		||||
             {%- if any_has_restock_price_processor -%}
 | 
			
		||||
                <th>Restock & Price</th>
 | 
			
		||||
             {% endif %}
 | 
			
		||||
             {%- endif -%}
 | 
			
		||||
                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_checked', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Checked <span class='arrow {{link_order}}'></span></a></th>
 | 
			
		||||
                <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('watchlist.index', sort='last_changed', order=link_order, tag=active_tag_uuid)}}"><span class="hide-on-mobile">Last</span> Changed <span class='arrow {{link_order}}'></span></a></th>
 | 
			
		||||
                <th class="empty-cell"></th>
 | 
			
		||||
            </tr>
 | 
			
		||||
            </thead>
 | 
			
		||||
            <tbody>
 | 
			
		||||
            {% if not watches|length %}
 | 
			
		||||
            {%- if not watches|length -%}
 | 
			
		||||
            <tr>
 | 
			
		||||
                <td colspan="{{ cols_required }}" style="text-wrap: wrap;">No website watches configured, please add a URL in the box above, or <a href="{{ url_for('imports.import_page')}}" >import a list</a>.</td>
 | 
			
		||||
            </tr>
 | 
			
		||||
            {% endif %}
 | 
			
		||||
            {% for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) %}
 | 
			
		||||
            {%- endif -%}
 | 
			
		||||
 | 
			
		||||
                {% set is_unviewed = watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}
 | 
			
		||||
                {% set checking_now = is_checking_now(watch) %}
 | 
			
		||||
            <tr id="{{ watch.uuid }}"
 | 
			
		||||
                class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} processor-{{ watch['processor'] }}
 | 
			
		||||
                {% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
 | 
			
		||||
                {% if watch.last_notification_error is defined and watch.last_notification_error != False %}error{% endif %}
 | 
			
		||||
                {% if watch.paused is defined and watch.paused != False %}paused{% endif %}
 | 
			
		||||
                {% if is_unviewed %}unviewed{% endif %}
 | 
			
		||||
                {% if watch.has_restock_info %} has-restock-info {% if watch['restock']['in_stock'] %}in-stock{% else %}not-in-stock{% endif %} {% else %}no-restock-info{% endif %}
 | 
			
		||||
                {% if watch.uuid in queued_uuids %}queued{% endif %}
 | 
			
		||||
                {% if checking_now %}checking-now{% endif %}
 | 
			
		||||
                ">
 | 
			
		||||
                <td class="inline checkbox-uuid" ><input name="uuids"  type="checkbox" value="{{ watch.uuid}} " > <span>{{ loop.index+pagination.skip }}</span></td>
 | 
			
		||||
            {%- for watch in (watches|sort(attribute=sort_attribute, reverse=sort_order == 'asc'))|pagination_slice(skip=pagination.skip) -%}
 | 
			
		||||
                {%- set checking_now = is_checking_now(watch) -%}
 | 
			
		||||
                {%- set history_n = watch.history_n -%}
 | 
			
		||||
                {%- set favicon = watch.get_favicon_filename() -%}
 | 
			
		||||
                {%- set system_use_url_watchlist = datastore.data['settings']['application']['ui'].get('use_page_title_in_list')  -%}
 | 
			
		||||
                {#  Class settings mirrored in changedetectionio/static/js/realtime.js for the frontend #}
 | 
			
		||||
                {%- set row_classes = [
 | 
			
		||||
                    loop.cycle('pure-table-odd', 'pure-table-even'),
 | 
			
		||||
                    'processor-' ~ watch['processor'],
 | 
			
		||||
                    'has-error' if watch.compile_error_texts()|length > 2 else '',
 | 
			
		||||
                    'paused' if watch.paused is defined and watch.paused != False else '',
 | 
			
		||||
                    'unviewed' if watch.has_unviewed else '',
 | 
			
		||||
                    'has-restock-info' if watch.has_restock_info else 'no-restock-info',
 | 
			
		||||
                    'has-favicon' if favicon else '',
 | 
			
		||||
                    'in-stock' if watch.has_restock_info and watch['restock']['in_stock'] else '',
 | 
			
		||||
                    'not-in-stock' if watch.has_restock_info and not watch['restock']['in_stock'] else '',
 | 
			
		||||
                    'queued' if watch.uuid in queued_uuids else '',
 | 
			
		||||
                    'checking-now' if checking_now else '',
 | 
			
		||||
                    'notification_muted' if watch.notification_muted else '',
 | 
			
		||||
                    'single-history' if history_n == 1 else '',
 | 
			
		||||
                    'multiple-history' if history_n >= 2 else '',
 | 
			
		||||
                    'use-html-title' if system_use_url_watchlist else 'no-html-title',
 | 
			
		||||
                ] -%}
 | 
			
		||||
            <tr id="{{ watch.uuid }}" data-watch-uuid="{{ watch.uuid }}" class="{{ row_classes | reject('equalto', '') | join(' ') }}">
 | 
			
		||||
                <td class="inline checkbox-uuid" ><div><input name="uuids"  type="checkbox" value="{{ watch.uuid}} " > <span class="counter-i">{{ loop.index+pagination.skip }}</span></div></td>
 | 
			
		||||
                <td class="inline watch-controls">
 | 
			
		||||
                    {% if not watch.paused %}
 | 
			
		||||
                    <a class="state-off" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
 | 
			
		||||
                    {% else %}
 | 
			
		||||
                    <a class="state-on" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                    {% set mute_label = 'UnMute notification' if watch.notification_muted else 'Mute notification' %}
 | 
			
		||||
                    <a class="link-mute state-{{'on' if watch.notification_muted else 'off'}}" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="{{ mute_label }}" title="{{ mute_label }}" class="icon icon-mute" ></a>
 | 
			
		||||
                </td>
 | 
			
		||||
                <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}}
 | 
			
		||||
                    <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a>
 | 
			
		||||
                    <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
 | 
			
		||||
 | 
			
		||||
                    {% if watch.get_fetch_backend == "html_webdriver"
 | 
			
		||||
                         or (  watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver'  )
 | 
			
		||||
                         or "extra_browser_" in watch.get_fetch_backend
 | 
			
		||||
                    %}
 | 
			
		||||
                    <img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
 | 
			
		||||
                    {%if watch.is_pdf  %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %}
 | 
			
		||||
                    {% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %}
 | 
			
		||||
                    {% if watch.last_error is defined and watch.last_error != False %}
 | 
			
		||||
                    <div class="fetch-error">{{ watch.last_error }}
 | 
			
		||||
 | 
			
		||||
                        {% if '403' in watch.last_error %}
 | 
			
		||||
                            {% if has_proxies %}
 | 
			
		||||
                                <a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a> 
 | 
			
		||||
                            {% endif %}
 | 
			
		||||
                            <a href="{{ url_for('settings.settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a>
 | 
			
		||||
                        
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                        {% if 'empty result or contain only an image' in watch.last_error %}
 | 
			
		||||
                            <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Detecting-changes-in-images">more help here</a>.
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                    <div>
 | 
			
		||||
                    <a class="ajax-op state-off pause-toggle" data-op="pause" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks" class="icon icon-pause" ></a>
 | 
			
		||||
                    <a class="ajax-op state-on pause-toggle"  data-op="pause" style="display: none" href="{{url_for('watchlist.index', op='pause', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='play.svg')}}" alt="UnPause checks" title="UnPause checks" class="icon icon-unpause" ></a>
 | 
			
		||||
                    <a class="ajax-op state-off mute-toggle" data-op="mute" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notification" title="Mute notification" class="icon icon-mute" ></a>
 | 
			
		||||
                    <a class="ajax-op state-on mute-toggle" data-op="mute"  style="display: none" href="{{url_for('watchlist.index', op='mute', uuid=watch.uuid, tag=active_tag_uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="UnMute notification" title="UnMute notification" class="icon icon-mute" ></a>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                    {% if watch.last_notification_error is defined and watch.last_notification_error != False %}
 | 
			
		||||
                    <div class="fetch-error notification-error"><a href="{{url_for('settings.notification_logs')}}">{{ watch.last_notification_error }}</a></div>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
 | 
			
		||||
                    {% if watch['processor'] == 'text_json_diff'  %}
 | 
			
		||||
                        {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  %}
 | 
			
		||||
                        <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                    {% if watch['processor'] == 'restock_diff' %}
 | 
			
		||||
                        <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon" > Price</span>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                    {% for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() %}
 | 
			
		||||
                      <span class="watch-tag-list">{{ watch_tag.title }}</span>
 | 
			
		||||
                    {% endfor %}
 | 
			
		||||
                </td>
 | 
			
		||||
            <!-- @todo make it so any watch handler obj can expose this --->
 | 
			
		||||
{% if any_has_restock_price_processor %}
 | 
			
		||||
 | 
			
		||||
                <td class="title-col inline">
 | 
			
		||||
                    <div class="flex-wrapper">
 | 
			
		||||
                    {% if 'favicons_enabled' not in ui_settings or ui_settings['favicons_enabled'] %}
 | 
			
		||||
                        <div>{# A page might have hundreds of these images, set IMG options for lazy loading, don't set SRC if we dont have it so it doesnt fetch the placeholder'  #}
 | 
			
		||||
                            <img alt="Favicon thumbnail" class="favicon" loading="lazy" decoding="async" fetchpriority="low" {% if favicon %} src="{{url_for('static_content', group='favicon', filename=watch.uuid)}}" {% else %} src='data:image/svg+xml;utf8,%3Csvg xmlns="http://www.w3.org/2000/svg" width="7.087" height="7.087" viewBox="0 0 7.087 7.087"%3E%3Ccircle cx="3.543" cy="3.543" r="3.279" stroke="%23e1e1e1" stroke-width="0.45" fill="none" opacity="0.74"/%3E%3C/svg%3E' {%  endif %} />
 | 
			
		||||
                        </div>
 | 
			
		||||
                    {%  endif %}
 | 
			
		||||
                        <div>
 | 
			
		||||
                        <span class="watch-title">
 | 
			
		||||
                            {% if system_use_url_watchlist or watch.get('use_page_title_in_list') %}
 | 
			
		||||
                                {{ watch.label }}
 | 
			
		||||
                            {% else %}
 | 
			
		||||
                                {{ watch.get('title') or watch.link }}
 | 
			
		||||
                            {% endif %}
 | 
			
		||||
                           <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"> </a>
 | 
			
		||||
                        </span>
 | 
			
		||||
                            <div class="error-text" style="display:none;">{{ watch.compile_error_texts(has_proxies=datastore.proxy_list) }}</div>
 | 
			
		||||
                            {%- if watch['processor'] == 'text_json_diff'  -%}
 | 
			
		||||
                                {%- if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  -%}
 | 
			
		||||
                                <div class="ldjson-price-track-offer">Switch to Restock & Price watch mode? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div>
 | 
			
		||||
                                {%- endif -%}
 | 
			
		||||
                            {%- endif -%}
 | 
			
		||||
                            {%- if watch['processor'] == 'restock_diff' -%}
 | 
			
		||||
                                <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon" > Price</span>
 | 
			
		||||
                            {%- endif -%}
 | 
			
		||||
                            {%- for watch_tag_uuid, watch_tag in datastore.get_all_tags_for_watch(watch['uuid']).items() -%}
 | 
			
		||||
                              <span class="watch-tag-list">{{ watch_tag.title }}</span>
 | 
			
		||||
                            {%- endfor -%}
 | 
			
		||||
                        </div>
 | 
			
		||||
                    <div class="status-icons">
 | 
			
		||||
                            <a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
 | 
			
		||||
                            {%- if watch.get_fetch_backend == "html_webdriver"
 | 
			
		||||
                                 or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver'  )
 | 
			
		||||
                                 or "extra_browser_" in watch.get_fetch_backend
 | 
			
		||||
                            -%}
 | 
			
		||||
                            <img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
 | 
			
		||||
                            {%- endif -%}
 | 
			
		||||
                            {%- if watch.is_pdf  -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
 | 
			
		||||
                            {%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
 | 
			
		||||
 | 
			
		||||
                    </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </td>
 | 
			
		||||
{%- if any_has_restock_price_processor -%}
 | 
			
		||||
                <td class="restock-and-price">
 | 
			
		||||
                    {% if watch['processor'] == 'restock_diff'  %}
 | 
			
		||||
                        {% if watch.has_restock_info %}
 | 
			
		||||
                    {%- if watch['processor'] == 'restock_diff'  -%}
 | 
			
		||||
                        {%- if watch.has_restock_info -%}
 | 
			
		||||
                            <span class="restock-label {{'in-stock' if watch['restock']['in_stock'] else 'not-in-stock' }}" title="Detecting restock and price">
 | 
			
		||||
                                <!-- maybe some object watch['processor'][restock_diff] or.. -->
 | 
			
		||||
                                 {% if watch['restock']['in_stock'] %} In stock {% else %} Not in stock {% endif %}
 | 
			
		||||
                                 {%- if watch['restock']['in_stock']-%}  In stock {%- else-%}  Not in stock {%- endif -%}
 | 
			
		||||
                            </span>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                        {%- endif -%}
 | 
			
		||||
 | 
			
		||||
                        {% if watch.get('restock') and watch['restock']['price'] != None %}
 | 
			
		||||
                            {% if watch['restock']['price'] != None %}
 | 
			
		||||
                        {%- if watch.get('restock') and watch['restock']['price'] != None -%}
 | 
			
		||||
                            {%- if watch['restock']['price'] != None -%}
 | 
			
		||||
                                <span class="restock-label price" title="Price">
 | 
			
		||||
                                {{ watch['restock']['price']|format_number_locale }} {{ watch['restock']['currency'] }}
 | 
			
		||||
                                </span>
 | 
			
		||||
                            {% endif %}
 | 
			
		||||
                        {% elif not watch.has_restock_info %}
 | 
			
		||||
                            {%- endif -%}
 | 
			
		||||
                        {%- elif not watch.has_restock_info -%}
 | 
			
		||||
                            <span class="restock-label error">No information</span>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                        {%- endif -%}
 | 
			
		||||
                    {%- endif -%}
 | 
			
		||||
                </td>
 | 
			
		||||
{% endif %}
 | 
			
		||||
{%- endif -%}
 | 
			
		||||
            {#last_checked becomes fetch-start-time#}
 | 
			
		||||
                <td class="last-checked" data-timestamp="{{ watch.last_checked }}" {% if checking_now %} data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" {% endif %} >
 | 
			
		||||
                    {% if checking_now %}
 | 
			
		||||
                        <span class="spinner"></span><span> Checking now</span>
 | 
			
		||||
                    {% else %}
 | 
			
		||||
                        {{watch|format_last_checked_time|safe}}</td>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
 | 
			
		||||
                <td class="last-changed" data-timestamp="{{ watch.last_changed }}">{% if watch.history_n >=2 and watch.last_changed >0 %}
 | 
			
		||||
                    {{watch.last_changed|format_timestamp_timeago}}
 | 
			
		||||
                    {% else %}
 | 
			
		||||
                    Not yet
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                <td class="last-checked" data-timestamp="{{ watch.last_checked }}" data-fetchduration={{ watch.fetch_time }} data-eta_complete="{{ watch.last_checked+watch.fetch_time }}" >
 | 
			
		||||
                    <div class="spinner-wrapper" style="display:none;" >
 | 
			
		||||
                        <span class="spinner"></span><span> Checking now</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <span class="innertext">{{watch|format_last_checked_time|safe}}</span>
 | 
			
		||||
                </td>
 | 
			
		||||
                <td>
 | 
			
		||||
                    <a {% if watch.uuid in queued_uuids %}disabled="true"{% endif %} href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}"
 | 
			
		||||
                       class="recheck pure-button pure-button-primary">{% if watch.uuid in queued_uuids %}Queued{% else %}Recheck{% endif %}</a>
 | 
			
		||||
                <td class="last-changed" data-timestamp="{{ watch.last_changed }}">{%- if watch.history_n >=2 and watch.last_changed >0 -%}
 | 
			
		||||
                    {{watch.last_changed|format_timestamp_timeago}}
 | 
			
		||||
                    {%- else -%}
 | 
			
		||||
                    Not yet
 | 
			
		||||
                    {%- endif -%}
 | 
			
		||||
                </td>
 | 
			
		||||
                <td class="buttons">
 | 
			
		||||
                    <div>
 | 
			
		||||
                    {%- set target_attr = ' target="' ~ watch.uuid ~ '"' if datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') else '' -%}
 | 
			
		||||
                    <a href="" class="already-in-queue-button recheck pure-button pure-button-primary" style="display: none;" disabled="disabled">Queued</a>
 | 
			
		||||
                    <a href="{{ url_for('ui.form_watch_checknow', uuid=watch.uuid, tag=request.args.get('tag')) }}" data-op='recheck' class="ajax-op recheck pure-button pure-button-primary">Recheck</a>
 | 
			
		||||
                    <a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
 | 
			
		||||
                    {% if watch.history_n >= 2 %}
 | 
			
		||||
 | 
			
		||||
                        {% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
 | 
			
		||||
                        {% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
 | 
			
		||||
 | 
			
		||||
                        {%  if is_unviewed %}
 | 
			
		||||
                           <a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
 | 
			
		||||
                        {% else %}
 | 
			
		||||
                           <a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
 | 
			
		||||
                    {% else %}
 | 
			
		||||
                        {% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
 | 
			
		||||
                            <a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
 | 
			
		||||
                        {% endif %}
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                    <a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary history-link" style="display: none;">History</a>
 | 
			
		||||
                    <a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary preview-link" style="display: none;">Preview</a>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </td>
 | 
			
		||||
            </tr>
 | 
			
		||||
            {% endfor %}
 | 
			
		||||
            {%- endfor -%}
 | 
			
		||||
            </tbody>
 | 
			
		||||
        </table>
 | 
			
		||||
        <ul id="post-list-buttons">
 | 
			
		||||
            {% if errored_count %}
 | 
			
		||||
            <li>
 | 
			
		||||
                <a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error ">With errors ({{ errored_count }})</a>
 | 
			
		||||
            <li id="post-list-with-errors" style="display: none;" >
 | 
			
		||||
                <a href="{{url_for('watchlist.index', with_errors=1, tag=request.args.get('tag')) }}" class="pure-button button-tag button-error">With errors ({{ errored_count }})</a>
 | 
			
		||||
            </li>
 | 
			
		||||
            {% endif %}
 | 
			
		||||
            {% if has_unviewed %}
 | 
			
		||||
            <li>
 | 
			
		||||
                <a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Mark all viewed</a>
 | 
			
		||||
            <li id="post-list-mark-views" style="display: none;" >
 | 
			
		||||
                <a href="{{url_for('ui.mark_all_viewed',with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed</a>
 | 
			
		||||
            </li>
 | 
			
		||||
        {%-  if active_tag_uuid -%}
 | 
			
		||||
            <li id="post-list-mark-views-tag">
 | 
			
		||||
                <a href="{{url_for('ui.mark_all_viewed', tag=active_tag_uuid) }}" class="pure-button button-tag " id="mark-all-viewed">Mark all viewed in '{{active_tag.title}}'</a>
 | 
			
		||||
            </li>
 | 
			
		||||
        {%-  endif -%}
 | 
			
		||||
            <li id="post-list-unread" style="display: none;" >
 | 
			
		||||
                <a href="{{url_for('watchlist.index', unread=1, tag=request.args.get('tag')) }}" class="pure-button button-tag">Unread (<span id="unread-tab-counter">{{ unread_changes_count }}</span>)</a>
 | 
			
		||||
            </li>
 | 
			
		||||
            {% endif %}
 | 
			
		||||
            <li>
 | 
			
		||||
               <a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck
 | 
			
		||||
                all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a>
 | 
			
		||||
               <a href="{{ url_for('ui.form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag" id="recheck-all">Recheck
 | 
			
		||||
                all {% if active_tag_uuid %}  in '{{active_tag.title}}'{%endif%}</a>
 | 
			
		||||
            </li>
 | 
			
		||||
            <li>
 | 
			
		||||
                <a href="{{ url_for('rss.feed', tag=active_tag_uuid, token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='generic_feed-icon.svg')}}" height="15"></a>
 | 
			
		||||
@@ -251,4 +270,4 @@
 | 
			
		||||
    </div>
 | 
			
		||||
    </form>
 | 
			
		||||
</div>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
{%- endblock -%}
 | 
			
		||||
@@ -1,11 +1,9 @@
 | 
			
		||||
from flask import Blueprint
 | 
			
		||||
 | 
			
		||||
from json_logic.builtins import BUILTINS
 | 
			
		||||
 | 
			
		||||
from .exceptions import EmptyConditionRuleRowNotUsable
 | 
			
		||||
from .pluggy_interface import plugin_manager  # Import the pluggy plugin manager
 | 
			
		||||
from . import default_plugin
 | 
			
		||||
 | 
			
		||||
from loguru import logger
 | 
			
		||||
# List of all supported JSON Logic operators
 | 
			
		||||
operator_choices = [
 | 
			
		||||
    (None, "Choose one - Operator"),
 | 
			
		||||
@@ -16,7 +14,6 @@ operator_choices = [
 | 
			
		||||
    ("==", "Equals"),
 | 
			
		||||
    ("!=", "Not Equals"),
 | 
			
		||||
    ("in", "Contains"),
 | 
			
		||||
    ("!in", "Does Not Contain"),
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
# Fields available in the rules
 | 
			
		||||
@@ -102,12 +99,33 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat
 | 
			
		||||
        if complete_rules:
 | 
			
		||||
            # Give all plugins a chance to update the data dict again (that we will test the conditions against)
 | 
			
		||||
            for plugin in plugin_manager.get_plugins():
 | 
			
		||||
                new_execute_data = plugin.add_data(current_watch_uuid=current_watch_uuid,
 | 
			
		||||
                                                   application_datastruct=application_datastruct,
 | 
			
		||||
                                                   ephemeral_data=ephemeral_data)
 | 
			
		||||
                try:
 | 
			
		||||
                    import concurrent.futures
 | 
			
		||||
                    import time
 | 
			
		||||
                    
 | 
			
		||||
                    with concurrent.futures.ThreadPoolExecutor() as executor:
 | 
			
		||||
                        future = executor.submit(
 | 
			
		||||
                            plugin.add_data,
 | 
			
		||||
                            current_watch_uuid=current_watch_uuid,
 | 
			
		||||
                            application_datastruct=application_datastruct,
 | 
			
		||||
                            ephemeral_data=ephemeral_data
 | 
			
		||||
                        )
 | 
			
		||||
                        logger.debug(f"Trying plugin {plugin}....")
 | 
			
		||||
 | 
			
		||||
                if new_execute_data and isinstance(new_execute_data, dict):
 | 
			
		||||
                    EXECUTE_DATA.update(new_execute_data)
 | 
			
		||||
                        # Set a timeout of 10 seconds
 | 
			
		||||
                        try:
 | 
			
		||||
                            new_execute_data = future.result(timeout=10)
 | 
			
		||||
                            if new_execute_data and isinstance(new_execute_data, dict):
 | 
			
		||||
                                EXECUTE_DATA.update(new_execute_data)
 | 
			
		||||
 | 
			
		||||
                        except concurrent.futures.TimeoutError:
 | 
			
		||||
                            # The plugin took too long, abort processing for this watch
 | 
			
		||||
                            raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.")
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    # Log the error but continue with the next plugin
 | 
			
		||||
                    import logging
 | 
			
		||||
                    logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}")
 | 
			
		||||
                    continue
 | 
			
		||||
 | 
			
		||||
            # Create the ruleset
 | 
			
		||||
            ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules)
 | 
			
		||||
@@ -132,3 +150,18 @@ for plugin in plugin_manager.get_plugins():
 | 
			
		||||
    if isinstance(new_field_choices, list):
 | 
			
		||||
        field_choices.extend(new_field_choices)
 | 
			
		||||
 | 
			
		||||
def collect_ui_edit_stats_extras(watch):
 | 
			
		||||
    """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
 | 
			
		||||
    extras_content = []
 | 
			
		||||
    
 | 
			
		||||
    for plugin in plugin_manager.get_plugins():
 | 
			
		||||
        try:
 | 
			
		||||
            content = plugin.ui_edit_stats_extras(watch=watch)
 | 
			
		||||
            if content:
 | 
			
		||||
                extras_content.append(content)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            # Skip plugins that don't implement the hook or have errors
 | 
			
		||||
            pass
 | 
			
		||||
            
 | 
			
		||||
    return "\n".join(extras_content) if extras_content else ""
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -21,17 +21,21 @@ def register_operators():
 | 
			
		||||
    def length_max(_, text, strlen):
 | 
			
		||||
        return len(text) <= int(strlen)
 | 
			
		||||
 | 
			
		||||
    # ✅ Custom function for case-insensitive regex matching
 | 
			
		||||
    # Custom function for case-insensitive regex matching
 | 
			
		||||
    def contains_regex(_, text, pattern):
 | 
			
		||||
        """Returns True if `text` contains `pattern` (case-insensitive regex match)."""
 | 
			
		||||
        return bool(re.search(pattern, str(text), re.IGNORECASE))
 | 
			
		||||
 | 
			
		||||
    # ✅ Custom function for NOT matching case-insensitive regex
 | 
			
		||||
    # Custom function for NOT matching case-insensitive regex
 | 
			
		||||
    def not_contains_regex(_, text, pattern):
 | 
			
		||||
        """Returns True if `text` does NOT contain `pattern` (case-insensitive regex match)."""
 | 
			
		||||
        return not bool(re.search(pattern, str(text), re.IGNORECASE))
 | 
			
		||||
 | 
			
		||||
    def not_contains(_, text, pattern):
 | 
			
		||||
        return not pattern in text
 | 
			
		||||
 | 
			
		||||
    return {
 | 
			
		||||
        "!in": not_contains,
 | 
			
		||||
        "!contains_regex": not_contains_regex,
 | 
			
		||||
        "contains_regex": contains_regex,
 | 
			
		||||
        "ends_with": ends_with,
 | 
			
		||||
@@ -43,6 +47,7 @@ def register_operators():
 | 
			
		||||
@hookimpl
 | 
			
		||||
def register_operator_choices():
 | 
			
		||||
    return [
 | 
			
		||||
        ("!in", "Does NOT Contain"),
 | 
			
		||||
        ("starts_with", "Text Starts With"),
 | 
			
		||||
        ("ends_with", "Text Ends With"),
 | 
			
		||||
        ("length_min", "Length minimum"),
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,8 @@
 | 
			
		||||
import pluggy
 | 
			
		||||
from . import default_plugin  # Import the default plugin
 | 
			
		||||
import os
 | 
			
		||||
import importlib
 | 
			
		||||
import sys
 | 
			
		||||
from . import default_plugin
 | 
			
		||||
 | 
			
		||||
# ✅ Ensure that the namespace in HookspecMarker matches PluginManager
 | 
			
		||||
PLUGIN_NAMESPACE = "changedetectionio_conditions"
 | 
			
		||||
@@ -30,6 +33,11 @@ class ConditionsSpec:
 | 
			
		||||
    def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
 | 
			
		||||
        """Add to the datadict"""
 | 
			
		||||
        pass
 | 
			
		||||
        
 | 
			
		||||
    @hookspec
 | 
			
		||||
    def ui_edit_stats_extras(watch):
 | 
			
		||||
        """Return HTML content to add to the stats tab in the edit view"""
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
# ✅ Set up Pluggy Plugin Manager
 | 
			
		||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
 | 
			
		||||
@@ -40,5 +48,27 @@ plugin_manager.add_hookspecs(ConditionsSpec)
 | 
			
		||||
# ✅ Register built-in plugins manually
 | 
			
		||||
plugin_manager.register(default_plugin, "default_plugin")
 | 
			
		||||
 | 
			
		||||
# ✅ Load plugins from the plugins directory
 | 
			
		||||
def load_plugins_from_directory():
 | 
			
		||||
    plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins')
 | 
			
		||||
    if not os.path.exists(plugins_dir):
 | 
			
		||||
        return
 | 
			
		||||
        
 | 
			
		||||
    # Get all Python files (excluding __init__.py)
 | 
			
		||||
    for filename in os.listdir(plugins_dir):
 | 
			
		||||
        if filename.endswith(".py") and filename != "__init__.py":
 | 
			
		||||
            module_name = filename[:-3]  # Remove .py extension
 | 
			
		||||
            module_path = f"changedetectionio.conditions.plugins.{module_name}"
 | 
			
		||||
            
 | 
			
		||||
            try:
 | 
			
		||||
                module = importlib.import_module(module_path)
 | 
			
		||||
                # Register the plugin with pluggy
 | 
			
		||||
                plugin_manager.register(module, module_name)
 | 
			
		||||
            except (ImportError, AttributeError) as e:
 | 
			
		||||
                print(f"Error loading plugin {module_name}: {e}")
 | 
			
		||||
 | 
			
		||||
# Load plugins from the plugins directory
 | 
			
		||||
load_plugins_from_directory()
 | 
			
		||||
 | 
			
		||||
# ✅ Discover installed plugins from external packages (if any)
 | 
			
		||||
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1 @@
 | 
			
		||||
# Import plugins package to make them discoverable
 | 
			
		||||
							
								
								
									
										119
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,119 @@
 | 
			
		||||
import pluggy
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS=100000
 | 
			
		||||
 | 
			
		||||
# Support both plugin systems
 | 
			
		||||
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
 | 
			
		||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
 | 
			
		||||
 | 
			
		||||
def levenshtein_ratio_recent_history(watch, incoming_text=None):
 | 
			
		||||
    try:
 | 
			
		||||
        from Levenshtein import ratio, distance
 | 
			
		||||
        k = list(watch.history.keys())
 | 
			
		||||
        a = None
 | 
			
		||||
        b = None
 | 
			
		||||
 | 
			
		||||
        # When called from ui_edit_stats_extras, we don't have incoming_text
 | 
			
		||||
        if incoming_text is None:
 | 
			
		||||
            a = watch.get_history_snapshot(timestamp=k[-1])  # Latest snapshot
 | 
			
		||||
            b = watch.get_history_snapshot(timestamp=k[-2])  # Previous snapshot
 | 
			
		||||
 | 
			
		||||
        # Needs atleast one snapshot
 | 
			
		||||
        elif len(k) >= 1: # Should be atleast one snapshot to compare against
 | 
			
		||||
            a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot
 | 
			
		||||
            b = incoming_text if incoming_text else k[-2]
 | 
			
		||||
 | 
			
		||||
        if a and b:
 | 
			
		||||
            distance_value = distance(a, b)
 | 
			
		||||
            ratio_value = ratio(a, b)
 | 
			
		||||
            return {
 | 
			
		||||
                'distance': distance_value,
 | 
			
		||||
                'ratio': ratio_value,
 | 
			
		||||
                'percent_similar': round(ratio_value * 100, 2)
 | 
			
		||||
            }
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.warning(f"Unable to calc similarity: {str(e)}")
 | 
			
		||||
 | 
			
		||||
    return ''
 | 
			
		||||
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def register_operators():
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def register_operator_choices():
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def register_field_choices():
 | 
			
		||||
    return [
 | 
			
		||||
        ("levenshtein_ratio", "Levenshtein - Text similarity ratio"),
 | 
			
		||||
        ("levenshtein_distance", "Levenshtein - Text change distance"),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
 | 
			
		||||
    res = {}
 | 
			
		||||
    watch = application_datastruct['watching'].get(current_watch_uuid)
 | 
			
		||||
    # ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc
 | 
			
		||||
 | 
			
		||||
    if watch and 'text' in ephemeral_data:
 | 
			
		||||
        lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text',''))
 | 
			
		||||
        if isinstance(lev_data, dict):
 | 
			
		||||
            res['levenshtein_ratio'] = lev_data.get('ratio', 0)
 | 
			
		||||
            res['levenshtein_similarity'] = lev_data.get('percent_similar', 0)
 | 
			
		||||
            res['levenshtein_distance'] = lev_data.get('distance', 0)
 | 
			
		||||
 | 
			
		||||
    return res
 | 
			
		||||
 | 
			
		||||
@global_hookimpl
 | 
			
		||||
def ui_edit_stats_extras(watch):
 | 
			
		||||
    """Add Levenshtein stats to the UI using the global plugin system"""
 | 
			
		||||
    """Generate the HTML for Levenshtein stats - shared by both plugin systems"""
 | 
			
		||||
    if len(watch.history.keys()) < 2:
 | 
			
		||||
        return "<p>Not enough history to calculate Levenshtein metrics</p>"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Protection against the algorithm getting stuck on huge documents
 | 
			
		||||
    k = list(watch.history.keys())
 | 
			
		||||
    if any(
 | 
			
		||||
            len(watch.get_history_snapshot(timestamp=k[idx])) > LEVENSHTEIN_MAX_LEN_FOR_EDIT_STATS
 | 
			
		||||
            for idx in (-1, -2)
 | 
			
		||||
            if len(k) >= abs(idx)
 | 
			
		||||
    ):
 | 
			
		||||
        return "<p>Snapshot too large for edit statistics, skipping.</p>"
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        lev_data = levenshtein_ratio_recent_history(watch)
 | 
			
		||||
        if not lev_data or not isinstance(lev_data, dict):
 | 
			
		||||
            return "<p>Unable to calculate Levenshtein metrics</p>"
 | 
			
		||||
            
 | 
			
		||||
        html = f"""
 | 
			
		||||
        <div class="levenshtein-stats">
 | 
			
		||||
            <h4>Levenshtein Text Similarity Details</h4>
 | 
			
		||||
            <table class="pure-table">
 | 
			
		||||
                <tbody>
 | 
			
		||||
                    <tr>
 | 
			
		||||
                        <td>Raw distance (edits needed)</td>
 | 
			
		||||
                        <td>{lev_data['distance']}</td>
 | 
			
		||||
                    </tr>
 | 
			
		||||
                    <tr>
 | 
			
		||||
                        <td>Similarity ratio</td>
 | 
			
		||||
                        <td>{lev_data['ratio']:.4f}</td>
 | 
			
		||||
                    </tr>
 | 
			
		||||
                    <tr>
 | 
			
		||||
                        <td>Percent similar</td>
 | 
			
		||||
                        <td>{lev_data['percent_similar']}%</td>
 | 
			
		||||
                    </tr>
 | 
			
		||||
                </tbody>
 | 
			
		||||
            </table>
 | 
			
		||||
            <p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p>
 | 
			
		||||
        </div>
 | 
			
		||||
        """
 | 
			
		||||
        return html
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error generating Levenshtein UI extras: {str(e)}")
 | 
			
		||||
        return "<p>Error calculating Levenshtein metrics</p>"
 | 
			
		||||
        
 | 
			
		||||
							
								
								
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,82 @@
 | 
			
		||||
import pluggy
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
# Support both plugin systems
 | 
			
		||||
conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions")
 | 
			
		||||
global_hookimpl = pluggy.HookimplMarker("changedetectionio")
 | 
			
		||||
 | 
			
		||||
def count_words_in_history(watch, incoming_text=None):
 | 
			
		||||
    """Count words in snapshot text"""
 | 
			
		||||
    try:
 | 
			
		||||
        if incoming_text is not None:
 | 
			
		||||
            # When called from add_data with incoming text
 | 
			
		||||
            return len(incoming_text.split())
 | 
			
		||||
        elif watch.history.keys():
 | 
			
		||||
            # When called from UI extras to count latest snapshot
 | 
			
		||||
            latest_key = list(watch.history.keys())[-1]
 | 
			
		||||
            latest_content = watch.get_history_snapshot(latest_key)
 | 
			
		||||
            return len(latest_content.split())
 | 
			
		||||
        return 0
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Error counting words: {str(e)}")
 | 
			
		||||
        return 0
 | 
			
		||||
 | 
			
		||||
# Implement condition plugin hooks
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def register_operators():
 | 
			
		||||
    # No custom operators needed
 | 
			
		||||
    return {}
 | 
			
		||||
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def register_operator_choices():
 | 
			
		||||
    # No custom operator choices needed
 | 
			
		||||
    return []
 | 
			
		||||
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def register_field_choices():
 | 
			
		||||
    # Add a field that will be available in conditions
 | 
			
		||||
    return [
 | 
			
		||||
        ("word_count", "Word count of content"),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def add_data(current_watch_uuid, application_datastruct, ephemeral_data):
 | 
			
		||||
    """Add word count data for conditions"""
 | 
			
		||||
    result = {}
 | 
			
		||||
    watch = application_datastruct['watching'].get(current_watch_uuid)
 | 
			
		||||
    
 | 
			
		||||
    if watch and 'text' in ephemeral_data:
 | 
			
		||||
        word_count = count_words_in_history(watch, ephemeral_data['text'])
 | 
			
		||||
        result['word_count'] = word_count
 | 
			
		||||
    
 | 
			
		||||
    return result
 | 
			
		||||
 | 
			
		||||
def _generate_stats_html(watch):
 | 
			
		||||
    """Generate the HTML content for the stats tab"""
 | 
			
		||||
    word_count = count_words_in_history(watch)
 | 
			
		||||
    
 | 
			
		||||
    html = f"""
 | 
			
		||||
    <div class="word-count-stats">
 | 
			
		||||
        <h4>Content Analysis</h4>
 | 
			
		||||
        <table class="pure-table">
 | 
			
		||||
            <tbody>
 | 
			
		||||
                <tr>
 | 
			
		||||
                    <td>Word count (latest snapshot)</td>
 | 
			
		||||
                    <td>{word_count}</td>
 | 
			
		||||
                </tr>
 | 
			
		||||
            </tbody>
 | 
			
		||||
        </table>
 | 
			
		||||
        <p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p>
 | 
			
		||||
    </div>
 | 
			
		||||
    """
 | 
			
		||||
    return html
 | 
			
		||||
 | 
			
		||||
@conditions_hookimpl
 | 
			
		||||
def ui_edit_stats_extras(watch):
 | 
			
		||||
    """Add word count stats to the UI through conditions plugin system"""
 | 
			
		||||
    return _generate_stats_html(watch)
 | 
			
		||||
 | 
			
		||||
@global_hookimpl
 | 
			
		||||
def ui_edit_stats_extras(watch):
 | 
			
		||||
    """Add word count stats to the UI using the global plugin system"""
 | 
			
		||||
    return _generate_stats_html(watch)
 | 
			
		||||
@@ -28,6 +28,7 @@ from changedetectionio.content_fetchers.requests import fetcher as html_requests
 | 
			
		||||
import importlib.resources
 | 
			
		||||
XPATH_ELEMENT_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
 | 
			
		||||
INSTOCK_DATA_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
 | 
			
		||||
FAVICON_FETCHER_JS = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('favicon-fetcher.js').read_text(encoding='utf-8')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def available_fetchers():
 | 
			
		||||
 
 | 
			
		||||
@@ -48,6 +48,7 @@ class Fetcher():
 | 
			
		||||
    error = None
 | 
			
		||||
    fetcher_description = "No description"
 | 
			
		||||
    headers = {}
 | 
			
		||||
    favicon_blob = None
 | 
			
		||||
    instock_data = None
 | 
			
		||||
    instock_data_js = ""
 | 
			
		||||
    status_code = None
 | 
			
		||||
@@ -63,21 +64,35 @@ class Fetcher():
 | 
			
		||||
    # Time ONTOP of the system defined env minimum time
 | 
			
		||||
    render_extract_delay = 0
 | 
			
		||||
 | 
			
		||||
    def clear_content(self):
 | 
			
		||||
        """
 | 
			
		||||
        Explicitly clear all content from memory to free up heap space.
 | 
			
		||||
        Call this after content has been saved to disk.
 | 
			
		||||
        """
 | 
			
		||||
        self.content = None
 | 
			
		||||
        if hasattr(self, 'raw_content'):
 | 
			
		||||
            self.raw_content = None
 | 
			
		||||
        self.screenshot = None
 | 
			
		||||
        self.xpath_data = None
 | 
			
		||||
        # Keep headers and status_code as they're small
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_error(self):
 | 
			
		||||
        return self.error
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False,
 | 
			
		||||
            empty_pages_are_a_change=False):
 | 
			
		||||
    async def run(self,
 | 
			
		||||
                  fetch_favicon=True,
 | 
			
		||||
                  current_include_filters=None,
 | 
			
		||||
                  empty_pages_are_a_change=False,
 | 
			
		||||
                  ignore_status_codes=False,
 | 
			
		||||
                  is_binary=False,
 | 
			
		||||
                  request_body=None,
 | 
			
		||||
                  request_headers=None,
 | 
			
		||||
                  request_method=None,
 | 
			
		||||
                  timeout=None,
 | 
			
		||||
                  url=None,
 | 
			
		||||
                  ):
 | 
			
		||||
        # Should set self.error, self.status_code and self.content
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
@@ -122,10 +137,10 @@ class Fetcher():
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def iterate_browser_steps(self, start_url=None):
 | 
			
		||||
    async def iterate_browser_steps(self, start_url=None):
 | 
			
		||||
        from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
 | 
			
		||||
        from playwright._impl._errors import TimeoutError, Error
 | 
			
		||||
        from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
        from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
        step_n = 0
 | 
			
		||||
 | 
			
		||||
        if self.browser_steps is not None and len(self.browser_steps):
 | 
			
		||||
@@ -136,8 +151,8 @@ class Fetcher():
 | 
			
		||||
            for step in valid_steps:
 | 
			
		||||
                step_n += 1
 | 
			
		||||
                logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...")
 | 
			
		||||
                self.screenshot_step("before-" + str(step_n))
 | 
			
		||||
                self.save_step_html("before-" + str(step_n))
 | 
			
		||||
                await self.screenshot_step("before-" + str(step_n))
 | 
			
		||||
                await self.save_step_html("before-" + str(step_n))
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    optional_value = step['optional_value']
 | 
			
		||||
@@ -148,11 +163,11 @@ class Fetcher():
 | 
			
		||||
                    if '{%' in step['selector'] or '{{' in step['selector']:
 | 
			
		||||
                        selector = jinja_render(template_str=step['selector'])
 | 
			
		||||
 | 
			
		||||
                    getattr(interface, "call_action")(action_name=step['operation'],
 | 
			
		||||
                    await getattr(interface, "call_action")(action_name=step['operation'],
 | 
			
		||||
                                                      selector=selector,
 | 
			
		||||
                                                      optional_value=optional_value)
 | 
			
		||||
                    self.screenshot_step(step_n)
 | 
			
		||||
                    self.save_step_html(step_n)
 | 
			
		||||
                    await self.screenshot_step(step_n)
 | 
			
		||||
                    await self.save_step_html(step_n)
 | 
			
		||||
                except (Error, TimeoutError) as e:
 | 
			
		||||
                    logger.debug(str(e))
 | 
			
		||||
                    # Stop processing here
 | 
			
		||||
 
 | 
			
		||||
@@ -5,19 +5,19 @@ from urllib.parse import urlparse
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
 | 
			
		||||
    SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS
 | 
			
		||||
    SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_MAX_TOTAL_HEIGHT, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, FAVICON_FETCHER_JS
 | 
			
		||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
 | 
			
		||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
 | 
			
		||||
 | 
			
		||||
def capture_full_page(page):
 | 
			
		||||
async def capture_full_page_async(page):
 | 
			
		||||
    import os
 | 
			
		||||
    import time
 | 
			
		||||
    from multiprocessing import Process, Pipe
 | 
			
		||||
 | 
			
		||||
    start = time.time()
 | 
			
		||||
 | 
			
		||||
    page_height = page.evaluate("document.documentElement.scrollHeight")
 | 
			
		||||
    page_width = page.evaluate("document.documentElement.scrollWidth")
 | 
			
		||||
    page_height = await page.evaluate("document.documentElement.scrollHeight")
 | 
			
		||||
    page_width = await page.evaluate("document.documentElement.scrollWidth")
 | 
			
		||||
    original_viewport = page.viewport_size
 | 
			
		||||
 | 
			
		||||
    logger.debug(f"Playwright viewport size {page.viewport_size} page height {page_height} page width {page_width}")
 | 
			
		||||
@@ -32,23 +32,23 @@ def capture_full_page(page):
 | 
			
		||||
            step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size
 | 
			
		||||
        logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size")
 | 
			
		||||
        # Set viewport to a larger size to capture more content at once
 | 
			
		||||
        page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
 | 
			
		||||
        await page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size})
 | 
			
		||||
 | 
			
		||||
    # Capture screenshots in chunks up to the max total height
 | 
			
		||||
    while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
 | 
			
		||||
        page.request_gc()
 | 
			
		||||
        page.evaluate(f"window.scrollTo(0, {y})")
 | 
			
		||||
        page.request_gc()
 | 
			
		||||
        screenshot_chunks.append(page.screenshot(
 | 
			
		||||
        await page.request_gc()
 | 
			
		||||
        await page.evaluate(f"window.scrollTo(0, {y})")
 | 
			
		||||
        await page.request_gc()
 | 
			
		||||
        screenshot_chunks.append(await page.screenshot(
 | 
			
		||||
            type="jpeg",
 | 
			
		||||
            full_page=False,
 | 
			
		||||
            quality=int(os.getenv("SCREENSHOT_QUALITY", 72))
 | 
			
		||||
        ))
 | 
			
		||||
        y += step_size
 | 
			
		||||
        page.request_gc()
 | 
			
		||||
        await page.request_gc()
 | 
			
		||||
 | 
			
		||||
    # Restore original viewport size
 | 
			
		||||
    page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
 | 
			
		||||
    await page.set_viewport_size({'width': original_viewport['width'], 'height': original_viewport['height']})
 | 
			
		||||
 | 
			
		||||
    # If we have multiple chunks, stitch them together
 | 
			
		||||
    if len(screenshot_chunks) > 1:
 | 
			
		||||
@@ -73,7 +73,6 @@ def capture_full_page(page):
 | 
			
		||||
 | 
			
		||||
    return screenshot_chunks[0]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class fetcher(Fetcher):
 | 
			
		||||
    fetcher_description = "Playwright {}/Javascript".format(
 | 
			
		||||
        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
 | 
			
		||||
@@ -124,9 +123,9 @@ class fetcher(Fetcher):
 | 
			
		||||
                self.proxy['username'] = parsed.username
 | 
			
		||||
                self.proxy['password'] = parsed.password
 | 
			
		||||
 | 
			
		||||
    def screenshot_step(self, step_n=''):
 | 
			
		||||
    async def screenshot_step(self, step_n=''):
 | 
			
		||||
        super().screenshot_step(step_n=step_n)
 | 
			
		||||
        screenshot = capture_full_page(page=self.page)
 | 
			
		||||
        screenshot = await capture_full_page_async(page=self.page)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        if self.browser_steps_screenshot_path is not None:
 | 
			
		||||
@@ -135,45 +134,47 @@ class fetcher(Fetcher):
 | 
			
		||||
            with open(destination, 'wb') as f:
 | 
			
		||||
                f.write(screenshot)
 | 
			
		||||
 | 
			
		||||
    def save_step_html(self, step_n):
 | 
			
		||||
    async def save_step_html(self, step_n):
 | 
			
		||||
        super().save_step_html(step_n=step_n)
 | 
			
		||||
        content = self.page.content()
 | 
			
		||||
        content = await self.page.content()
 | 
			
		||||
        destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n))
 | 
			
		||||
        logger.debug(f"Saving step HTML to {destination}")
 | 
			
		||||
        with open(destination, 'w') as f:
 | 
			
		||||
            f.write(content)
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False,
 | 
			
		||||
            empty_pages_are_a_change=False):
 | 
			
		||||
    async def run(self,
 | 
			
		||||
                  fetch_favicon=True,
 | 
			
		||||
                  current_include_filters=None,
 | 
			
		||||
                  empty_pages_are_a_change=False,
 | 
			
		||||
                  ignore_status_codes=False,
 | 
			
		||||
                  is_binary=False,
 | 
			
		||||
                  request_body=None,
 | 
			
		||||
                  request_headers=None,
 | 
			
		||||
                  request_method=None,
 | 
			
		||||
                  timeout=None,
 | 
			
		||||
                  url=None,
 | 
			
		||||
                  ):
 | 
			
		||||
 | 
			
		||||
        from playwright.sync_api import sync_playwright
 | 
			
		||||
        from playwright.async_api import async_playwright
 | 
			
		||||
        import playwright._impl._errors
 | 
			
		||||
        import time
 | 
			
		||||
        self.delete_browser_steps_screenshots()
 | 
			
		||||
        response = None
 | 
			
		||||
 | 
			
		||||
        with sync_playwright() as p:
 | 
			
		||||
        async with async_playwright() as p:
 | 
			
		||||
            browser_type = getattr(p, self.browser_type)
 | 
			
		||||
 | 
			
		||||
            # Seemed to cause a connection Exception even tho I can see it connect
 | 
			
		||||
            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
 | 
			
		||||
            # 60,000 connection timeout only
 | 
			
		||||
            browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
 | 
			
		||||
            browser = await browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000)
 | 
			
		||||
 | 
			
		||||
            # SOCKS5 with authentication is not supported (yet)
 | 
			
		||||
            # https://github.com/microsoft/playwright/issues/10567
 | 
			
		||||
 | 
			
		||||
            # Set user agent to prevent Cloudflare from blocking the browser
 | 
			
		||||
            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
 | 
			
		||||
            context = browser.new_context(
 | 
			
		||||
            context = await browser.new_context(
 | 
			
		||||
                accept_downloads=False,  # Should never be needed
 | 
			
		||||
                bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others
 | 
			
		||||
                extra_http_headers=request_headers,
 | 
			
		||||
@@ -183,41 +184,47 @@ class fetcher(Fetcher):
 | 
			
		||||
                user_agent=manage_user_agent(headers=request_headers),
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            self.page = context.new_page()
 | 
			
		||||
            self.page = await context.new_page()
 | 
			
		||||
 | 
			
		||||
            # Listen for all console events and handle errors
 | 
			
		||||
            self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
 | 
			
		||||
            self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
 | 
			
		||||
 | 
			
		||||
            # Re-use as much code from browser steps as possible so its the same
 | 
			
		||||
            from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface
 | 
			
		||||
            browsersteps_interface = steppable_browser_interface(start_url=url)
 | 
			
		||||
            browsersteps_interface.page = self.page
 | 
			
		||||
 | 
			
		||||
            response = browsersteps_interface.action_goto_url(value=url)
 | 
			
		||||
            self.headers = response.all_headers()
 | 
			
		||||
            response = await browsersteps_interface.action_goto_url(value=url)
 | 
			
		||||
 | 
			
		||||
            if response is None:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                await context.close()
 | 
			
		||||
                await browser.close()
 | 
			
		||||
                logger.debug("Content Fetcher > Response object from the browser communication was none")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            # In async_playwright, all_headers() returns a coroutine
 | 
			
		||||
            try:
 | 
			
		||||
                self.headers = await response.all_headers()
 | 
			
		||||
            except TypeError:
 | 
			
		||||
                # Fallback for sync version
 | 
			
		||||
                self.headers = response.all_headers()
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code):
 | 
			
		||||
                    browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
 | 
			
		||||
                    await browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None)
 | 
			
		||||
            except playwright._impl._errors.TimeoutError as e:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                await context.close()
 | 
			
		||||
                await browser.close()
 | 
			
		||||
                # This can be ok, we will try to grab what we could retrieve
 | 
			
		||||
                pass
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}")
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                await context.close()
 | 
			
		||||
                await browser.close()
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
            self.page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
            await self.page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                self.status_code = response.status
 | 
			
		||||
@@ -225,50 +232,58 @@ class fetcher(Fetcher):
 | 
			
		||||
                # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962
 | 
			
		||||
                logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.")
 | 
			
		||||
                logger.critical(response)
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                await context.close()
 | 
			
		||||
                await browser.close()
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
            if fetch_favicon:
 | 
			
		||||
                try:
 | 
			
		||||
                    self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
 | 
			
		||||
                    await self.page.request_gc()
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
 | 
			
		||||
 | 
			
		||||
            if self.status_code != 200 and not ignore_status_codes:
 | 
			
		||||
                screenshot = capture_full_page(self.page)
 | 
			
		||||
                screenshot = await capture_full_page_async(self.page)
 | 
			
		||||
                raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
 | 
			
		||||
 | 
			
		||||
            if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
 | 
			
		||||
            if not empty_pages_are_a_change and len((await self.page.content()).strip()) == 0:
 | 
			
		||||
                logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                await context.close()
 | 
			
		||||
                await browser.close()
 | 
			
		||||
                raise EmptyReply(url=url, status_code=response.status)
 | 
			
		||||
 | 
			
		||||
            # Run Browser Steps here
 | 
			
		||||
            if self.browser_steps_get_valid_steps():
 | 
			
		||||
                self.iterate_browser_steps(start_url=url)
 | 
			
		||||
                await self.iterate_browser_steps(start_url=url)
 | 
			
		||||
 | 
			
		||||
            self.page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
            await self.page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
 | 
			
		||||
            now = time.time()
 | 
			
		||||
            # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
 | 
			
		||||
            if current_include_filters is not None:
 | 
			
		||||
                self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
 | 
			
		||||
                await self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters)))
 | 
			
		||||
            else:
 | 
			
		||||
                self.page.evaluate("var include_filters=''")
 | 
			
		||||
            self.page.request_gc()
 | 
			
		||||
                await self.page.evaluate("var include_filters=''")
 | 
			
		||||
            await self.page.request_gc()
 | 
			
		||||
 | 
			
		||||
            # request_gc before and after evaluate to free up memory
 | 
			
		||||
            # @todo browsersteps etc
 | 
			
		||||
            MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
 | 
			
		||||
            self.xpath_data = self.page.evaluate(XPATH_ELEMENT_JS, {
 | 
			
		||||
            self.xpath_data = await self.page.evaluate(XPATH_ELEMENT_JS, {
 | 
			
		||||
                "visualselector_xpath_selectors": visualselector_xpath_selectors,
 | 
			
		||||
                "max_height": MAX_TOTAL_HEIGHT
 | 
			
		||||
            })
 | 
			
		||||
            self.page.request_gc()
 | 
			
		||||
            await self.page.request_gc()
 | 
			
		||||
 | 
			
		||||
            self.instock_data = self.page.evaluate(INSTOCK_DATA_JS)
 | 
			
		||||
            self.page.request_gc()
 | 
			
		||||
            self.instock_data = await self.page.evaluate(INSTOCK_DATA_JS)
 | 
			
		||||
            await self.page.request_gc()
 | 
			
		||||
 | 
			
		||||
            self.content = self.page.content()
 | 
			
		||||
            self.page.request_gc()
 | 
			
		||||
            self.content = await self.page.content()
 | 
			
		||||
            await self.page.request_gc()
 | 
			
		||||
            logger.debug(f"Scrape xPath element data in browser done in {time.time() - now:.2f}s")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            # Bug 3 in Playwright screenshot handling
 | 
			
		||||
            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
 | 
			
		||||
            # JPEG is better here because the screenshots can be very very large
 | 
			
		||||
@@ -278,7 +293,7 @@ class fetcher(Fetcher):
 | 
			
		||||
            # acceptable screenshot quality here
 | 
			
		||||
            try:
 | 
			
		||||
                # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
 | 
			
		||||
                self.screenshot = capture_full_page(page=self.page)
 | 
			
		||||
                self.screenshot = await capture_full_page_async(page=self.page)
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                # It's likely the screenshot was too long/big and something crashed
 | 
			
		||||
@@ -286,30 +301,30 @@ class fetcher(Fetcher):
 | 
			
		||||
            finally:
 | 
			
		||||
                # Request garbage collection one more time before closing
 | 
			
		||||
                try:
 | 
			
		||||
                    self.page.request_gc()
 | 
			
		||||
                    await self.page.request_gc()
 | 
			
		||||
                except:
 | 
			
		||||
                    pass
 | 
			
		||||
                
 | 
			
		||||
                # Clean up resources properly
 | 
			
		||||
                try:
 | 
			
		||||
                    self.page.request_gc()
 | 
			
		||||
                    await self.page.request_gc()
 | 
			
		||||
                except:
 | 
			
		||||
                    pass
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    self.page.close()
 | 
			
		||||
                    await self.page.close()
 | 
			
		||||
                except:
 | 
			
		||||
                    pass
 | 
			
		||||
                self.page = None
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    context.close()
 | 
			
		||||
                    await context.close()
 | 
			
		||||
                except:
 | 
			
		||||
                    pass
 | 
			
		||||
                context = None
 | 
			
		||||
 | 
			
		||||
                try:
 | 
			
		||||
                    browser.close()
 | 
			
		||||
                    await browser.close()
 | 
			
		||||
                except:
 | 
			
		||||
                    pass
 | 
			
		||||
                browser = None
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ from loguru import logger
 | 
			
		||||
 | 
			
		||||
from changedetectionio.content_fetchers import SCREENSHOT_MAX_HEIGHT_DEFAULT, visualselector_xpath_selectors, \
 | 
			
		||||
    SCREENSHOT_SIZE_STITCH_THRESHOLD, SCREENSHOT_DEFAULT_QUALITY, XPATH_ELEMENT_JS, INSTOCK_DATA_JS, \
 | 
			
		||||
    SCREENSHOT_MAX_TOTAL_HEIGHT
 | 
			
		||||
    SCREENSHOT_MAX_TOTAL_HEIGHT, FAVICON_FETCHER_JS
 | 
			
		||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
 | 
			
		||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, \
 | 
			
		||||
    BrowserConnectError
 | 
			
		||||
@@ -51,7 +51,15 @@ async def capture_full_page(page):
 | 
			
		||||
        await page.setViewport({'width': page.viewport['width'], 'height': step_size})
 | 
			
		||||
 | 
			
		||||
    while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT):
 | 
			
		||||
        await page.evaluate(f"window.scrollTo(0, {y})")
 | 
			
		||||
        # better than scrollTo incase they override it in the page
 | 
			
		||||
        await page.evaluate(
 | 
			
		||||
            """(y) => {
 | 
			
		||||
                document.documentElement.scrollTop = y;
 | 
			
		||||
                document.body.scrollTop = y;
 | 
			
		||||
            }""",
 | 
			
		||||
            y
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        screenshot_chunks.append(await page.screenshot(type_='jpeg',
 | 
			
		||||
                                                       fullPage=False,
 | 
			
		||||
                                                       quality=int(os.getenv("SCREENSHOT_QUALITY", 72))))
 | 
			
		||||
@@ -137,19 +145,24 @@ class fetcher(Fetcher):
 | 
			
		||||
    #         f.write(content)
 | 
			
		||||
 | 
			
		||||
    async def fetch_page(self,
 | 
			
		||||
                         url,
 | 
			
		||||
                         timeout,
 | 
			
		||||
                         request_headers,
 | 
			
		||||
                         request_body,
 | 
			
		||||
                         request_method,
 | 
			
		||||
                         ignore_status_codes,
 | 
			
		||||
                         current_include_filters,
 | 
			
		||||
                         empty_pages_are_a_change,
 | 
			
		||||
                         fetch_favicon,
 | 
			
		||||
                         ignore_status_codes,
 | 
			
		||||
                         is_binary,
 | 
			
		||||
                         empty_pages_are_a_change
 | 
			
		||||
                         request_body,
 | 
			
		||||
                         request_headers,
 | 
			
		||||
                         request_method,
 | 
			
		||||
                         timeout,
 | 
			
		||||
                         url,
 | 
			
		||||
                         ):
 | 
			
		||||
 | 
			
		||||
        import re
 | 
			
		||||
        self.delete_browser_steps_screenshots()
 | 
			
		||||
        extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
 | 
			
		||||
        n = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
        extra_wait = min(n, 15)
 | 
			
		||||
 | 
			
		||||
        logger.debug(f"Extra wait set to {extra_wait}s, requested was {n}s.")
 | 
			
		||||
 | 
			
		||||
        from pyppeteer import Pyppeteer
 | 
			
		||||
        pyppeteer_instance = Pyppeteer()
 | 
			
		||||
@@ -165,12 +178,24 @@ class fetcher(Fetcher):
 | 
			
		||||
        except websockets.exceptions.InvalidURI:
 | 
			
		||||
            raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}")
 | 
			
		||||
            raise BrowserConnectError(msg=f"Error connecting to the browser - Exception '{str(e)}'")
 | 
			
		||||
 | 
			
		||||
        # Better is to launch chrome with the URL as arg
 | 
			
		||||
        # non-headless - newPage() will launch an extra tab/window, .browser should already contain 1 page/tab
 | 
			
		||||
        # headless - ask a new page
 | 
			
		||||
        self.page = (pages := await browser.pages) and len(pages) or await browser.newPage()
 | 
			
		||||
        # more reliable is to just request a new page
 | 
			
		||||
        self.page = await browser.newPage()
 | 
			
		||||
        
 | 
			
		||||
        # Add console handler to capture console.log from favicon fetcher
 | 
			
		||||
        #self.page.on('console', lambda msg: logger.debug(f"Browser console [{msg.type}]: {msg.text}"))
 | 
			
		||||
 | 
			
		||||
        if '--window-size' in self.browser_connection_url:
 | 
			
		||||
            # Be sure the viewport is always the window-size, this is often not the same thing
 | 
			
		||||
            match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url)
 | 
			
		||||
            if match:
 | 
			
		||||
                logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}")
 | 
			
		||||
                await self.page.setViewport({
 | 
			
		||||
                    "width": int(match.group(1)),
 | 
			
		||||
                    "height": int(match.group(2))
 | 
			
		||||
                })
 | 
			
		||||
                logger.debug(f"Puppeteer viewport size {self.page.viewport}")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            from pyppeteerstealth import inject_evasions_into_page
 | 
			
		||||
@@ -216,14 +241,35 @@ class fetcher(Fetcher):
 | 
			
		||||
        #            browsersteps_interface = steppable_browser_interface()
 | 
			
		||||
        #            browsersteps_interface.page = self.page
 | 
			
		||||
 | 
			
		||||
        response = await self.page.goto(url, waitUntil="load")
 | 
			
		||||
        async def handle_frame_navigation(event):
 | 
			
		||||
            logger.debug(f"Frame navigated: {event}")
 | 
			
		||||
            w = extra_wait - 2 if extra_wait > 4 else 2
 | 
			
		||||
            logger.debug(f"Waiting {w} seconds before calling Page.stopLoading...")
 | 
			
		||||
            await asyncio.sleep(w)
 | 
			
		||||
            logger.debug("Issuing stopLoading command...")
 | 
			
		||||
            await self.page._client.send('Page.stopLoading')
 | 
			
		||||
            logger.debug("stopLoading command sent!")
 | 
			
		||||
 | 
			
		||||
        self.page._client.on('Page.frameStartedNavigating', lambda event: asyncio.create_task(handle_frame_navigation(event)))
 | 
			
		||||
        self.page._client.on('Page.frameStartedLoading', lambda event: asyncio.create_task(handle_frame_navigation(event)))
 | 
			
		||||
        self.page._client.on('Page.frameStoppedLoading', lambda event: logger.debug(f"Frame stopped loading: {event}"))
 | 
			
		||||
 | 
			
		||||
        if response is None:
 | 
			
		||||
            await self.page.close()
 | 
			
		||||
            await browser.close()
 | 
			
		||||
            logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
 | 
			
		||||
            raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
        response = None
 | 
			
		||||
        attempt=0
 | 
			
		||||
        while not response:
 | 
			
		||||
            logger.debug(f"Attempting page fetch {url} attempt {attempt}")
 | 
			
		||||
            response = await self.page.goto(url)
 | 
			
		||||
            await asyncio.sleep(1 + extra_wait)
 | 
			
		||||
            if response:
 | 
			
		||||
                break
 | 
			
		||||
            if not response:
 | 
			
		||||
                logger.warning("Page did not fetch! trying again!")
 | 
			
		||||
            if response is None and attempt>=2:
 | 
			
		||||
                await self.page.close()
 | 
			
		||||
                await browser.close()
 | 
			
		||||
                logger.warning(f"Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content) exiting attmpt {attempt}")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
            attempt+=1
 | 
			
		||||
 | 
			
		||||
        self.headers = response.headers
 | 
			
		||||
 | 
			
		||||
@@ -248,6 +294,12 @@ class fetcher(Fetcher):
 | 
			
		||||
            await browser.close()
 | 
			
		||||
            raise PageUnloadable(url=url, status_code=None, message=str(e))
 | 
			
		||||
 | 
			
		||||
        if fetch_favicon:
 | 
			
		||||
            try:
 | 
			
		||||
                self.favicon_blob = await self.page.evaluate(FAVICON_FETCHER_JS)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.error(f"Error fetching FavIcon info {str(e)}, continuing.")
 | 
			
		||||
 | 
			
		||||
        if self.status_code != 200 and not ignore_status_codes:
 | 
			
		||||
            screenshot = await capture_full_page(page=self.page)
 | 
			
		||||
 | 
			
		||||
@@ -266,7 +318,6 @@ class fetcher(Fetcher):
 | 
			
		||||
        #            if self.browser_steps_get_valid_steps():
 | 
			
		||||
        #                self.iterate_browser_steps()
 | 
			
		||||
 | 
			
		||||
        await asyncio.sleep(1 + extra_wait)
 | 
			
		||||
 | 
			
		||||
        # So we can find an element on the page where its selector was entered manually (maybe not xPath etc)
 | 
			
		||||
        # Setup the xPath/VisualSelector scraper
 | 
			
		||||
@@ -300,25 +351,36 @@ class fetcher(Fetcher):
 | 
			
		||||
    async def main(self, **kwargs):
 | 
			
		||||
        await self.fetch_page(**kwargs)
 | 
			
		||||
 | 
			
		||||
    def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
 | 
			
		||||
    async def run(self,
 | 
			
		||||
                  fetch_favicon=True,
 | 
			
		||||
                  current_include_filters=None,
 | 
			
		||||
                  empty_pages_are_a_change=False,
 | 
			
		||||
                  ignore_status_codes=False,
 | 
			
		||||
                  is_binary=False,
 | 
			
		||||
                  request_body=None,
 | 
			
		||||
                  request_headers=None,
 | 
			
		||||
                  request_method=None,
 | 
			
		||||
                  timeout=None,
 | 
			
		||||
                  url=None,
 | 
			
		||||
                  ):
 | 
			
		||||
 | 
			
		||||
        #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
 | 
			
		||||
        max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
 | 
			
		||||
        max_time = int(os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180))
 | 
			
		||||
 | 
			
		||||
        # This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only
 | 
			
		||||
        # Now we run this properly in async context since we're called from async worker
 | 
			
		||||
        try:
 | 
			
		||||
            asyncio.run(asyncio.wait_for(self.main(
 | 
			
		||||
                url=url,
 | 
			
		||||
                timeout=timeout,
 | 
			
		||||
                request_headers=request_headers,
 | 
			
		||||
                request_body=request_body,
 | 
			
		||||
                request_method=request_method,
 | 
			
		||||
                ignore_status_codes=ignore_status_codes,
 | 
			
		||||
            await asyncio.wait_for(self.main(
 | 
			
		||||
                current_include_filters=current_include_filters,
 | 
			
		||||
                empty_pages_are_a_change=empty_pages_are_a_change,
 | 
			
		||||
                fetch_favicon=fetch_favicon,
 | 
			
		||||
                ignore_status_codes=ignore_status_codes,
 | 
			
		||||
                is_binary=is_binary,
 | 
			
		||||
                empty_pages_are_a_change=empty_pages_are_a_change
 | 
			
		||||
            ), timeout=max_time))
 | 
			
		||||
                request_body=request_body,
 | 
			
		||||
                request_headers=request_headers,
 | 
			
		||||
                request_method=request_method,
 | 
			
		||||
                timeout=timeout,
 | 
			
		||||
                url=url,
 | 
			
		||||
            ), timeout=max_time
 | 
			
		||||
            )
 | 
			
		||||
        except asyncio.TimeoutError:
 | 
			
		||||
            raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
 | 
			
		||||
 | 
			
		||||
            raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,7 @@
 | 
			
		||||
from loguru import logger
 | 
			
		||||
import hashlib
 | 
			
		||||
import os
 | 
			
		||||
import asyncio
 | 
			
		||||
from changedetectionio import strtobool
 | 
			
		||||
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
 | 
			
		||||
from changedetectionio.content_fetchers.base import Fetcher
 | 
			
		||||
@@ -15,7 +16,7 @@ class fetcher(Fetcher):
 | 
			
		||||
        self.proxy_override = proxy_override
 | 
			
		||||
        # browser_connection_url is none because its always 'launched locally'
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
    def _run_sync(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
@@ -25,9 +26,11 @@ class fetcher(Fetcher):
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False,
 | 
			
		||||
            empty_pages_are_a_change=False):
 | 
			
		||||
        """Synchronous version of run - the original requests implementation"""
 | 
			
		||||
 | 
			
		||||
        import chardet
 | 
			
		||||
        import requests
 | 
			
		||||
        from requests.exceptions import ProxyError, ConnectionError, RequestException
 | 
			
		||||
 | 
			
		||||
        if self.browser_steps_get_valid_steps():
 | 
			
		||||
            raise BrowserStepsInUnsupportedFetcher(url=url)
 | 
			
		||||
@@ -35,7 +38,6 @@ class fetcher(Fetcher):
 | 
			
		||||
        proxies = {}
 | 
			
		||||
 | 
			
		||||
        # Allows override the proxy on a per-request basis
 | 
			
		||||
 | 
			
		||||
        # https://requests.readthedocs.io/en/latest/user/advanced/#socks
 | 
			
		||||
        # Should also work with `socks5://user:pass@host:port` type syntax.
 | 
			
		||||
 | 
			
		||||
@@ -49,17 +51,23 @@ class fetcher(Fetcher):
 | 
			
		||||
 | 
			
		||||
        session = requests.Session()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'):
 | 
			
		||||
            from requests_file import FileAdapter
 | 
			
		||||
            session.mount('file://', FileAdapter())
 | 
			
		||||
 | 
			
		||||
        r = session.request(method=request_method,
 | 
			
		||||
                            data=request_body.encode('utf-8') if type(request_body) is str else request_body,
 | 
			
		||||
                            url=url,
 | 
			
		||||
                            headers=request_headers,
 | 
			
		||||
                            timeout=timeout,
 | 
			
		||||
                            proxies=proxies,
 | 
			
		||||
                            verify=False)
 | 
			
		||||
        try:
 | 
			
		||||
            r = session.request(method=request_method,
 | 
			
		||||
                                data=request_body.encode('utf-8') if type(request_body) is str else request_body,
 | 
			
		||||
                                url=url,
 | 
			
		||||
                                headers=request_headers,
 | 
			
		||||
                                timeout=timeout,
 | 
			
		||||
                                proxies=proxies,
 | 
			
		||||
                                verify=False)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            msg = str(e)
 | 
			
		||||
            if proxies and 'SOCKSHTTPSConnectionPool' in msg:
 | 
			
		||||
                msg = f"Proxy connection failed? {msg}"
 | 
			
		||||
            raise Exception(msg) from e
 | 
			
		||||
 | 
			
		||||
        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
 | 
			
		||||
        # For example - some sites don't tell us it's utf-8, but return utf-8 content
 | 
			
		||||
@@ -94,9 +102,40 @@ class fetcher(Fetcher):
 | 
			
		||||
        else:
 | 
			
		||||
            self.content = r.text
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        self.raw_content = r.content
 | 
			
		||||
 | 
			
		||||
    async def run(self,
 | 
			
		||||
                  fetch_favicon=True,
 | 
			
		||||
                  current_include_filters=None,
 | 
			
		||||
                  empty_pages_are_a_change=False,
 | 
			
		||||
                  ignore_status_codes=False,
 | 
			
		||||
                  is_binary=False,
 | 
			
		||||
                  request_body=None,
 | 
			
		||||
                  request_headers=None,
 | 
			
		||||
                  request_method=None,
 | 
			
		||||
                  timeout=None,
 | 
			
		||||
                  url=None,
 | 
			
		||||
                  ):
 | 
			
		||||
        """Async wrapper that runs the synchronous requests code in a thread pool"""
 | 
			
		||||
        
 | 
			
		||||
        loop = asyncio.get_event_loop()
 | 
			
		||||
        
 | 
			
		||||
        # Run the synchronous _run_sync in a thread pool to avoid blocking the event loop
 | 
			
		||||
        await loop.run_in_executor(
 | 
			
		||||
            None,  # Use default ThreadPoolExecutor
 | 
			
		||||
            lambda: self._run_sync(
 | 
			
		||||
                url=url,
 | 
			
		||||
                timeout=timeout,
 | 
			
		||||
                request_headers=request_headers,
 | 
			
		||||
                request_body=request_body,
 | 
			
		||||
                request_method=request_method,
 | 
			
		||||
                ignore_status_codes=ignore_status_codes,
 | 
			
		||||
                current_include_filters=current_include_filters,
 | 
			
		||||
                is_binary=is_binary,
 | 
			
		||||
                empty_pages_are_a_change=empty_pages_are_a_change
 | 
			
		||||
            )
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def quit(self, watch=None):
 | 
			
		||||
 | 
			
		||||
        # In case they switched to `requests` fetcher from something else
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										101
									
								
								changedetectionio/content_fetchers/res/favicon-fetcher.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										101
									
								
								changedetectionio/content_fetchers/res/favicon-fetcher.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,101 @@
 | 
			
		||||
(async () => {
 | 
			
		||||
  // Define the function inside the IIFE for console testing
 | 
			
		||||
  window.getFaviconAsBlob = async function() {
 | 
			
		||||
    const links = Array.from(document.querySelectorAll(
 | 
			
		||||
      'link[rel~="apple-touch-icon"], link[rel~="icon"]'
 | 
			
		||||
    ));
 | 
			
		||||
 | 
			
		||||
    const icons = links.map(link => {
 | 
			
		||||
      const sizesStr = link.getAttribute('sizes');
 | 
			
		||||
      let size = 0;
 | 
			
		||||
      if (sizesStr) {
 | 
			
		||||
        const [w] = sizesStr.split('x').map(Number);
 | 
			
		||||
        if (!isNaN(w)) size = w;
 | 
			
		||||
      } else {
 | 
			
		||||
        size = 16;
 | 
			
		||||
      }
 | 
			
		||||
      return {
 | 
			
		||||
        size,
 | 
			
		||||
        rel: link.getAttribute('rel'),
 | 
			
		||||
        href: link.href,
 | 
			
		||||
        hasSizes: !!sizesStr
 | 
			
		||||
      };
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    // If no icons found, add fallback favicon.ico
 | 
			
		||||
    if (icons.length === 0) {
 | 
			
		||||
      icons.push({
 | 
			
		||||
        size: 16,
 | 
			
		||||
        rel: 'icon',
 | 
			
		||||
        href: '/favicon.ico',
 | 
			
		||||
        hasSizes: false
 | 
			
		||||
      });
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // sort preference: highest resolution first, then apple-touch-icon, then regular icons
 | 
			
		||||
    icons.sort((a, b) => {
 | 
			
		||||
      // First priority: actual size (highest first)
 | 
			
		||||
      if (a.size !== b.size) {
 | 
			
		||||
        return b.size - a.size;
 | 
			
		||||
      }
 | 
			
		||||
      
 | 
			
		||||
      // Second priority: apple-touch-icon over regular icon
 | 
			
		||||
      const isAppleA = /apple-touch-icon/.test(a.rel);
 | 
			
		||||
      const isAppleB = /apple-touch-icon/.test(b.rel);
 | 
			
		||||
      if (isAppleA && !isAppleB) return -1;
 | 
			
		||||
      if (!isAppleA && isAppleB) return 1;
 | 
			
		||||
      
 | 
			
		||||
      // Third priority: icons with no size attribute (fallback icons) last
 | 
			
		||||
      const hasNoSizeA = !a.hasSizes;
 | 
			
		||||
      const hasNoSizeB = !b.hasSizes;
 | 
			
		||||
      if (hasNoSizeA && !hasNoSizeB) return 1;
 | 
			
		||||
      if (!hasNoSizeA && hasNoSizeB) return -1;
 | 
			
		||||
      
 | 
			
		||||
      return 0;
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
    const timeoutMs = 2000;
 | 
			
		||||
 | 
			
		||||
    for (const icon of icons) {
 | 
			
		||||
      try {
 | 
			
		||||
        const controller = new AbortController();
 | 
			
		||||
        const timeout = setTimeout(() => controller.abort(), timeoutMs);
 | 
			
		||||
 | 
			
		||||
        const resp = await fetch(icon.href, {
 | 
			
		||||
          signal: controller.signal,
 | 
			
		||||
          redirect: 'follow'
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
        clearTimeout(timeout);
 | 
			
		||||
 | 
			
		||||
        if (!resp.ok) {
 | 
			
		||||
          continue;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const blob = await resp.blob();
 | 
			
		||||
 | 
			
		||||
        // Convert blob to base64
 | 
			
		||||
        const reader = new FileReader();
 | 
			
		||||
        return await new Promise(resolve => {
 | 
			
		||||
          reader.onloadend = () => {
 | 
			
		||||
            resolve({
 | 
			
		||||
              url: icon.href,
 | 
			
		||||
              base64: reader.result.split(",")[1]
 | 
			
		||||
            });
 | 
			
		||||
          };
 | 
			
		||||
          reader.readAsDataURL(blob);
 | 
			
		||||
        });
 | 
			
		||||
 | 
			
		||||
      } catch (e) {
 | 
			
		||||
        continue;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    // nothing found
 | 
			
		||||
    return null;
 | 
			
		||||
  };
 | 
			
		||||
 | 
			
		||||
  // Auto-execute and return result for page.evaluate()
 | 
			
		||||
  return await window.getFaviconAsBlob();
 | 
			
		||||
})();
 | 
			
		||||
 | 
			
		||||
@@ -10,13 +10,16 @@ async () => {
 | 
			
		||||
            'article épuisé',
 | 
			
		||||
            'artikel zurzeit vergriffen',
 | 
			
		||||
            'as soon as stock is available',
 | 
			
		||||
            'aucune offre n\'est disponible',
 | 
			
		||||
            'ausverkauft', // sold out
 | 
			
		||||
            'available for back order',
 | 
			
		||||
            'awaiting stock',
 | 
			
		||||
            'back in stock soon',
 | 
			
		||||
            'back-order or out of stock',
 | 
			
		||||
            'backordered',
 | 
			
		||||
            'backorder',
 | 
			
		||||
            'benachrichtigt mich', // notify me
 | 
			
		||||
            'binnenkort leverbaar', // coming soon
 | 
			
		||||
            'brak na stanie',
 | 
			
		||||
            'brak w magazynie',
 | 
			
		||||
            'coming soon',
 | 
			
		||||
@@ -25,9 +28,8 @@ async () => {
 | 
			
		||||
            'dieser artikel ist bald wieder verfügbar',
 | 
			
		||||
            'dostępne wkrótce',
 | 
			
		||||
            'en rupture',
 | 
			
		||||
            'en rupture de stock',
 | 
			
		||||
            'épuisé',
 | 
			
		||||
            'esgotado',
 | 
			
		||||
            'in kürze lieferbar',
 | 
			
		||||
            'indisponible',
 | 
			
		||||
            'indisponível',
 | 
			
		||||
            'isn\'t in stock right now',
 | 
			
		||||
@@ -38,22 +40,26 @@ async () => {
 | 
			
		||||
            'mail me when available',
 | 
			
		||||
            'message if back in stock',
 | 
			
		||||
            'mevcut değil',
 | 
			
		||||
            'more on order',
 | 
			
		||||
            'nachricht bei',
 | 
			
		||||
            'nicht auf lager',
 | 
			
		||||
            'nicht lagernd',
 | 
			
		||||
            'nicht lieferbar',
 | 
			
		||||
            'nicht verfügbar',
 | 
			
		||||
            'nicht vorrätig',
 | 
			
		||||
            'nicht mehr lieferbar',
 | 
			
		||||
            'nicht zur verfügung',
 | 
			
		||||
            'nie znaleziono produktów',
 | 
			
		||||
            'niet beschikbaar',
 | 
			
		||||
            'niet leverbaar',
 | 
			
		||||
            'niet op voorraad',
 | 
			
		||||
            'no disponible',
 | 
			
		||||
            'non disponibile',
 | 
			
		||||
            'non disponible',
 | 
			
		||||
            'no featured offers available',
 | 
			
		||||
            'no longer available',
 | 
			
		||||
            'no longer in stock',
 | 
			
		||||
            'no tickets available',
 | 
			
		||||
            'non disponibile',
 | 
			
		||||
            'non disponible',
 | 
			
		||||
            'not available',
 | 
			
		||||
            'not currently available',
 | 
			
		||||
            'not in stock',
 | 
			
		||||
@@ -83,19 +89,22 @@ async () => {
 | 
			
		||||
            'tidak tersedia',
 | 
			
		||||
            'tijdelijk uitverkocht',
 | 
			
		||||
            'tiket tidak tersedia',
 | 
			
		||||
            'to subscribe to back in stock',
 | 
			
		||||
            'tükendi',
 | 
			
		||||
            'unavailable nearby',
 | 
			
		||||
            'unavailable tickets',
 | 
			
		||||
            'vergriffen',
 | 
			
		||||
            'vorbestellen',
 | 
			
		||||
            'vorbestellung ist bald möglich',
 | 
			
		||||
            'we don\'t currently have any',
 | 
			
		||||
            'we couldn\'t find any products that match',
 | 
			
		||||
            'we do not currently have an estimate of when this product will be back in stock.',
 | 
			
		||||
            'we don\'t currently have any',
 | 
			
		||||
            'we don\'t know when or if this item will be back in stock.',
 | 
			
		||||
            'we were not able to find a match',
 | 
			
		||||
            'when this arrives in stock',
 | 
			
		||||
            'when this item is available to order',
 | 
			
		||||
            'zur zeit nicht an lager',
 | 
			
		||||
            'épuisé',
 | 
			
		||||
            '品切れ',
 | 
			
		||||
            '已售',
 | 
			
		||||
            '已售完',
 | 
			
		||||
@@ -115,13 +124,26 @@ async () => {
 | 
			
		||||
            return text.toLowerCase().trim();
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig');
 | 
			
		||||
 | 
			
		||||
        const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock|arrives approximately)', 'ig');
 | 
			
		||||
        // The out-of-stock or in-stock-text is generally always above-the-fold
 | 
			
		||||
        // and often below-the-fold is a list of related products that may or may not contain trigger text
 | 
			
		||||
        // so it's good to filter to just the 'above the fold' elements
 | 
			
		||||
        // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist
 | 
			
		||||
 | 
			
		||||
        function elementIsInEyeBallRange(element) {
 | 
			
		||||
            // outside the 'fold' or some weird text in the heading area
 | 
			
		||||
            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
 | 
			
		||||
            // Note: theres also an automated test that places the 'out of stock' text fairly low down
 | 
			
		||||
            // Skip text that could be in the header area
 | 
			
		||||
            if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            // Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there
 | 
			
		||||
            if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) {
 | 
			
		||||
                return false;
 | 
			
		||||
            }
 | 
			
		||||
            return true;
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
// @todo - if it's SVG or IMG, go into image diff mode
 | 
			
		||||
 | 
			
		||||
@@ -158,9 +180,7 @@ async () => {
 | 
			
		||||
        for (let i = elementsToScan.length - 1; i >= 0; i--) {
 | 
			
		||||
            const element = elementsToScan[i];
 | 
			
		||||
 | 
			
		||||
            // outside the 'fold' or some weird text in the heading area
 | 
			
		||||
            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
 | 
			
		||||
            if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) {
 | 
			
		||||
            if (!elementIsInEyeBallRange(element)) {
 | 
			
		||||
                continue
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
@@ -174,11 +194,11 @@ async () => {
 | 
			
		||||
            } catch (e) {
 | 
			
		||||
                console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e);
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
            if (elementText.length) {
 | 
			
		||||
                // try which ones could mean its in stock
 | 
			
		||||
                if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) {
 | 
			
		||||
                    console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`)
 | 
			
		||||
                    element.style.border = "2px solid green"; // highlight the element that was detected as in stock
 | 
			
		||||
                    return 'Possibly in stock';
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
@@ -187,10 +207,8 @@ async () => {
 | 
			
		||||
        // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK
 | 
			
		||||
        for (let i = elementsToScan.length - 1; i >= 0; i--) {
 | 
			
		||||
            const element = elementsToScan[i];
 | 
			
		||||
            // outside the 'fold' or some weird text in the heading area
 | 
			
		||||
            // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden
 | 
			
		||||
            // Note: theres also an automated test that places the 'out of stock' text fairly low down
 | 
			
		||||
            if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) {
 | 
			
		||||
 | 
			
		||||
            if (!elementIsInEyeBallRange(element)) {
 | 
			
		||||
                continue
 | 
			
		||||
            }
 | 
			
		||||
            elementText = "";
 | 
			
		||||
@@ -205,6 +223,7 @@ async () => {
 | 
			
		||||
                for (const outOfStockText of outOfStockTexts) {
 | 
			
		||||
                    if (elementText.includes(outOfStockText)) {
 | 
			
		||||
                        console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`)
 | 
			
		||||
                        element.style.border = "2px solid red"; // highlight the element that was detected as out of stock
 | 
			
		||||
                        return outOfStockText; // item is out of stock
 | 
			
		||||
                    }
 | 
			
		||||
                }
 | 
			
		||||
 
 | 
			
		||||
@@ -202,7 +202,6 @@ async (options) => {
 | 
			
		||||
        // Foreach filter, go and find it on the page and add it to the results so we can visualise it again
 | 
			
		||||
        for (const f of include_filters) {
 | 
			
		||||
            bbox = false;
 | 
			
		||||
            q = false;
 | 
			
		||||
 | 
			
		||||
            if (!f.length) {
 | 
			
		||||
                console.log("xpath_element_scraper: Empty filter, skipping");
 | 
			
		||||
@@ -255,7 +254,7 @@ async (options) => {
 | 
			
		||||
                            console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y)
 | 
			
		||||
                        } catch (e) {
 | 
			
		||||
                            console.log(e)
 | 
			
		||||
                            console.log("xpath_element_scraper: error looking up q.ownerElement")
 | 
			
		||||
                            console.log("xpath_element_scraper: error looking up node.ownerElement")
 | 
			
		||||
                        }
 | 
			
		||||
                    }
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -4,22 +4,20 @@ import time
 | 
			
		||||
from loguru import logger
 | 
			
		||||
from changedetectionio.content_fetchers.base import Fetcher
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class fetcher(Fetcher):
 | 
			
		||||
    if os.getenv("WEBDRIVER_URL"):
 | 
			
		||||
        fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
 | 
			
		||||
        fetcher_description = f"WebDriver Chrome/Javascript via \"{os.getenv('WEBDRIVER_URL', '')}\""
 | 
			
		||||
    else:
 | 
			
		||||
        fetcher_description = "WebDriver Chrome/Javascript"
 | 
			
		||||
 | 
			
		||||
    # Configs for Proxy setup
 | 
			
		||||
    # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy"
 | 
			
		||||
    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
 | 
			
		||||
                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
 | 
			
		||||
                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
 | 
			
		||||
    proxy = None
 | 
			
		||||
    proxy_url = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, proxy_override=None, custom_browser_connection_url=None):
 | 
			
		||||
        super().__init__()
 | 
			
		||||
        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
 | 
			
		||||
        from urllib.parse import urlparse
 | 
			
		||||
        from selenium.webdriver.common.proxy import Proxy
 | 
			
		||||
 | 
			
		||||
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
 | 
			
		||||
        if not custom_browser_connection_url:
 | 
			
		||||
@@ -28,107 +26,118 @@ class fetcher(Fetcher):
 | 
			
		||||
            self.browser_connection_is_custom = True
 | 
			
		||||
            self.browser_connection_url = custom_browser_connection_url
 | 
			
		||||
 | 
			
		||||
        # If any proxy settings are enabled, then we should setup the proxy object
 | 
			
		||||
        proxy_args = {}
 | 
			
		||||
        for k in self.selenium_proxy_settings_mappings:
 | 
			
		||||
            v = os.getenv('webdriver_' + k, False)
 | 
			
		||||
            if v:
 | 
			
		||||
                proxy_args[k] = v.strip('"')
 | 
			
		||||
        ##### PROXY SETUP #####
 | 
			
		||||
 | 
			
		||||
        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
 | 
			
		||||
        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
 | 
			
		||||
            proxy_args['httpProxy'] = self.system_http_proxy
 | 
			
		||||
        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
 | 
			
		||||
            proxy_args['httpsProxy'] = self.system_https_proxy
 | 
			
		||||
 | 
			
		||||
        # Allows override the proxy on a per-request basis
 | 
			
		||||
        if proxy_override is not None:
 | 
			
		||||
            proxy_args['httpProxy'] = proxy_override
 | 
			
		||||
 | 
			
		||||
        if proxy_args:
 | 
			
		||||
            self.proxy = SeleniumProxy(raw=proxy_args)
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False,
 | 
			
		||||
            current_include_filters=None,
 | 
			
		||||
            is_binary=False,
 | 
			
		||||
            empty_pages_are_a_change=False):
 | 
			
		||||
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
        from selenium.webdriver.chrome.options import Options as ChromeOptions
 | 
			
		||||
        from selenium.common.exceptions import WebDriverException
 | 
			
		||||
        # request_body, request_method unused for now, until some magic in the future happens.
 | 
			
		||||
 | 
			
		||||
        options = ChromeOptions()
 | 
			
		||||
 | 
			
		||||
        # Load Chrome options from env
 | 
			
		||||
        CHROME_OPTIONS = [
 | 
			
		||||
            line.strip()
 | 
			
		||||
            for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
 | 
			
		||||
            if line.strip()
 | 
			
		||||
        proxy_sources = [
 | 
			
		||||
            self.system_http_proxy,
 | 
			
		||||
            self.system_https_proxy,
 | 
			
		||||
            os.getenv('webdriver_proxySocks'),
 | 
			
		||||
            os.getenv('webdriver_socksProxy'),
 | 
			
		||||
            os.getenv('webdriver_proxyHttp'),
 | 
			
		||||
            os.getenv('webdriver_httpProxy'),
 | 
			
		||||
            os.getenv('webdriver_proxyHttps'),
 | 
			
		||||
            os.getenv('webdriver_httpsProxy'),
 | 
			
		||||
            os.getenv('webdriver_sslProxy'),
 | 
			
		||||
            proxy_override,  # last one should override
 | 
			
		||||
        ]
 | 
			
		||||
        # The built in selenium proxy handling is super unreliable!!! so we just grab which ever proxy setting we can find and throw it in --proxy-server=
 | 
			
		||||
        for k in filter(None, proxy_sources):
 | 
			
		||||
            if not k:
 | 
			
		||||
                continue
 | 
			
		||||
            self.proxy_url = k.strip()
 | 
			
		||||
 | 
			
		||||
        for opt in CHROME_OPTIONS:
 | 
			
		||||
            options.add_argument(opt)
 | 
			
		||||
    async def run(self,
 | 
			
		||||
                  fetch_favicon=True,
 | 
			
		||||
                  current_include_filters=None,
 | 
			
		||||
                  empty_pages_are_a_change=False,
 | 
			
		||||
                  ignore_status_codes=False,
 | 
			
		||||
                  is_binary=False,
 | 
			
		||||
                  request_body=None,
 | 
			
		||||
                  request_headers=None,
 | 
			
		||||
                  request_method=None,
 | 
			
		||||
                  timeout=None,
 | 
			
		||||
                  url=None,
 | 
			
		||||
                  ):
 | 
			
		||||
 | 
			
		||||
        if self.proxy:
 | 
			
		||||
            options.proxy = self.proxy
 | 
			
		||||
        import asyncio
 | 
			
		||||
 | 
			
		||||
        self.driver = webdriver.Remote(
 | 
			
		||||
            command_executor=self.browser_connection_url,
 | 
			
		||||
            options=options)
 | 
			
		||||
        # Wrap the entire selenium operation in a thread executor
 | 
			
		||||
        def _run_sync():
 | 
			
		||||
            from selenium.webdriver.chrome.options import Options as ChromeOptions
 | 
			
		||||
            # request_body, request_method unused for now, until some magic in the future happens.
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            self.driver.get(url)
 | 
			
		||||
        except WebDriverException as e:
 | 
			
		||||
            # Be sure we close the session window
 | 
			
		||||
            self.quit()
 | 
			
		||||
            raise
 | 
			
		||||
            options = ChromeOptions()
 | 
			
		||||
 | 
			
		||||
        if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
 | 
			
		||||
            self.driver.set_window_size(1280, 1024)
 | 
			
		||||
            # Load Chrome options from env
 | 
			
		||||
            CHROME_OPTIONS = [
 | 
			
		||||
                line.strip()
 | 
			
		||||
                for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines()
 | 
			
		||||
                if line.strip()
 | 
			
		||||
            ]
 | 
			
		||||
 | 
			
		||||
        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
            for opt in CHROME_OPTIONS:
 | 
			
		||||
                options.add_argument(opt)
 | 
			
		||||
 | 
			
		||||
        if self.webdriver_js_execute_code is not None:
 | 
			
		||||
            self.driver.execute_script(self.webdriver_js_execute_code)
 | 
			
		||||
            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
 | 
			
		||||
            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
            # 1. proxy_config /Proxy(proxy_config) selenium object is REALLY unreliable
 | 
			
		||||
            # 2. selenium-wire cant be used because the websocket version conflicts with pypeteer-ng
 | 
			
		||||
            # 3. selenium only allows ONE runner at a time by default!
 | 
			
		||||
            # 4. driver must use quit() or it will continue to block/hold the selenium process!!
 | 
			
		||||
 | 
			
		||||
            if self.proxy_url:
 | 
			
		||||
                options.add_argument(f'--proxy-server={self.proxy_url}')
 | 
			
		||||
 | 
			
		||||
        # @todo - how to check this? is it possible?
 | 
			
		||||
        self.status_code = 200
 | 
			
		||||
        # @todo somehow we should try to get this working for WebDriver
 | 
			
		||||
        # raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
 | 
			
		||||
        # @todo - dom wait loaded?
 | 
			
		||||
        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
 | 
			
		||||
        self.content = self.driver.page_source
 | 
			
		||||
        self.headers = {}
 | 
			
		||||
 | 
			
		||||
        self.screenshot = self.driver.get_screenshot_as_png()
 | 
			
		||||
 | 
			
		||||
    # Does the connection to the webdriver work? run a test connection.
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
        from selenium.webdriver.chrome.options import Options as ChromeOptions
 | 
			
		||||
 | 
			
		||||
        self.driver = webdriver.Remote(
 | 
			
		||||
            command_executor=self.command_executor,
 | 
			
		||||
            options=ChromeOptions())
 | 
			
		||||
 | 
			
		||||
        # driver.quit() seems to cause better exceptions
 | 
			
		||||
        self.quit()
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def quit(self, watch=None):
 | 
			
		||||
        if self.driver:
 | 
			
		||||
            from selenium.webdriver.remote.remote_connection import RemoteConnection
 | 
			
		||||
            from selenium.webdriver.remote.webdriver import WebDriver as RemoteWebDriver
 | 
			
		||||
            driver = None
 | 
			
		||||
            try:
 | 
			
		||||
                self.driver.quit()
 | 
			
		||||
                # Create the RemoteConnection and set timeout (e.g., 30 seconds)
 | 
			
		||||
                remote_connection = RemoteConnection(
 | 
			
		||||
                    self.browser_connection_url,
 | 
			
		||||
                )
 | 
			
		||||
                remote_connection.set_timeout(30)  # seconds
 | 
			
		||||
 | 
			
		||||
                # Now create the driver with the RemoteConnection
 | 
			
		||||
                driver = RemoteWebDriver(
 | 
			
		||||
                    command_executor=remote_connection,
 | 
			
		||||
                    options=options
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
                driver.set_page_load_timeout(int(os.getenv("WEBDRIVER_PAGELOAD_TIMEOUT", 45)))
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
 | 
			
		||||
                if driver:
 | 
			
		||||
                    driver.quit()
 | 
			
		||||
                raise e
 | 
			
		||||
 | 
			
		||||
            try:
 | 
			
		||||
                driver.get(url)
 | 
			
		||||
 | 
			
		||||
                if not "--window-size" in os.getenv("CHROME_OPTIONS", ""):
 | 
			
		||||
                    driver.set_window_size(1280, 1024)
 | 
			
		||||
 | 
			
		||||
                driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
                if self.webdriver_js_execute_code is not None:
 | 
			
		||||
                    driver.execute_script(self.webdriver_js_execute_code)
 | 
			
		||||
                    # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
 | 
			
		||||
                    driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
                # @todo - how to check this? is it possible?
 | 
			
		||||
                self.status_code = 200
 | 
			
		||||
                # @todo somehow we should try to get this working for WebDriver
 | 
			
		||||
                # raise EmptyReply(url=url, status_code=r.status_code)
 | 
			
		||||
 | 
			
		||||
                # @todo - dom wait loaded?
 | 
			
		||||
                import time
 | 
			
		||||
                time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay)
 | 
			
		||||
                self.content = driver.page_source
 | 
			
		||||
                self.headers = {}
 | 
			
		||||
                self.screenshot = driver.get_screenshot_as_png()
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                driver.quit()
 | 
			
		||||
                raise e
 | 
			
		||||
 | 
			
		||||
            driver.quit()
 | 
			
		||||
 | 
			
		||||
        # Run the selenium operations in a thread pool to avoid blocking the event loop
 | 
			
		||||
        loop = asyncio.get_event_loop()
 | 
			
		||||
        await loop.run_in_executor(None, _run_sync)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										535
									
								
								changedetectionio/custom_queue.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										535
									
								
								changedetectionio/custom_queue.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,535 @@
 | 
			
		||||
import queue
 | 
			
		||||
import asyncio
 | 
			
		||||
from blinker import signal
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NotificationQueue(queue.Queue):
 | 
			
		||||
    """
 | 
			
		||||
    Extended Queue that sends a 'notification_event' signal when notifications are added.
 | 
			
		||||
    
 | 
			
		||||
    This class extends the standard Queue and adds a signal emission after a notification
 | 
			
		||||
    is put into the queue. The signal includes the watch UUID if available.
 | 
			
		||||
    """
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self, maxsize=0):
 | 
			
		||||
        super().__init__(maxsize)
 | 
			
		||||
        try:
 | 
			
		||||
            self.notification_event_signal = signal('notification_event')
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception creating notification_event signal: {e}")
 | 
			
		||||
 | 
			
		||||
    def put(self, item, block=True, timeout=None):
 | 
			
		||||
        # Call the parent's put method first
 | 
			
		||||
        super().put(item, block, timeout)
 | 
			
		||||
        
 | 
			
		||||
        # After putting the notification in the queue, emit signal with watch UUID
 | 
			
		||||
        try:
 | 
			
		||||
            if self.notification_event_signal and isinstance(item, dict):
 | 
			
		||||
                watch_uuid = item.get('uuid')
 | 
			
		||||
                if watch_uuid:
 | 
			
		||||
                    # Send the notification_event signal with the watch UUID
 | 
			
		||||
                    self.notification_event_signal.send(watch_uuid=watch_uuid)
 | 
			
		||||
                    logger.trace(f"NotificationQueue: Emitted notification_event signal for watch UUID {watch_uuid}")
 | 
			
		||||
                else:
 | 
			
		||||
                    # Send signal without UUID for system notifications
 | 
			
		||||
                    self.notification_event_signal.send()
 | 
			
		||||
                    logger.trace("NotificationQueue: Emitted notification_event signal for system notification")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Exception emitting notification_event signal: {e}")
 | 
			
		||||
 | 
			
		||||
class SignalPriorityQueue(queue.PriorityQueue):
 | 
			
		||||
    """
 | 
			
		||||
    Extended PriorityQueue that sends a signal when items with a UUID are added.
 | 
			
		||||
    
 | 
			
		||||
    This class extends the standard PriorityQueue and adds a signal emission
 | 
			
		||||
    after an item is put into the queue. If the item contains a UUID, the signal
 | 
			
		||||
    is sent with that UUID as a parameter.
 | 
			
		||||
    """
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self, maxsize=0):
 | 
			
		||||
        super().__init__(maxsize)
 | 
			
		||||
        try:
 | 
			
		||||
            self.queue_length_signal = signal('queue_length')
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception: {e}")
 | 
			
		||||
 | 
			
		||||
    def put(self, item, block=True, timeout=None):
 | 
			
		||||
        # Call the parent's put method first
 | 
			
		||||
        super().put(item, block, timeout)
 | 
			
		||||
        
 | 
			
		||||
        # After putting the item in the queue, check if it has a UUID and emit signal
 | 
			
		||||
        if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
 | 
			
		||||
            uuid = item.item['uuid']
 | 
			
		||||
            # Get the signal and send it if it exists
 | 
			
		||||
            watch_check_update = signal('watch_check_update')
 | 
			
		||||
            if watch_check_update:
 | 
			
		||||
                # Send the watch_uuid parameter
 | 
			
		||||
                watch_check_update.send(watch_uuid=uuid)
 | 
			
		||||
        
 | 
			
		||||
        # Send queue_length signal with current queue size
 | 
			
		||||
        try:
 | 
			
		||||
 | 
			
		||||
            if self.queue_length_signal:
 | 
			
		||||
                self.queue_length_signal.send(length=self.qsize())
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception: {e}")
 | 
			
		||||
 | 
			
		||||
    def get(self, block=True, timeout=None):
 | 
			
		||||
        # Call the parent's get method first
 | 
			
		||||
        item = super().get(block, timeout)
 | 
			
		||||
        
 | 
			
		||||
        # Send queue_length signal with current queue size
 | 
			
		||||
        try:
 | 
			
		||||
            if self.queue_length_signal:
 | 
			
		||||
                self.queue_length_signal.send(length=self.qsize())
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception: {e}")
 | 
			
		||||
        return item
 | 
			
		||||
    
 | 
			
		||||
    def get_uuid_position(self, target_uuid):
 | 
			
		||||
        """
 | 
			
		||||
        Find the position of a watch UUID in the priority queue.
 | 
			
		||||
        Optimized for large queues - O(n) complexity instead of O(n log n).
 | 
			
		||||
        
 | 
			
		||||
        Args:
 | 
			
		||||
            target_uuid: The UUID to search for
 | 
			
		||||
            
 | 
			
		||||
        Returns:
 | 
			
		||||
            dict: Contains position info or None if not found
 | 
			
		||||
                - position: 0-based position in queue (0 = next to be processed)
 | 
			
		||||
                - total_items: total number of items in queue
 | 
			
		||||
                - priority: the priority value of the found item
 | 
			
		||||
        """
 | 
			
		||||
        with self.mutex:
 | 
			
		||||
            queue_list = list(self.queue)
 | 
			
		||||
            total_items = len(queue_list)
 | 
			
		||||
            
 | 
			
		||||
            if total_items == 0:
 | 
			
		||||
                return {
 | 
			
		||||
                    'position': None,
 | 
			
		||||
                    'total_items': 0,
 | 
			
		||||
                    'priority': None,
 | 
			
		||||
                    'found': False
 | 
			
		||||
                }
 | 
			
		||||
            
 | 
			
		||||
            # Find the target item and its priority first - O(n)
 | 
			
		||||
            target_item = None
 | 
			
		||||
            target_priority = None
 | 
			
		||||
            
 | 
			
		||||
            for item in queue_list:
 | 
			
		||||
                if (hasattr(item, 'item') and 
 | 
			
		||||
                    isinstance(item.item, dict) and 
 | 
			
		||||
                    item.item.get('uuid') == target_uuid):
 | 
			
		||||
                    target_item = item
 | 
			
		||||
                    target_priority = item.priority
 | 
			
		||||
                    break
 | 
			
		||||
            
 | 
			
		||||
            if target_item is None:
 | 
			
		||||
                return {
 | 
			
		||||
                    'position': None,
 | 
			
		||||
                    'total_items': total_items,
 | 
			
		||||
                    'priority': None,
 | 
			
		||||
                    'found': False
 | 
			
		||||
                }
 | 
			
		||||
            
 | 
			
		||||
            # Count how many items have higher priority (lower numbers) - O(n)
 | 
			
		||||
            position = 0
 | 
			
		||||
            for item in queue_list:
 | 
			
		||||
                # Items with lower priority numbers are processed first
 | 
			
		||||
                if item.priority < target_priority:
 | 
			
		||||
                    position += 1
 | 
			
		||||
                elif item.priority == target_priority and item != target_item:
 | 
			
		||||
                    # For same priority, count items that come before this one
 | 
			
		||||
                    # (Note: this is approximate since heap order isn't guaranteed for equal priorities)
 | 
			
		||||
                    position += 1
 | 
			
		||||
            
 | 
			
		||||
            return {
 | 
			
		||||
                'position': position,
 | 
			
		||||
                'total_items': total_items,
 | 
			
		||||
                'priority': target_priority,
 | 
			
		||||
                'found': True
 | 
			
		||||
            }
 | 
			
		||||
    
 | 
			
		||||
    def get_all_queued_uuids(self, limit=None, offset=0):
 | 
			
		||||
        """
 | 
			
		||||
        Get UUIDs currently in the queue with their positions.
 | 
			
		||||
        For large queues, use limit/offset for pagination.
 | 
			
		||||
        
 | 
			
		||||
        Args:
 | 
			
		||||
            limit: Maximum number of items to return (None = all)
 | 
			
		||||
            offset: Number of items to skip (for pagination)
 | 
			
		||||
        
 | 
			
		||||
        Returns:
 | 
			
		||||
            dict: Contains items and metadata
 | 
			
		||||
                - items: List of dicts with uuid, position, and priority
 | 
			
		||||
                - total_items: Total number of items in queue
 | 
			
		||||
                - returned_items: Number of items returned
 | 
			
		||||
                - has_more: Whether there are more items after this page
 | 
			
		||||
        """
 | 
			
		||||
        with self.mutex:
 | 
			
		||||
            queue_list = list(self.queue)
 | 
			
		||||
            total_items = len(queue_list)
 | 
			
		||||
            
 | 
			
		||||
            if total_items == 0:
 | 
			
		||||
                return {
 | 
			
		||||
                    'items': [],
 | 
			
		||||
                    'total_items': 0,
 | 
			
		||||
                    'returned_items': 0,
 | 
			
		||||
                    'has_more': False
 | 
			
		||||
                }
 | 
			
		||||
            
 | 
			
		||||
            # For very large queues, warn about performance
 | 
			
		||||
            if total_items > 1000 and limit is None:
 | 
			
		||||
                logger.warning(f"Getting all {total_items} queued items without limit - this may be slow")
 | 
			
		||||
            
 | 
			
		||||
            # Sort only if we need exact positions (expensive for large queues)
 | 
			
		||||
            if limit is not None and limit <= 100:
 | 
			
		||||
                # For small requests, we can afford to sort
 | 
			
		||||
                queue_items = sorted(queue_list)
 | 
			
		||||
                end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items)
 | 
			
		||||
                items_to_process = queue_items[offset:end_idx]
 | 
			
		||||
                
 | 
			
		||||
                result = []
 | 
			
		||||
                for position, item in enumerate(items_to_process, start=offset):
 | 
			
		||||
                    if (hasattr(item, 'item') and 
 | 
			
		||||
                        isinstance(item.item, dict) and 
 | 
			
		||||
                        'uuid' in item.item):
 | 
			
		||||
                        
 | 
			
		||||
                        result.append({
 | 
			
		||||
                            'uuid': item.item['uuid'],
 | 
			
		||||
                            'position': position,
 | 
			
		||||
                            'priority': item.priority
 | 
			
		||||
                        })
 | 
			
		||||
                
 | 
			
		||||
                return {
 | 
			
		||||
                    'items': result,
 | 
			
		||||
                    'total_items': total_items,
 | 
			
		||||
                    'returned_items': len(result),
 | 
			
		||||
                    'has_more': (offset + len(result)) < total_items
 | 
			
		||||
                }
 | 
			
		||||
            else:
 | 
			
		||||
                # For large requests, return items with approximate positions
 | 
			
		||||
                # This is much faster O(n) instead of O(n log n)
 | 
			
		||||
                result = []
 | 
			
		||||
                processed = 0
 | 
			
		||||
                skipped = 0
 | 
			
		||||
                
 | 
			
		||||
                for item in queue_list:
 | 
			
		||||
                    if (hasattr(item, 'item') and 
 | 
			
		||||
                        isinstance(item.item, dict) and 
 | 
			
		||||
                        'uuid' in item.item):
 | 
			
		||||
                        
 | 
			
		||||
                        if skipped < offset:
 | 
			
		||||
                            skipped += 1
 | 
			
		||||
                            continue
 | 
			
		||||
                        
 | 
			
		||||
                        if limit and processed >= limit:
 | 
			
		||||
                            break
 | 
			
		||||
                        
 | 
			
		||||
                        # Approximate position based on priority comparison
 | 
			
		||||
                        approx_position = sum(1 for other in queue_list if other.priority < item.priority)
 | 
			
		||||
                        
 | 
			
		||||
                        result.append({
 | 
			
		||||
                            'uuid': item.item['uuid'],
 | 
			
		||||
                            'position': approx_position,  # Approximate
 | 
			
		||||
                            'priority': item.priority
 | 
			
		||||
                        })
 | 
			
		||||
                        processed += 1
 | 
			
		||||
                
 | 
			
		||||
                return {
 | 
			
		||||
                    'items': result,
 | 
			
		||||
                    'total_items': total_items,
 | 
			
		||||
                    'returned_items': len(result),
 | 
			
		||||
                    'has_more': (offset + len(result)) < total_items,
 | 
			
		||||
                    'note': 'Positions are approximate for performance with large queues'
 | 
			
		||||
                }
 | 
			
		||||
    
 | 
			
		||||
    def get_queue_summary(self):
 | 
			
		||||
        """
 | 
			
		||||
        Get a quick summary of queue state without expensive operations.
 | 
			
		||||
        O(n) complexity - fast even for large queues.
 | 
			
		||||
        
 | 
			
		||||
        Returns:
 | 
			
		||||
            dict: Queue summary statistics
 | 
			
		||||
        """
 | 
			
		||||
        with self.mutex:
 | 
			
		||||
            queue_list = list(self.queue)
 | 
			
		||||
            total_items = len(queue_list)
 | 
			
		||||
            
 | 
			
		||||
            if total_items == 0:
 | 
			
		||||
                return {
 | 
			
		||||
                    'total_items': 0,
 | 
			
		||||
                    'priority_breakdown': {},
 | 
			
		||||
                    'immediate_items': 0,
 | 
			
		||||
                    'clone_items': 0,
 | 
			
		||||
                    'scheduled_items': 0
 | 
			
		||||
                }
 | 
			
		||||
            
 | 
			
		||||
            # Count items by priority type - O(n)
 | 
			
		||||
            immediate_items = 0  # priority 1
 | 
			
		||||
            clone_items = 0      # priority 5  
 | 
			
		||||
            scheduled_items = 0  # priority > 100 (timestamps)
 | 
			
		||||
            priority_counts = {}
 | 
			
		||||
            
 | 
			
		||||
            for item in queue_list:
 | 
			
		||||
                priority = item.priority
 | 
			
		||||
                priority_counts[priority] = priority_counts.get(priority, 0) + 1
 | 
			
		||||
                
 | 
			
		||||
                if priority == 1:
 | 
			
		||||
                    immediate_items += 1
 | 
			
		||||
                elif priority == 5:
 | 
			
		||||
                    clone_items += 1
 | 
			
		||||
                elif priority > 100:
 | 
			
		||||
                    scheduled_items += 1
 | 
			
		||||
            
 | 
			
		||||
            return {
 | 
			
		||||
                'total_items': total_items,
 | 
			
		||||
                'priority_breakdown': priority_counts,
 | 
			
		||||
                'immediate_items': immediate_items,
 | 
			
		||||
                'clone_items': clone_items,
 | 
			
		||||
                'scheduled_items': scheduled_items,
 | 
			
		||||
                'min_priority': min(priority_counts.keys()) if priority_counts else None,
 | 
			
		||||
                'max_priority': max(priority_counts.keys()) if priority_counts else None
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class AsyncSignalPriorityQueue(asyncio.PriorityQueue):
 | 
			
		||||
    """
 | 
			
		||||
    Async version of SignalPriorityQueue that sends signals when items are added/removed.
 | 
			
		||||
    
 | 
			
		||||
    This class extends asyncio.PriorityQueue and maintains the same signal behavior
 | 
			
		||||
    as the synchronous version for real-time UI updates.
 | 
			
		||||
    """
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self, maxsize=0):
 | 
			
		||||
        super().__init__(maxsize)
 | 
			
		||||
        try:
 | 
			
		||||
            self.queue_length_signal = signal('queue_length')
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception: {e}")
 | 
			
		||||
 | 
			
		||||
    async def put(self, item):
 | 
			
		||||
        # Call the parent's put method first
 | 
			
		||||
        await super().put(item)
 | 
			
		||||
        
 | 
			
		||||
        # After putting the item in the queue, check if it has a UUID and emit signal
 | 
			
		||||
        if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
 | 
			
		||||
            uuid = item.item['uuid']
 | 
			
		||||
            # Get the signal and send it if it exists
 | 
			
		||||
            watch_check_update = signal('watch_check_update')
 | 
			
		||||
            if watch_check_update:
 | 
			
		||||
                # Send the watch_uuid parameter
 | 
			
		||||
                watch_check_update.send(watch_uuid=uuid)
 | 
			
		||||
        
 | 
			
		||||
        # Send queue_length signal with current queue size
 | 
			
		||||
        try:
 | 
			
		||||
            if self.queue_length_signal:
 | 
			
		||||
                self.queue_length_signal.send(length=self.qsize())
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception: {e}")
 | 
			
		||||
 | 
			
		||||
    async def get(self):
 | 
			
		||||
        # Call the parent's get method first
 | 
			
		||||
        item = await super().get()
 | 
			
		||||
        
 | 
			
		||||
        # Send queue_length signal with current queue size
 | 
			
		||||
        try:
 | 
			
		||||
            if self.queue_length_signal:
 | 
			
		||||
                self.queue_length_signal.send(length=self.qsize())
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception: {e}")
 | 
			
		||||
        return item
 | 
			
		||||
    
 | 
			
		||||
    @property
 | 
			
		||||
    def queue(self):
 | 
			
		||||
        """
 | 
			
		||||
        Provide compatibility with sync PriorityQueue.queue access
 | 
			
		||||
        Returns the internal queue for template access
 | 
			
		||||
        """
 | 
			
		||||
        return self._queue if hasattr(self, '_queue') else []
 | 
			
		||||
    
 | 
			
		||||
    def get_uuid_position(self, target_uuid):
 | 
			
		||||
        """
 | 
			
		||||
        Find the position of a watch UUID in the async priority queue.
 | 
			
		||||
        Optimized for large queues - O(n) complexity instead of O(n log n).
 | 
			
		||||
        
 | 
			
		||||
        Args:
 | 
			
		||||
            target_uuid: The UUID to search for
 | 
			
		||||
            
 | 
			
		||||
        Returns:
 | 
			
		||||
            dict: Contains position info or None if not found
 | 
			
		||||
                - position: 0-based position in queue (0 = next to be processed)
 | 
			
		||||
                - total_items: total number of items in queue
 | 
			
		||||
                - priority: the priority value of the found item
 | 
			
		||||
        """
 | 
			
		||||
        queue_list = list(self._queue)
 | 
			
		||||
        total_items = len(queue_list)
 | 
			
		||||
        
 | 
			
		||||
        if total_items == 0:
 | 
			
		||||
            return {
 | 
			
		||||
                'position': None,
 | 
			
		||||
                'total_items': 0,
 | 
			
		||||
                'priority': None,
 | 
			
		||||
                'found': False
 | 
			
		||||
            }
 | 
			
		||||
        
 | 
			
		||||
        # Find the target item and its priority first - O(n)
 | 
			
		||||
        target_item = None
 | 
			
		||||
        target_priority = None
 | 
			
		||||
        
 | 
			
		||||
        for item in queue_list:
 | 
			
		||||
            if (hasattr(item, 'item') and 
 | 
			
		||||
                isinstance(item.item, dict) and 
 | 
			
		||||
                item.item.get('uuid') == target_uuid):
 | 
			
		||||
                target_item = item
 | 
			
		||||
                target_priority = item.priority
 | 
			
		||||
                break
 | 
			
		||||
        
 | 
			
		||||
        if target_item is None:
 | 
			
		||||
            return {
 | 
			
		||||
                'position': None,
 | 
			
		||||
                'total_items': total_items,
 | 
			
		||||
                'priority': None,
 | 
			
		||||
                'found': False
 | 
			
		||||
            }
 | 
			
		||||
        
 | 
			
		||||
        # Count how many items have higher priority (lower numbers) - O(n)
 | 
			
		||||
        position = 0
 | 
			
		||||
        for item in queue_list:
 | 
			
		||||
            if item.priority < target_priority:
 | 
			
		||||
                position += 1
 | 
			
		||||
            elif item.priority == target_priority and item != target_item:
 | 
			
		||||
                position += 1
 | 
			
		||||
        
 | 
			
		||||
        return {
 | 
			
		||||
            'position': position,
 | 
			
		||||
            'total_items': total_items,
 | 
			
		||||
            'priority': target_priority,
 | 
			
		||||
            'found': True
 | 
			
		||||
        }
 | 
			
		||||
    
 | 
			
		||||
    def get_all_queued_uuids(self, limit=None, offset=0):
 | 
			
		||||
        """
 | 
			
		||||
        Get UUIDs currently in the async queue with their positions.
 | 
			
		||||
        For large queues, use limit/offset for pagination.
 | 
			
		||||
        
 | 
			
		||||
        Args:
 | 
			
		||||
            limit: Maximum number of items to return (None = all)
 | 
			
		||||
            offset: Number of items to skip (for pagination)
 | 
			
		||||
        
 | 
			
		||||
        Returns:
 | 
			
		||||
            dict: Contains items and metadata (same structure as sync version)
 | 
			
		||||
        """
 | 
			
		||||
        queue_list = list(self._queue)
 | 
			
		||||
        total_items = len(queue_list)
 | 
			
		||||
        
 | 
			
		||||
        if total_items == 0:
 | 
			
		||||
            return {
 | 
			
		||||
                'items': [],
 | 
			
		||||
                'total_items': 0,
 | 
			
		||||
                'returned_items': 0,
 | 
			
		||||
                'has_more': False
 | 
			
		||||
            }
 | 
			
		||||
        
 | 
			
		||||
        # Same logic as sync version but without mutex
 | 
			
		||||
        if limit is not None and limit <= 100:
 | 
			
		||||
            queue_items = sorted(queue_list)
 | 
			
		||||
            end_idx = min(offset + limit, len(queue_items)) if limit else len(queue_items)
 | 
			
		||||
            items_to_process = queue_items[offset:end_idx]
 | 
			
		||||
            
 | 
			
		||||
            result = []
 | 
			
		||||
            for position, item in enumerate(items_to_process, start=offset):
 | 
			
		||||
                if (hasattr(item, 'item') and 
 | 
			
		||||
                    isinstance(item.item, dict) and 
 | 
			
		||||
                    'uuid' in item.item):
 | 
			
		||||
                    
 | 
			
		||||
                    result.append({
 | 
			
		||||
                        'uuid': item.item['uuid'],
 | 
			
		||||
                        'position': position,
 | 
			
		||||
                        'priority': item.priority
 | 
			
		||||
                    })
 | 
			
		||||
            
 | 
			
		||||
            return {
 | 
			
		||||
                'items': result,
 | 
			
		||||
                'total_items': total_items,
 | 
			
		||||
                'returned_items': len(result),
 | 
			
		||||
                'has_more': (offset + len(result)) < total_items
 | 
			
		||||
            }
 | 
			
		||||
        else:
 | 
			
		||||
            # Fast approximate positions for large queues
 | 
			
		||||
            result = []
 | 
			
		||||
            processed = 0
 | 
			
		||||
            skipped = 0
 | 
			
		||||
            
 | 
			
		||||
            for item in queue_list:
 | 
			
		||||
                if (hasattr(item, 'item') and 
 | 
			
		||||
                    isinstance(item.item, dict) and 
 | 
			
		||||
                    'uuid' in item.item):
 | 
			
		||||
                    
 | 
			
		||||
                    if skipped < offset:
 | 
			
		||||
                        skipped += 1
 | 
			
		||||
                        continue
 | 
			
		||||
                    
 | 
			
		||||
                    if limit and processed >= limit:
 | 
			
		||||
                        break
 | 
			
		||||
                    
 | 
			
		||||
                    approx_position = sum(1 for other in queue_list if other.priority < item.priority)
 | 
			
		||||
                    
 | 
			
		||||
                    result.append({
 | 
			
		||||
                        'uuid': item.item['uuid'],
 | 
			
		||||
                        'position': approx_position,
 | 
			
		||||
                        'priority': item.priority
 | 
			
		||||
                    })
 | 
			
		||||
                    processed += 1
 | 
			
		||||
            
 | 
			
		||||
            return {
 | 
			
		||||
                'items': result,
 | 
			
		||||
                'total_items': total_items,
 | 
			
		||||
                'returned_items': len(result),
 | 
			
		||||
                'has_more': (offset + len(result)) < total_items,
 | 
			
		||||
                'note': 'Positions are approximate for performance with large queues'
 | 
			
		||||
            }
 | 
			
		||||
    
 | 
			
		||||
    def get_queue_summary(self):
 | 
			
		||||
        """
 | 
			
		||||
        Get a quick summary of async queue state.
 | 
			
		||||
        O(n) complexity - fast even for large queues.
 | 
			
		||||
        """
 | 
			
		||||
        queue_list = list(self._queue)
 | 
			
		||||
        total_items = len(queue_list)
 | 
			
		||||
        
 | 
			
		||||
        if total_items == 0:
 | 
			
		||||
            return {
 | 
			
		||||
                'total_items': 0,
 | 
			
		||||
                'priority_breakdown': {},
 | 
			
		||||
                'immediate_items': 0,
 | 
			
		||||
                'clone_items': 0,
 | 
			
		||||
                'scheduled_items': 0
 | 
			
		||||
            }
 | 
			
		||||
        
 | 
			
		||||
        immediate_items = 0
 | 
			
		||||
        clone_items = 0
 | 
			
		||||
        scheduled_items = 0
 | 
			
		||||
        priority_counts = {}
 | 
			
		||||
        
 | 
			
		||||
        for item in queue_list:
 | 
			
		||||
            priority = item.priority
 | 
			
		||||
            priority_counts[priority] = priority_counts.get(priority, 0) + 1
 | 
			
		||||
            
 | 
			
		||||
            if priority == 1:
 | 
			
		||||
                immediate_items += 1
 | 
			
		||||
            elif priority == 5:
 | 
			
		||||
                clone_items += 1
 | 
			
		||||
            elif priority > 100:
 | 
			
		||||
                scheduled_items += 1
 | 
			
		||||
        
 | 
			
		||||
        return {
 | 
			
		||||
            'total_items': total_items,
 | 
			
		||||
            'priority_breakdown': priority_counts,
 | 
			
		||||
            'immediate_items': immediate_items,
 | 
			
		||||
            'clone_items': clone_items,
 | 
			
		||||
            'scheduled_items': scheduled_items,
 | 
			
		||||
            'min_priority': min(priority_counts.keys()) if priority_counts else None,
 | 
			
		||||
            'max_priority': max(priority_counts.keys()) if priority_counts else None
 | 
			
		||||
        }
 | 
			
		||||
@@ -1,8 +1,32 @@
 | 
			
		||||
import difflib
 | 
			
		||||
from typing import List, Iterator, Union
 | 
			
		||||
 | 
			
		||||
REMOVED_STYLE = "background-color: #fadad7; color: #b30000;"
 | 
			
		||||
ADDED_STYLE = "background-color: #eaf2c2; color: #406619;"
 | 
			
		||||
# https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
 | 
			
		||||
#HTML_ADDED_STYLE = "background-color: #d2f7c2; color: #255d00;"
 | 
			
		||||
#HTML_CHANGED_INTO_STYLE = "background-color: #dafbe1; color: #116329;"
 | 
			
		||||
#HTML_CHANGED_STYLE = "background-color: #ffd6cc; color: #7a2000;"
 | 
			
		||||
#HTML_REMOVED_STYLE = "background-color: #ffebe9; color: #82071e;"
 | 
			
		||||
 | 
			
		||||
# @todo - In the future we can make this configurable
 | 
			
		||||
HTML_ADDED_STYLE = "background-color: #eaf2c2; color: #406619"
 | 
			
		||||
HTML_REMOVED_STYLE = "background-color: #fadad7; color: #b30000"
 | 
			
		||||
HTML_CHANGED_STYLE = HTML_REMOVED_STYLE
 | 
			
		||||
HTML_CHANGED_INTO_STYLE = HTML_ADDED_STYLE
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# These get set to html or telegram type or discord compatible or whatever in handler.py
 | 
			
		||||
# Something that cant get escaped to HTML by accident
 | 
			
		||||
REMOVED_PLACEMARKER_OPEN = '@removed_PLACEMARKER_OPEN'
 | 
			
		||||
REMOVED_PLACEMARKER_CLOSED = '@removed_PLACEMARKER_CLOSED'
 | 
			
		||||
 | 
			
		||||
ADDED_PLACEMARKER_OPEN = '@added_PLACEMARKER_OPEN'
 | 
			
		||||
ADDED_PLACEMARKER_CLOSED = '@added_PLACEMARKER_CLOSED'
 | 
			
		||||
 | 
			
		||||
CHANGED_PLACEMARKER_OPEN = '@changed_PLACEMARKER_OPEN'
 | 
			
		||||
CHANGED_PLACEMARKER_CLOSED = '@changed_PLACEMARKER_CLOSED'
 | 
			
		||||
 | 
			
		||||
CHANGED_INTO_PLACEMARKER_OPEN = '@changed_into_PLACEMARKER_OPEN'
 | 
			
		||||
CHANGED_INTO_PLACEMARKER_CLOSED = '@changed_into_PLACEMARKER_CLOSED'
 | 
			
		||||
 | 
			
		||||
def same_slicer(lst: List[str], start: int, end: int) -> List[str]:
 | 
			
		||||
    """Return a slice of the list, or a single element if start == end."""
 | 
			
		||||
@@ -15,8 +39,7 @@ def customSequenceMatcher(
 | 
			
		||||
    include_removed: bool = True,
 | 
			
		||||
    include_added: bool = True,
 | 
			
		||||
    include_replaced: bool = True,
 | 
			
		||||
    include_change_type_prefix: bool = True,
 | 
			
		||||
    html_colour: bool = False
 | 
			
		||||
    include_change_type_prefix: bool = True
 | 
			
		||||
) -> Iterator[List[str]]:
 | 
			
		||||
    """
 | 
			
		||||
    Compare two sequences and yield differences based on specified parameters.
 | 
			
		||||
@@ -29,8 +52,6 @@ def customSequenceMatcher(
 | 
			
		||||
        include_added (bool): Include added parts
 | 
			
		||||
        include_replaced (bool): Include replaced parts
 | 
			
		||||
        include_change_type_prefix (bool): Add prefixes to indicate change types
 | 
			
		||||
        html_colour (bool): Use HTML background colors for differences
 | 
			
		||||
 | 
			
		||||
    Yields:
 | 
			
		||||
        List[str]: Differences between sequences
 | 
			
		||||
    """
 | 
			
		||||
@@ -42,22 +63,22 @@ def customSequenceMatcher(
 | 
			
		||||
        if include_equal and tag == 'equal':
 | 
			
		||||
            yield before[alo:ahi]
 | 
			
		||||
        elif include_removed and tag == 'delete':
 | 
			
		||||
            if html_colour:
 | 
			
		||||
                yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)]
 | 
			
		||||
            if include_change_type_prefix:
 | 
			
		||||
                yield [f'{REMOVED_PLACEMARKER_OPEN}{line}{REMOVED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)]
 | 
			
		||||
            else:
 | 
			
		||||
                yield [f"(removed) {line}" for line in same_slicer(before, alo, ahi)] if include_change_type_prefix else same_slicer(before, alo, ahi)
 | 
			
		||||
                yield same_slicer(before, alo, ahi)
 | 
			
		||||
        elif include_replaced and tag == 'replace':
 | 
			
		||||
            if html_colour:
 | 
			
		||||
                yield [f'<span style="{REMOVED_STYLE}">{line}</span>' for line in same_slicer(before, alo, ahi)] + \
 | 
			
		||||
                      [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
 | 
			
		||||
            if include_change_type_prefix:
 | 
			
		||||
                yield [f'{CHANGED_PLACEMARKER_OPEN}{line}{CHANGED_PLACEMARKER_CLOSED}' for line in same_slicer(before, alo, ahi)] + \
 | 
			
		||||
                      [f'{CHANGED_INTO_PLACEMARKER_OPEN}{line}{CHANGED_INTO_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
 | 
			
		||||
            else:
 | 
			
		||||
                yield [f"(changed) {line}" for line in same_slicer(before, alo, ahi)] + \
 | 
			
		||||
                      [f"(into) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
 | 
			
		||||
                yield same_slicer(before, alo, ahi) + same_slicer(after, blo, bhi)
 | 
			
		||||
        elif include_added and tag == 'insert':
 | 
			
		||||
            if html_colour:
 | 
			
		||||
                yield [f'<span style="{ADDED_STYLE}">{line}</span>' for line in same_slicer(after, blo, bhi)]
 | 
			
		||||
            if include_change_type_prefix:
 | 
			
		||||
                yield [f'{ADDED_PLACEMARKER_OPEN}{line}{ADDED_PLACEMARKER_CLOSED}' for line in same_slicer(after, blo, bhi)]
 | 
			
		||||
            else:
 | 
			
		||||
                yield [f"(added) {line}" for line in same_slicer(after, blo, bhi)] if include_change_type_prefix else same_slicer(after, blo, bhi)
 | 
			
		||||
                yield same_slicer(after, blo, bhi)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def render_diff(
 | 
			
		||||
    previous_version_file_contents: str,
 | 
			
		||||
@@ -68,8 +89,7 @@ def render_diff(
 | 
			
		||||
    include_replaced: bool = True,
 | 
			
		||||
    line_feed_sep: str = "\n",
 | 
			
		||||
    include_change_type_prefix: bool = True,
 | 
			
		||||
    patch_format: bool = False,
 | 
			
		||||
    html_colour: bool = False
 | 
			
		||||
    patch_format: bool = False
 | 
			
		||||
) -> str:
 | 
			
		||||
    """
 | 
			
		||||
    Render the difference between two file contents.
 | 
			
		||||
@@ -84,8 +104,6 @@ def render_diff(
 | 
			
		||||
        line_feed_sep (str): Separator for lines in output
 | 
			
		||||
        include_change_type_prefix (bool): Add prefixes to indicate change types
 | 
			
		||||
        patch_format (bool): Use patch format for output
 | 
			
		||||
        html_colour (bool): Use HTML background colors for differences
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        str: Rendered difference
 | 
			
		||||
    """
 | 
			
		||||
@@ -103,8 +121,7 @@ def render_diff(
 | 
			
		||||
        include_removed=include_removed,
 | 
			
		||||
        include_added=include_added,
 | 
			
		||||
        include_replaced=include_replaced,
 | 
			
		||||
        include_change_type_prefix=include_change_type_prefix,
 | 
			
		||||
        html_colour=html_colour
 | 
			
		||||
        include_change_type_prefix=include_change_type_prefix
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    def flatten(lst: List[Union[str, List[str]]]) -> str:
 | 
			
		||||
 
 | 
			
		||||
@@ -4,49 +4,53 @@ import flask_login
 | 
			
		||||
import locale
 | 
			
		||||
import os
 | 
			
		||||
import queue
 | 
			
		||||
import sys
 | 
			
		||||
import threading
 | 
			
		||||
import time
 | 
			
		||||
import timeago
 | 
			
		||||
from blinker import signal
 | 
			
		||||
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
from threading import Event
 | 
			
		||||
from changedetectionio.queue_handlers import RecheckPriorityQueue, NotificationQueue
 | 
			
		||||
from changedetectionio import worker_handler
 | 
			
		||||
 | 
			
		||||
from flask import (
 | 
			
		||||
    Flask,
 | 
			
		||||
    abort,
 | 
			
		||||
    flash,
 | 
			
		||||
    make_response,
 | 
			
		||||
    redirect,
 | 
			
		||||
    render_template,
 | 
			
		||||
    request,
 | 
			
		||||
    send_from_directory,
 | 
			
		||||
    session,
 | 
			
		||||
    url_for,
 | 
			
		||||
)
 | 
			
		||||
from flask_compress import Compress as FlaskCompress
 | 
			
		||||
from flask_login import current_user
 | 
			
		||||
from flask_paginate import Pagination, get_page_parameter
 | 
			
		||||
from flask_restful import abort, Api
 | 
			
		||||
from flask_cors import CORS
 | 
			
		||||
 | 
			
		||||
# Create specific signals for application events
 | 
			
		||||
# Make this a global singleton to avoid multiple signal objects
 | 
			
		||||
watch_check_update = signal('watch_check_update', doc='Signal sent when a watch check is completed')
 | 
			
		||||
from flask_wtf import CSRFProtect
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from changedetectionio import __version__
 | 
			
		||||
from changedetectionio import queuedWatchMetaData
 | 
			
		||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications
 | 
			
		||||
from changedetectionio.api import Watch, WatchHistory, WatchSingleHistory, CreateWatch, Import, SystemInfo, Tag, Tags, Notifications, WatchFavicon
 | 
			
		||||
from changedetectionio.api.Search import Search
 | 
			
		||||
from .time_handler import is_within_schedule
 | 
			
		||||
 | 
			
		||||
datastore = None
 | 
			
		||||
 | 
			
		||||
# Local
 | 
			
		||||
running_update_threads = []
 | 
			
		||||
ticker_thread = None
 | 
			
		||||
 | 
			
		||||
extra_stylesheets = []
 | 
			
		||||
 | 
			
		||||
update_q = queue.PriorityQueue()
 | 
			
		||||
notification_q = queue.Queue()
 | 
			
		||||
# Use bulletproof janus-based queues for sync/async reliability  
 | 
			
		||||
update_q = RecheckPriorityQueue()
 | 
			
		||||
notification_q = NotificationQueue()
 | 
			
		||||
MAX_QUEUE_SIZE = 2000
 | 
			
		||||
 | 
			
		||||
app = Flask(__name__,
 | 
			
		||||
@@ -54,6 +58,9 @@ app = Flask(__name__,
 | 
			
		||||
            static_folder="static",
 | 
			
		||||
            template_folder="templates")
 | 
			
		||||
 | 
			
		||||
# Will be initialized in changedetection_app
 | 
			
		||||
socketio_server = None
 | 
			
		||||
 | 
			
		||||
# Enable CORS, especially useful for the Chrome extension to operate from anywhere
 | 
			
		||||
CORS(app)
 | 
			
		||||
 | 
			
		||||
@@ -91,7 +98,7 @@ watch_api = Api(app, decorators=[csrf.exempt])
 | 
			
		||||
def init_app_secret(datastore_path):
 | 
			
		||||
    secret = ""
 | 
			
		||||
 | 
			
		||||
    path = "{}/secret.txt".format(datastore_path)
 | 
			
		||||
    path = os.path.join(datastore_path, "secret.txt")
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        with open(path, "r") as f:
 | 
			
		||||
@@ -115,6 +122,23 @@ def get_darkmode_state():
 | 
			
		||||
def get_css_version():
 | 
			
		||||
    return __version__
 | 
			
		||||
 | 
			
		||||
@app.template_global()
 | 
			
		||||
def get_socketio_path():
 | 
			
		||||
    """Generate the correct Socket.IO path prefix for the client"""
 | 
			
		||||
    # If behind a proxy with a sub-path, we need to respect that path
 | 
			
		||||
    prefix = ""
 | 
			
		||||
    if os.getenv('USE_X_SETTINGS') and 'X-Forwarded-Prefix' in request.headers:
 | 
			
		||||
        prefix = request.headers['X-Forwarded-Prefix']
 | 
			
		||||
 | 
			
		||||
    # Socket.IO will be available at {prefix}/socket.io/
 | 
			
		||||
    return prefix
 | 
			
		||||
 | 
			
		||||
@app.template_global('is_safe_valid_url')
 | 
			
		||||
def _is_safe_valid_url(test_url):
 | 
			
		||||
    from .validate_url import is_safe_valid_url
 | 
			
		||||
    return is_safe_valid_url(test_url)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@app.template_filter('format_number_locale')
 | 
			
		||||
def _jinja2_filter_format_number_locale(value: float) -> str:
 | 
			
		||||
    "Formats for example 4000.10 to the local locale default of 4,000.10"
 | 
			
		||||
@@ -125,10 +149,32 @@ def _jinja2_filter_format_number_locale(value: float) -> str:
 | 
			
		||||
 | 
			
		||||
@app.template_global('is_checking_now')
 | 
			
		||||
def _watch_is_checking_now(watch_obj, format="%Y-%m-%d %H:%M:%S"):
 | 
			
		||||
    # Worker thread tells us which UUID it is currently processing.
 | 
			
		||||
    for t in running_update_threads:
 | 
			
		||||
        if t.current_uuid == watch_obj['uuid']:
 | 
			
		||||
            return True
 | 
			
		||||
    return worker_handler.is_watch_running(watch_obj['uuid'])
 | 
			
		||||
 | 
			
		||||
@app.template_global('get_watch_queue_position')
 | 
			
		||||
def _get_watch_queue_position(watch_obj):
 | 
			
		||||
    """Get the position of a watch in the queue"""
 | 
			
		||||
    uuid = watch_obj['uuid']
 | 
			
		||||
    return update_q.get_uuid_position(uuid)
 | 
			
		||||
 | 
			
		||||
@app.template_global('get_current_worker_count')
 | 
			
		||||
def _get_current_worker_count():
 | 
			
		||||
    """Get the current number of operational workers"""
 | 
			
		||||
    return worker_handler.get_worker_count()
 | 
			
		||||
 | 
			
		||||
@app.template_global('get_worker_status_info')
 | 
			
		||||
def _get_worker_status_info():
 | 
			
		||||
    """Get detailed worker status information for display"""
 | 
			
		||||
    status = worker_handler.get_worker_status()
 | 
			
		||||
    running_uuids = worker_handler.get_running_uuids()
 | 
			
		||||
    
 | 
			
		||||
    return {
 | 
			
		||||
        'count': status['worker_count'],
 | 
			
		||||
        'type': status['worker_type'],
 | 
			
		||||
        'active_workers': len(running_uuids),
 | 
			
		||||
        'processing_watches': running_uuids,
 | 
			
		||||
        'loop_running': status.get('async_loop_running', None)
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# We use the whole watch object from the store/JSON so we can see if there's some related status in terms of a thread
 | 
			
		||||
@@ -215,12 +261,15 @@ class User(flask_login.UserMixin):
 | 
			
		||||
def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    logger.trace("TRACE log is enabled")
 | 
			
		||||
 | 
			
		||||
    global datastore
 | 
			
		||||
    global datastore, socketio_server
 | 
			
		||||
    datastore = datastore_o
 | 
			
		||||
 | 
			
		||||
    # so far just for read-only via tests, but this will be moved eventually to be the main source
 | 
			
		||||
    # (instead of the global var)
 | 
			
		||||
    app.config['DATASTORE'] = datastore_o
 | 
			
		||||
    
 | 
			
		||||
    # Store the signal in the app config to ensure it's accessible everywhere
 | 
			
		||||
    app.config['watch_check_update_SIGNAL'] = watch_check_update
 | 
			
		||||
 | 
			
		||||
    login_manager = flask_login.LoginManager(app)
 | 
			
		||||
    login_manager.login_view = 'login'
 | 
			
		||||
@@ -248,6 +297,9 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            # RSS access with token is allowed
 | 
			
		||||
            elif request.endpoint and 'rss.feed' in request.endpoint:
 | 
			
		||||
                return None
 | 
			
		||||
            # Socket.IO routes - need separate handling
 | 
			
		||||
            elif request.path.startswith('/socket.io/'):
 | 
			
		||||
                return None
 | 
			
		||||
            # API routes - use their own auth mechanism (@auth.check_token)
 | 
			
		||||
            elif request.path.startswith('/api/'):
 | 
			
		||||
                return None
 | 
			
		||||
@@ -258,7 +310,9 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    watch_api.add_resource(WatchSingleHistory,
 | 
			
		||||
                           '/api/v1/watch/<string:uuid>/history/<string:timestamp>',
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
 | 
			
		||||
 | 
			
		||||
    watch_api.add_resource(WatchFavicon,
 | 
			
		||||
                           '/api/v1/watch/<string:uuid>/favicon',
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore})
 | 
			
		||||
    watch_api.add_resource(WatchHistory,
 | 
			
		||||
                           '/api/v1/watch/<string:uuid>/history',
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore})
 | 
			
		||||
@@ -280,7 +334,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore})
 | 
			
		||||
 | 
			
		||||
    watch_api.add_resource(Tag, '/api/v1/tag', '/api/v1/tag/<string:uuid>',
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore})
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore, 'update_q': update_q})
 | 
			
		||||
                           
 | 
			
		||||
    watch_api.add_resource(Search, '/api/v1/search',
 | 
			
		||||
                           resource_class_kwargs={'datastore': datastore})
 | 
			
		||||
@@ -333,7 +387,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            # We would sometimes get login loop errors on sites hosted in sub-paths
 | 
			
		||||
 | 
			
		||||
            # note for the future:
 | 
			
		||||
            #            if not is_safe_url(next):
 | 
			
		||||
            #            if not is_safe_valid_url(next):
 | 
			
		||||
            #                return flask.abort(400)
 | 
			
		||||
            return redirect(url_for('watchlist.index'))
 | 
			
		||||
 | 
			
		||||
@@ -378,6 +432,32 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            except FileNotFoundError:
 | 
			
		||||
                abort(404)
 | 
			
		||||
 | 
			
		||||
        if group == 'favicon':
 | 
			
		||||
            # Could be sensitive, follow password requirements
 | 
			
		||||
            if datastore.data['settings']['application']['password'] and not flask_login.current_user.is_authenticated:
 | 
			
		||||
                abort(403)
 | 
			
		||||
            # Get the watch object
 | 
			
		||||
            watch = datastore.data['watching'].get(filename)
 | 
			
		||||
            if not watch:
 | 
			
		||||
                abort(404)
 | 
			
		||||
 | 
			
		||||
            favicon_filename = watch.get_favicon_filename()
 | 
			
		||||
            if favicon_filename:
 | 
			
		||||
                try:
 | 
			
		||||
                    import magic
 | 
			
		||||
                    mime = magic.from_file(
 | 
			
		||||
                        os.path.join(watch.watch_data_dir, favicon_filename),
 | 
			
		||||
                        mime=True
 | 
			
		||||
                    )
 | 
			
		||||
                except ImportError:
 | 
			
		||||
                    # Fallback, no python-magic
 | 
			
		||||
                    import mimetypes
 | 
			
		||||
                    mime, encoding = mimetypes.guess_type(favicon_filename)
 | 
			
		||||
 | 
			
		||||
                response = make_response(send_from_directory(watch.watch_data_dir, favicon_filename))
 | 
			
		||||
                response.headers['Content-type'] = mime
 | 
			
		||||
                response.headers['Cache-Control'] = 'max-age=300, must-revalidate'  # Cache for 5 minutes, then revalidate
 | 
			
		||||
                return response
 | 
			
		||||
 | 
			
		||||
        if group == 'visual_selector_data':
 | 
			
		||||
            # Could be sensitive, follow password requirements
 | 
			
		||||
@@ -444,11 +524,22 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
    # watchlist UI buttons etc
 | 
			
		||||
    import changedetectionio.blueprint.ui as ui
 | 
			
		||||
    app.register_blueprint(ui.construct_blueprint(datastore, update_q, running_update_threads, queuedWatchMetaData))
 | 
			
		||||
    app.register_blueprint(ui.construct_blueprint(datastore, update_q, worker_handler, queuedWatchMetaData, watch_check_update))
 | 
			
		||||
 | 
			
		||||
    import changedetectionio.blueprint.watchlist as watchlist
 | 
			
		||||
    app.register_blueprint(watchlist.construct_blueprint(datastore=datastore, update_q=update_q, queuedWatchMetaData=queuedWatchMetaData), url_prefix='')
 | 
			
		||||
    
 | 
			
		||||
 | 
			
		||||
    # Initialize Socket.IO server conditionally based on settings
 | 
			
		||||
    socket_io_enabled = datastore.data['settings']['application']['ui'].get('socket_io_enabled', True)
 | 
			
		||||
    if socket_io_enabled:
 | 
			
		||||
        from changedetectionio.realtime.socket_server import init_socketio
 | 
			
		||||
        global socketio_server
 | 
			
		||||
        socketio_server = init_socketio(app, datastore)
 | 
			
		||||
        logger.info("Socket.IO server initialized")
 | 
			
		||||
    else:
 | 
			
		||||
        logger.info("Socket.IO server disabled via settings")
 | 
			
		||||
        socketio_server = None
 | 
			
		||||
 | 
			
		||||
    # Memory cleanup endpoint
 | 
			
		||||
    @app.route('/gc-cleanup', methods=['GET'])
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
@@ -459,14 +550,95 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
        result = memory_cleanup(app)
 | 
			
		||||
        return jsonify({"status": "success", "message": "Memory cleanup completed", "result": result})
 | 
			
		||||
 | 
			
		||||
    # Worker health check endpoint
 | 
			
		||||
    @app.route('/worker-health', methods=['GET'])
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
    def worker_health():
 | 
			
		||||
        from flask import jsonify
 | 
			
		||||
        
 | 
			
		||||
        expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
 | 
			
		||||
        
 | 
			
		||||
        # Get basic status
 | 
			
		||||
        status = worker_handler.get_worker_status()
 | 
			
		||||
        
 | 
			
		||||
        # Perform health check
 | 
			
		||||
        health_result = worker_handler.check_worker_health(
 | 
			
		||||
            expected_count=expected_workers,
 | 
			
		||||
            update_q=update_q,
 | 
			
		||||
            notification_q=notification_q,
 | 
			
		||||
            app=app,
 | 
			
		||||
            datastore=datastore
 | 
			
		||||
        )
 | 
			
		||||
        
 | 
			
		||||
        return jsonify({
 | 
			
		||||
            "status": "success",
 | 
			
		||||
            "worker_status": status,
 | 
			
		||||
            "health_check": health_result,
 | 
			
		||||
            "expected_workers": expected_workers
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    # Queue status endpoint
 | 
			
		||||
    @app.route('/queue-status', methods=['GET'])
 | 
			
		||||
    @login_optionally_required
 | 
			
		||||
    def queue_status():
 | 
			
		||||
        from flask import jsonify, request
 | 
			
		||||
        
 | 
			
		||||
        # Get specific UUID position if requested
 | 
			
		||||
        target_uuid = request.args.get('uuid')
 | 
			
		||||
        
 | 
			
		||||
        if target_uuid:
 | 
			
		||||
            position_info = update_q.get_uuid_position(target_uuid)
 | 
			
		||||
            return jsonify({
 | 
			
		||||
                "status": "success",
 | 
			
		||||
                "uuid": target_uuid,
 | 
			
		||||
                "queue_position": position_info
 | 
			
		||||
            })
 | 
			
		||||
        else:
 | 
			
		||||
            # Get pagination parameters
 | 
			
		||||
            limit = request.args.get('limit', type=int)
 | 
			
		||||
            offset = request.args.get('offset', type=int, default=0)
 | 
			
		||||
            summary_only = request.args.get('summary', type=bool, default=False)
 | 
			
		||||
            
 | 
			
		||||
            if summary_only:
 | 
			
		||||
                # Fast summary for large queues
 | 
			
		||||
                summary = update_q.get_queue_summary()
 | 
			
		||||
                return jsonify({
 | 
			
		||||
                    "status": "success",
 | 
			
		||||
                    "queue_summary": summary
 | 
			
		||||
                })
 | 
			
		||||
            else:
 | 
			
		||||
                # Get queued items with pagination support
 | 
			
		||||
                if limit is None:
 | 
			
		||||
                    # Default limit for large queues to prevent performance issues
 | 
			
		||||
                    queue_size = update_q.qsize()
 | 
			
		||||
                    if queue_size > 100:
 | 
			
		||||
                        limit = 50
 | 
			
		||||
                        logger.warning(f"Large queue ({queue_size} items) detected, limiting to {limit} items. Use ?limit=N for more.")
 | 
			
		||||
                
 | 
			
		||||
                all_queued = update_q.get_all_queued_uuids(limit=limit, offset=offset)
 | 
			
		||||
                return jsonify({
 | 
			
		||||
                    "status": "success",
 | 
			
		||||
                    "queue_size": update_q.qsize(),
 | 
			
		||||
                    "queued_data": all_queued
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
    # Start the async workers during app initialization
 | 
			
		||||
    # Can be overridden by ENV or use the default settings
 | 
			
		||||
    n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
 | 
			
		||||
    logger.info(f"Starting {n_workers} workers during app initialization")
 | 
			
		||||
    worker_handler.start_workers(n_workers, update_q, notification_q, app, datastore)
 | 
			
		||||
 | 
			
		||||
    # @todo handle ctrl break
 | 
			
		||||
    ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
 | 
			
		||||
    threading.Thread(target=notification_runner).start()
 | 
			
		||||
 | 
			
		||||
    in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
 | 
			
		||||
    # Check for new release version, but not when running in test/build or pytest
 | 
			
		||||
    if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')):
 | 
			
		||||
    if not os.getenv("GITHUB_REF", False) and not strtobool(os.getenv('DISABLE_VERSION_CHECK', 'no')) and not in_pytest:
 | 
			
		||||
        threading.Thread(target=check_for_new_version).start()
 | 
			
		||||
 | 
			
		||||
    # Return the Flask app - the Socket.IO will be attached to it but initialized separately
 | 
			
		||||
    # This avoids circular dependencies
 | 
			
		||||
    return app
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -502,73 +674,87 @@ def notification_runner():
 | 
			
		||||
    global notification_debug_log
 | 
			
		||||
    from datetime import datetime
 | 
			
		||||
    import json
 | 
			
		||||
    while not app.config.exit.is_set():
 | 
			
		||||
        try:
 | 
			
		||||
            # At the moment only one thread runs (single runner)
 | 
			
		||||
            n_object = notification_q.get(block=False)
 | 
			
		||||
        except queue.Empty:
 | 
			
		||||
            time.sleep(1)
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
 | 
			
		||||
            now = datetime.now()
 | 
			
		||||
            sent_obj = None
 | 
			
		||||
 | 
			
		||||
    with app.app_context():
 | 
			
		||||
        while not app.config.exit.is_set():
 | 
			
		||||
            try:
 | 
			
		||||
                from changedetectionio.notification.handler import process_notification
 | 
			
		||||
                # At the moment only one thread runs (single runner)
 | 
			
		||||
                n_object = notification_q.get(block=False)
 | 
			
		||||
            except queue.Empty:
 | 
			
		||||
                time.sleep(1)
 | 
			
		||||
 | 
			
		||||
                # Fallback to system config if not set
 | 
			
		||||
                if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
 | 
			
		||||
                    n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
 | 
			
		||||
            else:
 | 
			
		||||
 | 
			
		||||
                if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
 | 
			
		||||
                    n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
 | 
			
		||||
                now = datetime.now()
 | 
			
		||||
                sent_obj = None
 | 
			
		||||
 | 
			
		||||
                if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
 | 
			
		||||
                    n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
 | 
			
		||||
                if n_object.get('notification_urls', {}):
 | 
			
		||||
                    sent_obj = process_notification(n_object, datastore)
 | 
			
		||||
                try:
 | 
			
		||||
                    from changedetectionio.notification.handler import process_notification
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.error(f"Watch URL: {n_object['watch_url']}  Error {str(e)}")
 | 
			
		||||
                    # Fallback to system config if not set
 | 
			
		||||
                    if not n_object.get('notification_body') and datastore.data['settings']['application'].get('notification_body'):
 | 
			
		||||
                        n_object['notification_body'] = datastore.data['settings']['application'].get('notification_body')
 | 
			
		||||
 | 
			
		||||
                # UUID wont be present when we submit a 'test' from the global settings
 | 
			
		||||
                if 'uuid' in n_object:
 | 
			
		||||
                    datastore.update_watch(uuid=n_object['uuid'],
 | 
			
		||||
                                           update_obj={'last_notification_error': "Notification error detected, goto notification log."})
 | 
			
		||||
                    if not n_object.get('notification_title') and datastore.data['settings']['application'].get('notification_title'):
 | 
			
		||||
                        n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title')
 | 
			
		||||
 | 
			
		||||
                    if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'):
 | 
			
		||||
                        n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format')
 | 
			
		||||
                    if n_object.get('notification_urls', {}):
 | 
			
		||||
                        sent_obj = process_notification(n_object, datastore)
 | 
			
		||||
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.error(f"Watch URL: {n_object['watch_url']}  Error {str(e)}")
 | 
			
		||||
 | 
			
		||||
                    # UUID wont be present when we submit a 'test' from the global settings
 | 
			
		||||
                    if 'uuid' in n_object:
 | 
			
		||||
                        datastore.update_watch(uuid=n_object['uuid'],
 | 
			
		||||
                                               update_obj={'last_notification_error': "Notification error detected, goto notification log."})
 | 
			
		||||
 | 
			
		||||
                    log_lines = str(e).splitlines()
 | 
			
		||||
                    notification_debug_log += log_lines
 | 
			
		||||
 | 
			
		||||
                    with app.app_context():
 | 
			
		||||
                        app.config['watch_check_update_SIGNAL'].send(app_context=app, watch_uuid=n_object.get('uuid'))
 | 
			
		||||
 | 
			
		||||
                # Process notifications
 | 
			
		||||
                notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
 | 
			
		||||
                # Trim the log length
 | 
			
		||||
                notification_debug_log = notification_debug_log[-100:]
 | 
			
		||||
 | 
			
		||||
                log_lines = str(e).splitlines()
 | 
			
		||||
                notification_debug_log += log_lines
 | 
			
		||||
 | 
			
		||||
            # Process notifications
 | 
			
		||||
            notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
 | 
			
		||||
            # Trim the log length
 | 
			
		||||
            notification_debug_log = notification_debug_log[-100:]
 | 
			
		||||
 | 
			
		||||
# Threaded runner, look for new watches to feed into the Queue.
 | 
			
		||||
def ticker_thread_check_time_launch_checks():
 | 
			
		||||
    import random
 | 
			
		||||
    from changedetectionio import update_worker
 | 
			
		||||
    proxy_last_called_time = {}
 | 
			
		||||
    last_health_check = 0
 | 
			
		||||
 | 
			
		||||
    recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
 | 
			
		||||
    logger.debug(f"System env MINIMUM_SECONDS_RECHECK_TIME {recheck_time_minimum_seconds}")
 | 
			
		||||
 | 
			
		||||
    # Spin up Workers that do the fetching
 | 
			
		||||
    # Can be overriden by ENV or use the default settings
 | 
			
		||||
    n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
 | 
			
		||||
    for _ in range(n_workers):
 | 
			
		||||
        new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
 | 
			
		||||
        running_update_threads.append(new_worker)
 | 
			
		||||
        new_worker.start()
 | 
			
		||||
    # Workers are now started during app initialization, not here
 | 
			
		||||
 | 
			
		||||
    while not app.config.exit.is_set():
 | 
			
		||||
 | 
			
		||||
        # Periodic worker health check (every 60 seconds)
 | 
			
		||||
        now = time.time()
 | 
			
		||||
        if now - last_health_check > 60:
 | 
			
		||||
            expected_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
 | 
			
		||||
            health_result = worker_handler.check_worker_health(
 | 
			
		||||
                expected_count=expected_workers,
 | 
			
		||||
                update_q=update_q,
 | 
			
		||||
                notification_q=notification_q,
 | 
			
		||||
                app=app,
 | 
			
		||||
                datastore=datastore
 | 
			
		||||
            )
 | 
			
		||||
            
 | 
			
		||||
            if health_result['status'] != 'healthy':
 | 
			
		||||
                logger.warning(f"Worker health check: {health_result['message']}")
 | 
			
		||||
                
 | 
			
		||||
            last_health_check = now
 | 
			
		||||
 | 
			
		||||
        # Get a list of watches by UUID that are currently fetching data
 | 
			
		||||
        running_uuids = []
 | 
			
		||||
        for t in running_update_threads:
 | 
			
		||||
            if t.current_uuid:
 | 
			
		||||
                running_uuids.append(t.current_uuid)
 | 
			
		||||
        running_uuids = worker_handler.get_running_uuids()
 | 
			
		||||
 | 
			
		||||
        # Re #232 - Deepcopy the data incase it changes while we're iterating through it all
 | 
			
		||||
        watch_uuid_list = []
 | 
			
		||||
@@ -614,7 +800,7 @@ def ticker_thread_check_time_launch_checks():
 | 
			
		||||
            else:
 | 
			
		||||
                time_schedule_limit = watch.get('time_schedule_limit')
 | 
			
		||||
                logger.trace(f"{uuid} Time scheduler - Using watch settings (not global settings)")
 | 
			
		||||
            tz_name = datastore.data['settings']['application'].get('timezone', 'UTC')
 | 
			
		||||
            tz_name = datastore.data['settings']['application'].get('scheduler_timezone_default', os.getenv('TZ', 'UTC').strip())
 | 
			
		||||
 | 
			
		||||
            if time_schedule_limit and time_schedule_limit.get('enabled'):
 | 
			
		||||
                try:
 | 
			
		||||
@@ -663,16 +849,22 @@ def ticker_thread_check_time_launch_checks():
 | 
			
		||||
 | 
			
		||||
                    # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
 | 
			
		||||
                    priority = int(time.time())
 | 
			
		||||
                    logger.debug(
 | 
			
		||||
                        f"> Queued watch UUID {uuid} "
 | 
			
		||||
                        f"last checked at {watch['last_checked']} "
 | 
			
		||||
                        f"queued at {now:0.2f} priority {priority} "
 | 
			
		||||
                        f"jitter {watch.jitter_seconds:0.2f}s, "
 | 
			
		||||
                        f"{now - watch['last_checked']:0.2f}s since last checked")
 | 
			
		||||
 | 
			
		||||
                    # Into the queue with you
 | 
			
		||||
                    update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid}))
 | 
			
		||||
 | 
			
		||||
                    queued_successfully = worker_handler.queue_item_async_safe(update_q,
 | 
			
		||||
                                                                               queuedWatchMetaData.PrioritizedItem(priority=priority,
 | 
			
		||||
                                                                                                                   item={'uuid': uuid})
 | 
			
		||||
                                                                               )
 | 
			
		||||
                    if queued_successfully:
 | 
			
		||||
                        logger.debug(
 | 
			
		||||
                            f"> Queued watch UUID {uuid} "
 | 
			
		||||
                            f"last checked at {watch['last_checked']} "
 | 
			
		||||
                            f"queued at {now:0.2f} priority {priority} "
 | 
			
		||||
                            f"jitter {watch.jitter_seconds:0.2f}s, "
 | 
			
		||||
                            f"{now - watch['last_checked']:0.2f}s since last checked")
 | 
			
		||||
                    else:
 | 
			
		||||
                        logger.critical(f"CRITICAL: Failed to queue watch UUID {uuid} in ticker thread!")
 | 
			
		||||
                        
 | 
			
		||||
                    # Reset for next time
 | 
			
		||||
                    watch.jitter_seconds = 0
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -5,6 +5,7 @@ from wtforms.widgets.core import TimeInput
 | 
			
		||||
 | 
			
		||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
 | 
			
		||||
from changedetectionio.conditions.form import ConditionFormRow
 | 
			
		||||
from changedetectionio.notification_service import NotificationContextData
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
 | 
			
		||||
from wtforms import (
 | 
			
		||||
@@ -23,11 +24,11 @@ from wtforms import (
 | 
			
		||||
)
 | 
			
		||||
from flask_wtf.file import FileField, FileAllowed
 | 
			
		||||
from wtforms.fields import FieldList
 | 
			
		||||
from wtforms.utils import unset_value
 | 
			
		||||
 | 
			
		||||
from wtforms.validators import ValidationError
 | 
			
		||||
 | 
			
		||||
from validators.url import url as url_validator
 | 
			
		||||
 | 
			
		||||
from changedetectionio.widgets import TernaryNoneBooleanField
 | 
			
		||||
 | 
			
		||||
# default
 | 
			
		||||
# each select <option data-enabled="enabled-0-0"
 | 
			
		||||
@@ -54,6 +55,8 @@ valid_method = {
 | 
			
		||||
 | 
			
		||||
default_method = 'GET'
 | 
			
		||||
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
 | 
			
		||||
REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
 | 
			
		||||
REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT='At least one time interval (weeks, days, hours, minutes, or seconds) must be specified when not using global settings.'
 | 
			
		||||
 | 
			
		||||
class StringListField(StringField):
 | 
			
		||||
    widget = widgets.TextArea()
 | 
			
		||||
@@ -210,6 +213,35 @@ class ScheduleLimitForm(Form):
 | 
			
		||||
        self.sunday.form.enabled.label.text = "Sunday"
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def validate_time_between_check_has_values(form):
 | 
			
		||||
    """
 | 
			
		||||
    Custom validation function for TimeBetweenCheckForm.
 | 
			
		||||
    Returns True if at least one time interval field has a value > 0.
 | 
			
		||||
    """
 | 
			
		||||
    res = any([
 | 
			
		||||
        form.weeks.data and int(form.weeks.data) > 0,
 | 
			
		||||
        form.days.data and int(form.days.data) > 0,
 | 
			
		||||
        form.hours.data and int(form.hours.data) > 0,
 | 
			
		||||
        form.minutes.data and int(form.minutes.data) > 0,
 | 
			
		||||
        form.seconds.data and int(form.seconds.data) > 0
 | 
			
		||||
    ])
 | 
			
		||||
 | 
			
		||||
    return res
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RequiredTimeInterval(object):
 | 
			
		||||
    """
 | 
			
		||||
    WTForms validator that ensures at least one time interval field has a value > 0.
 | 
			
		||||
    Use this with FormField(TimeBetweenCheckForm, validators=[RequiredTimeInterval()]).
 | 
			
		||||
    """
 | 
			
		||||
    def __init__(self, message=None):
 | 
			
		||||
        self.message = message or 'At least one time interval (weeks, days, hours, minutes, or seconds) must be specified.'
 | 
			
		||||
 | 
			
		||||
    def __call__(self, form, field):
 | 
			
		||||
        if not validate_time_between_check_has_values(field.form):
 | 
			
		||||
            raise ValidationError(self.message)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class TimeBetweenCheckForm(Form):
 | 
			
		||||
    weeks = IntegerField('Weeks', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
 | 
			
		||||
    days = IntegerField('Days', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
 | 
			
		||||
@@ -218,33 +250,160 @@ class TimeBetweenCheckForm(Form):
 | 
			
		||||
    seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")])
 | 
			
		||||
    # @todo add total seconds minimum validatior = minimum_seconds_recheck_time
 | 
			
		||||
 | 
			
		||||
    def __init__(self, formdata=None, obj=None, prefix="", data=None, meta=None, **kwargs):
 | 
			
		||||
        super().__init__(formdata, obj, prefix, data, meta, **kwargs)
 | 
			
		||||
        self.require_at_least_one = kwargs.get('require_at_least_one', False)
 | 
			
		||||
        self.require_at_least_one_message = kwargs.get('require_at_least_one_message', REQUIRE_ATLEAST_ONE_TIME_PART_MESSAGE_DEFAULT)
 | 
			
		||||
 | 
			
		||||
    def validate(self, **kwargs):
 | 
			
		||||
        """Custom validation that can optionally require at least one time interval."""
 | 
			
		||||
        # Run normal field validation first
 | 
			
		||||
        if not super().validate(**kwargs):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        # Apply optional "at least one" validation
 | 
			
		||||
        if self.require_at_least_one:
 | 
			
		||||
            if not validate_time_between_check_has_values(self):
 | 
			
		||||
                # Add error to the form's general errors (not field-specific)
 | 
			
		||||
                if not hasattr(self, '_formdata_errors'):
 | 
			
		||||
                    self._formdata_errors = []
 | 
			
		||||
                self._formdata_errors.append(self.require_at_least_one_message)
 | 
			
		||||
                return False
 | 
			
		||||
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EnhancedFormField(FormField):
 | 
			
		||||
    """
 | 
			
		||||
    An enhanced FormField that supports conditional validation with top-level error messages.
 | 
			
		||||
    Adds a 'top_errors' property for validation errors at the FormField level.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, form_class, label=None, validators=None, separator="-",
 | 
			
		||||
                 conditional_field=None, conditional_message=None, conditional_test_function=None, **kwargs):
 | 
			
		||||
        """
 | 
			
		||||
        Initialize EnhancedFormField with optional conditional validation.
 | 
			
		||||
 | 
			
		||||
        :param conditional_field: Name of the field this FormField depends on (e.g. 'time_between_check_use_default')
 | 
			
		||||
        :param conditional_message: Error message to show when validation fails
 | 
			
		||||
        :param conditional_test_function: Custom function to test if FormField has valid values.
 | 
			
		||||
                                        Should take self.form as parameter and return True if valid.
 | 
			
		||||
        """
 | 
			
		||||
        super().__init__(form_class, label, validators, separator, **kwargs)
 | 
			
		||||
        self.top_errors = []
 | 
			
		||||
        self.conditional_field = conditional_field
 | 
			
		||||
        self.conditional_message = conditional_message or "At least one field must have a value when not using defaults."
 | 
			
		||||
        self.conditional_test_function = conditional_test_function
 | 
			
		||||
 | 
			
		||||
    def validate(self, form, extra_validators=()):
 | 
			
		||||
        """
 | 
			
		||||
        Custom validation that supports conditional logic and stores top-level errors.
 | 
			
		||||
        """
 | 
			
		||||
        self.top_errors = []
 | 
			
		||||
 | 
			
		||||
        # First run the normal FormField validation
 | 
			
		||||
        base_valid = super().validate(form, extra_validators)
 | 
			
		||||
 | 
			
		||||
        # Apply conditional validation if configured
 | 
			
		||||
        if self.conditional_field and hasattr(form, self.conditional_field):
 | 
			
		||||
            conditional_field_obj = getattr(form, self.conditional_field)
 | 
			
		||||
 | 
			
		||||
            # If the conditional field is False/unchecked, check if this FormField has any values
 | 
			
		||||
            if not conditional_field_obj.data:
 | 
			
		||||
                # Use custom test function if provided, otherwise use generic fallback
 | 
			
		||||
                if self.conditional_test_function:
 | 
			
		||||
                    has_any_value = self.conditional_test_function(self.form)
 | 
			
		||||
                else:
 | 
			
		||||
                    # Generic fallback - check if any field has truthy data
 | 
			
		||||
                    has_any_value = any(field.data for field in self.form if hasattr(field, 'data') and field.data)
 | 
			
		||||
 | 
			
		||||
                if not has_any_value:
 | 
			
		||||
                    self.top_errors.append(self.conditional_message)
 | 
			
		||||
                    base_valid = False
 | 
			
		||||
 | 
			
		||||
        return base_valid
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RequiredFormField(FormField):
 | 
			
		||||
    """
 | 
			
		||||
    A FormField that passes require_at_least_one=True to TimeBetweenCheckForm.
 | 
			
		||||
    Use this when you want the sub-form to always require at least one value.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def __init__(self, form_class, label=None, validators=None, separator="-", **kwargs):
 | 
			
		||||
        super().__init__(form_class, label, validators, separator, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def process(self, formdata, data=unset_value, extra_filters=None):
 | 
			
		||||
        if extra_filters:
 | 
			
		||||
            raise TypeError(
 | 
			
		||||
                "FormField cannot take filters, as the encapsulated"
 | 
			
		||||
                "data is not mutable."
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        if data is unset_value:
 | 
			
		||||
            try:
 | 
			
		||||
                data = self.default()
 | 
			
		||||
            except TypeError:
 | 
			
		||||
                data = self.default
 | 
			
		||||
            self._obj = data
 | 
			
		||||
 | 
			
		||||
        self.object_data = data
 | 
			
		||||
 | 
			
		||||
        prefix = self.name + self.separator
 | 
			
		||||
        # Pass require_at_least_one=True to the sub-form
 | 
			
		||||
        if isinstance(data, dict):
 | 
			
		||||
            self.form = self.form_class(formdata=formdata, prefix=prefix, require_at_least_one=True, **data)
 | 
			
		||||
        else:
 | 
			
		||||
            self.form = self.form_class(formdata=formdata, obj=data, prefix=prefix, require_at_least_one=True)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def errors(self):
 | 
			
		||||
        """Include sub-form validation errors"""
 | 
			
		||||
        form_errors = self.form.errors
 | 
			
		||||
        # Add any general form errors to a special 'form' key
 | 
			
		||||
        if hasattr(self.form, '_formdata_errors') and self.form._formdata_errors:
 | 
			
		||||
            form_errors = dict(form_errors)  # Make a copy
 | 
			
		||||
            form_errors['form'] = self.form._formdata_errors
 | 
			
		||||
        return form_errors
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Separated by  key:value
 | 
			
		||||
class StringDictKeyValue(StringField):
 | 
			
		||||
    widget = widgets.TextArea()
 | 
			
		||||
 | 
			
		||||
    def _value(self):
 | 
			
		||||
        if self.data:
 | 
			
		||||
            output = u''
 | 
			
		||||
            for k in self.data.keys():
 | 
			
		||||
                output += "{}: {}\r\n".format(k, self.data[k])
 | 
			
		||||
 | 
			
		||||
            output = ''
 | 
			
		||||
            for k, v in self.data.items():
 | 
			
		||||
                output += f"{k}: {v}\r\n"
 | 
			
		||||
            return output
 | 
			
		||||
        else:
 | 
			
		||||
            return u''
 | 
			
		||||
            return ''
 | 
			
		||||
 | 
			
		||||
    # incoming
 | 
			
		||||
    # incoming data processing + validation
 | 
			
		||||
    def process_formdata(self, valuelist):
 | 
			
		||||
        self.data = {}
 | 
			
		||||
        errors = []
 | 
			
		||||
        if valuelist:
 | 
			
		||||
            self.data = {}
 | 
			
		||||
            # Remove empty strings
 | 
			
		||||
            cleaned = list(filter(None, valuelist[0].split("\n")))
 | 
			
		||||
            for s in cleaned:
 | 
			
		||||
                parts = s.strip().split(':', 1)
 | 
			
		||||
                if len(parts) == 2:
 | 
			
		||||
                    self.data.update({parts[0].strip(): parts[1].strip()})
 | 
			
		||||
            # Remove empty strings (blank lines)
 | 
			
		||||
            cleaned = [line.strip() for line in valuelist[0].split("\n") if line.strip()]
 | 
			
		||||
            for idx, s in enumerate(cleaned, start=1):
 | 
			
		||||
                if ':' not in s:
 | 
			
		||||
                    errors.append(f"Line {idx} is missing a ':' separator.")
 | 
			
		||||
                    continue
 | 
			
		||||
                parts = s.split(':', 1)
 | 
			
		||||
                key = parts[0].strip()
 | 
			
		||||
                value = parts[1].strip()
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            self.data = {}
 | 
			
		||||
                if not key:
 | 
			
		||||
                    errors.append(f"Line {idx} has an empty key.")
 | 
			
		||||
                if not value:
 | 
			
		||||
                    errors.append(f"Line {idx} has an empty value.")
 | 
			
		||||
 | 
			
		||||
                self.data[key] = value
 | 
			
		||||
 | 
			
		||||
        if errors:
 | 
			
		||||
            raise ValidationError("Invalid input:\n" + "\n".join(errors))
 | 
			
		||||
 | 
			
		||||
class ValidateContentFetcherIsReady(object):
 | 
			
		||||
    """
 | 
			
		||||
@@ -308,11 +467,16 @@ class ValidateAppRiseServers(object):
 | 
			
		||||
        import apprise
 | 
			
		||||
        from .notification.apprise_plugin.assets import apprise_asset
 | 
			
		||||
        from .notification.apprise_plugin.custom_handlers import apprise_http_custom_handler  # noqa: F401
 | 
			
		||||
        from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
 | 
			
		||||
        apobj = apprise.Apprise(asset=apprise_asset)
 | 
			
		||||
 | 
			
		||||
        for server_url in field.data:
 | 
			
		||||
            url = server_url.strip()
 | 
			
		||||
            generic_notification_context_data = NotificationContextData()
 | 
			
		||||
            # Make sure something is atleast in all those regular token fields
 | 
			
		||||
            generic_notification_context_data.set_random_for_validation()
 | 
			
		||||
 | 
			
		||||
            url = jinja_render(template_str=server_url.strip(), **generic_notification_context_data).strip()
 | 
			
		||||
            if url.startswith("#"):
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
@@ -326,9 +490,8 @@ class ValidateJinja2Template(object):
 | 
			
		||||
    """
 | 
			
		||||
    def __call__(self, form, field):
 | 
			
		||||
        from changedetectionio import notification
 | 
			
		||||
 | 
			
		||||
        from changedetectionio.jinja2_custom import create_jinja_env
 | 
			
		||||
        from jinja2 import BaseLoader, TemplateSyntaxError, UndefinedError
 | 
			
		||||
        from jinja2.sandbox import ImmutableSandboxedEnvironment
 | 
			
		||||
        from jinja2.meta import find_undeclared_variables
 | 
			
		||||
        import jinja2.exceptions
 | 
			
		||||
 | 
			
		||||
@@ -336,9 +499,11 @@ class ValidateJinja2Template(object):
 | 
			
		||||
        joined_data = ' '.join(map(str, field.data)) if isinstance(field.data, list) else f"{field.data}"
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            jinja2_env = ImmutableSandboxedEnvironment(loader=BaseLoader)
 | 
			
		||||
            jinja2_env.globals.update(notification.valid_tokens)
 | 
			
		||||
            # Extra validation tokens provided on the form_class(... extra_tokens={}) setup
 | 
			
		||||
            # Use the shared helper to create a properly configured environment
 | 
			
		||||
            jinja2_env = create_jinja_env(loader=BaseLoader)
 | 
			
		||||
 | 
			
		||||
            # Add notification tokens for validation
 | 
			
		||||
            jinja2_env.globals.update(NotificationContextData())
 | 
			
		||||
            if hasattr(field, 'extra_notification_tokens'):
 | 
			
		||||
                jinja2_env.globals.update(field.extra_notification_tokens)
 | 
			
		||||
 | 
			
		||||
@@ -350,6 +515,7 @@ class ValidateJinja2Template(object):
 | 
			
		||||
        except jinja2.exceptions.SecurityError as e:
 | 
			
		||||
            raise ValidationError(f"This is not a valid Jinja2 template: {e}") from e
 | 
			
		||||
 | 
			
		||||
        # Check for undeclared variables
 | 
			
		||||
        ast = jinja2_env.parse(joined_data)
 | 
			
		||||
        undefined = ", ".join(find_undeclared_variables(ast))
 | 
			
		||||
        if undefined:
 | 
			
		||||
@@ -372,19 +538,23 @@ class validateURL(object):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def validate_url(test_url):
 | 
			
		||||
    # If hosts that only contain alphanumerics are allowed ("localhost" for example)
 | 
			
		||||
    try:
 | 
			
		||||
        url_validator(test_url, simple_host=allow_simplehost)
 | 
			
		||||
    except validators.ValidationError:
 | 
			
		||||
        #@todo check for xss
 | 
			
		||||
        message = f"'{test_url}' is not a valid URL."
 | 
			
		||||
    from changedetectionio.validate_url import is_safe_valid_url
 | 
			
		||||
    if not is_safe_valid_url(test_url):
 | 
			
		||||
        # This should be wtforms.validators.
 | 
			
		||||
        raise ValidationError(message)
 | 
			
		||||
        raise ValidationError('Watch protocol is not permitted or invalid URL format')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ValidateSinglePythonRegexString(object):
 | 
			
		||||
    def __init__(self, message=None):
 | 
			
		||||
        self.message = message
 | 
			
		||||
 | 
			
		||||
    def __call__(self, form, field):
 | 
			
		||||
        try:
 | 
			
		||||
            re.compile(field.data)
 | 
			
		||||
        except re.error:
 | 
			
		||||
            message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
 | 
			
		||||
            raise ValidationError(message % (field.data))
 | 
			
		||||
 | 
			
		||||
    from .model.Watch import is_safe_url
 | 
			
		||||
    if not is_safe_url(test_url):
 | 
			
		||||
        # This should be wtforms.validators.
 | 
			
		||||
        raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
 | 
			
		||||
 | 
			
		||||
class ValidateListRegex(object):
 | 
			
		||||
    """
 | 
			
		||||
@@ -404,6 +574,7 @@ class ValidateListRegex(object):
 | 
			
		||||
                    message = field.gettext('RegEx \'%s\' is not a valid regular expression.')
 | 
			
		||||
                    raise ValidationError(message % (line))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ValidateCSSJSONXPATHInput(object):
 | 
			
		||||
    """
 | 
			
		||||
    Filter validation
 | 
			
		||||
@@ -503,6 +674,51 @@ class ValidateCSSJSONXPATHInput(object):
 | 
			
		||||
                except:
 | 
			
		||||
                    raise ValidationError("A system-error occurred when validating your jq expression")
 | 
			
		||||
 | 
			
		||||
class ValidateSimpleURL:
 | 
			
		||||
    """Validate that the value can be parsed by urllib.parse.urlparse() and has a scheme/netloc."""
 | 
			
		||||
    def __init__(self, message=None):
 | 
			
		||||
        self.message = message or "Invalid URL."
 | 
			
		||||
 | 
			
		||||
    def __call__(self, form, field):
 | 
			
		||||
        data = (field.data or "").strip()
 | 
			
		||||
        if not data:
 | 
			
		||||
            return  # empty is OK — pair with validators.Optional()
 | 
			
		||||
        from urllib.parse import urlparse
 | 
			
		||||
 | 
			
		||||
        parsed = urlparse(data)
 | 
			
		||||
        if not parsed.scheme or not parsed.netloc:
 | 
			
		||||
            raise ValidationError(self.message)
 | 
			
		||||
 | 
			
		||||
class ValidateStartsWithRegex(object):
 | 
			
		||||
    def __init__(self, regex, *, flags=0, message=None, allow_empty=True, split_lines=True):
 | 
			
		||||
        # compile with given flags (we’ll pass re.IGNORECASE below)
 | 
			
		||||
        self.pattern = re.compile(regex, flags) if isinstance(regex, str) else regex
 | 
			
		||||
        self.message = message
 | 
			
		||||
        self.allow_empty = allow_empty
 | 
			
		||||
        self.split_lines = split_lines
 | 
			
		||||
 | 
			
		||||
    def __call__(self, form, field):
 | 
			
		||||
        data = field.data
 | 
			
		||||
        if not data:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        # normalize into list of lines
 | 
			
		||||
        if isinstance(data, str) and self.split_lines:
 | 
			
		||||
            lines = data.splitlines()
 | 
			
		||||
        elif isinstance(data, (list, tuple)):
 | 
			
		||||
            lines = data
 | 
			
		||||
        else:
 | 
			
		||||
            lines = [data]
 | 
			
		||||
 | 
			
		||||
        for line in lines:
 | 
			
		||||
            stripped = line.strip()
 | 
			
		||||
            if not stripped:
 | 
			
		||||
                if self.allow_empty:
 | 
			
		||||
                    continue
 | 
			
		||||
                raise ValidationError(self.message or "Empty value not allowed.")
 | 
			
		||||
            if not self.pattern.match(stripped):
 | 
			
		||||
                raise ValidationError(self.message or "Invalid value.")
 | 
			
		||||
 | 
			
		||||
class quickWatchForm(Form):
 | 
			
		||||
    from . import processors
 | 
			
		||||
 | 
			
		||||
@@ -513,7 +729,6 @@ class quickWatchForm(Form):
 | 
			
		||||
    edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Common to a single watch and the global settings
 | 
			
		||||
class commonSettingsForm(Form):
 | 
			
		||||
    from . import processors
 | 
			
		||||
@@ -524,16 +739,23 @@ class commonSettingsForm(Form):
 | 
			
		||||
        self.notification_title.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
 | 
			
		||||
        self.notification_urls.extra_notification_tokens = kwargs.get('extra_notification_tokens', {})
 | 
			
		||||
 | 
			
		||||
    extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False)
 | 
			
		||||
    fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
 | 
			
		||||
    notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()])
 | 
			
		||||
    notification_format = SelectField('Notification format', choices=valid_notification_formats.keys())
 | 
			
		||||
    notification_format = SelectField('Notification format', choices=list(valid_notification_formats.items()))
 | 
			
		||||
    notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()])
 | 
			
		||||
    notification_urls = StringListField('Notification URL List', validators=[validators.Optional(), ValidateAppRiseServers(), ValidateJinja2Template()])
 | 
			
		||||
    processor = RadioField( label=u"Processor - What do you want to achieve?", choices=processors.available_processors(), default="text_json_diff")
 | 
			
		||||
    timezone = StringField("Timezone for watch schedule", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
 | 
			
		||||
    scheduler_timezone_default = StringField("Default timezone for watch check scheduler", render_kw={"list": "timezones"}, validators=[validateTimeZoneName()])
 | 
			
		||||
    webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, message="Should contain one or more seconds")])
 | 
			
		||||
 | 
			
		||||
# Not true anymore but keep the validate_ hook for future use, we convert color tags
 | 
			
		||||
#    def validate_notification_urls(self, field):
 | 
			
		||||
#        """Validate that HTML Color format is not used with Telegram"""
 | 
			
		||||
#        if self.notification_format.data == 'HTML Color' and field.data:
 | 
			
		||||
#            for url in field.data:
 | 
			
		||||
#                if url and ('tgram://' in url or 'discord://' in url or 'discord.com/api/webhooks' in url):
 | 
			
		||||
#                    raise ValidationError('HTML Color format is not supported by Telegram and Discord. Please choose another Notification Format (Plain Text, HTML, or Markdown to HTML).')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class importForm(Form):
 | 
			
		||||
    from . import processors
 | 
			
		||||
@@ -558,11 +780,16 @@ class processor_text_json_diff_form(commonSettingsForm):
 | 
			
		||||
    url = fields.URLField('URL', validators=[validateURL()])
 | 
			
		||||
    tags = StringTagUUID('Group tag', [validators.Optional()], default='')
 | 
			
		||||
 | 
			
		||||
    time_between_check = FormField(TimeBetweenCheckForm)
 | 
			
		||||
    time_between_check = EnhancedFormField(
 | 
			
		||||
        TimeBetweenCheckForm,
 | 
			
		||||
        conditional_field='time_between_check_use_default',
 | 
			
		||||
        conditional_message=REQUIRE_ATLEAST_ONE_TIME_PART_WHEN_NOT_GLOBAL_DEFAULT,
 | 
			
		||||
        conditional_test_function=validate_time_between_check_has_values
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    time_schedule_limit = FormField(ScheduleLimitForm)
 | 
			
		||||
 | 
			
		||||
    time_between_check_use_default = BooleanField('Use global settings for time between check', default=False)
 | 
			
		||||
    time_between_check_use_default = BooleanField('Use global settings for time between check and scheduler.', default=False)
 | 
			
		||||
 | 
			
		||||
    include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='')
 | 
			
		||||
 | 
			
		||||
@@ -580,6 +807,7 @@ class processor_text_json_diff_form(commonSettingsForm):
 | 
			
		||||
    check_unique_lines = BooleanField('Only trigger when unique lines appear in all history', default=False)
 | 
			
		||||
    remove_duplicate_lines = BooleanField('Remove duplicate lines of text', default=False)
 | 
			
		||||
    sort_text_alphabetically =  BooleanField('Sort text alphabetically', default=False)
 | 
			
		||||
    strip_ignored_lines = TernaryNoneBooleanField('Strip ignored lines', default=None)
 | 
			
		||||
    trim_text_whitespace = BooleanField('Trim whitespace before and after text', default=False)
 | 
			
		||||
 | 
			
		||||
    filter_text_added = BooleanField('Added lines', default=True)
 | 
			
		||||
@@ -592,18 +820,18 @@ class processor_text_json_diff_form(commonSettingsForm):
 | 
			
		||||
    text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
 | 
			
		||||
    webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
 | 
			
		||||
 | 
			
		||||
    save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
 | 
			
		||||
    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
 | 
			
		||||
    proxy = RadioField('Proxy')
 | 
			
		||||
    # filter_failure_notification_send @todo make ternary
 | 
			
		||||
    filter_failure_notification_send = BooleanField(
 | 
			
		||||
        'Send a notification when the filter can no longer be found on the page', default=False)
 | 
			
		||||
 | 
			
		||||
    notification_muted = BooleanField('Notifications Muted / Off', default=False)
 | 
			
		||||
    notification_muted = TernaryNoneBooleanField('Notifications', default=None, yes_text="Muted", no_text="On")
 | 
			
		||||
    notification_screenshot = BooleanField('Attach screenshot to notification (where possible)', default=False)
 | 
			
		||||
 | 
			
		||||
    conditions_match_logic = RadioField(u'Match', choices=[('ALL', 'Match all of the following'),('ANY', 'Match any of the following')], default='ALL')
 | 
			
		||||
    conditions = FieldList(FormField(ConditionFormRow), min_entries=1)  # Add rule logic here
 | 
			
		||||
 | 
			
		||||
    use_page_title_in_list = TernaryNoneBooleanField('Use page <title> in list', default=None)
 | 
			
		||||
 | 
			
		||||
    def extra_tab_content(self):
 | 
			
		||||
        return None
 | 
			
		||||
@@ -615,7 +843,7 @@ class processor_text_json_diff_form(commonSettingsForm):
 | 
			
		||||
        if not super().validate():
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
        from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
        result = True
 | 
			
		||||
 | 
			
		||||
        # Fail form validation when a body is set for a GET
 | 
			
		||||
@@ -678,23 +906,36 @@ class processor_text_json_diff_form(commonSettingsForm):
 | 
			
		||||
    ):
 | 
			
		||||
        super().__init__(formdata, obj, prefix, data, meta, **kwargs)
 | 
			
		||||
        if kwargs and kwargs.get('default_system_settings'):
 | 
			
		||||
            default_tz = kwargs.get('default_system_settings').get('application', {}).get('timezone')
 | 
			
		||||
            default_tz = kwargs.get('default_system_settings').get('application', {}).get('scheduler_timezone_default')
 | 
			
		||||
            if default_tz:
 | 
			
		||||
                self.time_schedule_limit.form.timezone.render_kw['placeholder'] = default_tz
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SingleExtraProxy(Form):
 | 
			
		||||
 | 
			
		||||
    # maybe better to set some <script>var..
 | 
			
		||||
    proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
 | 
			
		||||
    proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
 | 
			
		||||
    # @todo do the validation here instead
 | 
			
		||||
    proxy_url = StringField('Proxy URL', [
 | 
			
		||||
        validators.Optional(),
 | 
			
		||||
        ValidateStartsWithRegex(
 | 
			
		||||
            regex=r'^(https?|socks5)://',  # ✅ main pattern
 | 
			
		||||
            flags=re.IGNORECASE,  # ✅ makes it case-insensitive
 | 
			
		||||
            message='Proxy URLs must start with http://, https:// or socks5://',
 | 
			
		||||
        ),
 | 
			
		||||
        ValidateSimpleURL()
 | 
			
		||||
    ], render_kw={"placeholder": "socks5:// or regular proxy http://user:pass@...:3128", "size":50})
 | 
			
		||||
 | 
			
		||||
class SingleExtraBrowser(Form):
 | 
			
		||||
    browser_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"})
 | 
			
		||||
    browser_connection_url = StringField('Browser connection URL', [validators.Optional()], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
 | 
			
		||||
    # @todo do the validation here instead
 | 
			
		||||
    browser_connection_url = StringField('Browser connection URL', [
 | 
			
		||||
        validators.Optional(),
 | 
			
		||||
        ValidateStartsWithRegex(
 | 
			
		||||
            regex=r'^(wss?|ws)://',
 | 
			
		||||
            flags=re.IGNORECASE,
 | 
			
		||||
            message='Browser URLs must start with wss:// or ws://'
 | 
			
		||||
        ),
 | 
			
		||||
        ValidateSimpleURL()
 | 
			
		||||
    ], render_kw={"placeholder": "wss://brightdata... wss://oxylabs etc", "size":50})
 | 
			
		||||
 | 
			
		||||
class DefaultUAInputForm(Form):
 | 
			
		||||
    html_requests = StringField('Plaintext requests', validators=[validators.Optional()], render_kw={"placeholder": "<default>"})
 | 
			
		||||
@@ -703,12 +944,23 @@ class DefaultUAInputForm(Form):
 | 
			
		||||
 | 
			
		||||
# datastore.data['settings']['requests']..
 | 
			
		||||
class globalSettingsRequestForm(Form):
 | 
			
		||||
    time_between_check = FormField(TimeBetweenCheckForm)
 | 
			
		||||
    time_between_check = RequiredFormField(TimeBetweenCheckForm)
 | 
			
		||||
    time_schedule_limit = FormField(ScheduleLimitForm)
 | 
			
		||||
    proxy = RadioField('Proxy')
 | 
			
		||||
    proxy = RadioField('Default proxy')
 | 
			
		||||
    jitter_seconds = IntegerField('Random jitter seconds ± check',
 | 
			
		||||
                                  render_kw={"style": "width: 5em;"},
 | 
			
		||||
                                  validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
 | 
			
		||||
    
 | 
			
		||||
    workers = IntegerField('Number of fetch workers',
 | 
			
		||||
                          render_kw={"style": "width: 5em;"},
 | 
			
		||||
                          validators=[validators.NumberRange(min=1, max=50,
 | 
			
		||||
                                                             message="Should be between 1 and 50")])
 | 
			
		||||
 | 
			
		||||
    timeout = IntegerField('Requests timeout in seconds',
 | 
			
		||||
                           render_kw={"style": "width: 5em;"},
 | 
			
		||||
                           validators=[validators.NumberRange(min=1, max=999,
 | 
			
		||||
                                                              message="Should be between 1 and 999")])
 | 
			
		||||
 | 
			
		||||
    extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5)
 | 
			
		||||
    extra_browsers = FieldList(FormField(SingleExtraBrowser), min_entries=5)
 | 
			
		||||
 | 
			
		||||
@@ -722,7 +974,10 @@ class globalSettingsRequestForm(Form):
 | 
			
		||||
                    return False
 | 
			
		||||
 | 
			
		||||
class globalSettingsApplicationUIForm(Form):
 | 
			
		||||
    open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()])
 | 
			
		||||
    open_diff_in_new_tab = BooleanField("Open 'History' page in a new tab", default=True, validators=[validators.Optional()])
 | 
			
		||||
    socket_io_enabled = BooleanField('Realtime UI Updates Enabled', default=True, validators=[validators.Optional()])
 | 
			
		||||
    favicons_enabled = BooleanField('Favicons Enabled', default=True, validators=[validators.Optional()])
 | 
			
		||||
    use_page_title_in_list = BooleanField('Use page <title> in watch overview list') #BooleanField=True
 | 
			
		||||
 | 
			
		||||
# datastore.data['settings']['application']..
 | 
			
		||||
class globalSettingsApplicationForm(commonSettingsForm):
 | 
			
		||||
@@ -747,9 +1002,14 @@ class globalSettingsApplicationForm(commonSettingsForm):
 | 
			
		||||
 | 
			
		||||
    removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
    render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
 | 
			
		||||
    shared_diff_access = BooleanField('Allow access to view diff page when password is enabled', default=False, validators=[validators.Optional()])
 | 
			
		||||
    shared_diff_access = BooleanField('Allow anonymous access to watch history page when password is enabled', default=False, validators=[validators.Optional()])
 | 
			
		||||
    strip_ignored_lines = BooleanField('Strip ignored lines')
 | 
			
		||||
    rss_hide_muted_watches = BooleanField('Hide muted watches from RSS feed', default=True,
 | 
			
		||||
                                      validators=[validators.Optional()])
 | 
			
		||||
 | 
			
		||||
    rss_reader_mode = BooleanField('RSS reader mode ', default=False,
 | 
			
		||||
                                      validators=[validators.Optional()])
 | 
			
		||||
 | 
			
		||||
    filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
 | 
			
		||||
                                                                  render_kw={"style": "width: 5em;"},
 | 
			
		||||
                                                                  validators=[validators.NumberRange(min=0,
 | 
			
		||||
@@ -769,9 +1029,9 @@ class globalSettingsForm(Form):
 | 
			
		||||
 | 
			
		||||
    requests = FormField(globalSettingsRequestForm)
 | 
			
		||||
    application = FormField(globalSettingsApplicationForm)
 | 
			
		||||
    save_button = SubmitField('Save', render_kw={"class": "pure-button button-small pure-button-primary"})
 | 
			
		||||
    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class extractDataForm(Form):
 | 
			
		||||
    extract_regex = StringField('RegEx to extract', validators=[validators.Length(min=1, message="Needs a RegEx")])
 | 
			
		||||
    extract_regex = StringField('RegEx to extract', validators=[validators.DataRequired(), ValidateSinglePythonRegexString()])
 | 
			
		||||
    extract_submit_button = SubmitField('Extract as CSV', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
 
 | 
			
		||||
@@ -1,6 +1,8 @@
 | 
			
		||||
from functools import lru_cache
 | 
			
		||||
 | 
			
		||||
from loguru import logger
 | 
			
		||||
from lxml import etree
 | 
			
		||||
from typing import List
 | 
			
		||||
import html
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
@@ -9,6 +11,10 @@ TEXT_FILTER_LIST_LINE_SUFFIX = "<br>"
 | 
			
		||||
TRANSLATE_WHITESPACE_TABLE = str.maketrans('', '', '\r\n\t ')
 | 
			
		||||
PERL_STYLE_REGEX = r'^/(.*?)/([a-z]*)?$'
 | 
			
		||||
 | 
			
		||||
TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
 | 
			
		||||
META_CS  = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
 | 
			
		||||
META_CT  = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
 | 
			
		||||
 | 
			
		||||
# 'price' , 'lowPrice', 'highPrice' are usually under here
 | 
			
		||||
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
 | 
			
		||||
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
 | 
			
		||||
@@ -17,9 +23,9 @@ class JSONNotFound(ValueError):
 | 
			
		||||
    def __init__(self, msg):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Doesn't look like python supports forward slash auto enclosure in re.findall
 | 
			
		||||
# So convert it to inline flag "(?i)foobar" type configuration
 | 
			
		||||
@lru_cache(maxsize=100)
 | 
			
		||||
def perl_style_slash_enclosed_regex_to_options(regex):
 | 
			
		||||
 | 
			
		||||
    res = re.search(PERL_STYLE_REGEX, regex, re.IGNORECASE)
 | 
			
		||||
@@ -52,13 +58,17 @@ def include_filters(include_filters, html_content, append_pretty_line_formatting
 | 
			
		||||
 | 
			
		||||
    return html_block
 | 
			
		||||
 | 
			
		||||
def subtractive_css_selector(css_selector, html_content):
 | 
			
		||||
def subtractive_css_selector(css_selector, content):
 | 
			
		||||
    from bs4 import BeautifulSoup
 | 
			
		||||
    soup = BeautifulSoup(html_content, "html.parser")
 | 
			
		||||
    soup = BeautifulSoup(content, "html.parser")
 | 
			
		||||
 | 
			
		||||
    # So that the elements dont shift their index, build a list of elements here which will be pointers to their place in the DOM
 | 
			
		||||
    elements_to_remove = soup.select(css_selector)
 | 
			
		||||
 | 
			
		||||
    if not elements_to_remove:
 | 
			
		||||
        # Better to return the original that rebuild with BeautifulSoup
 | 
			
		||||
        return content
 | 
			
		||||
 | 
			
		||||
    # Then, remove them in a separate loop
 | 
			
		||||
    for item in elements_to_remove:
 | 
			
		||||
        item.decompose()
 | 
			
		||||
@@ -66,6 +76,7 @@ def subtractive_css_selector(css_selector, html_content):
 | 
			
		||||
    return str(soup)
 | 
			
		||||
 | 
			
		||||
def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
 | 
			
		||||
    from lxml import etree
 | 
			
		||||
    # Parse the HTML content using lxml
 | 
			
		||||
    html_tree = etree.HTML(html_content)
 | 
			
		||||
 | 
			
		||||
@@ -77,6 +88,10 @@ def subtractive_xpath_selector(selectors: List[str], html_content: str) -> str:
 | 
			
		||||
        # Collect elements for each selector
 | 
			
		||||
        elements_to_remove.extend(html_tree.xpath(selector))
 | 
			
		||||
 | 
			
		||||
    # If no elements were found, return the original HTML content
 | 
			
		||||
    if not elements_to_remove:
 | 
			
		||||
        return html_content
 | 
			
		||||
 | 
			
		||||
    # Then, remove them in a separate loop
 | 
			
		||||
    for element in elements_to_remove:
 | 
			
		||||
        if element.getparent() is not None:  # Ensure the element has a parent before removing
 | 
			
		||||
@@ -94,7 +109,7 @@ def element_removal(selectors: List[str], html_content):
 | 
			
		||||
    xpath_selectors = []
 | 
			
		||||
 | 
			
		||||
    for selector in selectors:
 | 
			
		||||
        if selector.startswith(('xpath:', 'xpath1:', '//')):
 | 
			
		||||
        if selector.strip().startswith(('xpath:', 'xpath1:', '//')):
 | 
			
		||||
            # Handle XPath selectors separately
 | 
			
		||||
            xpath_selector = selector.removeprefix('xpath:').removeprefix('xpath1:')
 | 
			
		||||
            xpath_selectors.append(xpath_selector)
 | 
			
		||||
@@ -171,8 +186,21 @@ def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False
 | 
			
		||||
    tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
 | 
			
		||||
    html_block = ""
 | 
			
		||||
 | 
			
		||||
    r = elementpath.select(tree, xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}, parser=XPath3Parser)
 | 
			
		||||
    #@note: //title/text() wont work where <title>CDATA..
 | 
			
		||||
    # Build namespace map for XPath queries
 | 
			
		||||
    namespaces = {'re': 'http://exslt.org/regular-expressions'}
 | 
			
		||||
 | 
			
		||||
    # Handle default namespace in documents (common in RSS/Atom feeds, but can occur in any XML)
 | 
			
		||||
    # XPath spec: unprefixed element names have no namespace, not the default namespace
 | 
			
		||||
    # Solution: Register the default namespace with empty string prefix in elementpath
 | 
			
		||||
    # This is primarily for RSS/Atom feeds but works for any XML with default namespace
 | 
			
		||||
    if hasattr(tree, 'nsmap') and tree.nsmap and None in tree.nsmap:
 | 
			
		||||
        # Register the default namespace with empty string prefix for elementpath
 | 
			
		||||
        # This allows //title to match elements in the default namespace
 | 
			
		||||
        namespaces[''] = tree.nsmap[None]
 | 
			
		||||
 | 
			
		||||
    r = elementpath.select(tree, xpath_filter.strip(), namespaces=namespaces, parser=XPath3Parser)
 | 
			
		||||
    #@note: //title/text() now works with default namespaces (fixed by registering '' prefix)
 | 
			
		||||
    #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
 | 
			
		||||
 | 
			
		||||
    if type(r) != list:
 | 
			
		||||
        r = [r]
 | 
			
		||||
@@ -207,8 +235,19 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals
 | 
			
		||||
    tree = html.fromstring(bytes(html_content, encoding='utf-8'), parser=parser)
 | 
			
		||||
    html_block = ""
 | 
			
		||||
 | 
			
		||||
    r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
 | 
			
		||||
    #@note: //title/text() wont work where <title>CDATA..
 | 
			
		||||
    # Build namespace map for XPath queries
 | 
			
		||||
    namespaces = {'re': 'http://exslt.org/regular-expressions'}
 | 
			
		||||
 | 
			
		||||
    # NOTE: lxml's native xpath() does NOT support empty string prefix for default namespace
 | 
			
		||||
    # For documents with default namespace (RSS/Atom feeds), users must use:
 | 
			
		||||
    #   - local-name(): //*[local-name()='title']/text()
 | 
			
		||||
    #   - Or use xpath_filter (not xpath1_filter) which supports default namespaces
 | 
			
		||||
    # XPath spec: unprefixed element names have no namespace, not the default namespace
 | 
			
		||||
 | 
			
		||||
    r = tree.xpath(xpath_filter.strip(), namespaces=namespaces)
 | 
			
		||||
    #@note: xpath1 (lxml) does NOT automatically handle default namespaces
 | 
			
		||||
    #@note: Use //*[local-name()='element'] or switch to xpath_filter for default namespace support
 | 
			
		||||
    #@note: //title/text() wont work where <title>CDATA.. (use cdata_in_document_to_text first)
 | 
			
		||||
 | 
			
		||||
    for element in r:
 | 
			
		||||
        # When there's more than 1 match, then add the suffix to separate each line
 | 
			
		||||
@@ -289,70 +328,92 @@ def _get_stripped_text_from_json_match(match):
 | 
			
		||||
 | 
			
		||||
    return stripped_text_from_html
 | 
			
		||||
 | 
			
		||||
def extract_json_blob_from_html(content, ensure_is_ldjson_info_type, json_filter):
 | 
			
		||||
    from bs4 import BeautifulSoup
 | 
			
		||||
    stripped_text_from_html = ''
 | 
			
		||||
 | 
			
		||||
    # Foreach <script json></script> blob.. just return the first that matches json_filter
 | 
			
		||||
    # As a last resort, try to parse the whole <body>
 | 
			
		||||
    soup = BeautifulSoup(content, 'html.parser')
 | 
			
		||||
 | 
			
		||||
    if ensure_is_ldjson_info_type:
 | 
			
		||||
        bs_result = soup.find_all('script', {"type": "application/ld+json"})
 | 
			
		||||
    else:
 | 
			
		||||
        bs_result = soup.find_all('script')
 | 
			
		||||
    bs_result += soup.find_all('body')
 | 
			
		||||
 | 
			
		||||
    bs_jsons = []
 | 
			
		||||
 | 
			
		||||
    for result in bs_result:
 | 
			
		||||
        # result.text is how bs4 magically strips JSON from the body
 | 
			
		||||
        content_start = result.text.lstrip("\ufeff").strip()[:100] if result.text else ''
 | 
			
		||||
        # Skip empty tags, and things that dont even look like JSON
 | 
			
		||||
        if not result.text or not (content_start[0] == '{' or content_start[0] == '['):
 | 
			
		||||
            continue
 | 
			
		||||
        try:
 | 
			
		||||
            json_data = json.loads(result.text)
 | 
			
		||||
            bs_jsons.append(json_data)
 | 
			
		||||
        except json.JSONDecodeError:
 | 
			
		||||
            # Skip objects which cannot be parsed
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
    if not bs_jsons:
 | 
			
		||||
        raise JSONNotFound("No parsable JSON found in this document")
 | 
			
		||||
 | 
			
		||||
    for json_data in bs_jsons:
 | 
			
		||||
        stripped_text_from_html = _parse_json(json_data, json_filter)
 | 
			
		||||
 | 
			
		||||
        if ensure_is_ldjson_info_type:
 | 
			
		||||
            # Could sometimes be list, string or something else random
 | 
			
		||||
            if isinstance(json_data, dict):
 | 
			
		||||
                # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
 | 
			
		||||
                # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
 | 
			
		||||
                # @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
 | 
			
		||||
                # LD_JSON auto-extract also requires some content PLUS the ldjson to be present
 | 
			
		||||
                # 1833 - could be either str or dict, should not be anything else
 | 
			
		||||
 | 
			
		||||
                t = json_data.get('@type')
 | 
			
		||||
                if t and stripped_text_from_html:
 | 
			
		||||
 | 
			
		||||
                    if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
 | 
			
		||||
                        break
 | 
			
		||||
                    # The non-standard part, some have a list
 | 
			
		||||
                    elif isinstance(t, list):
 | 
			
		||||
                        if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
 | 
			
		||||
                            break
 | 
			
		||||
 | 
			
		||||
        elif stripped_text_from_html:
 | 
			
		||||
            break
 | 
			
		||||
 | 
			
		||||
    return stripped_text_from_html
 | 
			
		||||
 | 
			
		||||
# content - json
 | 
			
		||||
# json_filter - ie json:$..price
 | 
			
		||||
# ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector)
 | 
			
		||||
def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None):
 | 
			
		||||
    from bs4 import BeautifulSoup
 | 
			
		||||
 | 
			
		||||
    stripped_text_from_html = False
 | 
			
		||||
# https://github.com/dgtlmoon/changedetection.io/pull/2041#issuecomment-1848397161w
 | 
			
		||||
    # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded within HTML tags
 | 
			
		||||
    try:
 | 
			
		||||
        # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
 | 
			
		||||
        stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff") ), json_filter)
 | 
			
		||||
    except json.JSONDecodeError as e:
 | 
			
		||||
        logger.warning(str(e))
 | 
			
		||||
 | 
			
		||||
        # Foreach <script json></script> blob.. just return the first that matches json_filter
 | 
			
		||||
        # As a last resort, try to parse the whole <body>
 | 
			
		||||
        soup = BeautifulSoup(content, 'html.parser')
 | 
			
		||||
    # Looks like clean JSON, dont bother extracting from HTML
 | 
			
		||||
 | 
			
		||||
        if ensure_is_ldjson_info_type:
 | 
			
		||||
            bs_result = soup.findAll('script', {"type": "application/ld+json"})
 | 
			
		||||
        else:
 | 
			
		||||
            bs_result = soup.findAll('script')
 | 
			
		||||
        bs_result += soup.findAll('body')
 | 
			
		||||
    content_start = content.lstrip("\ufeff").strip()[:100]
 | 
			
		||||
 | 
			
		||||
        bs_jsons = []
 | 
			
		||||
        for result in bs_result:
 | 
			
		||||
            # Skip empty tags, and things that dont even look like JSON
 | 
			
		||||
            if not result.text or '{' not in result.text:
 | 
			
		||||
                continue
 | 
			
		||||
            try:
 | 
			
		||||
                json_data = json.loads(result.text)
 | 
			
		||||
                bs_jsons.append(json_data)
 | 
			
		||||
            except json.JSONDecodeError:
 | 
			
		||||
                # Skip objects which cannot be parsed
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
        if not bs_jsons:
 | 
			
		||||
            raise JSONNotFound("No parsable JSON found in this document")
 | 
			
		||||
        
 | 
			
		||||
        for json_data in bs_jsons:
 | 
			
		||||
            stripped_text_from_html = _parse_json(json_data, json_filter)
 | 
			
		||||
 | 
			
		||||
            if ensure_is_ldjson_info_type:
 | 
			
		||||
                # Could sometimes be list, string or something else random
 | 
			
		||||
                if isinstance(json_data, dict):
 | 
			
		||||
                    # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search
 | 
			
		||||
                    # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part)
 | 
			
		||||
                    # @type could also be a list although non-standard ("@type": ["Product", "SubType"],)
 | 
			
		||||
                    # LD_JSON auto-extract also requires some content PLUS the ldjson to be present
 | 
			
		||||
                    # 1833 - could be either str or dict, should not be anything else
 | 
			
		||||
 | 
			
		||||
                    t = json_data.get('@type')
 | 
			
		||||
                    if t and stripped_text_from_html:
 | 
			
		||||
 | 
			
		||||
                        if isinstance(t, str) and t.lower() == ensure_is_ldjson_info_type.lower():
 | 
			
		||||
                            break
 | 
			
		||||
                        # The non-standard part, some have a list
 | 
			
		||||
                        elif isinstance(t, list):
 | 
			
		||||
                            if ensure_is_ldjson_info_type.lower() in [x.lower().strip() for x in t]:
 | 
			
		||||
                                break
 | 
			
		||||
 | 
			
		||||
            elif stripped_text_from_html:
 | 
			
		||||
                break
 | 
			
		||||
    if content_start[0] == '{' or content_start[0] == '[':
 | 
			
		||||
        try:
 | 
			
		||||
            # .lstrip("\ufeff") strings ByteOrderMark from UTF8 and still lets the UTF work
 | 
			
		||||
            stripped_text_from_html = _parse_json(json.loads(content.lstrip("\ufeff")), json_filter)
 | 
			
		||||
        except json.JSONDecodeError as e:
 | 
			
		||||
            logger.warning(f"Error processing JSON {content[:20]}...{str(e)})")
 | 
			
		||||
    else:
 | 
			
		||||
        # Probably something else, go fish inside for it
 | 
			
		||||
        try:
 | 
			
		||||
            stripped_text_from_html = extract_json_blob_from_html(content=content,
 | 
			
		||||
                                                                  ensure_is_ldjson_info_type=ensure_is_ldjson_info_type,
 | 
			
		||||
                                                                  json_filter=json_filter                                                                  )
 | 
			
		||||
        except json.JSONDecodeError as e:
 | 
			
		||||
            logger.warning(f"Error processing JSON while extracting JSON from HTML blob {content[:20]}...{str(e)})")
 | 
			
		||||
 | 
			
		||||
    if not stripped_text_from_html:
 | 
			
		||||
        # Re 265 - Just return an empty string when filter not found
 | 
			
		||||
@@ -372,6 +433,9 @@ def strip_ignore_text(content, wordlist, mode="content"):
 | 
			
		||||
    ignored_lines = []
 | 
			
		||||
 | 
			
		||||
    for k in wordlist:
 | 
			
		||||
        # Skip empty strings to avoid matching everything
 | 
			
		||||
        if not k or not k.strip():
 | 
			
		||||
            continue
 | 
			
		||||
        # Is it a regex?
 | 
			
		||||
        res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
 | 
			
		||||
        if res:
 | 
			
		||||
@@ -436,55 +500,27 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False
 | 
			
		||||
    return re.sub(pattern, repl, html_content)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def html_to_text_sub_worker(conn, html_content: str, render_anchor_tag_content=False, is_rss=False):
 | 
			
		||||
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False, timeout=10) -> str:
 | 
			
		||||
    from inscriptis import get_text
 | 
			
		||||
    from inscriptis.model.config import ParserConfig
 | 
			
		||||
 | 
			
		||||
    """Converts html string to a string with just the text. If ignoring
 | 
			
		||||
    rendering anchor tag content is enable, anchor tag content are also
 | 
			
		||||
    included in the text
 | 
			
		||||
 | 
			
		||||
    :param html_content: string with html content
 | 
			
		||||
    :param render_anchor_tag_content: boolean flag indicating whether to extract
 | 
			
		||||
    hyperlinks (the anchor tag content) together with text. This refers to the
 | 
			
		||||
    'href' inside 'a' tags.
 | 
			
		||||
    Anchor tag content is rendered in the following manner:
 | 
			
		||||
    '[ text ](anchor tag content)'
 | 
			
		||||
    :return: extracted text from the HTML
 | 
			
		||||
    """
 | 
			
		||||
    #  if anchor tag content flag is set to True define a config for
 | 
			
		||||
    #  extracting this content
 | 
			
		||||
    if render_anchor_tag_content:
 | 
			
		||||
        parser_config = ParserConfig(
 | 
			
		||||
            annotation_rules={"a": ["hyperlink"]},
 | 
			
		||||
            display_links=True
 | 
			
		||||
        )
 | 
			
		||||
    # otherwise set config to None/default
 | 
			
		||||
    else:
 | 
			
		||||
        parser_config = None
 | 
			
		||||
 | 
			
		||||
    # RSS Mode - Inscriptis will treat `title` as something else.
 | 
			
		||||
    # Make it as a regular block display element (//item/title)
 | 
			
		||||
    # This is a bit of a hack - the real way it to use XSLT to convert it to HTML #1874
 | 
			
		||||
    if is_rss:
 | 
			
		||||
        html_content = re.sub(r'<title([\s>])', r'<h1\1', html_content)
 | 
			
		||||
        html_content = re.sub(r'</title>', r'</h1>', html_content)
 | 
			
		||||
 | 
			
		||||
    text_content = get_text(html_content, config=parser_config)
 | 
			
		||||
    conn.send(text_content)
 | 
			
		||||
    conn.close()
 | 
			
		||||
 | 
			
		||||
# NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON
 | 
			
		||||
def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False):
 | 
			
		||||
    from multiprocessing import Process, Pipe
 | 
			
		||||
 | 
			
		||||
    parent_conn, child_conn = Pipe()
 | 
			
		||||
    p = Process(target=html_to_text_sub_worker, args=(child_conn, html_content, render_anchor_tag_content, is_rss))
 | 
			
		||||
    p.start()
 | 
			
		||||
    text = parent_conn.recv()
 | 
			
		||||
    p.join()
 | 
			
		||||
    return text
 | 
			
		||||
    return text_content
 | 
			
		||||
 | 
			
		||||
# Does LD+JSON exist with a @type=='product' and a .price set anywhere?
 | 
			
		||||
def has_ldjson_product_info(content):
 | 
			
		||||
@@ -538,3 +574,43 @@ def get_triggered_text(content, trigger_text):
 | 
			
		||||
        i += 1
 | 
			
		||||
 | 
			
		||||
    return triggered_text
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def extract_title(data: bytes | str, sniff_bytes: int = 2048, scan_chars: int = 8192) -> str | None:
 | 
			
		||||
    try:
 | 
			
		||||
        # Only decode/process the prefix we need for title extraction
 | 
			
		||||
        match data:
 | 
			
		||||
            case bytes() if data.startswith((b"\xff\xfe", b"\xfe\xff")):
 | 
			
		||||
                prefix = data[:scan_chars * 2].decode("utf-16", errors="replace")
 | 
			
		||||
            case bytes() if data.startswith((b"\xff\xfe\x00\x00", b"\x00\x00\xfe\xff")):
 | 
			
		||||
                prefix = data[:scan_chars * 4].decode("utf-32", errors="replace")
 | 
			
		||||
            case bytes():
 | 
			
		||||
                try:
 | 
			
		||||
                    prefix = data[:scan_chars].decode("utf-8")
 | 
			
		||||
                except UnicodeDecodeError:
 | 
			
		||||
                    try:
 | 
			
		||||
                        head = data[:sniff_bytes].decode("ascii", errors="ignore")
 | 
			
		||||
                        if m := (META_CS.search(head) or META_CT.search(head)):
 | 
			
		||||
                            enc = m.group(1).lower()
 | 
			
		||||
                        else:
 | 
			
		||||
                            enc = "cp1252"
 | 
			
		||||
                        prefix = data[:scan_chars * 2].decode(enc, errors="replace")
 | 
			
		||||
                    except Exception as e:
 | 
			
		||||
                        logger.error(f"Title extraction encoding detection failed: {e}")
 | 
			
		||||
                        return None
 | 
			
		||||
            case str():
 | 
			
		||||
                prefix = data[:scan_chars] if len(data) > scan_chars else data
 | 
			
		||||
            case _:
 | 
			
		||||
                logger.error(f"Title extraction received unsupported data type: {type(data)}")
 | 
			
		||||
                return None
 | 
			
		||||
 | 
			
		||||
        # Search only in the prefix
 | 
			
		||||
        if m := TITLE_RE.search(prefix):
 | 
			
		||||
            title = html.unescape(" ".join(m.group(1).split())).strip()
 | 
			
		||||
            # Some safe limit
 | 
			
		||||
            return title[:2000]
 | 
			
		||||
        return None
 | 
			
		||||
        
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Title extraction failed: {e}")
 | 
			
		||||
        return None
 | 
			
		||||
							
								
								
									
										22
									
								
								changedetectionio/jinja2_custom/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										22
									
								
								changedetectionio/jinja2_custom/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,22 @@
 | 
			
		||||
"""
 | 
			
		||||
Jinja2 custom extensions and safe rendering utilities.
 | 
			
		||||
"""
 | 
			
		||||
from .extensions.TimeExtension import TimeExtension
 | 
			
		||||
from .safe_jinja import (
 | 
			
		||||
    render,
 | 
			
		||||
    render_fully_escaped,
 | 
			
		||||
    create_jinja_env,
 | 
			
		||||
    JINJA2_MAX_RETURN_PAYLOAD_SIZE,
 | 
			
		||||
    DEFAULT_JINJA2_EXTENSIONS,
 | 
			
		||||
)
 | 
			
		||||
from .plugins.regex import regex_replace
 | 
			
		||||
 | 
			
		||||
__all__ = [
 | 
			
		||||
    'TimeExtension',
 | 
			
		||||
    'render',
 | 
			
		||||
    'render_fully_escaped',
 | 
			
		||||
    'create_jinja_env',
 | 
			
		||||
    'JINJA2_MAX_RETURN_PAYLOAD_SIZE',
 | 
			
		||||
    'DEFAULT_JINJA2_EXTENSIONS',
 | 
			
		||||
    'regex_replace',
 | 
			
		||||
]
 | 
			
		||||
							
								
								
									
										221
									
								
								changedetectionio/jinja2_custom/extensions/TimeExtension.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										221
									
								
								changedetectionio/jinja2_custom/extensions/TimeExtension.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,221 @@
 | 
			
		||||
"""
 | 
			
		||||
Jinja2 TimeExtension - Custom date/time handling for templates.
 | 
			
		||||
 | 
			
		||||
This extension provides the {% now %} tag for Jinja2 templates, offering timezone-aware
 | 
			
		||||
date/time formatting with support for time offsets.
 | 
			
		||||
 | 
			
		||||
Why This Extension Exists:
 | 
			
		||||
    The Arrow library has a now() function (arrow.now()), but Jinja2 templates cannot
 | 
			
		||||
    directly call Python functions - they need extensions or filters to expose functionality.
 | 
			
		||||
 | 
			
		||||
    This TimeExtension serves as a Jinja2-to-Arrow bridge that:
 | 
			
		||||
 | 
			
		||||
    1. Makes Arrow accessible in templates - Jinja2 requires registering functions/tags
 | 
			
		||||
       through extensions. You cannot use arrow.now() directly in a template.
 | 
			
		||||
 | 
			
		||||
    2. Provides template-friendly syntax - Instead of complex Python code, you get clean tags:
 | 
			
		||||
       {% now 'UTC' %}
 | 
			
		||||
       {% now 'UTC' + 'hours=2' %}
 | 
			
		||||
       {% now 'Europe/London', '%Y-%m-%d' %}
 | 
			
		||||
 | 
			
		||||
    3. Adds convenience features on top of Arrow:
 | 
			
		||||
       - Default timezone from environment variable (TZ) or config
 | 
			
		||||
       - Default datetime format configuration
 | 
			
		||||
       - Offset syntax parsing: 'hours=2,minutes=30' → shift(hours=2, minutes=30)
 | 
			
		||||
       - Empty string timezone support to use configured defaults
 | 
			
		||||
 | 
			
		||||
    4. Maintains security - Works within Jinja2's sandboxed environment so users
 | 
			
		||||
       cannot access arbitrary Python code or objects.
 | 
			
		||||
 | 
			
		||||
    Essentially, this is a Jinja2 wrapper around arrow.now() and arrow.shift() that
 | 
			
		||||
    provides user-friendly template syntax while maintaining security.
 | 
			
		||||
 | 
			
		||||
Basic Usage:
 | 
			
		||||
    {% now 'UTC' %}
 | 
			
		||||
    # Output: Wed, 09 Dec 2015 23:33:01
 | 
			
		||||
 | 
			
		||||
Custom Format:
 | 
			
		||||
    {% now 'UTC', '%Y-%m-%d %H:%M:%S' %}
 | 
			
		||||
    # Output: 2015-12-09 23:33:01
 | 
			
		||||
 | 
			
		||||
Timezone Support:
 | 
			
		||||
    {% now 'America/New_York' %}
 | 
			
		||||
    {% now 'Europe/London' %}
 | 
			
		||||
    {% now '' %}  # Uses default timezone from environment.default_timezone
 | 
			
		||||
 | 
			
		||||
Time Offsets (Addition):
 | 
			
		||||
    {% now 'UTC' + 'hours=2' %}
 | 
			
		||||
    {% now 'UTC' + 'hours=2,minutes=30' %}
 | 
			
		||||
    {% now 'UTC' + 'days=1,hours=2,minutes=15,seconds=10' %}
 | 
			
		||||
 | 
			
		||||
Time Offsets (Subtraction):
 | 
			
		||||
    {% now 'UTC' - 'minutes=11' %}
 | 
			
		||||
    {% now 'UTC' - 'days=2,minutes=33,seconds=1' %}
 | 
			
		||||
 | 
			
		||||
Time Offsets with Custom Format:
 | 
			
		||||
    {% now 'UTC' + 'hours=2', '%Y-%m-%d %H:%M:%S' %}
 | 
			
		||||
    # Output: 2015-12-10 01:33:01
 | 
			
		||||
 | 
			
		||||
Weekday Support (for finding next/previous weekday):
 | 
			
		||||
    {% now 'UTC' + 'weekday=0' %}  # Next Monday (0=Monday, 6=Sunday)
 | 
			
		||||
    {% now 'UTC' + 'weekday=4' %}  # Next Friday
 | 
			
		||||
 | 
			
		||||
Configuration:
 | 
			
		||||
    - Default timezone: Set via TZ environment variable or override environment.default_timezone
 | 
			
		||||
    - Default format: '%a, %d %b %Y %H:%M:%S' (can be overridden via environment.datetime_format)
 | 
			
		||||
 | 
			
		||||
Environment Customization:
 | 
			
		||||
    from changedetectionio.jinja2_custom import create_jinja_env
 | 
			
		||||
 | 
			
		||||
    jinja2_env = create_jinja_env()
 | 
			
		||||
    jinja2_env.default_timezone = 'America/New_York'  # Override default timezone
 | 
			
		||||
    jinja2_env.datetime_format = '%Y-%m-%d %H:%M'      # Override default format
 | 
			
		||||
 | 
			
		||||
Supported Offset Parameters:
 | 
			
		||||
    - years, months, weeks, days
 | 
			
		||||
    - hours, minutes, seconds, microseconds
 | 
			
		||||
    - weekday (0=Monday through 6=Sunday, must be integer)
 | 
			
		||||
 | 
			
		||||
Note:
 | 
			
		||||
    This extension uses the Arrow library for timezone-aware datetime handling.
 | 
			
		||||
    All timezone names should be valid IANA timezone identifiers (e.g., 'America/New_York').
 | 
			
		||||
"""
 | 
			
		||||
import arrow
 | 
			
		||||
 | 
			
		||||
from jinja2 import nodes
 | 
			
		||||
from jinja2.ext import Extension
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
class TimeExtension(Extension):
 | 
			
		||||
    """
 | 
			
		||||
    Jinja2 Extension providing the {% now %} tag for timezone-aware date/time rendering.
 | 
			
		||||
 | 
			
		||||
    This extension adds two attributes to the Jinja2 environment:
 | 
			
		||||
    - datetime_format: Default strftime format string (default: '%a, %d %b %Y %H:%M:%S')
 | 
			
		||||
    - default_timezone: Default timezone for rendering (default: TZ env var or 'UTC')
 | 
			
		||||
 | 
			
		||||
    Both can be overridden after environment creation by setting the attributes directly.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    tags = {'now'}
 | 
			
		||||
 | 
			
		||||
    def __init__(self, environment):
 | 
			
		||||
        """Jinja2 Extension constructor."""
 | 
			
		||||
        super().__init__(environment)
 | 
			
		||||
 | 
			
		||||
        environment.extend(
 | 
			
		||||
            datetime_format='%a, %d %b %Y %H:%M:%S',
 | 
			
		||||
            default_timezone=os.getenv('TZ', 'UTC').strip()
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    def _datetime(self, timezone, operator, offset, datetime_format):
 | 
			
		||||
        """
 | 
			
		||||
        Get current datetime with time offset applied.
 | 
			
		||||
 | 
			
		||||
        Args:
 | 
			
		||||
            timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
 | 
			
		||||
            operator: '+' for addition or '-' for subtraction
 | 
			
		||||
            offset: Comma-separated offset parameters (e.g., 'hours=2,minutes=30')
 | 
			
		||||
            datetime_format: strftime format string or None to use environment default
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Formatted datetime string with offset applied
 | 
			
		||||
 | 
			
		||||
        Example:
 | 
			
		||||
            _datetime('UTC', '+', 'hours=2,minutes=30', '%Y-%m-%d %H:%M:%S')
 | 
			
		||||
            # Returns current time + 2.5 hours
 | 
			
		||||
        """
 | 
			
		||||
        # Use default timezone if none specified
 | 
			
		||||
        if not timezone or timezone == '':
 | 
			
		||||
            timezone = self.environment.default_timezone
 | 
			
		||||
 | 
			
		||||
        d = arrow.now(timezone)
 | 
			
		||||
 | 
			
		||||
        # parse shift params from offset and include operator
 | 
			
		||||
        shift_params = {}
 | 
			
		||||
        for param in offset.split(','):
 | 
			
		||||
            interval, value = param.split('=')
 | 
			
		||||
            shift_params[interval.strip()] = float(operator + value.strip())
 | 
			
		||||
 | 
			
		||||
        # Fix weekday parameter can not be float
 | 
			
		||||
        if 'weekday' in shift_params:
 | 
			
		||||
            shift_params['weekday'] = int(shift_params['weekday'])
 | 
			
		||||
 | 
			
		||||
        d = d.shift(**shift_params)
 | 
			
		||||
 | 
			
		||||
        if datetime_format is None:
 | 
			
		||||
            datetime_format = self.environment.datetime_format
 | 
			
		||||
        return d.strftime(datetime_format)
 | 
			
		||||
 | 
			
		||||
    def _now(self, timezone, datetime_format):
 | 
			
		||||
        """
 | 
			
		||||
        Get current datetime without any offset.
 | 
			
		||||
 | 
			
		||||
        Args:
 | 
			
		||||
            timezone: IANA timezone identifier (e.g., 'UTC', 'America/New_York') or empty string for default
 | 
			
		||||
            datetime_format: strftime format string or None to use environment default
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Formatted datetime string for current time
 | 
			
		||||
 | 
			
		||||
        Example:
 | 
			
		||||
            _now('America/New_York', '%Y-%m-%d %H:%M:%S')
 | 
			
		||||
            # Returns current time in New York timezone
 | 
			
		||||
        """
 | 
			
		||||
        # Use default timezone if none specified
 | 
			
		||||
        if not timezone or timezone == '':
 | 
			
		||||
            timezone = self.environment.default_timezone
 | 
			
		||||
 | 
			
		||||
        if datetime_format is None:
 | 
			
		||||
            datetime_format = self.environment.datetime_format
 | 
			
		||||
        return arrow.now(timezone).strftime(datetime_format)
 | 
			
		||||
 | 
			
		||||
    def parse(self, parser):
 | 
			
		||||
        """
 | 
			
		||||
        Parse the {% now %} tag and generate appropriate AST nodes.
 | 
			
		||||
 | 
			
		||||
        This method is called by Jinja2 when it encounters a {% now %} tag.
 | 
			
		||||
        It parses the tag syntax and determines whether to call _now() or _datetime()
 | 
			
		||||
        based on whether offset operations (+ or -) are present.
 | 
			
		||||
 | 
			
		||||
        Supported syntax:
 | 
			
		||||
            {% now 'timezone' %}                              -> calls _now()
 | 
			
		||||
            {% now 'timezone', 'format' %}                    -> calls _now()
 | 
			
		||||
            {% now 'timezone' + 'offset' %}                   -> calls _datetime()
 | 
			
		||||
            {% now 'timezone' + 'offset', 'format' %}         -> calls _datetime()
 | 
			
		||||
            {% now 'timezone' - 'offset', 'format' %}         -> calls _datetime()
 | 
			
		||||
 | 
			
		||||
        Args:
 | 
			
		||||
            parser: Jinja2 parser instance
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            nodes.Output: AST output node containing the formatted datetime string
 | 
			
		||||
        """
 | 
			
		||||
        lineno = next(parser.stream).lineno
 | 
			
		||||
 | 
			
		||||
        node = parser.parse_expression()
 | 
			
		||||
 | 
			
		||||
        if parser.stream.skip_if('comma'):
 | 
			
		||||
            datetime_format = parser.parse_expression()
 | 
			
		||||
        else:
 | 
			
		||||
            datetime_format = nodes.Const(None)
 | 
			
		||||
 | 
			
		||||
        if isinstance(node, nodes.Add):
 | 
			
		||||
            call_method = self.call_method(
 | 
			
		||||
                '_datetime',
 | 
			
		||||
                [node.left, nodes.Const('+'), node.right, datetime_format],
 | 
			
		||||
                lineno=lineno,
 | 
			
		||||
            )
 | 
			
		||||
        elif isinstance(node, nodes.Sub):
 | 
			
		||||
            call_method = self.call_method(
 | 
			
		||||
                '_datetime',
 | 
			
		||||
                [node.left, nodes.Const('-'), node.right, datetime_format],
 | 
			
		||||
                lineno=lineno,
 | 
			
		||||
            )
 | 
			
		||||
        else:
 | 
			
		||||
            call_method = self.call_method(
 | 
			
		||||
                '_now',
 | 
			
		||||
                [node, datetime_format],
 | 
			
		||||
                lineno=lineno,
 | 
			
		||||
            )
 | 
			
		||||
        return nodes.Output([call_method], lineno=lineno)
 | 
			
		||||
							
								
								
									
										6
									
								
								changedetectionio/jinja2_custom/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										6
									
								
								changedetectionio/jinja2_custom/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,6 @@
 | 
			
		||||
"""
 | 
			
		||||
Jinja2 custom filter plugins for changedetection.io
 | 
			
		||||
"""
 | 
			
		||||
from .regex import regex_replace
 | 
			
		||||
 | 
			
		||||
__all__ = ['regex_replace']
 | 
			
		||||
							
								
								
									
										98
									
								
								changedetectionio/jinja2_custom/plugins/regex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								changedetectionio/jinja2_custom/plugins/regex.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,98 @@
 | 
			
		||||
"""
 | 
			
		||||
Regex filter plugin for Jinja2 templates.
 | 
			
		||||
 | 
			
		||||
Provides regex_replace filter for pattern-based string replacements in templates.
 | 
			
		||||
"""
 | 
			
		||||
import re
 | 
			
		||||
import signal
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def regex_replace(value: str, pattern: str, replacement: str = '', count: int = 0) -> str:
 | 
			
		||||
    """
 | 
			
		||||
    Replace occurrences of a regex pattern in a string.
 | 
			
		||||
 | 
			
		||||
    Security: Protected against ReDoS (Regular Expression Denial of Service) attacks:
 | 
			
		||||
    - Limits input value size to prevent excessive processing
 | 
			
		||||
    - Uses timeout mechanism to prevent runaway regex operations
 | 
			
		||||
    - Validates pattern complexity to prevent catastrophic backtracking
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        value: The input string to perform replacements on
 | 
			
		||||
        pattern: The regex pattern to search for
 | 
			
		||||
        replacement: The replacement string (default: '')
 | 
			
		||||
        count: Maximum number of replacements (0 = replace all, default: 0)
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        String with replacements applied, or original value on error
 | 
			
		||||
 | 
			
		||||
    Example:
 | 
			
		||||
        {{ "hello world" | regex_replace("world", "universe") }}
 | 
			
		||||
        {{ diff | regex_replace("<td>([^<]+)</td><td>([^<]+)</td>", "Label1: \\1\\nLabel2: \\2") }}
 | 
			
		||||
 | 
			
		||||
    Security limits:
 | 
			
		||||
        - Maximum input size: 10MB
 | 
			
		||||
        - Maximum pattern length: 500 characters
 | 
			
		||||
        - Operation timeout: 10 seconds
 | 
			
		||||
        - Dangerous nested quantifier patterns are rejected
 | 
			
		||||
    """
 | 
			
		||||
    # Security limits
 | 
			
		||||
    MAX_INPUT_SIZE = 1024 * 1024 * 10 # 10MB max input size
 | 
			
		||||
    MAX_PATTERN_LENGTH = 500  # Maximum regex pattern length
 | 
			
		||||
    REGEX_TIMEOUT_SECONDS = 10  # Maximum time for regex operation
 | 
			
		||||
 | 
			
		||||
    # Validate input sizes
 | 
			
		||||
    value_str = str(value)
 | 
			
		||||
    if len(value_str) > MAX_INPUT_SIZE:
 | 
			
		||||
        logger.warning(f"regex_replace: Input too large ({len(value_str)} bytes), truncating")
 | 
			
		||||
        value_str = value_str[:MAX_INPUT_SIZE]
 | 
			
		||||
 | 
			
		||||
    if len(pattern) > MAX_PATTERN_LENGTH:
 | 
			
		||||
        logger.warning(f"regex_replace: Pattern too long ({len(pattern)} chars), rejecting")
 | 
			
		||||
        return value_str
 | 
			
		||||
 | 
			
		||||
    # Check for potentially dangerous patterns (basic checks)
 | 
			
		||||
    # Nested quantifiers like (a+)+ can cause catastrophic backtracking
 | 
			
		||||
    dangerous_patterns = [
 | 
			
		||||
        r'\([^)]*\+[^)]*\)\+',  # (x+)+
 | 
			
		||||
        r'\([^)]*\*[^)]*\)\+',  # (x*)+
 | 
			
		||||
        r'\([^)]*\+[^)]*\)\*',  # (x+)*
 | 
			
		||||
        r'\([^)]*\*[^)]*\)\*',  # (x*)*
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    for dangerous in dangerous_patterns:
 | 
			
		||||
        if re.search(dangerous, pattern):
 | 
			
		||||
            logger.warning(f"regex_replace: Potentially dangerous pattern detected: {pattern}")
 | 
			
		||||
            return value_str
 | 
			
		||||
 | 
			
		||||
    def timeout_handler(signum, frame):
 | 
			
		||||
        raise TimeoutError("Regex operation timed out")
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        # Set up timeout for regex operation (Unix-like systems only)
 | 
			
		||||
        # This prevents ReDoS attacks
 | 
			
		||||
        old_handler = None
 | 
			
		||||
        if hasattr(signal, 'SIGALRM'):
 | 
			
		||||
            old_handler = signal.signal(signal.SIGALRM, timeout_handler)
 | 
			
		||||
            signal.alarm(REGEX_TIMEOUT_SECONDS)
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            result = re.sub(pattern, replacement, value_str, count=count)
 | 
			
		||||
        finally:
 | 
			
		||||
            # Cancel the alarm
 | 
			
		||||
            if hasattr(signal, 'SIGALRM'):
 | 
			
		||||
                signal.alarm(0)
 | 
			
		||||
                if old_handler is not None:
 | 
			
		||||
                    signal.signal(signal.SIGALRM, old_handler)
 | 
			
		||||
 | 
			
		||||
        return result
 | 
			
		||||
 | 
			
		||||
    except TimeoutError:
 | 
			
		||||
        logger.error(f"regex_replace: Regex operation timed out - possible ReDoS attack. Pattern: {pattern}")
 | 
			
		||||
        return value_str
 | 
			
		||||
    except re.error as e:
 | 
			
		||||
        logger.warning(f"regex_replace: Invalid regex pattern: {e}")
 | 
			
		||||
        return value_str
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"regex_replace: Unexpected error: {e}")
 | 
			
		||||
        return value_str
 | 
			
		||||
							
								
								
									
										58
									
								
								changedetectionio/jinja2_custom/safe_jinja.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								changedetectionio/jinja2_custom/safe_jinja.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
"""
 | 
			
		||||
Safe Jinja2 render with max payload sizes
 | 
			
		||||
 | 
			
		||||
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import jinja2.sandbox
 | 
			
		||||
import typing as t
 | 
			
		||||
import os
 | 
			
		||||
from .extensions.TimeExtension import TimeExtension
 | 
			
		||||
from .plugins import regex_replace
 | 
			
		||||
 | 
			
		||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
 | 
			
		||||
 | 
			
		||||
# Default extensions - can be overridden in create_jinja_env()
 | 
			
		||||
DEFAULT_JINJA2_EXTENSIONS = [TimeExtension]
 | 
			
		||||
 | 
			
		||||
def create_jinja_env(extensions=None, **kwargs) -> jinja2.sandbox.ImmutableSandboxedEnvironment:
 | 
			
		||||
    """
 | 
			
		||||
    Create a sandboxed Jinja2 environment with our custom extensions and default timezone.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        extensions: List of extension classes to use (defaults to DEFAULT_JINJA2_EXTENSIONS)
 | 
			
		||||
        **kwargs: Additional arguments to pass to ImmutableSandboxedEnvironment
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        Configured Jinja2 environment
 | 
			
		||||
    """
 | 
			
		||||
    if extensions is None:
 | 
			
		||||
        extensions = DEFAULT_JINJA2_EXTENSIONS
 | 
			
		||||
 | 
			
		||||
    jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(
 | 
			
		||||
        extensions=extensions,
 | 
			
		||||
        **kwargs
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Get default timezone from environment variable
 | 
			
		||||
    default_timezone = os.getenv('TZ', 'UTC').strip()
 | 
			
		||||
    jinja2_env.default_timezone = default_timezone
 | 
			
		||||
 | 
			
		||||
    # Register custom filters
 | 
			
		||||
    jinja2_env.filters['regex_replace'] = regex_replace
 | 
			
		||||
 | 
			
		||||
    return jinja2_env
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# This is used for notifications etc, so actually it's OK to send custom HTML such as <a href> etc, but it should limit what data is available.
 | 
			
		||||
# (Which also limits available functions that could be called)
 | 
			
		||||
def render(template_str, **args: t.Any) -> str:
 | 
			
		||||
    jinja2_env = create_jinja_env()
 | 
			
		||||
    output = jinja2_env.from_string(template_str).render(args)
 | 
			
		||||
    return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
 | 
			
		||||
 | 
			
		||||
def render_fully_escaped(content):
 | 
			
		||||
    env = jinja2.sandbox.ImmutableSandboxedEnvironment(autoescape=True)
 | 
			
		||||
    template = env.from_string("{{ some_html|e }}")
 | 
			
		||||
    return template.render(some_html=content)
 | 
			
		||||
 | 
			
		||||
@@ -1,4 +1,5 @@
 | 
			
		||||
from os import getenv
 | 
			
		||||
from copy import deepcopy
 | 
			
		||||
 | 
			
		||||
from changedetectionio.blueprint.rss import RSS_FORMAT_TYPES
 | 
			
		||||
 | 
			
		||||
@@ -39,12 +40,12 @@ class model(dict):
 | 
			
		||||
                    'api_access_token_enabled': True,
 | 
			
		||||
                    'base_url' : None,
 | 
			
		||||
                    'empty_pages_are_a_change': False,
 | 
			
		||||
                    'extract_title_as_title': False,
 | 
			
		||||
                    'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
 | 
			
		||||
                    'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
 | 
			
		||||
                    'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
 | 
			
		||||
                    'global_subtractive_selectors': [],
 | 
			
		||||
                    'ignore_whitespace': True,
 | 
			
		||||
                    'ignore_status_codes': False, #@todo implement, as ternary.
 | 
			
		||||
                    'notification_body': default_notification_body,
 | 
			
		||||
                    'notification_format': default_notification_format,
 | 
			
		||||
                    'notification_title': default_notification_title,
 | 
			
		||||
@@ -55,13 +56,18 @@ class model(dict):
 | 
			
		||||
                    'rss_access_token': None,
 | 
			
		||||
                    'rss_content_format': RSS_FORMAT_TYPES[0][0],
 | 
			
		||||
                    'rss_hide_muted_watches': True,
 | 
			
		||||
                    'rss_reader_mode': False,
 | 
			
		||||
                    'scheduler_timezone_default': None,  # Default IANA timezone name
 | 
			
		||||
                    'schema_version' : 0,
 | 
			
		||||
                    'shared_diff_access': False,
 | 
			
		||||
                    'webdriver_delay': None , # Extra delay in seconds before extracting text
 | 
			
		||||
                    'strip_ignored_lines': False,
 | 
			
		||||
                    'tags': {}, #@todo use Tag.model initialisers
 | 
			
		||||
                    'timezone': None, # Default IANA timezone name
 | 
			
		||||
                    'webdriver_delay': None , # Extra delay in seconds before extracting text
 | 
			
		||||
                    'ui': {
 | 
			
		||||
                        'use_page_title_in_list': True,
 | 
			
		||||
                        'open_diff_in_new_tab': True,
 | 
			
		||||
                        'socket_io_enabled': True,
 | 
			
		||||
                        'favicons_enabled': True
 | 
			
		||||
                    },
 | 
			
		||||
                }
 | 
			
		||||
            }
 | 
			
		||||
@@ -69,7 +75,8 @@ class model(dict):
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *arg, **kw):
 | 
			
		||||
        super(model, self).__init__(*arg, **kw)
 | 
			
		||||
        self.update(self.base_config)
 | 
			
		||||
        # CRITICAL: deepcopy to avoid sharing mutable objects between instances
 | 
			
		||||
        self.update(deepcopy(self.base_config))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def parse_headers_from_text_file(filepath):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,37 +1,24 @@
 | 
			
		||||
from blinker import signal
 | 
			
		||||
from changedetectionio.validate_url import is_safe_valid_url
 | 
			
		||||
 | 
			
		||||
from changedetectionio.strtobool import strtobool
 | 
			
		||||
from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
from . import watch_base
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
from pathlib import Path
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from .. import jinja2_custom as safe_jinja
 | 
			
		||||
from ..diff import ADDED_PLACEMARKER_OPEN
 | 
			
		||||
from ..html_tools import TRANSLATE_WHITESPACE_TABLE
 | 
			
		||||
 | 
			
		||||
# Allowable protocols, protects against javascript: etc
 | 
			
		||||
# file:// is further checked by ALLOW_FILE_URI
 | 
			
		||||
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
 | 
			
		||||
FAVICON_RESAVE_THRESHOLD_SECONDS=86400
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 3))
 | 
			
		||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def is_safe_url(test_url):
 | 
			
		||||
    # See https://github.com/dgtlmoon/changedetection.io/issues/1358
 | 
			
		||||
 | 
			
		||||
    # Remove 'source:' prefix so we dont get 'source:javascript:' etc
 | 
			
		||||
    # 'source:' is a valid way to tell us to return the source
 | 
			
		||||
 | 
			
		||||
    r = re.compile(re.escape('source:'), re.IGNORECASE)
 | 
			
		||||
    test_url = r.sub('', test_url)
 | 
			
		||||
 | 
			
		||||
    pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
 | 
			
		||||
    if not pattern.match(test_url.strip()):
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class model(watch_base):
 | 
			
		||||
    __newest_history_key = None
 | 
			
		||||
    __history_n = 0
 | 
			
		||||
@@ -41,6 +28,7 @@ class model(watch_base):
 | 
			
		||||
        self.__datastore_path = kw.get('datastore_path')
 | 
			
		||||
        if kw.get('datastore_path'):
 | 
			
		||||
            del kw['datastore_path']
 | 
			
		||||
            
 | 
			
		||||
        super(model, self).__init__(*arg, **kw)
 | 
			
		||||
        if kw.get('default'):
 | 
			
		||||
            self.update(kw['default'])
 | 
			
		||||
@@ -60,6 +48,10 @@ class model(watch_base):
 | 
			
		||||
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def has_unviewed(self):
 | 
			
		||||
        return int(self.newest_history_key) > int(self['last_viewed']) and self.__history_n >= 2
 | 
			
		||||
 | 
			
		||||
    def ensure_data_dir_exists(self):
 | 
			
		||||
        if not os.path.isdir(self.watch_data_dir):
 | 
			
		||||
            logger.debug(f"> Creating data dir {self.watch_data_dir}")
 | 
			
		||||
@@ -69,7 +61,7 @@ class model(watch_base):
 | 
			
		||||
    def link(self):
 | 
			
		||||
 | 
			
		||||
        url = self.get('url', '')
 | 
			
		||||
        if not is_safe_url(url):
 | 
			
		||||
        if not is_safe_valid_url(url):
 | 
			
		||||
            return 'DISABLED'
 | 
			
		||||
 | 
			
		||||
        ready_url = url
 | 
			
		||||
@@ -79,9 +71,8 @@ class model(watch_base):
 | 
			
		||||
                ready_url = jinja_render(template_str=url)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.critical(f"Invalid URL template for: '{url}' - {str(e)}")
 | 
			
		||||
                from flask import (
 | 
			
		||||
                    flash, Markup, url_for
 | 
			
		||||
                )
 | 
			
		||||
                from flask import flash, url_for
 | 
			
		||||
                from markupsafe import Markup
 | 
			
		||||
                message = Markup('<a href="{}#general">The URL {} is invalid and cannot be used, click to edit</a>'.format(
 | 
			
		||||
                    url_for('ui.ui_edit.edit_page', uuid=self.get('uuid')), self.get('url', '')))
 | 
			
		||||
                flash(message, 'error')
 | 
			
		||||
@@ -91,10 +82,17 @@ class model(watch_base):
 | 
			
		||||
            ready_url=ready_url.replace('source:', '')
 | 
			
		||||
 | 
			
		||||
        # Also double check it after any Jinja2 formatting just incase
 | 
			
		||||
        if not is_safe_url(ready_url):
 | 
			
		||||
        if not is_safe_valid_url(ready_url):
 | 
			
		||||
            return 'DISABLED'
 | 
			
		||||
        return ready_url
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def domain_only_from_link(self):
 | 
			
		||||
        from urllib.parse import urlparse
 | 
			
		||||
        parsed = urlparse(self.link)
 | 
			
		||||
        domain = parsed.hostname
 | 
			
		||||
        return domain
 | 
			
		||||
 | 
			
		||||
    def clear_watch(self):
 | 
			
		||||
        import pathlib
 | 
			
		||||
 | 
			
		||||
@@ -120,6 +118,10 @@ class model(watch_base):
 | 
			
		||||
            'remote_server_reply': None,
 | 
			
		||||
            'track_ldjson_price_data': None
 | 
			
		||||
        })
 | 
			
		||||
        watch_check_update = signal('watch_check_update')
 | 
			
		||||
        if watch_check_update:
 | 
			
		||||
            watch_check_update.send(watch_uuid=self.get('uuid'))
 | 
			
		||||
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
@@ -148,8 +150,8 @@ class model(watch_base):
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def label(self):
 | 
			
		||||
        # Used for sorting
 | 
			
		||||
        return self.get('title') if self.get('title') else self.get('url')
 | 
			
		||||
        # Used for sorting, display, etc
 | 
			
		||||
        return self.get('title') or self.get('page_title') or self.link
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def last_changed(self):
 | 
			
		||||
@@ -401,6 +403,154 @@ class model(watch_base):
 | 
			
		||||
        # False is not an option for AppRise, must be type None
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def favicon_is_expired(self):
 | 
			
		||||
        favicon_fname = self.get_favicon_filename()
 | 
			
		||||
        import glob
 | 
			
		||||
        import time
 | 
			
		||||
 | 
			
		||||
        if not favicon_fname:
 | 
			
		||||
            return True
 | 
			
		||||
        try:
 | 
			
		||||
            fname = next(iter(glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))), None)
 | 
			
		||||
            logger.trace(f"Favicon file maybe found at {fname}")
 | 
			
		||||
            if os.path.isfile(fname):
 | 
			
		||||
                file_age = int(time.time() - os.path.getmtime(fname))
 | 
			
		||||
                logger.trace(f"Favicon file age is {file_age}s")
 | 
			
		||||
                if file_age < FAVICON_RESAVE_THRESHOLD_SECONDS:
 | 
			
		||||
                    return False
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"Exception checking Favicon age {str(e)}")
 | 
			
		||||
            return True
 | 
			
		||||
 | 
			
		||||
        # Also in the case that the file didnt exist
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def bump_favicon(self, url, favicon_base_64: str) -> None:
 | 
			
		||||
        from urllib.parse import urlparse
 | 
			
		||||
        import base64
 | 
			
		||||
        import binascii
 | 
			
		||||
        decoded = None
 | 
			
		||||
 | 
			
		||||
        if url:
 | 
			
		||||
            try:
 | 
			
		||||
                parsed = urlparse(url)
 | 
			
		||||
                filename = os.path.basename(parsed.path)
 | 
			
		||||
                (base, extension) = filename.lower().strip().rsplit('.', 1)
 | 
			
		||||
            except ValueError:
 | 
			
		||||
                logger.error(f"UUID: {self.get('uuid')} Cant work out file extension from '{url}'")
 | 
			
		||||
                return None
 | 
			
		||||
        else:
 | 
			
		||||
            # Assume favicon.ico
 | 
			
		||||
            base = "favicon"
 | 
			
		||||
            extension = "ico"
 | 
			
		||||
 | 
			
		||||
        fname = os.path.join(self.watch_data_dir, f"favicon.{extension}")
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            # validate=True makes sure the string only contains valid base64 chars
 | 
			
		||||
            decoded = base64.b64decode(favicon_base_64, validate=True)
 | 
			
		||||
        except (binascii.Error, ValueError) as e:
 | 
			
		||||
            logger.warning(f"UUID: {self.get('uuid')} FavIcon save data (Base64) corrupt? {str(e)}")
 | 
			
		||||
        else:
 | 
			
		||||
            if decoded:
 | 
			
		||||
                try:
 | 
			
		||||
                    with open(fname, 'wb') as f:
 | 
			
		||||
                        f.write(decoded)
 | 
			
		||||
                    # A signal that could trigger the socket server to update the browser also
 | 
			
		||||
                    watch_check_update = signal('watch_favicon_bump')
 | 
			
		||||
                    if watch_check_update:
 | 
			
		||||
                        watch_check_update.send(watch_uuid=self.get('uuid'))
 | 
			
		||||
 | 
			
		||||
                except Exception as e:
 | 
			
		||||
                    logger.warning(f"UUID: {self.get('uuid')} error saving FavIcon to {fname} - {str(e)}")
 | 
			
		||||
 | 
			
		||||
        # @todo - Store some checksum and only write when its different
 | 
			
		||||
        logger.debug(f"UUID: {self.get('uuid')} updated favicon to at {fname}")
 | 
			
		||||
 | 
			
		||||
    def get_favicon_filename(self) -> str | None:
 | 
			
		||||
        """
 | 
			
		||||
        Find any favicon.* file in the current working directory
 | 
			
		||||
        and return the contents of the newest one.
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            bytes: Contents of the newest favicon file, or None if not found.
 | 
			
		||||
        """
 | 
			
		||||
        import glob
 | 
			
		||||
 | 
			
		||||
        # Search for all favicon.* files
 | 
			
		||||
        files = glob.glob(os.path.join(self.watch_data_dir, "favicon.*"))
 | 
			
		||||
 | 
			
		||||
        if not files:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        # Find the newest by modification time
 | 
			
		||||
        newest_file = max(files, key=os.path.getmtime)
 | 
			
		||||
        return os.path.basename(newest_file)
 | 
			
		||||
 | 
			
		||||
    def get_screenshot_as_thumbnail(self, max_age=3200):
 | 
			
		||||
        """Return path to a square thumbnail of the most recent screenshot.
 | 
			
		||||
 | 
			
		||||
        Creates a 150x150 pixel thumbnail from the top portion of the screenshot.
 | 
			
		||||
 | 
			
		||||
        Args:
 | 
			
		||||
            max_age: Maximum age in seconds before recreating thumbnail
 | 
			
		||||
 | 
			
		||||
        Returns:
 | 
			
		||||
            Path to thumbnail or None if no screenshot exists
 | 
			
		||||
        """
 | 
			
		||||
        import os
 | 
			
		||||
        import time
 | 
			
		||||
 | 
			
		||||
        thumbnail_path = os.path.join(self.watch_data_dir, "thumbnail.jpeg")
 | 
			
		||||
        top_trim = 500  # Pixels from top of screenshot to use
 | 
			
		||||
 | 
			
		||||
        screenshot_path = self.get_screenshot()
 | 
			
		||||
        if not screenshot_path:
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        # Reuse thumbnail if it's fresh and screenshot hasn't changed
 | 
			
		||||
        if os.path.isfile(thumbnail_path):
 | 
			
		||||
            thumbnail_mtime = os.path.getmtime(thumbnail_path)
 | 
			
		||||
            screenshot_mtime = os.path.getmtime(screenshot_path)
 | 
			
		||||
 | 
			
		||||
            if screenshot_mtime <= thumbnail_mtime and time.time() - thumbnail_mtime < max_age:
 | 
			
		||||
                return thumbnail_path
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            from PIL import Image
 | 
			
		||||
 | 
			
		||||
            with Image.open(screenshot_path) as img:
 | 
			
		||||
                # Crop top portion first (full width, top_trim height)
 | 
			
		||||
                top_crop_height = min(top_trim, img.height)
 | 
			
		||||
                img = img.crop((0, 0, img.width, top_crop_height))
 | 
			
		||||
 | 
			
		||||
                # Create a smaller intermediate image (to reduce memory usage)
 | 
			
		||||
                aspect = img.width / img.height
 | 
			
		||||
                interim_width = min(top_trim, img.width)
 | 
			
		||||
                interim_height = int(interim_width / aspect) if aspect > 0 else top_trim
 | 
			
		||||
                img = img.resize((interim_width, interim_height), Image.NEAREST)
 | 
			
		||||
 | 
			
		||||
                # Convert to RGB if needed
 | 
			
		||||
                if img.mode != 'RGB':
 | 
			
		||||
                    img = img.convert('RGB')
 | 
			
		||||
 | 
			
		||||
                # Crop to square from top center
 | 
			
		||||
                square_size = min(img.width, img.height)
 | 
			
		||||
                left = (img.width - square_size) // 2
 | 
			
		||||
                img = img.crop((left, 0, left + square_size, square_size))
 | 
			
		||||
 | 
			
		||||
                # Final resize to exact thumbnail size with better filter
 | 
			
		||||
                img = img.resize((350, 350), Image.BILINEAR)
 | 
			
		||||
 | 
			
		||||
                # Save with optimized settings
 | 
			
		||||
                img.save(thumbnail_path, "JPEG", quality=75, optimize=True)
 | 
			
		||||
 | 
			
		||||
            return thumbnail_path
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Error creating thumbnail for {self.get('uuid')}: {str(e)}")
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
    def __get_file_ctime(self, filename):
 | 
			
		||||
        fname = os.path.join(self.watch_data_dir, filename)
 | 
			
		||||
        if os.path.isfile(fname):
 | 
			
		||||
@@ -494,7 +644,7 @@ class model(watch_base):
 | 
			
		||||
                    if res:
 | 
			
		||||
                        if not csv_writer:
 | 
			
		||||
                            # A file on the disk can be transferred much faster via flask than a string reply
 | 
			
		||||
                            csv_output_filename = 'report.csv'
 | 
			
		||||
                            csv_output_filename = f"report-{self.get('uuid')}.csv"
 | 
			
		||||
                            f = open(os.path.join(self.watch_data_dir, csv_output_filename), 'w')
 | 
			
		||||
                            # @todo some headers in the future
 | 
			
		||||
                            #fieldnames = ['Epoch seconds', 'Date']
 | 
			
		||||
@@ -648,3 +798,44 @@ class model(watch_base):
 | 
			
		||||
            if step_n:
 | 
			
		||||
                available.append(step_n.group(1))
 | 
			
		||||
        return available
 | 
			
		||||
 | 
			
		||||
    def compile_error_texts(self, has_proxies=None):
 | 
			
		||||
        """Compile error texts for this watch.
 | 
			
		||||
        Accepts has_proxies parameter to ensure it works even outside app context"""
 | 
			
		||||
        from flask import url_for
 | 
			
		||||
        from markupsafe import Markup
 | 
			
		||||
 | 
			
		||||
        output = []  # Initialize as list since we're using append
 | 
			
		||||
        last_error = self.get('last_error','')
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            url_for('settings.settings_page')
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            has_app_context = False
 | 
			
		||||
        else:
 | 
			
		||||
            has_app_context = True
 | 
			
		||||
 | 
			
		||||
        # has app+request context, we can use url_for()
 | 
			
		||||
        if has_app_context:
 | 
			
		||||
            if last_error:
 | 
			
		||||
                if '403' in last_error:
 | 
			
		||||
                    if has_proxies:
 | 
			
		||||
                        output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try other proxies/location</a> '")))
 | 
			
		||||
                    else:
 | 
			
		||||
                        output.append(str(Markup(f"{last_error} - <a href=\"{url_for('settings.settings_page', uuid=self.get('uuid'))}\">Try adding external proxies/locations</a> '")))
 | 
			
		||||
                else:
 | 
			
		||||
                    output.append(str(Markup(last_error)))
 | 
			
		||||
 | 
			
		||||
            if self.get('last_notification_error'):
 | 
			
		||||
                output.append(str(Markup(f"<div class=\"notification-error\"><a href=\"{url_for('settings.notification_logs')}\">{ self.get('last_notification_error') }</a></div>")))
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            # Lo_Fi version - no app context, cant rely on Jinja2 Markup
 | 
			
		||||
            if last_error:
 | 
			
		||||
                output.append(safe_jinja.render_fully_escaped(last_error))
 | 
			
		||||
            if self.get('last_notification_error'):
 | 
			
		||||
                output.append(safe_jinja.render_fully_escaped(self.get('last_notification_error')))
 | 
			
		||||
 | 
			
		||||
        res = "\n".join(output)
 | 
			
		||||
        return res
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,8 @@ import os
 | 
			
		||||
import uuid
 | 
			
		||||
 | 
			
		||||
from changedetectionio import strtobool
 | 
			
		||||
default_notification_format_for_watch = 'System default'
 | 
			
		||||
USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH = 'System default'
 | 
			
		||||
CONDITIONS_MATCH_LOGIC_DEFAULT = 'ALL'
 | 
			
		||||
 | 
			
		||||
class watch_base(dict):
 | 
			
		||||
 | 
			
		||||
@@ -15,13 +16,14 @@ class watch_base(dict):
 | 
			
		||||
            'body': None,
 | 
			
		||||
            'browser_steps': [],
 | 
			
		||||
            'browser_steps_last_error_step': None,
 | 
			
		||||
            'conditions' : {},
 | 
			
		||||
            'conditions_match_logic': CONDITIONS_MATCH_LOGIC_DEFAULT,
 | 
			
		||||
            'check_count': 0,
 | 
			
		||||
            'check_unique_lines': False,  # On change-detected, compare against all history if its something new
 | 
			
		||||
            'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine.
 | 
			
		||||
            'content-type': None,
 | 
			
		||||
            'date_created': None,
 | 
			
		||||
            'extract_text': [],  # Extract text by regex after filters
 | 
			
		||||
            'extract_title_as_title': False,
 | 
			
		||||
            'fetch_backend': 'system',  # plaintext, playwright etc
 | 
			
		||||
            'fetch_time': 0.0,
 | 
			
		||||
            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
 | 
			
		||||
@@ -32,19 +34,22 @@ class watch_base(dict):
 | 
			
		||||
            'has_ldjson_price_data': None,
 | 
			
		||||
            'headers': {},  # Extra headers to send
 | 
			
		||||
            'ignore_text': [],  # List of text to ignore when calculating the comparison checksum
 | 
			
		||||
            'ignore_status_codes': None,
 | 
			
		||||
            'in_stock_only': True,  # Only trigger change on going to instock from out-of-stock
 | 
			
		||||
            'include_filters': [],
 | 
			
		||||
            'last_checked': 0,
 | 
			
		||||
            'last_error': False,
 | 
			
		||||
            'last_notification_error': None,
 | 
			
		||||
            'last_viewed': 0,  # history key value of the last viewed via the [diff] link
 | 
			
		||||
            'method': 'GET',
 | 
			
		||||
            'notification_alert_count': 0,
 | 
			
		||||
            'notification_body': None,
 | 
			
		||||
            'notification_format': default_notification_format_for_watch,
 | 
			
		||||
            'notification_format': USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
 | 
			
		||||
            'notification_muted': False,
 | 
			
		||||
            'notification_screenshot': False,  # Include the latest screenshot if available and supported by the apprise URL
 | 
			
		||||
            'notification_title': None,
 | 
			
		||||
            'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise)
 | 
			
		||||
            'page_title': None, # <title> from the page
 | 
			
		||||
            'paused': False,
 | 
			
		||||
            'previous_md5': False,
 | 
			
		||||
            'previous_md5_before_filters': False,  # Used for skipping changedetection entirely
 | 
			
		||||
@@ -53,6 +58,7 @@ class watch_base(dict):
 | 
			
		||||
            'proxy': None,  # Preferred proxy connection
 | 
			
		||||
            'remote_server_reply': None,  # From 'server' reply header
 | 
			
		||||
            'sort_text_alphabetically': False,
 | 
			
		||||
            'strip_ignored_lines': None,
 | 
			
		||||
            'subtractive_selectors': [],
 | 
			
		||||
            'tag': '',  # Old system of text name for a tag, to be removed
 | 
			
		||||
            'tags': [],  # list of UUIDs to App.Tags
 | 
			
		||||
@@ -118,12 +124,13 @@ class watch_base(dict):
 | 
			
		||||
                    }
 | 
			
		||||
                },
 | 
			
		||||
            },
 | 
			
		||||
            'title': None,
 | 
			
		||||
            'title': None, # An arbitrary field that overrides 'page_title'
 | 
			
		||||
            'track_ldjson_price_data': None,
 | 
			
		||||
            'trim_text_whitespace': False,
 | 
			
		||||
            'remove_duplicate_lines': False,
 | 
			
		||||
            'trigger_text': [],  # List of text or regex to wait for until a change is detected
 | 
			
		||||
            'url': '',
 | 
			
		||||
            'use_page_title_in_list': None, # None = use system settings
 | 
			
		||||
            'uuid': str(uuid.uuid4()),
 | 
			
		||||
            'webdriver_delay': None,
 | 
			
		||||
            'webdriver_js_execute_code': None,  # Run before change-detection
 | 
			
		||||
 
 | 
			
		||||
@@ -1,35 +1,16 @@
 | 
			
		||||
from changedetectionio.model import default_notification_format_for_watch
 | 
			
		||||
from changedetectionio.model import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
 | 
			
		||||
 | 
			
		||||
ult_notification_format_for_watch = 'System default'
 | 
			
		||||
default_notification_format = 'HTML Color'
 | 
			
		||||
default_notification_format = 'htmlcolor'
 | 
			
		||||
default_notification_body = '{{watch_url}} had a change.\n---\n{{diff}}\n---\n'
 | 
			
		||||
default_notification_title = 'ChangeDetection.io Notification - {{watch_url}}'
 | 
			
		||||
 | 
			
		||||
# The values (markdown etc) are from apprise NotifyFormat,
 | 
			
		||||
# But to avoid importing the whole heavy module just use the same strings here.
 | 
			
		||||
valid_notification_formats = {
 | 
			
		||||
    'Text': 'text',
 | 
			
		||||
    'Markdown': 'markdown',
 | 
			
		||||
    'HTML': 'html',
 | 
			
		||||
    'HTML Color': 'htmlcolor',
 | 
			
		||||
    'text': 'Plain Text',
 | 
			
		||||
    'html': 'HTML',
 | 
			
		||||
    'htmlcolor': 'HTML Color',
 | 
			
		||||
    'markdown': 'Markdown to HTML',
 | 
			
		||||
    # Used only for editing a watch (not for global)
 | 
			
		||||
    default_notification_format_for_watch: default_notification_format_for_watch
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
valid_tokens = {
 | 
			
		||||
    'base_url': '',
 | 
			
		||||
    'current_snapshot': '',
 | 
			
		||||
    'diff': '',
 | 
			
		||||
    'diff_added': '',
 | 
			
		||||
    'diff_full': '',
 | 
			
		||||
    'diff_patch': '',
 | 
			
		||||
    'diff_removed': '',
 | 
			
		||||
    'diff_url': '',
 | 
			
		||||
    'preview_url': '',
 | 
			
		||||
    'triggered_text': '',
 | 
			
		||||
    'watch_tag': '',
 | 
			
		||||
    'watch_title': '',
 | 
			
		||||
    'watch_url': '',
 | 
			
		||||
    'watch_uuid': '',
 | 
			
		||||
    USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH: USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
 | 
			
		||||
}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,61 @@
 | 
			
		||||
"""
 | 
			
		||||
Custom Apprise HTTP Handlers with format= Parameter Support
 | 
			
		||||
 | 
			
		||||
IMPORTANT: This module works around a limitation in Apprise's @notify decorator.
 | 
			
		||||
 | 
			
		||||
THE PROBLEM:
 | 
			
		||||
-------------
 | 
			
		||||
When using Apprise's @notify decorator to create custom notification handlers, the
 | 
			
		||||
decorator creates a CustomNotifyPlugin that uses parse_url(..., simple=True) to parse
 | 
			
		||||
URLs. This simple parsing mode does NOT extract the format= query parameter from the URL
 | 
			
		||||
and set it as a top-level parameter that NotifyBase.__init__ can use to set notify_format.
 | 
			
		||||
 | 
			
		||||
As a result:
 | 
			
		||||
1. URL: post://example.com/webhook?format=html
 | 
			
		||||
2. Apprise parses this and sees format=html in qsd (query string dictionary)
 | 
			
		||||
3. But it does NOT extract it and pass it to NotifyBase.__init__
 | 
			
		||||
4. NotifyBase defaults to notify_format=TEXT
 | 
			
		||||
5. When you call apobj.notify(body="<html>...", body_format="html"):
 | 
			
		||||
   - Apprise sees: input format = html, output format (notify_format) = text
 | 
			
		||||
   - Apprise calls convert_between("html", "text", body)
 | 
			
		||||
   - This strips all HTML tags, leaving only plain text
 | 
			
		||||
6. Your custom handler receives stripped plain text instead of HTML
 | 
			
		||||
 | 
			
		||||
THE SOLUTION:
 | 
			
		||||
-------------
 | 
			
		||||
Instead of using the @notify decorator directly, we:
 | 
			
		||||
1. Manually register custom plugins using plugins.N_MGR.add()
 | 
			
		||||
2. Create a CustomHTTPHandler class that extends CustomNotifyPlugin
 | 
			
		||||
3. Override __init__ to extract format= from qsd and set it as kwargs['format']
 | 
			
		||||
4. Call NotifyBase.__init__ which properly sets notify_format from kwargs['format']
 | 
			
		||||
5. Set up _default_args like CustomNotifyPlugin does for compatibility
 | 
			
		||||
 | 
			
		||||
This ensures that when format=html is in the URL:
 | 
			
		||||
- notify_format is set to HTML
 | 
			
		||||
- Apprise sees: input format = html, output format = html
 | 
			
		||||
- No conversion happens (convert_between returns content unchanged)
 | 
			
		||||
- Your custom handler receives the original HTML intact
 | 
			
		||||
 | 
			
		||||
TESTING:
 | 
			
		||||
--------
 | 
			
		||||
To verify this works:
 | 
			
		||||
>>> apobj = apprise.Apprise()
 | 
			
		||||
>>> apobj.add('post://localhost:5005/test?format=html')
 | 
			
		||||
>>> for server in apobj:
 | 
			
		||||
...     print(server.notify_format)  # Should print: html (not text)
 | 
			
		||||
>>> apobj.notify(body='<span>Test</span>', body_format='html')
 | 
			
		||||
# Your handler should receive '<span>Test</span>' not 'Test'
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
from urllib.parse import unquote_plus
 | 
			
		||||
 | 
			
		||||
import requests
 | 
			
		||||
from apprise.decorators import notify
 | 
			
		||||
from apprise.utils.parse import parse_url as apprise_parse_url
 | 
			
		||||
from apprise import plugins
 | 
			
		||||
from apprise.decorators.base import CustomNotifyPlugin
 | 
			
		||||
from apprise.utils.parse import parse_url as apprise_parse_url, url_assembly
 | 
			
		||||
from apprise.utils.logic import dict_full_update
 | 
			
		||||
from loguru import logger
 | 
			
		||||
from requests.structures import CaseInsensitiveDict
 | 
			
		||||
 | 
			
		||||
@@ -12,13 +63,66 @@ SUPPORTED_HTTP_METHODS = {"get", "post", "put", "delete", "patch", "head"}
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def notify_supported_methods(func):
 | 
			
		||||
    """Register custom HTTP method handlers that properly support format= parameter."""
 | 
			
		||||
    for method in SUPPORTED_HTTP_METHODS:
 | 
			
		||||
        func = notify(on=method)(func)
 | 
			
		||||
        # Add support for https, for each supported http method
 | 
			
		||||
        func = notify(on=f"{method}s")(func)
 | 
			
		||||
        _register_http_handler(method, func)
 | 
			
		||||
        _register_http_handler(f"{method}s", func)
 | 
			
		||||
    return func
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _register_http_handler(schema, send_func):
 | 
			
		||||
    """Register a custom HTTP handler that extracts format= from URL query parameters."""
 | 
			
		||||
 | 
			
		||||
    # Parse base URL
 | 
			
		||||
    base_url = f"{schema}://"
 | 
			
		||||
    base_args = apprise_parse_url(base_url, default_schema=schema, verify_host=False, simple=True)
 | 
			
		||||
 | 
			
		||||
    class CustomHTTPHandler(CustomNotifyPlugin):
 | 
			
		||||
        secure_protocol = schema
 | 
			
		||||
        service_name = f"Custom HTTP - {schema.upper()}"
 | 
			
		||||
        _base_args = base_args
 | 
			
		||||
 | 
			
		||||
        def __init__(self, **kwargs):
 | 
			
		||||
            # Extract format from qsd and set it as a top-level kwarg
 | 
			
		||||
            # This allows NotifyBase.__init__ to properly set notify_format
 | 
			
		||||
            if 'qsd' in kwargs and 'format' in kwargs['qsd']:
 | 
			
		||||
                kwargs['format'] = kwargs['qsd']['format']
 | 
			
		||||
 | 
			
		||||
            # Call NotifyBase.__init__ (skip CustomNotifyPlugin.__init__)
 | 
			
		||||
            super(CustomNotifyPlugin, self).__init__(**kwargs)
 | 
			
		||||
 | 
			
		||||
            # Set up _default_args like CustomNotifyPlugin does
 | 
			
		||||
            self._default_args = {}
 | 
			
		||||
            kwargs.pop("secure", None)
 | 
			
		||||
            dict_full_update(self._default_args, self._base_args)
 | 
			
		||||
            dict_full_update(self._default_args, kwargs)
 | 
			
		||||
            self._default_args["url"] = url_assembly(**self._default_args)
 | 
			
		||||
 | 
			
		||||
        __send = staticmethod(send_func)
 | 
			
		||||
 | 
			
		||||
        def send(self, body, title="", notify_type="info", *args, **kwargs):
 | 
			
		||||
            """Call the custom send function."""
 | 
			
		||||
            try:
 | 
			
		||||
                result = self.__send(
 | 
			
		||||
                    body, title, notify_type,
 | 
			
		||||
                    *args,
 | 
			
		||||
                    meta=self._default_args,
 | 
			
		||||
                    **kwargs
 | 
			
		||||
                )
 | 
			
		||||
                return True if result is None else bool(result)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                self.logger.warning(f"Exception in custom HTTP handler: {e}")
 | 
			
		||||
                return False
 | 
			
		||||
 | 
			
		||||
    # Register the plugin
 | 
			
		||||
    plugins.N_MGR.add(
 | 
			
		||||
        plugin=CustomHTTPHandler,
 | 
			
		||||
        schemas=schema,
 | 
			
		||||
        send_func=send_func,
 | 
			
		||||
        url=base_url,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def _get_auth(parsed_url: dict) -> str | tuple[str, str]:
 | 
			
		||||
    user: str | None = parsed_url.get("user")
 | 
			
		||||
    password: str | None = parsed_url.get("password")
 | 
			
		||||
@@ -70,9 +174,12 @@ def apprise_http_custom_handler(
 | 
			
		||||
    title: str,
 | 
			
		||||
    notify_type: str,
 | 
			
		||||
    meta: dict,
 | 
			
		||||
    body_format: str = None,
 | 
			
		||||
    *args,
 | 
			
		||||
    **kwargs,
 | 
			
		||||
) -> bool:
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    url: str = meta.get("url")
 | 
			
		||||
    schema: str = meta.get("schema")
 | 
			
		||||
    method: str = re.sub(r"s$", "", schema).upper()
 | 
			
		||||
@@ -88,25 +195,16 @@ def apprise_http_custom_handler(
 | 
			
		||||
 | 
			
		||||
    url = re.sub(rf"^{schema}", "https" if schema.endswith("s") else "http", parsed_url.get("url"))
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        response = requests.request(
 | 
			
		||||
            method=method,
 | 
			
		||||
            url=url,
 | 
			
		||||
            auth=auth,
 | 
			
		||||
            headers=headers,
 | 
			
		||||
            params=params,
 | 
			
		||||
            data=body.encode("utf-8") if isinstance(body, str) else body,
 | 
			
		||||
        )
 | 
			
		||||
    response = requests.request(
 | 
			
		||||
        method=method,
 | 
			
		||||
        url=url,
 | 
			
		||||
        auth=auth,
 | 
			
		||||
        headers=headers,
 | 
			
		||||
        params=params,
 | 
			
		||||
        data=body.encode("utf-8") if isinstance(body, str) else body,
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
        response.raise_for_status()
 | 
			
		||||
    response.raise_for_status()
 | 
			
		||||
 | 
			
		||||
        logger.info(f"Successfully sent custom notification to {url}")
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    except requests.RequestException as e:
 | 
			
		||||
        logger.error(f"Remote host error while sending custom notification to {url}: {e}")
 | 
			
		||||
        return False
 | 
			
		||||
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Unexpected error occurred while sending custom notification to {url}: {e}")
 | 
			
		||||
        return False
 | 
			
		||||
    logger.info(f"Successfully sent custom notification to {url}")
 | 
			
		||||
    return True
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										286
									
								
								changedetectionio/notification/apprise_plugin/discord.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										286
									
								
								changedetectionio/notification/apprise_plugin/discord.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,286 @@
 | 
			
		||||
"""
 | 
			
		||||
Custom Discord plugin for changedetection.io
 | 
			
		||||
Extends Apprise's Discord plugin to support custom colored embeds for removed/added content
 | 
			
		||||
"""
 | 
			
		||||
from apprise.plugins.discord import NotifyDiscord
 | 
			
		||||
from apprise.decorators import notify
 | 
			
		||||
from apprise.common import NotifyFormat
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
# Import placeholders from changedetection's diff module
 | 
			
		||||
from ...diff import (
 | 
			
		||||
    REMOVED_PLACEMARKER_OPEN,
 | 
			
		||||
    REMOVED_PLACEMARKER_CLOSED,
 | 
			
		||||
    ADDED_PLACEMARKER_OPEN,
 | 
			
		||||
    ADDED_PLACEMARKER_CLOSED,
 | 
			
		||||
    CHANGED_PLACEMARKER_OPEN,
 | 
			
		||||
    CHANGED_PLACEMARKER_CLOSED,
 | 
			
		||||
    CHANGED_INTO_PLACEMARKER_OPEN,
 | 
			
		||||
    CHANGED_INTO_PLACEMARKER_CLOSED,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
# Discord embed sidebar colors for different change types
 | 
			
		||||
DISCORD_COLOR_UNCHANGED = 8421504   # Gray (#808080)
 | 
			
		||||
DISCORD_COLOR_REMOVED = 16711680    # Red (#FF0000)
 | 
			
		||||
DISCORD_COLOR_ADDED = 65280         # Green (#00FF00)
 | 
			
		||||
DISCORD_COLOR_CHANGED = 16753920    # Orange (#FFA500)
 | 
			
		||||
DISCORD_COLOR_CHANGED_INTO = 3447003  # Blue (#5865F2 - Discord blue)
 | 
			
		||||
DISCORD_COLOR_WARNING = 16776960    # Yellow (#FFFF00)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NotifyDiscordCustom(NotifyDiscord):
 | 
			
		||||
    """
 | 
			
		||||
    Custom Discord notification handler that supports multiple colored embeds
 | 
			
		||||
    for showing removed (red) and added (green) content separately.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    def send(self, body, title="", notify_type=None, attach=None, **kwargs):
 | 
			
		||||
        """
 | 
			
		||||
        Override send method to create custom embeds with red/green colors
 | 
			
		||||
        for removed/added content when placeholders are present.
 | 
			
		||||
        """
 | 
			
		||||
 | 
			
		||||
        # Check if body contains our diff placeholders
 | 
			
		||||
        has_removed = REMOVED_PLACEMARKER_OPEN in body
 | 
			
		||||
        has_added = ADDED_PLACEMARKER_OPEN in body
 | 
			
		||||
        has_changed = CHANGED_PLACEMARKER_OPEN in body
 | 
			
		||||
        has_changed_into = CHANGED_INTO_PLACEMARKER_OPEN in body
 | 
			
		||||
 | 
			
		||||
        # If we have diff placeholders and we're in markdown/html format, create custom embeds
 | 
			
		||||
        if (has_removed or has_added or has_changed or has_changed_into) and self.notify_format in (NotifyFormat.MARKDOWN, NotifyFormat.HTML):
 | 
			
		||||
            return self._send_with_colored_embeds(body, title, notify_type, attach, **kwargs)
 | 
			
		||||
 | 
			
		||||
        # Otherwise, use the parent class's default behavior
 | 
			
		||||
        return super().send(body, title, notify_type, attach, **kwargs)
 | 
			
		||||
 | 
			
		||||
    def _send_with_colored_embeds(self, body, title, notify_type, attach, **kwargs):
 | 
			
		||||
        """
 | 
			
		||||
        Send Discord message with embeds in the original diff order.
 | 
			
		||||
        Preserves the sequence: unchanged -> removed -> added -> unchanged, etc.
 | 
			
		||||
        """
 | 
			
		||||
        from datetime import datetime, timezone
 | 
			
		||||
 | 
			
		||||
        payload = {
 | 
			
		||||
            "tts": self.tts,
 | 
			
		||||
            "wait": self.tts is False,
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        if self.flags:
 | 
			
		||||
            payload["flags"] = self.flags
 | 
			
		||||
 | 
			
		||||
        # Acquire image_url
 | 
			
		||||
        image_url = self.image_url(notify_type)
 | 
			
		||||
 | 
			
		||||
        if self.avatar and (image_url or self.avatar_url):
 | 
			
		||||
            payload["avatar_url"] = self.avatar_url if self.avatar_url else image_url
 | 
			
		||||
 | 
			
		||||
        if self.user:
 | 
			
		||||
            payload["username"] = self.user
 | 
			
		||||
 | 
			
		||||
        # Associate our thread_id with our message
 | 
			
		||||
        params = {"thread_id": self.thread_id} if self.thread_id else None
 | 
			
		||||
 | 
			
		||||
        # Build embeds array preserving order
 | 
			
		||||
        embeds = []
 | 
			
		||||
 | 
			
		||||
        # Add title as plain bold text in message content (not an embed)
 | 
			
		||||
        if title:
 | 
			
		||||
            payload["content"] = f"**{title}**"
 | 
			
		||||
 | 
			
		||||
        # Parse the body into ordered chunks
 | 
			
		||||
        chunks = self._parse_body_into_chunks(body)
 | 
			
		||||
 | 
			
		||||
        # Discord limits:
 | 
			
		||||
        # - Max 10 embeds per message
 | 
			
		||||
        # - Max 6000 characters total across all embeds
 | 
			
		||||
        # - Max 4096 characters per embed description
 | 
			
		||||
        max_embeds = 10
 | 
			
		||||
        max_total_chars = 6000
 | 
			
		||||
        max_embed_description = 4096
 | 
			
		||||
 | 
			
		||||
        # All 10 embed slots are available for content
 | 
			
		||||
        max_content_embeds = max_embeds
 | 
			
		||||
 | 
			
		||||
        # Start character count
 | 
			
		||||
        total_chars = 0
 | 
			
		||||
 | 
			
		||||
        # Create embeds from chunks in order (no titles, just color coding)
 | 
			
		||||
        for chunk_type, content in chunks:
 | 
			
		||||
            if not content.strip():
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            # Truncate individual embed description if needed
 | 
			
		||||
            if len(content) > max_embed_description:
 | 
			
		||||
                content = content[:max_embed_description - 3] + "..."
 | 
			
		||||
 | 
			
		||||
            # Check if we're approaching the embed count limit
 | 
			
		||||
            # We need room for the warning embed, so stop at max_content_embeds - 1
 | 
			
		||||
            current_content_embeds = len(embeds)
 | 
			
		||||
            if current_content_embeds >= max_content_embeds - 1:
 | 
			
		||||
                # Add a truncation notice (this will be the 10th embed)
 | 
			
		||||
                embeds.append({
 | 
			
		||||
                    "description": "⚠️ Content truncated (Discord 10 embed limit reached) - Tip: Select 'Plain Text' or 'HTML' format for longer diffs",
 | 
			
		||||
                    "color": DISCORD_COLOR_WARNING,
 | 
			
		||||
                })
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            # Check if adding this embed would exceed total character limit
 | 
			
		||||
            if total_chars + len(content) > max_total_chars:
 | 
			
		||||
                # Add a truncation notice
 | 
			
		||||
                remaining_chars = max_total_chars - total_chars
 | 
			
		||||
                if remaining_chars > 100:
 | 
			
		||||
                    # Add partial content if we have room
 | 
			
		||||
                    truncated_content = content[:remaining_chars - 100] + "..."
 | 
			
		||||
                    embeds.append({
 | 
			
		||||
                        "description": truncated_content,
 | 
			
		||||
                        "color": (DISCORD_COLOR_UNCHANGED if chunk_type == "unchanged"
 | 
			
		||||
                                 else DISCORD_COLOR_REMOVED if chunk_type == "removed"
 | 
			
		||||
                                 else DISCORD_COLOR_ADDED),
 | 
			
		||||
                    })
 | 
			
		||||
                embeds.append({
 | 
			
		||||
                    "description": "⚠️ Content truncated (Discord 6000 char limit reached)\nTip: Select 'Plain Text' or 'HTML' format for longer diffs",
 | 
			
		||||
                    "color": DISCORD_COLOR_WARNING,
 | 
			
		||||
                })
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            if chunk_type == "unchanged":
 | 
			
		||||
                embeds.append({
 | 
			
		||||
                    "description": content,
 | 
			
		||||
                    "color": DISCORD_COLOR_UNCHANGED,
 | 
			
		||||
                })
 | 
			
		||||
            elif chunk_type == "removed":
 | 
			
		||||
                embeds.append({
 | 
			
		||||
                    "description": content,
 | 
			
		||||
                    "color": DISCORD_COLOR_REMOVED,
 | 
			
		||||
                })
 | 
			
		||||
            elif chunk_type == "added":
 | 
			
		||||
                embeds.append({
 | 
			
		||||
                    "description": content,
 | 
			
		||||
                    "color": DISCORD_COLOR_ADDED,
 | 
			
		||||
                })
 | 
			
		||||
            elif chunk_type == "changed":
 | 
			
		||||
                # Changed (old value) - use orange to distinguish from pure removal
 | 
			
		||||
                embeds.append({
 | 
			
		||||
                    "description": content,
 | 
			
		||||
                    "color": DISCORD_COLOR_CHANGED,
 | 
			
		||||
                })
 | 
			
		||||
            elif chunk_type == "changed_into":
 | 
			
		||||
                # Changed into (new value) - use blue to distinguish from pure addition
 | 
			
		||||
                embeds.append({
 | 
			
		||||
                    "description": content,
 | 
			
		||||
                    "color": DISCORD_COLOR_CHANGED_INTO,
 | 
			
		||||
                })
 | 
			
		||||
 | 
			
		||||
            total_chars += len(content)
 | 
			
		||||
 | 
			
		||||
        if embeds:
 | 
			
		||||
            payload["embeds"] = embeds
 | 
			
		||||
 | 
			
		||||
        # Send the payload using parent's _send method
 | 
			
		||||
        if not self._send(payload, params=params):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        # Handle attachments if present
 | 
			
		||||
        if attach and self.attachment_support:
 | 
			
		||||
            payload.update({
 | 
			
		||||
                "tts": False,
 | 
			
		||||
                "wait": True,
 | 
			
		||||
            })
 | 
			
		||||
            payload.pop("embeds", None)
 | 
			
		||||
            payload.pop("content", None)
 | 
			
		||||
            payload.pop("allow_mentions", None)
 | 
			
		||||
 | 
			
		||||
            for attachment in attach:
 | 
			
		||||
                self.logger.info(f"Posting Discord Attachment {attachment.name}")
 | 
			
		||||
                if not self._send(payload, params=params, attach=attachment):
 | 
			
		||||
                    return False
 | 
			
		||||
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
    def _parse_body_into_chunks(self, body):
 | 
			
		||||
        """
 | 
			
		||||
        Parse the body into ordered chunks of (type, content) tuples.
 | 
			
		||||
        Types: "unchanged", "removed", "added", "changed", "changed_into"
 | 
			
		||||
        Preserves the original order of the diff.
 | 
			
		||||
        """
 | 
			
		||||
        chunks = []
 | 
			
		||||
        position = 0
 | 
			
		||||
 | 
			
		||||
        while position < len(body):
 | 
			
		||||
            # Find the next marker
 | 
			
		||||
            next_removed = body.find(REMOVED_PLACEMARKER_OPEN, position)
 | 
			
		||||
            next_added = body.find(ADDED_PLACEMARKER_OPEN, position)
 | 
			
		||||
            next_changed = body.find(CHANGED_PLACEMARKER_OPEN, position)
 | 
			
		||||
            next_changed_into = body.find(CHANGED_INTO_PLACEMARKER_OPEN, position)
 | 
			
		||||
 | 
			
		||||
            # Determine which marker comes first
 | 
			
		||||
            if next_removed == -1 and next_added == -1 and next_changed == -1 and next_changed_into == -1:
 | 
			
		||||
                # No more markers, rest is unchanged
 | 
			
		||||
                if position < len(body):
 | 
			
		||||
                    chunks.append(("unchanged", body[position:]))
 | 
			
		||||
                break
 | 
			
		||||
 | 
			
		||||
            # Find the earliest marker
 | 
			
		||||
            next_marker_pos = None
 | 
			
		||||
            next_marker_type = None
 | 
			
		||||
 | 
			
		||||
            # Compare all marker positions to find the earliest
 | 
			
		||||
            markers = []
 | 
			
		||||
            if next_removed != -1:
 | 
			
		||||
                markers.append((next_removed, "removed"))
 | 
			
		||||
            if next_added != -1:
 | 
			
		||||
                markers.append((next_added, "added"))
 | 
			
		||||
            if next_changed != -1:
 | 
			
		||||
                markers.append((next_changed, "changed"))
 | 
			
		||||
            if next_changed_into != -1:
 | 
			
		||||
                markers.append((next_changed_into, "changed_into"))
 | 
			
		||||
 | 
			
		||||
            if markers:
 | 
			
		||||
                next_marker_pos, next_marker_type = min(markers, key=lambda x: x[0])
 | 
			
		||||
 | 
			
		||||
            # Add unchanged content before the marker
 | 
			
		||||
            if next_marker_pos > position:
 | 
			
		||||
                chunks.append(("unchanged", body[position:next_marker_pos]))
 | 
			
		||||
 | 
			
		||||
            # Find the closing marker
 | 
			
		||||
            if next_marker_type == "removed":
 | 
			
		||||
                open_marker = REMOVED_PLACEMARKER_OPEN
 | 
			
		||||
                close_marker = REMOVED_PLACEMARKER_CLOSED
 | 
			
		||||
            elif next_marker_type == "added":
 | 
			
		||||
                open_marker = ADDED_PLACEMARKER_OPEN
 | 
			
		||||
                close_marker = ADDED_PLACEMARKER_CLOSED
 | 
			
		||||
            elif next_marker_type == "changed":
 | 
			
		||||
                open_marker = CHANGED_PLACEMARKER_OPEN
 | 
			
		||||
                close_marker = CHANGED_PLACEMARKER_CLOSED
 | 
			
		||||
            else:  # changed_into
 | 
			
		||||
                open_marker = CHANGED_INTO_PLACEMARKER_OPEN
 | 
			
		||||
                close_marker = CHANGED_INTO_PLACEMARKER_CLOSED
 | 
			
		||||
 | 
			
		||||
            close_pos = body.find(close_marker, next_marker_pos)
 | 
			
		||||
 | 
			
		||||
            if close_pos == -1:
 | 
			
		||||
                # No closing marker, take rest as this type
 | 
			
		||||
                content = body[next_marker_pos + len(open_marker):]
 | 
			
		||||
                chunks.append((next_marker_type, content))
 | 
			
		||||
                break
 | 
			
		||||
            else:
 | 
			
		||||
                # Extract content between markers
 | 
			
		||||
                content = body[next_marker_pos + len(open_marker):close_pos]
 | 
			
		||||
                chunks.append((next_marker_type, content))
 | 
			
		||||
                position = close_pos + len(close_marker)
 | 
			
		||||
 | 
			
		||||
        return chunks
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Register the custom Discord handler with Apprise
 | 
			
		||||
# This will override the built-in discord:// handler
 | 
			
		||||
@notify(on="discord")
 | 
			
		||||
def discord_custom_wrapper(body, title, notify_type, meta, body_format=None, *args, **kwargs):
 | 
			
		||||
    """
 | 
			
		||||
    Wrapper function to make the custom Discord handler work with Apprise's decorator system.
 | 
			
		||||
    Note: This decorator approach may not work for overriding built-in plugins.
 | 
			
		||||
    The class-based approach above is the proper way to extend NotifyDiscord.
 | 
			
		||||
    """
 | 
			
		||||
    logger.info("Custom Discord handler called")
 | 
			
		||||
    # This is here for potential future use with decorator-based registration
 | 
			
		||||
    return True
 | 
			
		||||
							
								
								
									
										42
									
								
								changedetectionio/notification/email_helpers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								changedetectionio/notification/email_helpers.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,42 @@
 | 
			
		||||
def as_monospaced_html_email(content: str, title: str) -> str:
 | 
			
		||||
    """
 | 
			
		||||
    Wraps `content` in a minimal, email-safe HTML template
 | 
			
		||||
    that forces monospace rendering across Gmail, Hotmail, Apple Mail, etc.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        content: The body text (plain text or HTML-like).
 | 
			
		||||
        title: The title plaintext
 | 
			
		||||
    Returns:
 | 
			
		||||
        A complete HTML document string suitable for sending as an email body.
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # All line feed types should be removed and then this function should only be fed <br>'s
 | 
			
		||||
    # Then it works with our <pre> styling without double linefeeds
 | 
			
		||||
    content = content.translate(str.maketrans('', '', '\r\n'))
 | 
			
		||||
 | 
			
		||||
    if title:
 | 
			
		||||
        import html
 | 
			
		||||
        title = html.escape(title)
 | 
			
		||||
    else:
 | 
			
		||||
        title = ''
 | 
			
		||||
    # 2. Full email-safe HTML
 | 
			
		||||
    html_email = f"""<!DOCTYPE html>
 | 
			
		||||
<html lang="en">
 | 
			
		||||
<head>
 | 
			
		||||
  <meta charset="UTF-8">
 | 
			
		||||
  <meta name="x-apple-disable-message-reformatting">
 | 
			
		||||
  <meta name="viewport" content="width=device-width, initial-scale=1.0">
 | 
			
		||||
  <!--[if mso]>
 | 
			
		||||
    <style>
 | 
			
		||||
      body, div, pre, td {{ font-family: "Courier New", Courier, monospace !important; }}
 | 
			
		||||
    </style>
 | 
			
		||||
  <![endif]-->
 | 
			
		||||
  <title>{title}</title>
 | 
			
		||||
</head>
 | 
			
		||||
<body style="-webkit-text-size-adjust:100%;-ms-text-size-adjust:100%;">
 | 
			
		||||
  <pre role="article" aria-roledescription="email" lang="en"
 | 
			
		||||
       style="font-family: monospace, 'Courier New', Courier; font-size: 0.9rem;
 | 
			
		||||
              white-space: pre-wrap; word-break: break-word;">{content}</pre>
 | 
			
		||||
</body>
 | 
			
		||||
</html>"""
 | 
			
		||||
    return html_email
 | 
			
		||||
@@ -1,32 +1,313 @@
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
import apprise
 | 
			
		||||
from apprise import NotifyFormat
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from urllib.parse import urlparse
 | 
			
		||||
from .apprise_plugin.assets import apprise_asset, APPRISE_AVATAR_URL
 | 
			
		||||
from .email_helpers import as_monospaced_html_email
 | 
			
		||||
from ..diff import HTML_REMOVED_STYLE, REMOVED_PLACEMARKER_OPEN, REMOVED_PLACEMARKER_CLOSED, ADDED_PLACEMARKER_OPEN, HTML_ADDED_STYLE, \
 | 
			
		||||
    ADDED_PLACEMARKER_CLOSED, CHANGED_INTO_PLACEMARKER_OPEN, CHANGED_INTO_PLACEMARKER_CLOSED, CHANGED_PLACEMARKER_OPEN, \
 | 
			
		||||
    CHANGED_PLACEMARKER_CLOSED, HTML_CHANGED_STYLE, HTML_CHANGED_INTO_STYLE
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
from ..notification_service import NotificationContextData
 | 
			
		||||
 | 
			
		||||
newline_re = re.compile(r'\r\n|\r|\n')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def process_notification(n_object, datastore):
 | 
			
		||||
    from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
    from . import default_notification_format_for_watch, default_notification_format, valid_notification_formats
 | 
			
		||||
def markup_text_links_to_html(body):
 | 
			
		||||
    """
 | 
			
		||||
    Convert plaintext to HTML with clickable links.
 | 
			
		||||
    Uses Jinja2's escape and Markup for XSS safety.
 | 
			
		||||
    """
 | 
			
		||||
    from linkify_it import LinkifyIt
 | 
			
		||||
    from markupsafe import Markup, escape
 | 
			
		||||
 | 
			
		||||
    linkify = LinkifyIt()
 | 
			
		||||
 | 
			
		||||
    # Match URLs in the ORIGINAL text (before escaping)
 | 
			
		||||
    matches = linkify.match(body)
 | 
			
		||||
 | 
			
		||||
    if not matches:
 | 
			
		||||
        # No URLs, just escape everything
 | 
			
		||||
        return Markup(escape(body))
 | 
			
		||||
 | 
			
		||||
    result = []
 | 
			
		||||
    last_index = 0
 | 
			
		||||
 | 
			
		||||
    # Process each URL match
 | 
			
		||||
    for match in matches:
 | 
			
		||||
        # Add escaped text before the URL
 | 
			
		||||
        if match.index > last_index:
 | 
			
		||||
            text_part = body[last_index:match.index]
 | 
			
		||||
            result.append(escape(text_part))
 | 
			
		||||
 | 
			
		||||
        # Add the link with escaped URL (both in href and display)
 | 
			
		||||
        url = match.url
 | 
			
		||||
        result.append(Markup(f'<a href="{escape(url)}">{escape(url)}</a>'))
 | 
			
		||||
 | 
			
		||||
        last_index = match.last_index
 | 
			
		||||
 | 
			
		||||
    # Add remaining escaped text
 | 
			
		||||
    if last_index < len(body):
 | 
			
		||||
        result.append(escape(body[last_index:]))
 | 
			
		||||
 | 
			
		||||
    # Join all parts
 | 
			
		||||
    return str(Markup(''.join(str(part) for part in result)))
 | 
			
		||||
 | 
			
		||||
def notification_format_align_with_apprise(n_format : str):
 | 
			
		||||
    """
 | 
			
		||||
    Correctly align changedetection's formats with apprise's formats
 | 
			
		||||
    Probably these are the same - but good to be sure.
 | 
			
		||||
    These set the expected OUTPUT format type
 | 
			
		||||
    :param n_format:
 | 
			
		||||
    :return:
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    if n_format.startswith('html'):
 | 
			
		||||
        # Apprise only knows 'html' not 'htmlcolor' etc, which shouldnt matter here
 | 
			
		||||
        n_format = NotifyFormat.HTML.value
 | 
			
		||||
    elif n_format.startswith('markdown'):
 | 
			
		||||
        # probably the same but just to be safe
 | 
			
		||||
        n_format = NotifyFormat.MARKDOWN.value
 | 
			
		||||
    elif n_format.startswith('text'):
 | 
			
		||||
        # probably the same but just to be safe
 | 
			
		||||
        n_format = NotifyFormat.TEXT.value
 | 
			
		||||
    else:
 | 
			
		||||
        n_format = NotifyFormat.TEXT.value
 | 
			
		||||
 | 
			
		||||
    return n_format
 | 
			
		||||
 | 
			
		||||
def apply_discord_markdown_to_body(n_body):
 | 
			
		||||
    """
 | 
			
		||||
    Discord does not support <del> but it supports non-standard ~~strikethrough~~
 | 
			
		||||
    :param n_body:
 | 
			
		||||
    :return:
 | 
			
		||||
    """
 | 
			
		||||
    import re
 | 
			
		||||
    # Define the mapping between your placeholders and markdown markers
 | 
			
		||||
    replacements = [
 | 
			
		||||
        (REMOVED_PLACEMARKER_OPEN, '~~', REMOVED_PLACEMARKER_CLOSED, '~~'),
 | 
			
		||||
        (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
 | 
			
		||||
        (CHANGED_PLACEMARKER_OPEN, '~~', CHANGED_PLACEMARKER_CLOSED, '~~'),
 | 
			
		||||
        (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
 | 
			
		||||
    ]
 | 
			
		||||
    # So that the markdown gets added without any whitespace following it which would break it
 | 
			
		||||
    for open_tag, open_md, close_tag, close_md in replacements:
 | 
			
		||||
        # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
 | 
			
		||||
        pattern = re.compile(
 | 
			
		||||
            re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
 | 
			
		||||
            flags=re.DOTALL
 | 
			
		||||
        )
 | 
			
		||||
        n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
 | 
			
		||||
    return n_body
 | 
			
		||||
 | 
			
		||||
def apply_standard_markdown_to_body(n_body):
 | 
			
		||||
    """
 | 
			
		||||
    Apprise does not support ~~strikethrough~~ but it will convert <del> to HTML strikethrough.
 | 
			
		||||
    :param n_body:
 | 
			
		||||
    :return:
 | 
			
		||||
    """
 | 
			
		||||
    import re
 | 
			
		||||
    # Define the mapping between your placeholders and markdown markers
 | 
			
		||||
    replacements = [
 | 
			
		||||
        (REMOVED_PLACEMARKER_OPEN, '<del>', REMOVED_PLACEMARKER_CLOSED, '</del>'),
 | 
			
		||||
        (ADDED_PLACEMARKER_OPEN, '**', ADDED_PLACEMARKER_CLOSED, '**'),
 | 
			
		||||
        (CHANGED_PLACEMARKER_OPEN, '<del>', CHANGED_PLACEMARKER_CLOSED, '</del>'),
 | 
			
		||||
        (CHANGED_INTO_PLACEMARKER_OPEN, '**', CHANGED_INTO_PLACEMARKER_CLOSED, '**'),
 | 
			
		||||
    ]
 | 
			
		||||
 | 
			
		||||
    # So that the markdown gets added without any whitespace following it which would break it
 | 
			
		||||
    for open_tag, open_md, close_tag, close_md in replacements:
 | 
			
		||||
        # Regex: match opening tag, optional whitespace, capture the content, optional whitespace, then closing tag
 | 
			
		||||
        pattern = re.compile(
 | 
			
		||||
            re.escape(open_tag) + r'(\s*)(.*?)?(\s*)' + re.escape(close_tag),
 | 
			
		||||
            flags=re.DOTALL
 | 
			
		||||
        )
 | 
			
		||||
        n_body = pattern.sub(lambda m: f"{m.group(1)}{open_md}{m.group(2)}{close_md}{m.group(3)}", n_body)
 | 
			
		||||
    return n_body
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def replace_placemarkers_in_text(text, url, requested_output_format):
 | 
			
		||||
    """
 | 
			
		||||
    Replace diff placemarkers in text based on the URL service type and requested output format.
 | 
			
		||||
    Used for both notification title and body to ensure consistent placeholder replacement.
 | 
			
		||||
 | 
			
		||||
    :param text: The text to process
 | 
			
		||||
    :param url: The notification URL (to detect service type)
 | 
			
		||||
    :param requested_output_format: The output format (html, htmlcolor, markdown, text, etc.)
 | 
			
		||||
    :return: Processed text with placemarkers replaced
 | 
			
		||||
    """
 | 
			
		||||
    if not text:
 | 
			
		||||
        return text
 | 
			
		||||
 | 
			
		||||
    if url.startswith('tgram://'):
 | 
			
		||||
        # Telegram only supports a limited subset of HTML
 | 
			
		||||
        # Use strikethrough for removed content, bold for added content
 | 
			
		||||
        text = text.replace(REMOVED_PLACEMARKER_OPEN, '<s>')
 | 
			
		||||
        text = text.replace(REMOVED_PLACEMARKER_CLOSED, '</s>')
 | 
			
		||||
        text = text.replace(ADDED_PLACEMARKER_OPEN, '<b>')
 | 
			
		||||
        text = text.replace(ADDED_PLACEMARKER_CLOSED, '</b>')
 | 
			
		||||
        # Handle changed/replaced lines (old → new)
 | 
			
		||||
        text = text.replace(CHANGED_PLACEMARKER_OPEN, '<s>')
 | 
			
		||||
        text = text.replace(CHANGED_PLACEMARKER_CLOSED, '</s>')
 | 
			
		||||
        text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, '<b>')
 | 
			
		||||
        text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, '</b>')
 | 
			
		||||
    elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
 | 
			
		||||
          or url.startswith('https://discord.com/api')) and requested_output_format == 'html':
 | 
			
		||||
        # Discord doesn't support HTML, use Discord markdown
 | 
			
		||||
        text = apply_discord_markdown_to_body(n_body=text)
 | 
			
		||||
    elif requested_output_format == 'htmlcolor':
 | 
			
		||||
        # https://github.com/dgtlmoon/changedetection.io/issues/821#issuecomment-1241837050
 | 
			
		||||
        text = text.replace(REMOVED_PLACEMARKER_OPEN, f'<span style="{HTML_REMOVED_STYLE}" role="deletion" aria-label="Removed text" title="Removed text">')
 | 
			
		||||
        text = text.replace(REMOVED_PLACEMARKER_CLOSED, f'</span>')
 | 
			
		||||
        text = text.replace(ADDED_PLACEMARKER_OPEN, f'<span style="{HTML_ADDED_STYLE}" role="insertion" aria-label="Added text" title="Added text">')
 | 
			
		||||
        text = text.replace(ADDED_PLACEMARKER_CLOSED, f'</span>')
 | 
			
		||||
        # Handle changed/replaced lines (old → new)
 | 
			
		||||
        text = text.replace(CHANGED_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_STYLE}" role="note" aria-label="Changed text" title="Changed text">')
 | 
			
		||||
        text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'</span>')
 | 
			
		||||
        text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'<span style="{HTML_CHANGED_INTO_STYLE}" role="note" aria-label="Changed into" title="Changed into">')
 | 
			
		||||
        text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'</span>')
 | 
			
		||||
    elif requested_output_format == 'markdown':
 | 
			
		||||
        # Markdown to HTML - Apprise will convert this to HTML
 | 
			
		||||
        text = apply_standard_markdown_to_body(n_body=text)
 | 
			
		||||
    else:
 | 
			
		||||
        # plaintext, html, and default - use simple text markers
 | 
			
		||||
        text = text.replace(REMOVED_PLACEMARKER_OPEN, '(removed) ')
 | 
			
		||||
        text = text.replace(REMOVED_PLACEMARKER_CLOSED, '')
 | 
			
		||||
        text = text.replace(ADDED_PLACEMARKER_OPEN, '(added) ')
 | 
			
		||||
        text = text.replace(ADDED_PLACEMARKER_CLOSED, '')
 | 
			
		||||
        text = text.replace(CHANGED_PLACEMARKER_OPEN, f'(changed) ')
 | 
			
		||||
        text = text.replace(CHANGED_PLACEMARKER_CLOSED, f'')
 | 
			
		||||
        text = text.replace(CHANGED_INTO_PLACEMARKER_OPEN, f'(into) ')
 | 
			
		||||
        text = text.replace(CHANGED_INTO_PLACEMARKER_CLOSED, f'')
 | 
			
		||||
 | 
			
		||||
    return text
 | 
			
		||||
 | 
			
		||||
def apply_service_tweaks(url, n_body, n_title, requested_output_format):
 | 
			
		||||
 | 
			
		||||
    # Re 323 - Limit discord length to their 2000 char limit total or it wont send.
 | 
			
		||||
    # Because different notifications may require different pre-processing, run each sequentially :(
 | 
			
		||||
    # 2000 bytes minus -
 | 
			
		||||
    #     200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
 | 
			
		||||
    #     Length of URL - Incase they specify a longer custom avatar_url
 | 
			
		||||
 | 
			
		||||
    if not n_body or not n_body.strip():
 | 
			
		||||
        return url, n_body, n_title
 | 
			
		||||
 | 
			
		||||
    # Normalize URL scheme to lowercase to prevent case-sensitivity issues
 | 
			
		||||
    # e.g., "Discord://webhook" -> "discord://webhook", "TGRAM://bot123" -> "tgram://bot123"
 | 
			
		||||
    scheme_separator_pos = url.find('://')
 | 
			
		||||
    if scheme_separator_pos > 0:
 | 
			
		||||
        url = url[:scheme_separator_pos].lower() + url[scheme_separator_pos:]
 | 
			
		||||
 | 
			
		||||
    # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
 | 
			
		||||
    parsed = urlparse(url)
 | 
			
		||||
    k = '?' if not parsed.query else '&'
 | 
			
		||||
    if url and not 'avatar_url' in url \
 | 
			
		||||
            and not url.startswith('mail') \
 | 
			
		||||
            and not url.startswith('post') \
 | 
			
		||||
            and not url.startswith('get') \
 | 
			
		||||
            and not url.startswith('delete') \
 | 
			
		||||
            and not url.startswith('put'):
 | 
			
		||||
        url += k + f"avatar_url={APPRISE_AVATAR_URL}"
 | 
			
		||||
 | 
			
		||||
    # Replace placemarkers in title first (this was the missing piece causing the bug)
 | 
			
		||||
    # Titles are ALWAYS plain text across all notification services (Discord embeds, Slack attachments,
 | 
			
		||||
    # email Subject headers, etc.), so we always use 'text' format for title placemarker replacement
 | 
			
		||||
    # Looking over apprise library it seems that all plugins only expect plain-text.
 | 
			
		||||
    n_title = replace_placemarkers_in_text(n_title, url, 'text')
 | 
			
		||||
 | 
			
		||||
    if url.startswith('tgram://'):
 | 
			
		||||
        # Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
 | 
			
		||||
        # re https://github.com/dgtlmoon/changedetection.io/issues/555
 | 
			
		||||
        # @todo re-use an existing library we have already imported to strip all non-allowed tags
 | 
			
		||||
        n_body = n_body.replace('<br>', '\n')
 | 
			
		||||
        n_body = n_body.replace('</br>', '\n')
 | 
			
		||||
        n_body = newline_re.sub('\n', n_body)
 | 
			
		||||
 | 
			
		||||
        # Replace placemarkers for body
 | 
			
		||||
        n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
 | 
			
		||||
 | 
			
		||||
        # real limit is 4096, but minus some for extra metadata
 | 
			
		||||
        payload_max_size = 3600
 | 
			
		||||
        body_limit = max(0, payload_max_size - len(n_title))
 | 
			
		||||
        n_title = n_title[0:payload_max_size]
 | 
			
		||||
        n_body = n_body[0:body_limit]
 | 
			
		||||
 | 
			
		||||
    elif (url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks')
 | 
			
		||||
          or url.startswith('https://discord.com/api'))\
 | 
			
		||||
            and 'html' in requested_output_format:
 | 
			
		||||
        # Discord doesn't support HTML, replace <br> with newlines
 | 
			
		||||
        n_body = n_body.strip().replace('<br>', '\n')
 | 
			
		||||
        n_body = n_body.replace('</br>', '\n')
 | 
			
		||||
        n_body = newline_re.sub('\n', n_body)
 | 
			
		||||
 | 
			
		||||
        # Don't replace placeholders or truncate here - let the custom Discord plugin handle it
 | 
			
		||||
        # The plugin will use embeds (6000 char limit across all embeds) if placeholders are present,
 | 
			
		||||
        # or plain content (2000 char limit) otherwise
 | 
			
		||||
 | 
			
		||||
        # Only do placeholder replacement if NOT using htmlcolor (which triggers embeds in custom plugin)
 | 
			
		||||
        if requested_output_format == 'html':
 | 
			
		||||
            # No diff placeholders, use Discord markdown for any other formatting
 | 
			
		||||
            # Use Discord markdown: strikethrough for removed, bold for added
 | 
			
		||||
            n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
 | 
			
		||||
 | 
			
		||||
            # Apply 2000 char limit for plain content
 | 
			
		||||
            payload_max_size = 1700
 | 
			
		||||
            body_limit = max(0, payload_max_size - len(n_title))
 | 
			
		||||
            n_title = n_title[0:payload_max_size]
 | 
			
		||||
            n_body = n_body[0:body_limit]
 | 
			
		||||
        # else: our custom Discord plugin will convert any placeholders left over into embeds with color bars
 | 
			
		||||
 | 
			
		||||
    # Is not discord/tgram and they want htmlcolor
 | 
			
		||||
    elif requested_output_format == 'htmlcolor':
 | 
			
		||||
        n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
 | 
			
		||||
        n_body = newline_re.sub('<br>\n', n_body)
 | 
			
		||||
    elif requested_output_format == 'html':
 | 
			
		||||
        n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
 | 
			
		||||
        n_body = newline_re.sub('<br>\n', n_body)
 | 
			
		||||
    elif requested_output_format == 'markdown':
 | 
			
		||||
        # Markdown to HTML - Apprise will convert this to HTML
 | 
			
		||||
        n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
 | 
			
		||||
 | 
			
		||||
    else: #plaintext etc default
 | 
			
		||||
        n_body = replace_placemarkers_in_text(n_body, url, requested_output_format)
 | 
			
		||||
 | 
			
		||||
    return url, n_body, n_title
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def process_notification(n_object: NotificationContextData, datastore):
 | 
			
		||||
    from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
    from . import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH, default_notification_format, valid_notification_formats
 | 
			
		||||
    # be sure its registered
 | 
			
		||||
    from .apprise_plugin.custom_handlers import apprise_http_custom_handler
 | 
			
		||||
    # Register custom Discord plugin
 | 
			
		||||
    from .apprise_plugin.discord import NotifyDiscordCustom
 | 
			
		||||
 | 
			
		||||
    if not isinstance(n_object, NotificationContextData):
 | 
			
		||||
        raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
 | 
			
		||||
 | 
			
		||||
    now = time.time()
 | 
			
		||||
    if n_object.get('notification_timestamp'):
 | 
			
		||||
        logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s")
 | 
			
		||||
 | 
			
		||||
    # Insert variables into the notification content
 | 
			
		||||
    notification_parameters = create_notification_parameters(n_object, datastore)
 | 
			
		||||
 | 
			
		||||
    n_format = valid_notification_formats.get(
 | 
			
		||||
        n_object.get('notification_format', default_notification_format),
 | 
			
		||||
        valid_notification_formats[default_notification_format],
 | 
			
		||||
    )
 | 
			
		||||
    requested_output_format = n_object.get('notification_format', default_notification_format)
 | 
			
		||||
    logger.debug(f"Requested notification output format: '{requested_output_format}'")
 | 
			
		||||
 | 
			
		||||
    # If we arrived with 'System default' then look it up
 | 
			
		||||
    if n_format == default_notification_format_for_watch and datastore.data['settings']['application'].get('notification_format') != default_notification_format_for_watch:
 | 
			
		||||
    if requested_output_format == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
 | 
			
		||||
        # Initially text or whatever
 | 
			
		||||
        n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format])
 | 
			
		||||
        requested_output_format = datastore.data['settings']['application'].get('notification_format', default_notification_format)
 | 
			
		||||
 | 
			
		||||
    requested_output_format_original = requested_output_format
 | 
			
		||||
 | 
			
		||||
    # Now clean it up so it fits perfectly with apprise
 | 
			
		||||
    requested_output_format = notification_format_align_with_apprise(n_format=requested_output_format)
 | 
			
		||||
 | 
			
		||||
    logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.2f}s")
 | 
			
		||||
 | 
			
		||||
@@ -41,99 +322,107 @@ def process_notification(n_object, datastore):
 | 
			
		||||
 | 
			
		||||
    apobj = apprise.Apprise(debug=True, asset=apprise_asset)
 | 
			
		||||
 | 
			
		||||
    # Override Apprise's built-in Discord plugin with our custom one
 | 
			
		||||
    # This allows us to use colored embeds for diff content
 | 
			
		||||
    # First remove the built-in discord plugin, then add our custom one
 | 
			
		||||
    apprise.plugins.N_MGR.remove('discord')
 | 
			
		||||
    apprise.plugins.N_MGR.add(NotifyDiscordCustom, schemas='discord')
 | 
			
		||||
 | 
			
		||||
    if not n_object.get('notification_urls'):
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
 | 
			
		||||
    with (apprise.LogCapture(level=apprise.logging.DEBUG) as logs):
 | 
			
		||||
        for url in n_object['notification_urls']:
 | 
			
		||||
 | 
			
		||||
            # Get the notification body from datastore
 | 
			
		||||
            n_body = jinja_render(template_str=n_object.get('notification_body', ''), **notification_parameters)
 | 
			
		||||
            if n_object.get('notification_format', '').startswith('HTML'):
 | 
			
		||||
                n_body = n_body.replace("\n", '<br>')
 | 
			
		||||
 | 
			
		||||
            n_title = jinja_render(template_str=n_object.get('notification_title', ''), **notification_parameters)
 | 
			
		||||
 | 
			
		||||
            if n_object.get('markup_text_links_to_html_links'):
 | 
			
		||||
                n_body = markup_text_links_to_html(body=n_body)
 | 
			
		||||
 | 
			
		||||
            url = url.strip()
 | 
			
		||||
            if url.startswith('#'):
 | 
			
		||||
                logger.trace(f"Skipping commented out notification URL - {url}")
 | 
			
		||||
            if not url or url.startswith('#'):
 | 
			
		||||
                logger.debug(f"Skipping commented out or empty notification URL - '{url}'")
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            if not url:
 | 
			
		||||
                logger.warning(f"Process Notification: skipping empty notification URL.")
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            logger.info(f">> Process Notification: AppRise notifying {url}")
 | 
			
		||||
            logger.info(f">> Process Notification: AppRise start notifying '{url}'")
 | 
			
		||||
            url = jinja_render(template_str=url, **notification_parameters)
 | 
			
		||||
 | 
			
		||||
            # Re 323 - Limit discord length to their 2000 char limit total or it wont send.
 | 
			
		||||
            # Because different notifications may require different pre-processing, run each sequentially :(
 | 
			
		||||
            # 2000 bytes minus -
 | 
			
		||||
            #     200 bytes for the overhead of the _entire_ json payload, 200 bytes for {tts, wait, content} etc headers
 | 
			
		||||
            #     Length of URL - Incase they specify a longer custom avatar_url
 | 
			
		||||
            # If it's a plaintext document, and they want HTML type email/alerts, so it needs to be escaped
 | 
			
		||||
            watch_mime_type = n_object.get('watch_mime_type')
 | 
			
		||||
            if watch_mime_type and 'text/' in watch_mime_type.lower() and not 'html' in watch_mime_type.lower():
 | 
			
		||||
                if 'html' in requested_output_format:
 | 
			
		||||
                    from markupsafe import escape
 | 
			
		||||
                    n_body = str(escape(n_body))
 | 
			
		||||
 | 
			
		||||
            # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
 | 
			
		||||
            k = '?' if not '?' in url else '&'
 | 
			
		||||
            if not 'avatar_url' in url \
 | 
			
		||||
                    and not url.startswith('mail') \
 | 
			
		||||
                    and not url.startswith('post') \
 | 
			
		||||
                    and not url.startswith('get') \
 | 
			
		||||
                    and not url.startswith('delete') \
 | 
			
		||||
                    and not url.startswith('put'):
 | 
			
		||||
                url += k + f"avatar_url={APPRISE_AVATAR_URL}"
 | 
			
		||||
            if 'html' in requested_output_format:
 | 
			
		||||
                # Since the n_body is always some kind of text from the 'diff' engine, attempt to preserve whitespaces that get sent to the HTML output
 | 
			
		||||
                # But only where its more than 1 consecutive whitespace, otherwise "and this" becomes "and this" etc which is too much.
 | 
			
		||||
                n_body = n_body.replace('  ', '  ')
 | 
			
		||||
 | 
			
		||||
            if url.startswith('tgram://'):
 | 
			
		||||
                # Telegram only supports a limit subset of HTML, remove the '<br>' we place in.
 | 
			
		||||
                # re https://github.com/dgtlmoon/changedetection.io/issues/555
 | 
			
		||||
                # @todo re-use an existing library we have already imported to strip all non-allowed tags
 | 
			
		||||
                n_body = n_body.replace('<br>', '\n')
 | 
			
		||||
                n_body = n_body.replace('</br>', '\n')
 | 
			
		||||
                # real limit is 4096, but minus some for extra metadata
 | 
			
		||||
                payload_max_size = 3600
 | 
			
		||||
                body_limit = max(0, payload_max_size - len(n_title))
 | 
			
		||||
                n_title = n_title[0:payload_max_size]
 | 
			
		||||
                n_body = n_body[0:body_limit]
 | 
			
		||||
            (url, n_body, n_title) = apply_service_tweaks(url=url, n_body=n_body, n_title=n_title, requested_output_format=requested_output_format_original)
 | 
			
		||||
 | 
			
		||||
            elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith(
 | 
			
		||||
                    'https://discord.com/api'):
 | 
			
		||||
                # real limit is 2000, but minus some for extra metadata
 | 
			
		||||
                payload_max_size = 1700
 | 
			
		||||
                body_limit = max(0, payload_max_size - len(n_title))
 | 
			
		||||
                n_title = n_title[0:payload_max_size]
 | 
			
		||||
                n_body = n_body[0:body_limit]
 | 
			
		||||
            apprise_input_format = "NO-THANKS-WE-WILL-MANAGE-ALL-OF-THIS"
 | 
			
		||||
 | 
			
		||||
            elif url.startswith('mailto'):
 | 
			
		||||
                # Apprise will default to HTML, so we need to override it
 | 
			
		||||
                # So that whats' generated in n_body is in line with what is going to be sent.
 | 
			
		||||
                # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
 | 
			
		||||
                if not 'format=' in url and (n_format == 'Text' or n_format == 'Markdown'):
 | 
			
		||||
                    prefix = '?' if not '?' in url else '&'
 | 
			
		||||
                    # Apprise format is lowercase text https://github.com/caronc/apprise/issues/633
 | 
			
		||||
                    n_format = n_format.lower()
 | 
			
		||||
                    url = f"{url}{prefix}format={n_format}"
 | 
			
		||||
                # If n_format == HTML, then apprise email should default to text/html and we should be sending HTML only
 | 
			
		||||
            if not 'format=' in url:
 | 
			
		||||
                parsed_url = urlparse(url)
 | 
			
		||||
                prefix_add_to_url = '?' if not parsed_url.query else '&'
 | 
			
		||||
 | 
			
		||||
                # THIS IS THE TRICK HOW TO DISABLE APPRISE DOING WEIRD AUTO-CONVERSION WITH BREAKING BR TAGS ETC
 | 
			
		||||
                if 'html' in requested_output_format:
 | 
			
		||||
                    url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
 | 
			
		||||
                    apprise_input_format = NotifyFormat.HTML.value
 | 
			
		||||
                elif 'text' in requested_output_format:
 | 
			
		||||
                    url = f"{url}{prefix_add_to_url}format={NotifyFormat.TEXT.value}"
 | 
			
		||||
                    apprise_input_format = NotifyFormat.TEXT.value
 | 
			
		||||
 | 
			
		||||
                elif requested_output_format == NotifyFormat.MARKDOWN.value:
 | 
			
		||||
                    # Convert markdown to HTML ourselves since not all plugins do this
 | 
			
		||||
                    from apprise.conversion import markdown_to_html
 | 
			
		||||
                    # Make sure there are paragraph breaks around horizontal rules
 | 
			
		||||
                    n_body = n_body.replace('---', '\n\n---\n\n')
 | 
			
		||||
                    n_body = markdown_to_html(n_body)
 | 
			
		||||
                    url = f"{url}{prefix_add_to_url}format={NotifyFormat.HTML.value}"
 | 
			
		||||
                    requested_output_format = NotifyFormat.HTML.value
 | 
			
		||||
                    apprise_input_format = NotifyFormat.HTML.value  # Changed from MARKDOWN to HTML
 | 
			
		||||
 | 
			
		||||
            else:
 | 
			
		||||
                # ?format was IN the apprise URL, they are kind of on their own here, we will try our best
 | 
			
		||||
                if 'format=html' in url:
 | 
			
		||||
                    n_body = newline_re.sub('<br>\r\n', n_body)
 | 
			
		||||
                    # This will also prevent apprise from doing conversion
 | 
			
		||||
                    apprise_input_format = NotifyFormat.HTML.value
 | 
			
		||||
                    requested_output_format = NotifyFormat.HTML.value
 | 
			
		||||
                elif 'format=text' in url:
 | 
			
		||||
                    apprise_input_format = NotifyFormat.TEXT.value
 | 
			
		||||
                    requested_output_format = NotifyFormat.TEXT.value
 | 
			
		||||
 | 
			
		||||
            apobj.add(url)
 | 
			
		||||
 | 
			
		||||
            sent_objs.append({'title': n_title,
 | 
			
		||||
                              'body': n_body,
 | 
			
		||||
                              'url': url,
 | 
			
		||||
                              'body_format': n_format})
 | 
			
		||||
                              'url': url})
 | 
			
		||||
            apobj.add(url)
 | 
			
		||||
 | 
			
		||||
            # Since the output is always based on the plaintext of the 'diff' engine, wrap it nicely.
 | 
			
		||||
            # It should always be similar to the 'history' part of the UI.
 | 
			
		||||
            if url.startswith('mail') and 'html' in requested_output_format:
 | 
			
		||||
                if not '<pre' in n_body and not '<body' in n_body: # No custom HTML-ish body was setup already
 | 
			
		||||
                    n_body = as_monospaced_html_email(content=n_body, title=n_title)
 | 
			
		||||
 | 
			
		||||
        # Blast off the notifications tht are set in .add()
 | 
			
		||||
        apobj.notify(
 | 
			
		||||
            title=n_title,
 | 
			
		||||
            body=n_body,
 | 
			
		||||
            body_format=n_format,
 | 
			
		||||
            # `body_format` Tell apprise what format the INPUT is in, specify a wrong/bad type and it will force skip conversion in apprise
 | 
			
		||||
            # &format= in URL Tell apprise what format the OUTPUT should be in (it can convert between)
 | 
			
		||||
            body_format=apprise_input_format,
 | 
			
		||||
            # False is not an option for AppRise, must be type None
 | 
			
		||||
            attach=n_object.get('screenshot', None)
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # Returns empty string if nothing found, multi-line string otherwise
 | 
			
		||||
        log_value = logs.getvalue()
 | 
			
		||||
 | 
			
		||||
        if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
 | 
			
		||||
        if log_value and ('WARNING' in log_value or 'ERROR' in log_value):
 | 
			
		||||
            logger.critical(log_value)
 | 
			
		||||
            raise Exception(log_value)
 | 
			
		||||
 | 
			
		||||
@@ -143,17 +432,15 @@ def process_notification(n_object, datastore):
 | 
			
		||||
 | 
			
		||||
# Notification title + body content parameters get created here.
 | 
			
		||||
# ( Where we prepare the tokens in the notification to be replaced with actual values )
 | 
			
		||||
def create_notification_parameters(n_object, datastore):
 | 
			
		||||
    from copy import deepcopy
 | 
			
		||||
    from . import valid_tokens
 | 
			
		||||
def create_notification_parameters(n_object: NotificationContextData, datastore):
 | 
			
		||||
    if not isinstance(n_object, NotificationContextData):
 | 
			
		||||
        raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
 | 
			
		||||
 | 
			
		||||
    # in the case we send a test notification from the main settings, there is no UUID.
 | 
			
		||||
    uuid = n_object['uuid'] if 'uuid' in n_object else ''
 | 
			
		||||
 | 
			
		||||
    if uuid:
 | 
			
		||||
        watch_title = datastore.data['watching'][uuid].get('title', '')
 | 
			
		||||
    watch = datastore.data['watching'].get(n_object['uuid'])
 | 
			
		||||
    if watch:
 | 
			
		||||
        watch_title = datastore.data['watching'][n_object['uuid']].label
 | 
			
		||||
        tag_list = []
 | 
			
		||||
        tags = datastore.get_all_tags_for_watch(uuid)
 | 
			
		||||
        tags = datastore.get_all_tags_for_watch(n_object['uuid'])
 | 
			
		||||
        if tags:
 | 
			
		||||
            for tag_uuid, tag in tags.items():
 | 
			
		||||
                tag_list.append(tag.get('title'))
 | 
			
		||||
@@ -168,14 +455,10 @@ def create_notification_parameters(n_object, datastore):
 | 
			
		||||
 | 
			
		||||
    watch_url = n_object['watch_url']
 | 
			
		||||
 | 
			
		||||
    diff_url = "{}/diff/{}".format(base_url, uuid)
 | 
			
		||||
    preview_url = "{}/preview/{}".format(base_url, uuid)
 | 
			
		||||
    diff_url = "{}/diff/{}".format(base_url, n_object['uuid'])
 | 
			
		||||
    preview_url = "{}/preview/{}".format(base_url, n_object['uuid'])
 | 
			
		||||
 | 
			
		||||
    # Not sure deepcopy is needed here, but why not
 | 
			
		||||
    tokens = deepcopy(valid_tokens)
 | 
			
		||||
 | 
			
		||||
    # Valid_tokens also used as a field validator
 | 
			
		||||
    tokens.update(
 | 
			
		||||
    n_object.update(
 | 
			
		||||
        {
 | 
			
		||||
            'base_url': base_url,
 | 
			
		||||
            'diff_url': diff_url,
 | 
			
		||||
@@ -183,13 +466,10 @@ def create_notification_parameters(n_object, datastore):
 | 
			
		||||
            'watch_tag': watch_tag if watch_tag is not None else '',
 | 
			
		||||
            'watch_title': watch_title if watch_title is not None else '',
 | 
			
		||||
            'watch_url': watch_url,
 | 
			
		||||
            'watch_uuid': uuid,
 | 
			
		||||
            'watch_uuid': n_object['uuid'],
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
    # n_object will contain diff, diff_added etc etc
 | 
			
		||||
    tokens.update(n_object)
 | 
			
		||||
    if watch:
 | 
			
		||||
        n_object.update(datastore.data['watching'].get(n_object['uuid']).extra_notification_token_values())
 | 
			
		||||
 | 
			
		||||
    if uuid:
 | 
			
		||||
        tokens.update(datastore.data['watching'].get(uuid).extra_notification_token_values())
 | 
			
		||||
 | 
			
		||||
    return tokens
 | 
			
		||||
    return n_object
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										346
									
								
								changedetectionio/notification_service.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										346
									
								
								changedetectionio/notification_service.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,346 @@
 | 
			
		||||
#!/usr/bin/env python3
 | 
			
		||||
 | 
			
		||||
"""
 | 
			
		||||
Notification Service Module
 | 
			
		||||
Extracted from update_worker.py to provide standalone notification functionality
 | 
			
		||||
for both sync and async workers
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from loguru import logger
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from changedetectionio.notification import default_notification_format, valid_notification_formats
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# What is passed around as notification context, also used as the complete list of valid {{ tokens }}
 | 
			
		||||
class NotificationContextData(dict):
 | 
			
		||||
    def __init__(self, initial_data=None, **kwargs):
 | 
			
		||||
        super().__init__({
 | 
			
		||||
            'base_url': None,
 | 
			
		||||
            'current_snapshot': None,
 | 
			
		||||
            'diff': None,
 | 
			
		||||
            'diff_clean': None,
 | 
			
		||||
            'diff_added': None,
 | 
			
		||||
            'diff_added_clean': None,
 | 
			
		||||
            'diff_full': None,
 | 
			
		||||
            'diff_full_clean': None,
 | 
			
		||||
            'diff_patch': None,
 | 
			
		||||
            'diff_removed': None,
 | 
			
		||||
            'diff_removed_clean': None,
 | 
			
		||||
            'diff_url': None,
 | 
			
		||||
            'markup_text_links_to_html_links': False, # If automatic conversion of plaintext to HTML should happen
 | 
			
		||||
            'notification_timestamp': time.time(),
 | 
			
		||||
            'preview_url': None,
 | 
			
		||||
            'screenshot': None,
 | 
			
		||||
            'triggered_text': None,
 | 
			
		||||
            'uuid': 'XXXXXXXX-XXXX-XXXX-XXXX-XXXXXXXXXXXX',  # Converted to 'watch_uuid' in create_notification_parameters
 | 
			
		||||
            'watch_mime_type': None,
 | 
			
		||||
            'watch_tag': None,
 | 
			
		||||
            'watch_title': None,
 | 
			
		||||
            'watch_url': 'https://WATCH-PLACE-HOLDER/',
 | 
			
		||||
        })
 | 
			
		||||
 | 
			
		||||
        # Apply any initial data passed in
 | 
			
		||||
        self.update({'watch_uuid': self.get('uuid')})
 | 
			
		||||
        if initial_data:
 | 
			
		||||
            self.update(initial_data)
 | 
			
		||||
 | 
			
		||||
        # Apply any keyword arguments
 | 
			
		||||
        if kwargs:
 | 
			
		||||
            self.update(kwargs)
 | 
			
		||||
 | 
			
		||||
        n_format = self.get('notification_format')
 | 
			
		||||
        if n_format and not valid_notification_formats.get(n_format):
 | 
			
		||||
            raise ValueError(f'Invalid notification format: "{n_format}"')
 | 
			
		||||
 | 
			
		||||
    def set_random_for_validation(self):
 | 
			
		||||
        import random, string
 | 
			
		||||
        """Randomly fills all dict keys with random strings (for validation/testing). 
 | 
			
		||||
        So we can test the output in the notification body
 | 
			
		||||
        """
 | 
			
		||||
        for key in self.keys():
 | 
			
		||||
            if key in ['uuid', 'time', 'watch_uuid']:
 | 
			
		||||
                continue
 | 
			
		||||
            rand_str = 'RANDOM-PLACEHOLDER-'+''.join(random.choices(string.ascii_letters + string.digits, k=12))
 | 
			
		||||
            self[key] = rand_str
 | 
			
		||||
 | 
			
		||||
    def __setitem__(self, key, value):
 | 
			
		||||
        if key == 'notification_format' and isinstance(value, str) and not value.startswith('RANDOM-PLACEHOLDER-'):
 | 
			
		||||
            if not valid_notification_formats.get(value):
 | 
			
		||||
                raise ValueError(f'Invalid notification format: "{value}"')
 | 
			
		||||
 | 
			
		||||
        super().__setitem__(key, value)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_basic_notification_vars(snapshot_contents, current_snapshot, prev_snapshot, watch, triggered_text):
 | 
			
		||||
    now = time.time()
 | 
			
		||||
    from changedetectionio import diff
 | 
			
		||||
 | 
			
		||||
    n_object = {
 | 
			
		||||
        'current_snapshot': snapshot_contents,
 | 
			
		||||
        'diff': diff.render_diff(prev_snapshot, current_snapshot),
 | 
			
		||||
        'diff_clean': diff.render_diff(prev_snapshot, current_snapshot, include_change_type_prefix=False),
 | 
			
		||||
        'diff_added': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False),
 | 
			
		||||
        'diff_added_clean': diff.render_diff(prev_snapshot, current_snapshot, include_removed=False, include_change_type_prefix=False),
 | 
			
		||||
        'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True),
 | 
			
		||||
        'diff_full_clean': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, include_change_type_prefix=False),
 | 
			
		||||
        'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, patch_format=True),
 | 
			
		||||
        'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False),
 | 
			
		||||
        'diff_removed_clean': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, include_change_type_prefix=False),
 | 
			
		||||
        'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None,
 | 
			
		||||
        'triggered_text': triggered_text,
 | 
			
		||||
        'uuid': watch.get('uuid') if watch else None,
 | 
			
		||||
        'watch_url': watch.get('url') if watch else None,
 | 
			
		||||
        'watch_uuid': watch.get('uuid') if watch else None,
 | 
			
		||||
        'watch_mime_type': watch.get('content-type')
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    # The \n's in the content from the above will get converted to <br> etc depending on the notification format
 | 
			
		||||
 | 
			
		||||
    if watch:
 | 
			
		||||
        n_object.update(watch.extra_notification_token_values())
 | 
			
		||||
 | 
			
		||||
    logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time() - now:.3f}s")
 | 
			
		||||
    return n_object
 | 
			
		||||
 | 
			
		||||
class NotificationService:
 | 
			
		||||
    """
 | 
			
		||||
    Standalone notification service that handles all notification functionality
 | 
			
		||||
    previously embedded in the update_worker class
 | 
			
		||||
    """
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self, datastore, notification_q):
 | 
			
		||||
        self.datastore = datastore
 | 
			
		||||
        self.notification_q = notification_q
 | 
			
		||||
    
 | 
			
		||||
    def queue_notification_for_watch(self, n_object: NotificationContextData, watch):
 | 
			
		||||
        """
 | 
			
		||||
        Queue a notification for a watch with full diff rendering and template variables
 | 
			
		||||
        """
 | 
			
		||||
        from changedetectionio.notification import USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
 | 
			
		||||
 | 
			
		||||
        if not isinstance(n_object, NotificationContextData):
 | 
			
		||||
            raise TypeError(f"Expected NotificationContextData, got {type(n_object)}")
 | 
			
		||||
 | 
			
		||||
        dates = []
 | 
			
		||||
        trigger_text = ''
 | 
			
		||||
 | 
			
		||||
        if watch:
 | 
			
		||||
            watch_history = watch.history
 | 
			
		||||
            dates = list(watch_history.keys())
 | 
			
		||||
            trigger_text = watch.get('trigger_text', [])
 | 
			
		||||
 | 
			
		||||
        # Add text that was triggered
 | 
			
		||||
        if len(dates):
 | 
			
		||||
            snapshot_contents = watch.get_history_snapshot(dates[-1])
 | 
			
		||||
        else:
 | 
			
		||||
            snapshot_contents = "No snapshot/history available, the watch should fetch atleast once."
 | 
			
		||||
 | 
			
		||||
        # If we ended up here with "System default"
 | 
			
		||||
        if n_object.get('notification_format') == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
 | 
			
		||||
            n_object['notification_format'] = self.datastore.data['settings']['application'].get('notification_format')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        triggered_text = ''
 | 
			
		||||
        if len(trigger_text):
 | 
			
		||||
            from . import html_tools
 | 
			
		||||
            triggered_text = html_tools.get_triggered_text(content=snapshot_contents, trigger_text=trigger_text)
 | 
			
		||||
            if triggered_text:
 | 
			
		||||
                triggered_text = '\n'.join(triggered_text)
 | 
			
		||||
 | 
			
		||||
        # Could be called as a 'test notification' with only 1 snapshot available
 | 
			
		||||
        prev_snapshot = "Example text: example test\nExample text: change detection is cool\nExample text: some more examples\n"
 | 
			
		||||
        current_snapshot = "Example text: example test\nExample text: change detection is fantastic\nExample text: even more examples\nExample text: a lot more examples"
 | 
			
		||||
 | 
			
		||||
        if len(dates) > 1:
 | 
			
		||||
            prev_snapshot = watch.get_history_snapshot(dates[-2])
 | 
			
		||||
            current_snapshot = watch.get_history_snapshot(dates[-1])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        n_object.update(set_basic_notification_vars(snapshot_contents=snapshot_contents,
 | 
			
		||||
                                                    current_snapshot=current_snapshot,
 | 
			
		||||
                                                    prev_snapshot=prev_snapshot,
 | 
			
		||||
                                                    watch=watch,
 | 
			
		||||
                                                    triggered_text=triggered_text))
 | 
			
		||||
 | 
			
		||||
        logger.debug("Queued notification for sending")
 | 
			
		||||
        self.notification_q.put(n_object)
 | 
			
		||||
 | 
			
		||||
    def _check_cascading_vars(self, var_name, watch):
 | 
			
		||||
        """
 | 
			
		||||
        Check notification variables in cascading priority:
 | 
			
		||||
        Individual watch settings > Tag settings > Global settings
 | 
			
		||||
        """
 | 
			
		||||
        from changedetectionio.notification import (
 | 
			
		||||
            USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH,
 | 
			
		||||
            default_notification_body,
 | 
			
		||||
            default_notification_title
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Would be better if this was some kind of Object where Watch can reference the parent datastore etc
 | 
			
		||||
        v = watch.get(var_name)
 | 
			
		||||
        if v and not watch.get('notification_muted'):
 | 
			
		||||
            if var_name == 'notification_format' and v == USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH:
 | 
			
		||||
                return self.datastore.data['settings']['application'].get('notification_format')
 | 
			
		||||
 | 
			
		||||
            return v
 | 
			
		||||
 | 
			
		||||
        tags = self.datastore.get_all_tags_for_watch(uuid=watch.get('uuid'))
 | 
			
		||||
        if tags:
 | 
			
		||||
            for tag_uuid, tag in tags.items():
 | 
			
		||||
                v = tag.get(var_name)
 | 
			
		||||
                if v and not tag.get('notification_muted'):
 | 
			
		||||
                    return v
 | 
			
		||||
 | 
			
		||||
        if self.datastore.data['settings']['application'].get(var_name):
 | 
			
		||||
            return self.datastore.data['settings']['application'].get(var_name)
 | 
			
		||||
 | 
			
		||||
        # Otherwise could be defaults
 | 
			
		||||
        if var_name == 'notification_format':
 | 
			
		||||
            return USE_SYSTEM_DEFAULT_NOTIFICATION_FORMAT_FOR_WATCH
 | 
			
		||||
        if var_name == 'notification_body':
 | 
			
		||||
            return default_notification_body
 | 
			
		||||
        if var_name == 'notification_title':
 | 
			
		||||
            return default_notification_title
 | 
			
		||||
 | 
			
		||||
        return None
 | 
			
		||||
 | 
			
		||||
    def send_content_changed_notification(self, watch_uuid):
 | 
			
		||||
        """
 | 
			
		||||
        Send notification when content changes are detected
 | 
			
		||||
        """
 | 
			
		||||
        n_object = NotificationContextData()
 | 
			
		||||
        watch = self.datastore.data['watching'].get(watch_uuid)
 | 
			
		||||
        if not watch:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        watch_history = watch.history
 | 
			
		||||
        dates = list(watch_history.keys())
 | 
			
		||||
        # Theoretically it's possible that this could be just 1 long,
 | 
			
		||||
        # - In the case that the timestamp key was not unique
 | 
			
		||||
        if len(dates) == 1:
 | 
			
		||||
            raise ValueError(
 | 
			
		||||
                "History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        # Should be a better parent getter in the model object
 | 
			
		||||
 | 
			
		||||
        # Prefer - Individual watch settings > Tag settings >  Global settings (in that order)
 | 
			
		||||
        n_object['notification_urls'] = self._check_cascading_vars('notification_urls', watch)
 | 
			
		||||
        n_object['notification_title'] = self._check_cascading_vars('notification_title', watch)
 | 
			
		||||
        n_object['notification_body'] = self._check_cascading_vars('notification_body', watch)
 | 
			
		||||
        n_object['notification_format'] = self._check_cascading_vars('notification_format', watch)
 | 
			
		||||
 | 
			
		||||
        # (Individual watch) Only prepare to notify if the rules above matched
 | 
			
		||||
        queued = False
 | 
			
		||||
        if n_object and n_object.get('notification_urls'):
 | 
			
		||||
            queued = True
 | 
			
		||||
 | 
			
		||||
            count = watch.get('notification_alert_count', 0) + 1
 | 
			
		||||
            self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count})
 | 
			
		||||
 | 
			
		||||
            self.queue_notification_for_watch(n_object=n_object, watch=watch)
 | 
			
		||||
 | 
			
		||||
        return queued
 | 
			
		||||
 | 
			
		||||
    def send_filter_failure_notification(self, watch_uuid):
 | 
			
		||||
        """
 | 
			
		||||
        Send notification when CSS/XPath filters fail consecutively
 | 
			
		||||
        """
 | 
			
		||||
        threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
 | 
			
		||||
        watch = self.datastore.data['watching'].get(watch_uuid)
 | 
			
		||||
        if not watch:
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
        filter_list = ", ".join(watch['include_filters'])
 | 
			
		||||
        # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
 | 
			
		||||
        body = f"""Hello,
 | 
			
		||||
 | 
			
		||||
Your configured CSS/xPath filters of '{filter_list}' for {{{{watch_url}}}} did not appear on the page after {threshold} attempts.
 | 
			
		||||
 | 
			
		||||
It's possible the page changed layout and the filter needs updating ( Try the 'Visual Selector' tab )
 | 
			
		||||
 | 
			
		||||
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
 | 
			
		||||
 | 
			
		||||
Thanks - Your omniscient changedetection.io installation.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
        n_object = NotificationContextData({
 | 
			
		||||
            'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
 | 
			
		||||
            'notification_body': body,
 | 
			
		||||
            'notification_format': self._check_cascading_vars('notification_format', watch),
 | 
			
		||||
        })
 | 
			
		||||
        n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
 | 
			
		||||
 | 
			
		||||
        if len(watch['notification_urls']):
 | 
			
		||||
            n_object['notification_urls'] = watch['notification_urls']
 | 
			
		||||
 | 
			
		||||
        elif len(self.datastore.data['settings']['application']['notification_urls']):
 | 
			
		||||
            n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
 | 
			
		||||
 | 
			
		||||
        # Only prepare to notify if the rules above matched
 | 
			
		||||
        if 'notification_urls' in n_object:
 | 
			
		||||
            n_object.update({
 | 
			
		||||
                'watch_url': watch['url'],
 | 
			
		||||
                'uuid': watch_uuid,
 | 
			
		||||
                'screenshot': None
 | 
			
		||||
            })
 | 
			
		||||
            self.notification_q.put(n_object)
 | 
			
		||||
            logger.debug(f"Sent filter not found notification for {watch_uuid}")
 | 
			
		||||
        else:
 | 
			
		||||
            logger.debug(f"NOT sending filter not found notification for {watch_uuid} - no notification URLs")
 | 
			
		||||
 | 
			
		||||
    def send_step_failure_notification(self, watch_uuid, step_n):
 | 
			
		||||
        """
 | 
			
		||||
        Send notification when browser steps fail consecutively
 | 
			
		||||
        """
 | 
			
		||||
        watch = self.datastore.data['watching'].get(watch_uuid, False)
 | 
			
		||||
        if not watch:
 | 
			
		||||
            return
 | 
			
		||||
        threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
 | 
			
		||||
 | 
			
		||||
        step = step_n + 1
 | 
			
		||||
        # @todo - This could be a markdown template on the disk, apprise will convert the markdown to HTML+Plaintext parts in the email, and then 'markup_text_links_to_html_links' is not needed
 | 
			
		||||
 | 
			
		||||
        # {{{{ }}}} because this will be Jinja2 {{ }} tokens
 | 
			
		||||
        body = f"""Hello,
 | 
			
		||||
        
 | 
			
		||||
Your configured browser step at position {step} for the web page watch {{{{watch_url}}}} did not appear on the page after {threshold} attempts, did the page change layout?
 | 
			
		||||
 | 
			
		||||
The element may have moved and needs editing, or does it need a delay added?
 | 
			
		||||
 | 
			
		||||
Edit link: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}
 | 
			
		||||
 | 
			
		||||
Thanks - Your omniscient changedetection.io installation.
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
        n_object = NotificationContextData({
 | 
			
		||||
            'notification_title': f"Changedetection.io - Alert - Browser step at position {step} could not be run",
 | 
			
		||||
            'notification_body': body,
 | 
			
		||||
            'notification_format': self._check_cascading_vars('notification_format', watch),
 | 
			
		||||
        })
 | 
			
		||||
        n_object['markup_text_links_to_html_links'] = n_object.get('notification_format').startswith('html')
 | 
			
		||||
 | 
			
		||||
        if len(watch['notification_urls']):
 | 
			
		||||
            n_object['notification_urls'] = watch['notification_urls']
 | 
			
		||||
 | 
			
		||||
        elif len(self.datastore.data['settings']['application']['notification_urls']):
 | 
			
		||||
            n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
 | 
			
		||||
 | 
			
		||||
        # Only prepare to notify if the rules above matched
 | 
			
		||||
        if 'notification_urls' in n_object:
 | 
			
		||||
            n_object.update({
 | 
			
		||||
                'watch_url': watch['url'],
 | 
			
		||||
                'uuid': watch_uuid
 | 
			
		||||
            })
 | 
			
		||||
            self.notification_q.put(n_object)
 | 
			
		||||
            logger.error(f"Sent step not found notification for {watch_uuid}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Convenience functions for creating notification service instances
 | 
			
		||||
def create_notification_service(datastore, notification_q):
 | 
			
		||||
    """
 | 
			
		||||
    Factory function to create a NotificationService instance
 | 
			
		||||
    """
 | 
			
		||||
    return NotificationService(datastore, notification_q)
 | 
			
		||||
							
								
								
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,82 @@
 | 
			
		||||
import pluggy
 | 
			
		||||
import os
 | 
			
		||||
import importlib
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
# Global plugin namespace for changedetection.io
 | 
			
		||||
PLUGIN_NAMESPACE = "changedetectionio"
 | 
			
		||||
 | 
			
		||||
hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE)
 | 
			
		||||
hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ChangeDetectionSpec:
 | 
			
		||||
    """Hook specifications for extending changedetection.io functionality."""
 | 
			
		||||
 | 
			
		||||
    @hookspec
 | 
			
		||||
    def ui_edit_stats_extras(watch):
 | 
			
		||||
        """Return HTML content to add to the stats tab in the edit view.
 | 
			
		||||
        
 | 
			
		||||
        Args:
 | 
			
		||||
            watch: The watch object being edited
 | 
			
		||||
            
 | 
			
		||||
        Returns:
 | 
			
		||||
            str: HTML content to be inserted in the stats tab
 | 
			
		||||
        """
 | 
			
		||||
        pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Set up Plugin Manager
 | 
			
		||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
 | 
			
		||||
 | 
			
		||||
# Register hookspecs
 | 
			
		||||
plugin_manager.add_hookspecs(ChangeDetectionSpec)
 | 
			
		||||
 | 
			
		||||
# Load plugins from subdirectories
 | 
			
		||||
def load_plugins_from_directories():
 | 
			
		||||
    # Dictionary of directories to scan for plugins
 | 
			
		||||
    plugin_dirs = {
 | 
			
		||||
        'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'),
 | 
			
		||||
        # Add more plugin directories here as needed
 | 
			
		||||
    }
 | 
			
		||||
    
 | 
			
		||||
    # Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory
 | 
			
		||||
    
 | 
			
		||||
    for dir_name, dir_path in plugin_dirs.items():
 | 
			
		||||
        if not os.path.exists(dir_path):
 | 
			
		||||
            continue
 | 
			
		||||
            
 | 
			
		||||
        # Get all Python files (excluding __init__.py)
 | 
			
		||||
        for filename in os.listdir(dir_path):
 | 
			
		||||
            if filename.endswith(".py") and filename != "__init__.py":
 | 
			
		||||
                module_name = filename[:-3]  # Remove .py extension
 | 
			
		||||
                module_path = f"changedetectionio.{dir_name}.plugins.{module_name}"
 | 
			
		||||
                
 | 
			
		||||
                try:
 | 
			
		||||
                    module = importlib.import_module(module_path)
 | 
			
		||||
                    # Register the plugin with pluggy
 | 
			
		||||
                    plugin_manager.register(module, module_name)
 | 
			
		||||
                except (ImportError, AttributeError) as e:
 | 
			
		||||
                    print(f"Error loading plugin {module_name}: {e}")
 | 
			
		||||
 | 
			
		||||
# Load plugins
 | 
			
		||||
load_plugins_from_directories()
 | 
			
		||||
 | 
			
		||||
# Discover installed plugins from external packages (if any)
 | 
			
		||||
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
 | 
			
		||||
 | 
			
		||||
# Helper function to collect UI stats extras from all plugins
 | 
			
		||||
def collect_ui_edit_stats_extras(watch):
 | 
			
		||||
    """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
 | 
			
		||||
    extras_content = []
 | 
			
		||||
    
 | 
			
		||||
    # Get all plugins that implement the ui_edit_stats_extras hook
 | 
			
		||||
    results = plugin_manager.hook.ui_edit_stats_extras(watch=watch)
 | 
			
		||||
    
 | 
			
		||||
    # If we have results, add them to our content
 | 
			
		||||
    if results:
 | 
			
		||||
        for result in results:
 | 
			
		||||
            if result:  # Skip empty results
 | 
			
		||||
                extras_content.append(result)
 | 
			
		||||
            
 | 
			
		||||
    return "\n".join(extras_content) if extras_content else ""
 | 
			
		||||
@@ -27,7 +27,7 @@ class difference_detection_processor():
 | 
			
		||||
        # Generic fetcher that should be extended (requests, playwright etc)
 | 
			
		||||
        self.fetcher = Fetcher()
 | 
			
		||||
 | 
			
		||||
    def call_browser(self, preferred_proxy_id=None):
 | 
			
		||||
    async def call_browser(self, preferred_proxy_id=None):
 | 
			
		||||
 | 
			
		||||
        from requests.structures import CaseInsensitiveDict
 | 
			
		||||
 | 
			
		||||
@@ -89,7 +89,9 @@ class difference_detection_processor():
 | 
			
		||||
                proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
 | 
			
		||||
                logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
 | 
			
		||||
            else:
 | 
			
		||||
                logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified. ")
 | 
			
		||||
                logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
 | 
			
		||||
 | 
			
		||||
        logger.debug(f"Using proxy '{proxy_url}' for {self.watch['uuid']}")
 | 
			
		||||
 | 
			
		||||
        # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
 | 
			
		||||
        # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)
 | 
			
		||||
@@ -102,7 +104,7 @@ class difference_detection_processor():
 | 
			
		||||
            self.fetcher.browser_steps_screenshot_path = os.path.join(self.datastore.datastore_path, self.watch.get('uuid'))
 | 
			
		||||
 | 
			
		||||
        # Tweak the base config with the per-watch ones
 | 
			
		||||
        from changedetectionio.safe_jinja import render as jinja_render
 | 
			
		||||
        from changedetectionio.jinja2_custom import render as jinja_render
 | 
			
		||||
        request_headers = CaseInsensitiveDict()
 | 
			
		||||
 | 
			
		||||
        ua = self.datastore.data['settings']['requests'].get('default_ua')
 | 
			
		||||
@@ -146,17 +148,19 @@ class difference_detection_processor():
 | 
			
		||||
 | 
			
		||||
        # And here we go! call the right browser with browser-specific settings
 | 
			
		||||
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
 | 
			
		||||
 | 
			
		||||
        self.fetcher.run(url=url,
 | 
			
		||||
                         timeout=timeout,
 | 
			
		||||
                         request_headers=request_headers,
 | 
			
		||||
                         request_body=request_body,
 | 
			
		||||
                         request_method=request_method,
 | 
			
		||||
                         ignore_status_codes=ignore_status_codes,
 | 
			
		||||
                         current_include_filters=self.watch.get('include_filters'),
 | 
			
		||||
                         is_binary=is_binary,
 | 
			
		||||
                         empty_pages_are_a_change=empty_pages_are_a_change
 | 
			
		||||
                         )
 | 
			
		||||
        # All fetchers are now async
 | 
			
		||||
        await self.fetcher.run(
 | 
			
		||||
            current_include_filters=self.watch.get('include_filters'),
 | 
			
		||||
            empty_pages_are_a_change=empty_pages_are_a_change,
 | 
			
		||||
            fetch_favicon=self.watch.favicon_is_expired(),
 | 
			
		||||
            ignore_status_codes=ignore_status_codes,
 | 
			
		||||
            is_binary=is_binary,
 | 
			
		||||
            request_body=request_body,
 | 
			
		||||
            request_headers=request_headers,
 | 
			
		||||
            request_method=request_method,
 | 
			
		||||
            timeout=timeout,
 | 
			
		||||
            url=url,
 | 
			
		||||
       )
 | 
			
		||||
 | 
			
		||||
        #@todo .quit here could go on close object, so we can run JS if change-detected
 | 
			
		||||
        self.fetcher.quit(watch=self.watch)
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										133
									
								
								changedetectionio/processors/magic.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										133
									
								
								changedetectionio/processors/magic.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,133 @@
 | 
			
		||||
"""
 | 
			
		||||
Content Type Detection and Stream Classification
 | 
			
		||||
 | 
			
		||||
This module provides intelligent content-type detection for changedetection.io.
 | 
			
		||||
It addresses the common problem where HTTP Content-Type headers are missing, incorrect,
 | 
			
		||||
or too generic, which would otherwise cause the wrong processor to be used.
 | 
			
		||||
 | 
			
		||||
The guess_stream_type class combines:
 | 
			
		||||
1. HTTP Content-Type headers (when available and reliable)
 | 
			
		||||
2. Python-magic library for MIME detection (analyzing actual file content)
 | 
			
		||||
3. Content-based pattern matching for text formats (HTML tags, XML declarations, etc.)
 | 
			
		||||
 | 
			
		||||
This multi-layered approach ensures accurate detection of RSS feeds, JSON, HTML, PDF,
 | 
			
		||||
plain text, CSV, YAML, and XML formats - even when servers provide misleading headers.
 | 
			
		||||
 | 
			
		||||
Used by: processors/text_json_diff/processor.py and other content processors
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
# When to apply the 'cdata to real HTML' hack
 | 
			
		||||
RSS_XML_CONTENT_TYPES = [
 | 
			
		||||
    "application/rss+xml",
 | 
			
		||||
    "application/rdf+xml",
 | 
			
		||||
    "application/atom+xml",
 | 
			
		||||
    "text/rss+xml",  # rare, non-standard
 | 
			
		||||
    "application/x-rss+xml",  # legacy (older feed software)
 | 
			
		||||
    "application/x-atom+xml",  # legacy (older Atom)
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
# JSON Content-types
 | 
			
		||||
JSON_CONTENT_TYPES = [
 | 
			
		||||
    "application/activity+json",
 | 
			
		||||
    "application/feed+json",
 | 
			
		||||
    "application/json",
 | 
			
		||||
    "application/ld+json",
 | 
			
		||||
    "application/vnd.api+json",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Generic XML Content-types (non-RSS/Atom)
 | 
			
		||||
XML_CONTENT_TYPES = [
 | 
			
		||||
    "text/xml",
 | 
			
		||||
    "application/xml",
 | 
			
		||||
]
 | 
			
		||||
 | 
			
		||||
HTML_PATTERNS = ['<!doctype html', '<html', '<head', '<body', '<script', '<iframe', '<div']
 | 
			
		||||
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
class guess_stream_type():
 | 
			
		||||
    is_pdf = False
 | 
			
		||||
    is_json = False
 | 
			
		||||
    is_html = False
 | 
			
		||||
    is_plaintext = False
 | 
			
		||||
    is_rss = False
 | 
			
		||||
    is_csv = False
 | 
			
		||||
    is_xml = False  # Generic XML, not RSS/Atom
 | 
			
		||||
    is_yaml = False
 | 
			
		||||
 | 
			
		||||
    def __init__(self, http_content_header, content):
 | 
			
		||||
        import re
 | 
			
		||||
        magic_content_header = http_content_header
 | 
			
		||||
        test_content = content[:200].lower().strip()
 | 
			
		||||
 | 
			
		||||
        # Remove whitespace between < and tag name for robust detection (handles '< html', '<\nhtml', etc.)
 | 
			
		||||
        test_content_normalized = re.sub(r'<\s+', '<', test_content)
 | 
			
		||||
 | 
			
		||||
        # Use puremagic for lightweight MIME detection (saves ~14MB vs python-magic)
 | 
			
		||||
        magic_result = None
 | 
			
		||||
        try:
 | 
			
		||||
            import puremagic
 | 
			
		||||
 | 
			
		||||
            # puremagic needs bytes, so encode if we have a string
 | 
			
		||||
            content_bytes = content[:200].encode('utf-8') if isinstance(content, str) else content[:200]
 | 
			
		||||
 | 
			
		||||
            # puremagic returns a list of PureMagic objects with confidence scores
 | 
			
		||||
            detections = puremagic.magic_string(content_bytes)
 | 
			
		||||
            if detections:
 | 
			
		||||
                # Get the highest confidence detection
 | 
			
		||||
                mime = detections[0].mime_type
 | 
			
		||||
                logger.debug(f"Guessing mime type, original content_type '{http_content_header}', mime type detected '{mime}'")
 | 
			
		||||
                if mime and "/" in mime:
 | 
			
		||||
                    magic_result = mime
 | 
			
		||||
                    # Ignore generic/fallback mime types
 | 
			
		||||
                    if mime in ['application/octet-stream', 'application/x-empty', 'binary']:
 | 
			
		||||
                        logger.debug(f"Ignoring generic mime type '{mime}' from puremagic library")
 | 
			
		||||
                    # Trust puremagic for non-text types immediately
 | 
			
		||||
                    elif mime not in ['text/html', 'text/plain']:
 | 
			
		||||
                        magic_content_header = mime
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.warning(f"Error getting a more precise mime type from 'puremagic' library ({str(e)}), using content-based detection")
 | 
			
		||||
 | 
			
		||||
        # Content-based detection (most reliable for text formats)
 | 
			
		||||
        # Check for HTML patterns first - if found, override magic's text/plain
 | 
			
		||||
        has_html_patterns = any(p in test_content_normalized for p in HTML_PATTERNS)
 | 
			
		||||
 | 
			
		||||
        # Always trust headers first
 | 
			
		||||
        if 'text/plain' in http_content_header:
 | 
			
		||||
            self.is_plaintext = True
 | 
			
		||||
        if any(s in http_content_header for s in RSS_XML_CONTENT_TYPES):
 | 
			
		||||
            self.is_rss = True
 | 
			
		||||
        elif any(s in http_content_header for s in JSON_CONTENT_TYPES):
 | 
			
		||||
            self.is_json = True
 | 
			
		||||
        elif 'pdf' in magic_content_header:
 | 
			
		||||
            self.is_pdf = True
 | 
			
		||||
        elif has_html_patterns or http_content_header == 'text/html':
 | 
			
		||||
            self.is_html = True
 | 
			
		||||
        elif any(s in magic_content_header for s in JSON_CONTENT_TYPES):
 | 
			
		||||
            self.is_json = True
 | 
			
		||||
        # magic will call a rss document 'xml'
 | 
			
		||||
        # Rarely do endpoints give the right header, usually just text/xml, so we check also for <rss
 | 
			
		||||
        # This also triggers the automatic CDATA text parser so the RSS goes back a nice content list
 | 
			
		||||
        elif '<rss' in test_content_normalized or '<feed' in test_content_normalized or any(s in magic_content_header for s in RSS_XML_CONTENT_TYPES) or '<rdf:' in test_content_normalized:
 | 
			
		||||
            self.is_rss = True
 | 
			
		||||
        elif any(s in http_content_header for s in XML_CONTENT_TYPES):
 | 
			
		||||
            # Only mark as generic XML if not already detected as RSS
 | 
			
		||||
            if not self.is_rss:
 | 
			
		||||
                self.is_xml = True
 | 
			
		||||
        elif test_content_normalized.startswith('<?xml') or any(s in magic_content_header for s in XML_CONTENT_TYPES):
 | 
			
		||||
            # Generic XML that's not RSS/Atom (RSS/Atom checked above)
 | 
			
		||||
            self.is_xml = True
 | 
			
		||||
        elif '%pdf-1' in test_content:
 | 
			
		||||
            self.is_pdf = True
 | 
			
		||||
        elif http_content_header.startswith('text/'):
 | 
			
		||||
            self.is_plaintext = True
 | 
			
		||||
        # Only trust magic for 'text' if no other patterns matched
 | 
			
		||||
        elif 'text' in magic_content_header:
 | 
			
		||||
            self.is_plaintext = True
 | 
			
		||||
        # If magic says text/plain and we found no HTML patterns, trust it
 | 
			
		||||
        elif magic_result == 'text/plain':
 | 
			
		||||
            self.is_plaintext = True
 | 
			
		||||
            logger.debug(f"Trusting magic's text/plain result (no HTML patterns detected)")
 | 
			
		||||
 | 
			
		||||
@@ -7,7 +7,7 @@ import urllib3
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
			
		||||
name = 'Re-stock & Price detection for single product pages'
 | 
			
		||||
name = 'Re-stock & Price detection for pages with a SINGLE product'
 | 
			
		||||
description = 'Detects if the product goes back to in-stock'
 | 
			
		||||
 | 
			
		||||
class UnableToExtractRestockData(Exception):
 | 
			
		||||
@@ -79,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
 | 
			
		||||
    # First phase, dead simple scanning of anything that looks useful
 | 
			
		||||
    value = Restock()
 | 
			
		||||
    if data:
 | 
			
		||||
        logger.debug(f"Using jsonpath to find price/availability/etc")
 | 
			
		||||
        logger.debug("Using jsonpath to find price/availability/etc")
 | 
			
		||||
        price_parse = parse('$..(price|Price)')
 | 
			
		||||
        pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
 | 
			
		||||
        availability_parse = parse('$..(availability|Availability)')
 | 
			
		||||
@@ -110,7 +110,7 @@ def get_itemprop_availability(html_content) -> Restock:
 | 
			
		||||
 | 
			
		||||
        # Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
 | 
			
		||||
        if not value.get('price') or value.get('availability'):
 | 
			
		||||
            logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..")
 | 
			
		||||
            logger.debug("Alternatively digging through OpenGraph properties for restock/price info..")
 | 
			
		||||
            jsonpath_expr = parse('$..properties')
 | 
			
		||||
 | 
			
		||||
            for match in jsonpath_expr.find(data):
 | 
			
		||||
 
 | 
			
		||||
@@ -15,7 +15,7 @@ def _task(watch, update_handler):
 | 
			
		||||
    except FilterNotFoundInResponse as e:
 | 
			
		||||
        text_after_filter = f"Filter not found in HTML: {str(e)}"
 | 
			
		||||
    except ReplyWithContentButNoText as e:
 | 
			
		||||
        text_after_filter = f"Filter found but no text (empty result)"
 | 
			
		||||
        text_after_filter = "Filter found but no text (empty result)"
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        text_after_filter = f"Error: {str(e)}"
 | 
			
		||||
 | 
			
		||||
@@ -32,7 +32,7 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
 | 
			
		||||
    '''Used by @app.route("/edit/<string:uuid>/preview-rendered", methods=['POST'])'''
 | 
			
		||||
    from changedetectionio import forms, html_tools
 | 
			
		||||
    from changedetectionio.model.Watch import model as watch_model
 | 
			
		||||
    from concurrent.futures import ProcessPoolExecutor
 | 
			
		||||
    from concurrent.futures import ThreadPoolExecutor
 | 
			
		||||
    from copy import deepcopy
 | 
			
		||||
    from flask import request
 | 
			
		||||
    import brotli
 | 
			
		||||
@@ -76,13 +76,16 @@ def prepare_filter_prevew(datastore, watch_uuid, form_data):
 | 
			
		||||
            update_handler.fetcher.headers['content-type'] = tmp_watch.get('content-type')
 | 
			
		||||
 | 
			
		||||
            # Process our watch with filters and the HTML from disk, and also a blank watch with no filters but also with the same HTML from disk
 | 
			
		||||
            # Do this as a parallel process because it could take some time
 | 
			
		||||
            with ProcessPoolExecutor(max_workers=2) as executor:
 | 
			
		||||
                future1 = executor.submit(_task, tmp_watch, update_handler)
 | 
			
		||||
                future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
 | 
			
		||||
            # Do this as parallel threads (not processes) to avoid pickle issues with Lock objects
 | 
			
		||||
            try:
 | 
			
		||||
                with ThreadPoolExecutor(max_workers=2) as executor:
 | 
			
		||||
                    future1 = executor.submit(_task, tmp_watch, update_handler)
 | 
			
		||||
                    future2 = executor.submit(_task, blank_watch_no_filters, update_handler)
 | 
			
		||||
 | 
			
		||||
                text_after_filter = future1.result()
 | 
			
		||||
                text_before_filter = future2.result()
 | 
			
		||||
                    text_after_filter = future1.result()
 | 
			
		||||
                    text_before_filter = future2.result()
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                x=1
 | 
			
		||||
 | 
			
		||||
    try:
 | 
			
		||||
        trigger_line_numbers = html_tools.strip_ignore_text(content=text_after_filter,
 | 
			
		||||
 
 | 
			
		||||
@@ -7,18 +7,24 @@ import re
 | 
			
		||||
import urllib3
 | 
			
		||||
 | 
			
		||||
from changedetectionio.conditions import execute_ruleset_against_all_plugins
 | 
			
		||||
from changedetectionio.diff import ADDED_PLACEMARKER_OPEN
 | 
			
		||||
from changedetectionio.processors import difference_detection_processor
 | 
			
		||||
from changedetectionio.html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text, TRANSLATE_WHITESPACE_TABLE
 | 
			
		||||
from changedetectionio import html_tools, content_fetchers
 | 
			
		||||
from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT
 | 
			
		||||
from loguru import logger
 | 
			
		||||
 | 
			
		||||
from changedetectionio.processors.magic import guess_stream_type
 | 
			
		||||
 | 
			
		||||
urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
			
		||||
 | 
			
		||||
name = 'Webpage Text/HTML, JSON and PDF changes'
 | 
			
		||||
description = 'Detects all text changes where possible'
 | 
			
		||||
 | 
			
		||||
json_filter_prefixes = ['json:', 'jq:', 'jqraw:']
 | 
			
		||||
JSON_FILTER_PREFIXES = ['json:', 'jq:', 'jqraw:']
 | 
			
		||||
 | 
			
		||||
# Assume it's this type if the server says nothing on content-type
 | 
			
		||||
DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER = 'text/html'
 | 
			
		||||
 | 
			
		||||
class FilterNotFoundInResponse(ValueError):
 | 
			
		||||
    def __init__(self, msg, screenshot=None, xpath_data=None):
 | 
			
		||||
@@ -32,356 +38,560 @@ class PDFToHTMLToolNotFound(ValueError):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class FilterConfig:
 | 
			
		||||
    """Consolidates all filter and rule configurations from watch, tags, and global settings."""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, watch, datastore):
 | 
			
		||||
        self.watch = watch
 | 
			
		||||
        self.datastore = datastore
 | 
			
		||||
        self.watch_uuid = watch.get('uuid')
 | 
			
		||||
        # Cache computed properties to avoid repeated list operations
 | 
			
		||||
        self._include_filters_cache = None
 | 
			
		||||
        self._subtractive_selectors_cache = None
 | 
			
		||||
 | 
			
		||||
    def _get_merged_rules(self, attr, include_global=False):
 | 
			
		||||
        """Merge rules from watch, tags, and optionally global settings."""
 | 
			
		||||
        watch_rules = self.watch.get(attr, [])
 | 
			
		||||
        tag_rules = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr=attr)
 | 
			
		||||
        rules = list(dict.fromkeys(watch_rules + tag_rules))
 | 
			
		||||
 | 
			
		||||
        if include_global:
 | 
			
		||||
            global_rules = self.datastore.data['settings']['application'].get(f'global_{attr}', [])
 | 
			
		||||
            rules = list(dict.fromkeys(rules + global_rules))
 | 
			
		||||
 | 
			
		||||
        return rules
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def include_filters(self):
 | 
			
		||||
        if self._include_filters_cache is None:
 | 
			
		||||
            filters = self._get_merged_rules('include_filters')
 | 
			
		||||
            # Inject LD+JSON price tracker rule if enabled
 | 
			
		||||
            if self.watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
 | 
			
		||||
                filters += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
 | 
			
		||||
            self._include_filters_cache = filters
 | 
			
		||||
        return self._include_filters_cache
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def subtractive_selectors(self):
 | 
			
		||||
        if self._subtractive_selectors_cache is None:
 | 
			
		||||
            watch_selectors = self.watch.get("subtractive_selectors", [])
 | 
			
		||||
            tag_selectors = self.datastore.get_tag_overrides_for_watch(uuid=self.watch_uuid, attr='subtractive_selectors')
 | 
			
		||||
            global_selectors = self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
 | 
			
		||||
            self._subtractive_selectors_cache = [*tag_selectors, *watch_selectors, *global_selectors]
 | 
			
		||||
        return self._subtractive_selectors_cache
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def extract_text(self):
 | 
			
		||||
        return self._get_merged_rules('extract_text')
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def ignore_text(self):
 | 
			
		||||
        return self._get_merged_rules('ignore_text', include_global=True)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def trigger_text(self):
 | 
			
		||||
        return self._get_merged_rules('trigger_text')
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def text_should_not_be_present(self):
 | 
			
		||||
        return self._get_merged_rules('text_should_not_be_present')
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def has_include_filters(self):
 | 
			
		||||
        return bool(self.include_filters) and bool(self.include_filters[0].strip())
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def has_include_json_filters(self):
 | 
			
		||||
        return any(f.strip().startswith(prefix) for f in self.include_filters for prefix in JSON_FILTER_PREFIXES)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def has_subtractive_selectors(self):
 | 
			
		||||
        return bool(self.subtractive_selectors) and bool(self.subtractive_selectors[0].strip())
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ContentTransformer:
 | 
			
		||||
    """Handles text transformations like trimming, sorting, and deduplication."""
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def trim_whitespace(text):
 | 
			
		||||
        """Remove leading/trailing whitespace from each line."""
 | 
			
		||||
        # Use generator expression to avoid building intermediate list
 | 
			
		||||
        return '\n'.join(line.strip() for line in text.replace("\n\n", "\n").splitlines())
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def remove_duplicate_lines(text):
 | 
			
		||||
        """Remove duplicate lines while preserving order."""
 | 
			
		||||
        return '\n'.join(dict.fromkeys(line for line in text.replace("\n\n", "\n").splitlines()))
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def sort_alphabetically(text):
 | 
			
		||||
        """Sort lines alphabetically (case-insensitive)."""
 | 
			
		||||
        # Remove double line feeds before sorting
 | 
			
		||||
        text = text.replace("\n\n", "\n")
 | 
			
		||||
        return '\n'.join(sorted(text.splitlines(), key=lambda x: x.lower()))
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def extract_by_regex(text, regex_patterns):
 | 
			
		||||
        """Extract text matching regex patterns."""
 | 
			
		||||
        # Use list of strings instead of concatenating lists repeatedly (avoids O(n²) behavior)
 | 
			
		||||
        regex_matched_output = []
 | 
			
		||||
 | 
			
		||||
        for s_re in regex_patterns:
 | 
			
		||||
            # Check if it's perl-style regex /.../
 | 
			
		||||
            if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
 | 
			
		||||
                regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
 | 
			
		||||
                result = re.findall(regex, text)
 | 
			
		||||
 | 
			
		||||
                for match in result:
 | 
			
		||||
                    if type(match) is tuple:
 | 
			
		||||
                        regex_matched_output.extend(match)
 | 
			
		||||
                        regex_matched_output.append('\n')
 | 
			
		||||
                    else:
 | 
			
		||||
                        regex_matched_output.append(match)
 | 
			
		||||
                        regex_matched_output.append('\n')
 | 
			
		||||
            else:
 | 
			
		||||
                # Plain text search (case-insensitive)
 | 
			
		||||
                r = re.compile(re.escape(s_re), re.IGNORECASE)
 | 
			
		||||
                res = r.findall(text)
 | 
			
		||||
                if res:
 | 
			
		||||
                    for match in res:
 | 
			
		||||
                        regex_matched_output.append(match)
 | 
			
		||||
                        regex_matched_output.append('\n')
 | 
			
		||||
 | 
			
		||||
        return ''.join(regex_matched_output) if regex_matched_output else ''
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RuleEngine:
 | 
			
		||||
    """Evaluates blocking rules (triggers, conditions, text_should_not_be_present)."""
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def evaluate_trigger_text(content, trigger_patterns):
 | 
			
		||||
        """
 | 
			
		||||
        Check if trigger text is present. If trigger_text is configured,
 | 
			
		||||
        content is blocked UNLESS the trigger is found.
 | 
			
		||||
        Returns True if blocked, False if allowed.
 | 
			
		||||
        """
 | 
			
		||||
        if not trigger_patterns:
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        # Assume blocked if trigger_text is configured
 | 
			
		||||
        result = html_tools.strip_ignore_text(
 | 
			
		||||
            content=str(content),
 | 
			
		||||
            wordlist=trigger_patterns,
 | 
			
		||||
            mode="line numbers"
 | 
			
		||||
        )
 | 
			
		||||
        # Unblock if trigger was found
 | 
			
		||||
        return not bool(result)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def evaluate_text_should_not_be_present(content, patterns):
 | 
			
		||||
        """
 | 
			
		||||
        Check if forbidden text is present. If found, block the change.
 | 
			
		||||
        Returns True if blocked, False if allowed.
 | 
			
		||||
        """
 | 
			
		||||
        if not patterns:
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        result = html_tools.strip_ignore_text(
 | 
			
		||||
            content=str(content),
 | 
			
		||||
            wordlist=patterns,
 | 
			
		||||
            mode="line numbers"
 | 
			
		||||
        )
 | 
			
		||||
        # Block if forbidden text was found
 | 
			
		||||
        return bool(result)
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def evaluate_conditions(watch, datastore, content):
 | 
			
		||||
        """
 | 
			
		||||
        Evaluate custom conditions ruleset.
 | 
			
		||||
        Returns True if blocked, False if allowed.
 | 
			
		||||
        """
 | 
			
		||||
        if not watch.get('conditions') or not watch.get('conditions_match_logic'):
 | 
			
		||||
            return False
 | 
			
		||||
 | 
			
		||||
        conditions_result = execute_ruleset_against_all_plugins(
 | 
			
		||||
            current_watch_uuid=watch.get('uuid'),
 | 
			
		||||
            application_datastruct=datastore.data,
 | 
			
		||||
            ephemeral_data={'text': content}
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        # Block if conditions not met
 | 
			
		||||
        return not conditions_result.get('result')
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ContentProcessor:
 | 
			
		||||
    """Handles content preprocessing, filtering, and extraction."""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, fetcher, watch, filter_config, datastore):
 | 
			
		||||
        self.fetcher = fetcher
 | 
			
		||||
        self.watch = watch
 | 
			
		||||
        self.filter_config = filter_config
 | 
			
		||||
        self.datastore = datastore
 | 
			
		||||
 | 
			
		||||
    def preprocess_rss(self, content):
 | 
			
		||||
        """
 | 
			
		||||
        Convert CDATA/comments in RSS to usable text.
 | 
			
		||||
 | 
			
		||||
        Supports two RSS processing modes:
 | 
			
		||||
        - 'default': Inline CDATA replacement (original behavior)
 | 
			
		||||
        - 'formatted': Format RSS items with title, link, guid, pubDate, and description (CDATA unmarked)
 | 
			
		||||
        """
 | 
			
		||||
        from changedetectionio import rss_tools
 | 
			
		||||
        rss_mode = self.datastore.data["settings"]["application"].get("rss_reader_mode")
 | 
			
		||||
        if rss_mode:
 | 
			
		||||
            # Format RSS items nicely with CDATA content unmarked and converted to text
 | 
			
		||||
            return rss_tools.format_rss_items(content)
 | 
			
		||||
        else:
 | 
			
		||||
            # Default: Original inline CDATA replacement
 | 
			
		||||
            return cdata_in_document_to_text(html_content=content)
 | 
			
		||||
 | 
			
		||||
    def preprocess_pdf(self, raw_content):
 | 
			
		||||
        """Convert PDF to HTML using external tool."""
 | 
			
		||||
        from shutil import which
 | 
			
		||||
        tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
 | 
			
		||||
        if not which(tool):
 | 
			
		||||
            raise PDFToHTMLToolNotFound(
 | 
			
		||||
                f"Command-line `{tool}` tool was not found in system PATH, was it installed?"
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        import subprocess
 | 
			
		||||
        proc = subprocess.Popen(
 | 
			
		||||
            [tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
 | 
			
		||||
            stdout=subprocess.PIPE,
 | 
			
		||||
            stdin=subprocess.PIPE
 | 
			
		||||
        )
 | 
			
		||||
        proc.stdin.write(raw_content)
 | 
			
		||||
        proc.stdin.close()
 | 
			
		||||
        html_content = proc.stdout.read().decode('utf-8')
 | 
			
		||||
        proc.wait(timeout=60)
 | 
			
		||||
 | 
			
		||||
        # Add metadata for change detection
 | 
			
		||||
        metadata = (
 | 
			
		||||
            f"<p>Added by changedetection.io: Document checksum - "
 | 
			
		||||
            f"{hashlib.md5(raw_content).hexdigest().upper()} "
 | 
			
		||||
            f"Original file size - {len(raw_content)} bytes</p>"
 | 
			
		||||
        )
 | 
			
		||||
        return html_content.replace('</body>', metadata + '</body>')
 | 
			
		||||
 | 
			
		||||
    def preprocess_json(self, raw_content):
 | 
			
		||||
        """Format and sort JSON content."""
 | 
			
		||||
        # Then we re-format it, else it does have filters (later on) which will reformat it anyway
 | 
			
		||||
        content = html_tools.extract_json_as_string(content=raw_content, json_filter="json:$")
 | 
			
		||||
 | 
			
		||||
        # Sort JSON to avoid false alerts from reordering
 | 
			
		||||
        try:
 | 
			
		||||
            content = json.dumps(json.loads(content), sort_keys=True, indent=4)
 | 
			
		||||
        except Exception:
 | 
			
		||||
            # Might be malformed JSON, continue anyway
 | 
			
		||||
            pass
 | 
			
		||||
 | 
			
		||||
        return content
 | 
			
		||||
 | 
			
		||||
    def apply_include_filters(self, content, stream_content_type):
 | 
			
		||||
        """Apply CSS, XPath, or JSON filters to extract specific content."""
 | 
			
		||||
        filtered_content = ""
 | 
			
		||||
 | 
			
		||||
        for filter_rule in self.filter_config.include_filters:
 | 
			
		||||
            # XPath filters
 | 
			
		||||
            if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
 | 
			
		||||
                filtered_content += html_tools.xpath_filter(
 | 
			
		||||
                    xpath_filter=filter_rule.replace('xpath:', ''),
 | 
			
		||||
                    html_content=content,
 | 
			
		||||
                    append_pretty_line_formatting=not self.watch.is_source_type_url,
 | 
			
		||||
                    is_rss=stream_content_type.is_rss
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            # XPath1 filters (first match only)
 | 
			
		||||
            elif filter_rule.startswith('xpath1:'):
 | 
			
		||||
                filtered_content += html_tools.xpath1_filter(
 | 
			
		||||
                    xpath_filter=filter_rule.replace('xpath1:', ''),
 | 
			
		||||
                    html_content=content,
 | 
			
		||||
                    append_pretty_line_formatting=not self.watch.is_source_type_url,
 | 
			
		||||
                    is_rss=stream_content_type.is_rss
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            # JSON filters
 | 
			
		||||
            elif any(filter_rule.startswith(prefix) for prefix in JSON_FILTER_PREFIXES):
 | 
			
		||||
                filtered_content += html_tools.extract_json_as_string(
 | 
			
		||||
                    content=content,
 | 
			
		||||
                    json_filter=filter_rule
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
            # CSS selectors, default fallback
 | 
			
		||||
            else:
 | 
			
		||||
                filtered_content += html_tools.include_filters(
 | 
			
		||||
                    include_filters=filter_rule,
 | 
			
		||||
                    html_content=content,
 | 
			
		||||
                    append_pretty_line_formatting=not self.watch.is_source_type_url
 | 
			
		||||
                )
 | 
			
		||||
 | 
			
		||||
        # Raise error if filter returned nothing
 | 
			
		||||
        if not filtered_content.strip():
 | 
			
		||||
            raise FilterNotFoundInResponse(
 | 
			
		||||
                msg=self.filter_config.include_filters,
 | 
			
		||||
                screenshot=self.fetcher.screenshot,
 | 
			
		||||
                xpath_data=self.fetcher.xpath_data
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        return filtered_content
 | 
			
		||||
 | 
			
		||||
    def apply_subtractive_selectors(self, content):
 | 
			
		||||
        """Remove elements matching subtractive selectors."""
 | 
			
		||||
        return html_tools.element_removal(self.filter_config.subtractive_selectors, content)
 | 
			
		||||
 | 
			
		||||
    def extract_text_from_html(self, html_content, stream_content_type):
 | 
			
		||||
        """Convert HTML to plain text."""
 | 
			
		||||
        do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
 | 
			
		||||
        return html_tools.html_to_text(
 | 
			
		||||
            html_content=html_content,
 | 
			
		||||
            render_anchor_tag_content=do_anchor,
 | 
			
		||||
            is_rss=stream_content_type.is_rss
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class ChecksumCalculator:
 | 
			
		||||
    """Calculates checksums with various options."""
 | 
			
		||||
 | 
			
		||||
    @staticmethod
 | 
			
		||||
    def calculate(text, ignore_whitespace=False):
 | 
			
		||||
        """Calculate MD5 checksum of text content."""
 | 
			
		||||
        if ignore_whitespace:
 | 
			
		||||
            text = text.translate(TRANSLATE_WHITESPACE_TABLE)
 | 
			
		||||
        return hashlib.md5(text.encode('utf-8')).hexdigest()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Some common stuff here that can be moved to a base class
 | 
			
		||||
# (set_proxy_from_list)
 | 
			
		||||
class perform_site_check(difference_detection_processor):
 | 
			
		||||
 | 
			
		||||
    def run_changedetection(self, watch):
 | 
			
		||||
        changed_detected = False
 | 
			
		||||
        html_content = ""
 | 
			
		||||
        screenshot = False  # as bytes
 | 
			
		||||
        stripped_text_from_html = ""
 | 
			
		||||
 | 
			
		||||
        if not watch:
 | 
			
		||||
            raise Exception("Watch no longer exists.")
 | 
			
		||||
 | 
			
		||||
        # Initialize components
 | 
			
		||||
        filter_config = FilterConfig(watch, self.datastore)
 | 
			
		||||
        content_processor = ContentProcessor(self.fetcher, watch, filter_config, self.datastore)
 | 
			
		||||
        transformer = ContentTransformer()
 | 
			
		||||
        rule_engine = RuleEngine()
 | 
			
		||||
 | 
			
		||||
        # Get content type and stream info
 | 
			
		||||
        ctype_header = self.fetcher.get_all_headers().get('content-type', DEFAULT_WHEN_NO_CONTENT_TYPE_HEADER).lower()
 | 
			
		||||
        stream_content_type = guess_stream_type(http_content_header=ctype_header, content=self.fetcher.content)
 | 
			
		||||
 | 
			
		||||
        # Unset any existing notification error
 | 
			
		||||
        update_obj = {'last_notification_error': False, 'last_error': False}
 | 
			
		||||
 | 
			
		||||
        url = watch.link
 | 
			
		||||
 | 
			
		||||
        self.screenshot = self.fetcher.screenshot
 | 
			
		||||
        self.xpath_data = self.fetcher.xpath_data
 | 
			
		||||
 | 
			
		||||
        # Track the content type
 | 
			
		||||
        update_obj['content_type'] = self.fetcher.get_all_headers().get('content-type', '').lower()
 | 
			
		||||
 | 
			
		||||
        # Watches added automatically in the queue manager will skip if its the same checksum as the previous run
 | 
			
		||||
        # Saves a lot of CPU
 | 
			
		||||
        # Track the content type and checksum before filters
 | 
			
		||||
        update_obj['content_type'] = ctype_header
 | 
			
		||||
        update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest()
 | 
			
		||||
 | 
			
		||||
        # Fetching complete, now filters
 | 
			
		||||
        # === CONTENT PREPROCESSING ===
 | 
			
		||||
        # Avoid creating unnecessary intermediate string copies by reassigning only when needed
 | 
			
		||||
        content = self.fetcher.content
 | 
			
		||||
 | 
			
		||||
        # @note: I feel like the following should be in a more obvious chain system
 | 
			
		||||
        #  - Check filter text
 | 
			
		||||
        #  - Is the checksum different?
 | 
			
		||||
        #  - Do we convert to JSON?
 | 
			
		||||
        # https://stackoverflow.com/questions/41817578/basic-method-chaining ?
 | 
			
		||||
        # return content().textfilter().jsonextract().checksumcompare() ?
 | 
			
		||||
        # RSS preprocessing
 | 
			
		||||
        if stream_content_type.is_rss:
 | 
			
		||||
            content = content_processor.preprocess_rss(content)
 | 
			
		||||
            if self.datastore.data["settings"]["application"].get("rss_reader_mode"):
 | 
			
		||||
                # Now just becomes regular HTML that can have xpath/CSS applied (first of the set etc)
 | 
			
		||||
                stream_content_type.is_rss = False
 | 
			
		||||
                stream_content_type.is_html = True
 | 
			
		||||
                self.fetcher.content = content
 | 
			
		||||
 | 
			
		||||
        is_json = 'application/json' in self.fetcher.get_all_headers().get('content-type', '').lower()
 | 
			
		||||
        is_html = not is_json
 | 
			
		||||
        is_rss = False
 | 
			
		||||
        # PDF preprocessing
 | 
			
		||||
        if watch.is_pdf or stream_content_type.is_pdf:
 | 
			
		||||
            content = content_processor.preprocess_pdf(raw_content=self.fetcher.raw_content)
 | 
			
		||||
            stream_content_type.is_html = True
 | 
			
		||||
 | 
			
		||||
        ctype_header = self.fetcher.get_all_headers().get('content-type', '').lower()
 | 
			
		||||
        # Go into RSS preprocess for converting CDATA/comment to usable text
 | 
			
		||||
        if any(substring in ctype_header for substring in ['application/xml', 'application/rss', 'text/xml']):
 | 
			
		||||
            if '<rss' in self.fetcher.content[:100].lower():
 | 
			
		||||
                self.fetcher.content = cdata_in_document_to_text(html_content=self.fetcher.content)
 | 
			
		||||
                is_rss = True
 | 
			
		||||
        # JSON - Always reformat it nicely for consistency.
 | 
			
		||||
 | 
			
		||||
        # source: support, basically treat it as plaintext
 | 
			
		||||
        if stream_content_type.is_json:
 | 
			
		||||
            if not filter_config.has_include_json_filters:
 | 
			
		||||
                content = content_processor.preprocess_json(raw_content=content)
 | 
			
		||||
        #else, otherwise it gets sorted/formatted in the filter stage anyway
 | 
			
		||||
 | 
			
		||||
        # HTML obfuscation workarounds
 | 
			
		||||
        if stream_content_type.is_html:
 | 
			
		||||
            content = html_tools.workarounds_for_obfuscations(content)
 | 
			
		||||
 | 
			
		||||
        # Check for LD+JSON price data (for HTML content)
 | 
			
		||||
        if stream_content_type.is_html:
 | 
			
		||||
            update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(content)
 | 
			
		||||
 | 
			
		||||
        # === FILTER APPLICATION ===
 | 
			
		||||
        # Start with content reference, avoid copy until modification
 | 
			
		||||
        html_content = content
 | 
			
		||||
 | 
			
		||||
        # Apply include filters (CSS, XPath, JSON)
 | 
			
		||||
        # Except for plaintext (incase they tried to confuse the system, it will HTML escape
 | 
			
		||||
        #if not stream_content_type.is_plaintext:
 | 
			
		||||
        if filter_config.has_include_filters:
 | 
			
		||||
            html_content = content_processor.apply_include_filters(content, stream_content_type)
 | 
			
		||||
 | 
			
		||||
        # Apply subtractive selectors
 | 
			
		||||
        if filter_config.has_subtractive_selectors:
 | 
			
		||||
            html_content = content_processor.apply_subtractive_selectors(html_content)
 | 
			
		||||
 | 
			
		||||
        # === TEXT EXTRACTION ===
 | 
			
		||||
        if watch.is_source_type_url:
 | 
			
		||||
            is_html = False
 | 
			
		||||
            is_json = False
 | 
			
		||||
 | 
			
		||||
        inline_pdf = self.fetcher.get_all_headers().get('content-disposition', '') and '%PDF-1' in self.fetcher.content[:10]
 | 
			
		||||
        if watch.is_pdf or 'application/pdf' in self.fetcher.get_all_headers().get('content-type', '').lower() or inline_pdf:
 | 
			
		||||
            from shutil import which
 | 
			
		||||
            tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml")
 | 
			
		||||
            if not which(tool):
 | 
			
		||||
                raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool))
 | 
			
		||||
 | 
			
		||||
            import subprocess
 | 
			
		||||
            proc = subprocess.Popen(
 | 
			
		||||
                [tool, '-stdout', '-', '-s', 'out.pdf', '-i'],
 | 
			
		||||
                stdout=subprocess.PIPE,
 | 
			
		||||
                stdin=subprocess.PIPE)
 | 
			
		||||
            proc.stdin.write(self.fetcher.raw_content)
 | 
			
		||||
            proc.stdin.close()
 | 
			
		||||
            self.fetcher.content = proc.stdout.read().decode('utf-8')
 | 
			
		||||
            proc.wait(timeout=60)
 | 
			
		||||
 | 
			
		||||
            # Add a little metadata so we know if the file changes (like if an image changes, but the text is the same
 | 
			
		||||
            # @todo may cause problems with non-UTF8?
 | 
			
		||||
            metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format(
 | 
			
		||||
                hashlib.md5(self.fetcher.raw_content).hexdigest().upper(),
 | 
			
		||||
                len(self.fetcher.content))
 | 
			
		||||
 | 
			
		||||
            self.fetcher.content = self.fetcher.content.replace('</body>', metadata + '</body>')
 | 
			
		||||
 | 
			
		||||
        # Better would be if Watch.model could access the global data also
 | 
			
		||||
        # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__
 | 
			
		||||
        # https://realpython.com/inherit-python-dict/ instead of doing it procedurely
 | 
			
		||||
        include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='include_filters')
 | 
			
		||||
 | 
			
		||||
        # 1845 - remove duplicated filters in both group and watch include filter
 | 
			
		||||
        include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags))
 | 
			
		||||
 | 
			
		||||
        subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='subtractive_selectors'),
 | 
			
		||||
                                 *watch.get("subtractive_selectors", []),
 | 
			
		||||
                                 *self.datastore.data["settings"]["application"].get("global_subtractive_selectors", [])
 | 
			
		||||
                                 ]
 | 
			
		||||
 | 
			
		||||
        # Inject a virtual LD+JSON price tracker rule
 | 
			
		||||
        if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT:
 | 
			
		||||
            include_filters_rule += html_tools.LD_JSON_PRODUCT_OFFER_SELECTORS
 | 
			
		||||
 | 
			
		||||
        has_filter_rule = len(include_filters_rule) and len(include_filters_rule[0].strip())
 | 
			
		||||
        has_subtractive_selectors = len(subtractive_selectors) and len(subtractive_selectors[0].strip())
 | 
			
		||||
 | 
			
		||||
        if is_json and not has_filter_rule:
 | 
			
		||||
            include_filters_rule.append("json:$")
 | 
			
		||||
            has_filter_rule = True
 | 
			
		||||
 | 
			
		||||
        if is_json:
 | 
			
		||||
            # Sort the JSON so we dont get false alerts when the content is just re-ordered
 | 
			
		||||
            try:
 | 
			
		||||
                self.fetcher.content = json.dumps(json.loads(self.fetcher.content), sort_keys=True)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                # Might have just been a snippet, or otherwise bad JSON, continue
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
        if has_filter_rule:
 | 
			
		||||
            for filter in include_filters_rule:
 | 
			
		||||
                if any(prefix in filter for prefix in json_filter_prefixes):
 | 
			
		||||
                    stripped_text_from_html += html_tools.extract_json_as_string(content=self.fetcher.content, json_filter=filter)
 | 
			
		||||
                    is_html = False
 | 
			
		||||
 | 
			
		||||
        if is_html or watch.is_source_type_url:
 | 
			
		||||
 | 
			
		||||
            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
 | 
			
		||||
            self.fetcher.content = html_tools.workarounds_for_obfuscations(self.fetcher.content)
 | 
			
		||||
            html_content = self.fetcher.content
 | 
			
		||||
 | 
			
		||||
            # If not JSON,  and if it's not text/plain..
 | 
			
		||||
            if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower():
 | 
			
		||||
                # Don't run get_text or xpath/css filters on plaintext
 | 
			
		||||
                stripped_text_from_html = html_content
 | 
			
		||||
            # For source URLs, keep raw content
 | 
			
		||||
            stripped_text = html_content
 | 
			
		||||
        elif stream_content_type.is_plaintext:
 | 
			
		||||
            # For plaintext, keep as-is without HTML-to-text conversion
 | 
			
		||||
            stripped_text = html_content
 | 
			
		||||
        else:
 | 
			
		||||
            # Extract text from HTML/RSS content (not generic XML)
 | 
			
		||||
            if stream_content_type.is_html or stream_content_type.is_rss:
 | 
			
		||||
                stripped_text = content_processor.extract_text_from_html(html_content, stream_content_type)
 | 
			
		||||
            else:
 | 
			
		||||
                # Does it have some ld+json price data? used for easier monitoring
 | 
			
		||||
                update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(self.fetcher.content)
 | 
			
		||||
 | 
			
		||||
                # Then we assume HTML
 | 
			
		||||
                if has_filter_rule:
 | 
			
		||||
                    html_content = ""
 | 
			
		||||
 | 
			
		||||
                    for filter_rule in include_filters_rule:
 | 
			
		||||
                        # For HTML/XML we offer xpath as an option, just start a regular xPath "/.."
 | 
			
		||||
                        if filter_rule[0] == '/' or filter_rule.startswith('xpath:'):
 | 
			
		||||
                            html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''),
 | 
			
		||||
                                                                    html_content=self.fetcher.content,
 | 
			
		||||
                                                                    append_pretty_line_formatting=not watch.is_source_type_url,
 | 
			
		||||
                                                                    is_rss=is_rss)
 | 
			
		||||
 | 
			
		||||
                        elif filter_rule.startswith('xpath1:'):
 | 
			
		||||
                            html_content += html_tools.xpath1_filter(xpath_filter=filter_rule.replace('xpath1:', ''),
 | 
			
		||||
                                                                     html_content=self.fetcher.content,
 | 
			
		||||
                                                                     append_pretty_line_formatting=not watch.is_source_type_url,
 | 
			
		||||
                                                                     is_rss=is_rss)
 | 
			
		||||
                        else:
 | 
			
		||||
                            html_content += html_tools.include_filters(include_filters=filter_rule,
 | 
			
		||||
                                                                       html_content=self.fetcher.content,
 | 
			
		||||
                                                                       append_pretty_line_formatting=not watch.is_source_type_url)
 | 
			
		||||
 | 
			
		||||
                    if not html_content.strip():
 | 
			
		||||
                        raise FilterNotFoundInResponse(msg=include_filters_rule, screenshot=self.fetcher.screenshot, xpath_data=self.fetcher.xpath_data)
 | 
			
		||||
 | 
			
		||||
                if has_subtractive_selectors:
 | 
			
		||||
                    html_content = html_tools.element_removal(subtractive_selectors, html_content)
 | 
			
		||||
 | 
			
		||||
                if watch.is_source_type_url:
 | 
			
		||||
                    stripped_text_from_html = html_content
 | 
			
		||||
                else:
 | 
			
		||||
                    # extract text
 | 
			
		||||
                    do_anchor = self.datastore.data["settings"]["application"].get("render_anchor_tag_content", False)
 | 
			
		||||
                    stripped_text_from_html = html_tools.html_to_text(html_content=html_content,
 | 
			
		||||
                                                                      render_anchor_tag_content=do_anchor,
 | 
			
		||||
                                                                      is_rss=is_rss)  # 1874 activate the <title workaround hack
 | 
			
		||||
                stripped_text = html_content
 | 
			
		||||
 | 
			
		||||
        # === TEXT TRANSFORMATIONS ===
 | 
			
		||||
        if watch.get('trim_text_whitespace'):
 | 
			
		||||
            stripped_text_from_html = '\n'.join(line.strip() for line in stripped_text_from_html.replace("\n\n", "\n").splitlines())
 | 
			
		||||
            stripped_text = transformer.trim_whitespace(stripped_text)
 | 
			
		||||
 | 
			
		||||
        # Re #340 - return the content before the 'ignore text' was applied
 | 
			
		||||
        # Also used to calculate/show what was removed
 | 
			
		||||
        text_content_before_ignored_filter = stripped_text_from_html
 | 
			
		||||
 | 
			
		||||
        # @todo whitespace coming from missing rtrim()?
 | 
			
		||||
        # stripped_text_from_html could be based on their preferences, replace the processed text with only that which they want to know about.
 | 
			
		||||
        # Rewrite's the processing text based on only what diff result they want to see
 | 
			
		||||
        # Save text before ignore filters (for diff calculation)
 | 
			
		||||
        text_content_before_ignored_filter = stripped_text
 | 
			
		||||
 | 
			
		||||
        # === DIFF FILTERING ===
 | 
			
		||||
        # If user wants specific diff types (added/removed/replaced only)
 | 
			
		||||
        if watch.has_special_diff_filter_options_set() and len(watch.history.keys()):
 | 
			
		||||
            # Now the content comes from the diff-parser and not the returned HTTP traffic, so could be some differences
 | 
			
		||||
            from changedetectionio import diff
 | 
			
		||||
            # needs to not include (added) etc or it may get used twice
 | 
			
		||||
            # Replace the processed text with the preferred result
 | 
			
		||||
            rendered_diff = diff.render_diff(previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
 | 
			
		||||
                                             newest_version_file_contents=stripped_text_from_html,
 | 
			
		||||
                                             include_equal=False,  # not the same lines
 | 
			
		||||
                                             include_added=watch.get('filter_text_added', True),
 | 
			
		||||
                                             include_removed=watch.get('filter_text_removed', True),
 | 
			
		||||
                                             include_replaced=watch.get('filter_text_replaced', True),
 | 
			
		||||
                                             line_feed_sep="\n",
 | 
			
		||||
                                             include_change_type_prefix=False)
 | 
			
		||||
            stripped_text = self._apply_diff_filtering(watch, stripped_text, text_content_before_ignored_filter)
 | 
			
		||||
            if stripped_text is None:
 | 
			
		||||
                # No differences found, but content exists
 | 
			
		||||
                c = ChecksumCalculator.calculate(text_content_before_ignored_filter, ignore_whitespace=True)
 | 
			
		||||
                return False, {'previous_md5': c}, text_content_before_ignored_filter.encode('utf-8')
 | 
			
		||||
 | 
			
		||||
            watch.save_last_text_fetched_before_filters(text_content_before_ignored_filter.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
            if not rendered_diff and stripped_text_from_html:
 | 
			
		||||
                # We had some content, but no differences were found
 | 
			
		||||
                # Store our new file as the MD5 so it will trigger in the future
 | 
			
		||||
                c = hashlib.md5(stripped_text_from_html.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
 | 
			
		||||
                return False, {'previous_md5': c}, stripped_text_from_html.encode('utf-8')
 | 
			
		||||
            else:
 | 
			
		||||
                stripped_text_from_html = rendered_diff
 | 
			
		||||
 | 
			
		||||
        # Treat pages with no renderable text content as a change? No by default
 | 
			
		||||
        # === EMPTY PAGE CHECK ===
 | 
			
		||||
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
 | 
			
		||||
        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
 | 
			
		||||
            raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url,
 | 
			
		||||
                                                            status_code=self.fetcher.get_last_status_code(),
 | 
			
		||||
                                                            screenshot=self.fetcher.screenshot,
 | 
			
		||||
                                                            has_filters=has_filter_rule,
 | 
			
		||||
                                                            html_content=html_content,
 | 
			
		||||
                                                            xpath_data=self.fetcher.xpath_data
 | 
			
		||||
                                                            )
 | 
			
		||||
 | 
			
		||||
        # We rely on the actual text in the html output.. many sites have random script vars etc,
 | 
			
		||||
        # in the future we'll implement other mechanisms.
 | 
			
		||||
        if not stream_content_type.is_json and not empty_pages_are_a_change and len(stripped_text.strip()) == 0:
 | 
			
		||||
            raise content_fetchers.exceptions.ReplyWithContentButNoText(
 | 
			
		||||
                url=url,
 | 
			
		||||
                status_code=self.fetcher.get_last_status_code(),
 | 
			
		||||
                screenshot=self.fetcher.screenshot,
 | 
			
		||||
                has_filters=filter_config.has_include_filters,
 | 
			
		||||
                html_content=html_content,
 | 
			
		||||
                xpath_data=self.fetcher.xpath_data
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
        update_obj["last_check_status"] = self.fetcher.get_last_status_code()
 | 
			
		||||
 | 
			
		||||
        # 615 Extract text by regex
 | 
			
		||||
        extract_text = watch.get('extract_text', [])
 | 
			
		||||
        extract_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='extract_text')
 | 
			
		||||
        if len(extract_text) > 0:
 | 
			
		||||
            regex_matched_output = []
 | 
			
		||||
            for s_re in extract_text:
 | 
			
		||||
                # incase they specified something in '/.../x'
 | 
			
		||||
                if re.search(PERL_STYLE_REGEX, s_re, re.IGNORECASE):
 | 
			
		||||
                    regex = html_tools.perl_style_slash_enclosed_regex_to_options(s_re)
 | 
			
		||||
                    result = re.findall(regex, stripped_text_from_html)
 | 
			
		||||
 | 
			
		||||
                    for l in result:
 | 
			
		||||
                        if type(l) is tuple:
 | 
			
		||||
                            # @todo - some formatter option default (between groups)
 | 
			
		||||
                            regex_matched_output += list(l) + ['\n']
 | 
			
		||||
                        else:
 | 
			
		||||
                            # @todo - some formatter option default (between each ungrouped result)
 | 
			
		||||
                            regex_matched_output += [l] + ['\n']
 | 
			
		||||
                else:
 | 
			
		||||
                    # Doesnt look like regex, just hunt for plaintext and return that which matches
 | 
			
		||||
                    # `stripped_text_from_html` will be bytes, so we must encode s_re also to bytes
 | 
			
		||||
                    r = re.compile(re.escape(s_re), re.IGNORECASE)
 | 
			
		||||
                    res = r.findall(stripped_text_from_html)
 | 
			
		||||
                    if res:
 | 
			
		||||
                        for match in res:
 | 
			
		||||
                            regex_matched_output += [match] + ['\n']
 | 
			
		||||
 | 
			
		||||
            ##########################################################
 | 
			
		||||
            stripped_text_from_html = ''
 | 
			
		||||
 | 
			
		||||
            if regex_matched_output:
 | 
			
		||||
                # @todo some formatter for presentation?
 | 
			
		||||
                stripped_text_from_html = ''.join(regex_matched_output)
 | 
			
		||||
        # === REGEX EXTRACTION ===
 | 
			
		||||
        if filter_config.extract_text:
 | 
			
		||||
            extracted = transformer.extract_by_regex(stripped_text, filter_config.extract_text)
 | 
			
		||||
            stripped_text = extracted
 | 
			
		||||
 | 
			
		||||
        # === MORE TEXT TRANSFORMATIONS ===
 | 
			
		||||
        if watch.get('remove_duplicate_lines'):
 | 
			
		||||
            stripped_text_from_html = '\n'.join(dict.fromkeys(line for line in stripped_text_from_html.replace("\n\n", "\n").splitlines()))
 | 
			
		||||
 | 
			
		||||
            stripped_text = transformer.remove_duplicate_lines(stripped_text)
 | 
			
		||||
 | 
			
		||||
        if watch.get('sort_text_alphabetically'):
 | 
			
		||||
            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
 | 
			
		||||
            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
 | 
			
		||||
            stripped_text_from_html = stripped_text_from_html.replace("\n\n", "\n")
 | 
			
		||||
            stripped_text_from_html = '\n'.join(sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower()))
 | 
			
		||||
            stripped_text = transformer.sort_alphabetically(stripped_text)
 | 
			
		||||
 | 
			
		||||
### CALCULATE MD5
 | 
			
		||||
        # If there's text to ignore
 | 
			
		||||
        text_to_ignore = watch.get('ignore_text', []) + self.datastore.data['settings']['application'].get('global_ignore_text', [])
 | 
			
		||||
        text_to_ignore += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='ignore_text')
 | 
			
		||||
        # === CHECKSUM CALCULATION ===
 | 
			
		||||
        text_for_checksuming = stripped_text
 | 
			
		||||
 | 
			
		||||
        text_for_checksuming = stripped_text_from_html
 | 
			
		||||
        if text_to_ignore:
 | 
			
		||||
            text_for_checksuming = html_tools.strip_ignore_text(stripped_text_from_html, text_to_ignore)
 | 
			
		||||
        # Apply ignore_text for checksum calculation
 | 
			
		||||
        if filter_config.ignore_text:
 | 
			
		||||
            text_for_checksuming = html_tools.strip_ignore_text(stripped_text, filter_config.ignore_text)
 | 
			
		||||
 | 
			
		||||
        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
 | 
			
		||||
        if text_for_checksuming and self.datastore.data['settings']['application'].get('ignore_whitespace', False):
 | 
			
		||||
            fetched_md5 = hashlib.md5(text_for_checksuming.translate(TRANSLATE_WHITESPACE_TABLE).encode('utf-8')).hexdigest()
 | 
			
		||||
        else:
 | 
			
		||||
            fetched_md5 = hashlib.md5(text_for_checksuming.encode('utf-8')).hexdigest()
 | 
			
		||||
            # Optionally remove ignored lines from output
 | 
			
		||||
            strip_ignored_lines = watch.get('strip_ignored_lines')
 | 
			
		||||
            if strip_ignored_lines is None:
 | 
			
		||||
                strip_ignored_lines = self.datastore.data['settings']['application'].get('strip_ignored_lines')
 | 
			
		||||
            if strip_ignored_lines:
 | 
			
		||||
                stripped_text = text_for_checksuming
 | 
			
		||||
 | 
			
		||||
        ############ Blocking rules, after checksum #################
 | 
			
		||||
        # Calculate checksum
 | 
			
		||||
        ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace', False)
 | 
			
		||||
        fetched_md5 = ChecksumCalculator.calculate(text_for_checksuming, ignore_whitespace=ignore_whitespace)
 | 
			
		||||
 | 
			
		||||
        # === BLOCKING RULES EVALUATION ===
 | 
			
		||||
        blocked = False
 | 
			
		||||
        trigger_text = watch.get('trigger_text', [])
 | 
			
		||||
        trigger_text += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='trigger_text')
 | 
			
		||||
        if len(trigger_text):
 | 
			
		||||
            # Assume blocked
 | 
			
		||||
 | 
			
		||||
        # Check trigger_text
 | 
			
		||||
        if rule_engine.evaluate_trigger_text(stripped_text, filter_config.trigger_text):
 | 
			
		||||
            blocked = True
 | 
			
		||||
            # Filter and trigger works the same, so reuse it
 | 
			
		||||
            # It should return the line numbers that match
 | 
			
		||||
            # Unblock flow if the trigger was found (some text remained after stripped what didnt match)
 | 
			
		||||
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
 | 
			
		||||
                                                  wordlist=trigger_text,
 | 
			
		||||
                                                  mode="line numbers")
 | 
			
		||||
            # Unblock if the trigger was found
 | 
			
		||||
            if result:
 | 
			
		||||
                blocked = False
 | 
			
		||||
 | 
			
		||||
        text_should_not_be_present = watch.get('text_should_not_be_present', [])
 | 
			
		||||
        text_should_not_be_present += self.datastore.get_tag_overrides_for_watch(uuid=watch.get('uuid'), attr='text_should_not_be_present')
 | 
			
		||||
        if len(text_should_not_be_present):
 | 
			
		||||
            # If anything matched, then we should block a change from happening
 | 
			
		||||
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
 | 
			
		||||
                                                  wordlist=text_should_not_be_present,
 | 
			
		||||
                                                  mode="line numbers")
 | 
			
		||||
            if result:
 | 
			
		||||
                blocked = True
 | 
			
		||||
        # Check text_should_not_be_present
 | 
			
		||||
        if rule_engine.evaluate_text_should_not_be_present(stripped_text, filter_config.text_should_not_be_present):
 | 
			
		||||
            blocked = True
 | 
			
		||||
 | 
			
		||||
        # And check if 'conditions' will let this pass through
 | 
			
		||||
        if watch.get('conditions') and watch.get('conditions_match_logic'):
 | 
			
		||||
            conditions_result = execute_ruleset_against_all_plugins(current_watch_uuid=watch.get('uuid'),
 | 
			
		||||
                                                                    application_datastruct=self.datastore.data,
 | 
			
		||||
                                                                    ephemeral_data={
 | 
			
		||||
                                                                        'text': stripped_text_from_html
 | 
			
		||||
                                                                    }
 | 
			
		||||
                                                                    )
 | 
			
		||||
        # Check custom conditions
 | 
			
		||||
        if rule_engine.evaluate_conditions(watch, self.datastore, stripped_text):
 | 
			
		||||
            blocked = True
 | 
			
		||||
 | 
			
		||||
            if not conditions_result.get('result'):
 | 
			
		||||
                # Conditions say "Condition not met" so we block it.
 | 
			
		||||
                blocked = True
 | 
			
		||||
 | 
			
		||||
        # Looks like something changed, but did it match all the rules?
 | 
			
		||||
        # === CHANGE DETECTION ===
 | 
			
		||||
        if blocked:
 | 
			
		||||
            changed_detected = False
 | 
			
		||||
        else:
 | 
			
		||||
            # The main thing that all this at the moment comes down to :)
 | 
			
		||||
            # Compare checksums
 | 
			
		||||
            if watch.get('previous_md5') != fetched_md5:
 | 
			
		||||
                changed_detected = True
 | 
			
		||||
 | 
			
		||||
            # Always record the new checksum
 | 
			
		||||
            update_obj["previous_md5"] = fetched_md5
 | 
			
		||||
 | 
			
		||||
            # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
 | 
			
		||||
            # On first run, initialize previous_md5
 | 
			
		||||
            if not watch.get('previous_md5'):
 | 
			
		||||
                watch['previous_md5'] = fetched_md5
 | 
			
		||||
 | 
			
		||||
        logger.debug(f"Watch UUID {watch.get('uuid')} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}")
 | 
			
		||||
 | 
			
		||||
        if changed_detected:
 | 
			
		||||
            if watch.get('check_unique_lines', False):
 | 
			
		||||
                ignore_whitespace = self.datastore.data['settings']['application'].get('ignore_whitespace')
 | 
			
		||||
        # === UNIQUE LINES CHECK ===
 | 
			
		||||
        if changed_detected and watch.get('check_unique_lines', False):
 | 
			
		||||
            has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
 | 
			
		||||
                lines=stripped_text.splitlines(),
 | 
			
		||||
                ignore_whitespace=ignore_whitespace
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(
 | 
			
		||||
                    lines=stripped_text_from_html.splitlines(),
 | 
			
		||||
                    ignore_whitespace=ignore_whitespace
 | 
			
		||||
                )
 | 
			
		||||
            if not has_unique_lines:
 | 
			
		||||
                logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
 | 
			
		||||
                changed_detected = False
 | 
			
		||||
            else:
 | 
			
		||||
                logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
 | 
			
		||||
 | 
			
		||||
                # One or more lines? unsure?
 | 
			
		||||
                if not has_unique_lines:
 | 
			
		||||
                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} didnt have anything new setting change_detected=False")
 | 
			
		||||
                    changed_detected = False
 | 
			
		||||
                else:
 | 
			
		||||
                    logger.debug(f"check_unique_lines: UUID {watch.get('uuid')} had unique content")
 | 
			
		||||
        # Note: Explicit cleanup is only needed here because text_json_diff handles
 | 
			
		||||
        # large strings (100KB-300KB for RSS/HTML). The other processors work with
 | 
			
		||||
        # small strings and don't need this.
 | 
			
		||||
        #
 | 
			
		||||
        # Python would clean these up automatically, but explicit `del` frees memory
 | 
			
		||||
        # immediately rather than waiting for function return, reducing peak memory usage.
 | 
			
		||||
        del content
 | 
			
		||||
        if 'html_content' in locals() and html_content is not stripped_text:
 | 
			
		||||
            del html_content
 | 
			
		||||
        if 'text_content_before_ignored_filter' in locals() and text_content_before_ignored_filter is not stripped_text:
 | 
			
		||||
            del text_content_before_ignored_filter
 | 
			
		||||
        if 'text_for_checksuming' in locals() and text_for_checksuming is not stripped_text:
 | 
			
		||||
            del text_for_checksuming
 | 
			
		||||
 | 
			
		||||
        return changed_detected, update_obj, stripped_text
 | 
			
		||||
 | 
			
		||||
        # stripped_text_from_html - Everything after filters and NO 'ignored' content
 | 
			
		||||
        return changed_detected, update_obj, stripped_text_from_html
 | 
			
		||||
    def _apply_diff_filtering(self, watch, stripped_text, text_before_filter):
 | 
			
		||||
        """Apply user's diff filtering preferences (show only added/removed/replaced lines)."""
 | 
			
		||||
        from changedetectionio import diff
 | 
			
		||||
 | 
			
		||||
        rendered_diff = diff.render_diff(
 | 
			
		||||
            previous_version_file_contents=watch.get_last_fetched_text_before_filters(),
 | 
			
		||||
            newest_version_file_contents=stripped_text,
 | 
			
		||||
            include_equal=False,
 | 
			
		||||
            include_added=watch.get('filter_text_added', True),
 | 
			
		||||
            include_removed=watch.get('filter_text_removed', True),
 | 
			
		||||
            include_replaced=watch.get('filter_text_replaced', True),
 | 
			
		||||
            line_feed_sep="\n",
 | 
			
		||||
            include_change_type_prefix=False
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
        watch.save_last_text_fetched_before_filters(text_before_filter.encode('utf-8'))
 | 
			
		||||
 | 
			
		||||
        if not rendered_diff and stripped_text:
 | 
			
		||||
            # No differences found
 | 
			
		||||
            return None
 | 
			
		||||
 | 
			
		||||
        return rendered_diff
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,5 @@
 | 
			
		||||
[pytest]
 | 
			
		||||
addopts = --no-start-live-server --live-server-port=5005
 | 
			
		||||
addopts = --no-start-live-server --live-server-port=0
 | 
			
		||||
#testpaths = tests pytest_invenio
 | 
			
		||||
#live_server_scope = function
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										435
									
								
								changedetectionio/queue_handlers.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										435
									
								
								changedetectionio/queue_handlers.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,435 @@
 | 
			
		||||
from blinker import signal
 | 
			
		||||
from loguru import logger
 | 
			
		||||
from typing import Dict, List, Any, Optional
 | 
			
		||||
import heapq
 | 
			
		||||
import queue
 | 
			
		||||
import threading
 | 
			
		||||
 | 
			
		||||
try:
 | 
			
		||||
    import janus
 | 
			
		||||
except ImportError:
 | 
			
		||||
    logger.critical(f"CRITICAL: janus library is required. Install with: pip install janus")
 | 
			
		||||
    raise
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class RecheckPriorityQueue:
 | 
			
		||||
    """
 | 
			
		||||
    Ultra-reliable priority queue using janus for async/sync bridging.
 | 
			
		||||
    
 | 
			
		||||
    CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
 | 
			
		||||
    - sync_q: Used by Flask routes, ticker threads, and other synchronous code
 | 
			
		||||
    - async_q: Used by async workers (the actual fetchers/processors) and coroutines
 | 
			
		||||
    
 | 
			
		||||
    DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts:
 | 
			
		||||
    - Synchronous code (Flask, threads) cannot use async methods without blocking
 | 
			
		||||
    - Async code cannot use sync methods without blocking the event loop
 | 
			
		||||
    - janus provides the only safe bridge between these two worlds
 | 
			
		||||
    
 | 
			
		||||
    Attempting to unify to async-only would require:
 | 
			
		||||
    - Converting all Flask routes to async (major breaking change)
 | 
			
		||||
    - Using asyncio.run() in sync contexts (causes deadlocks)
 | 
			
		||||
    - Thread-pool wrapping (adds complexity and overhead)
 | 
			
		||||
    
 | 
			
		||||
    Minimal implementation focused on reliability:
 | 
			
		||||
    - Pure janus for sync/async bridge
 | 
			
		||||
    - Thread-safe priority ordering  
 | 
			
		||||
    - Bulletproof error handling with critical logging
 | 
			
		||||
    """
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self, maxsize: int = 0):
 | 
			
		||||
        try:
 | 
			
		||||
            self._janus_queue = janus.Queue(maxsize=maxsize)
 | 
			
		||||
            # BOTH interfaces required - see class docstring for why
 | 
			
		||||
            self.sync_q = self._janus_queue.sync_q   # Flask routes, ticker thread
 | 
			
		||||
            self.async_q = self._janus_queue.async_q # Async workers
 | 
			
		||||
            
 | 
			
		||||
            # Priority storage - thread-safe
 | 
			
		||||
            self._priority_items = []
 | 
			
		||||
            self._lock = threading.RLock()
 | 
			
		||||
            
 | 
			
		||||
            # Signals for UI updates
 | 
			
		||||
            self.queue_length_signal = signal('queue_length')
 | 
			
		||||
            
 | 
			
		||||
            logger.debug("RecheckPriorityQueue initialized successfully")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to initialize RecheckPriorityQueue: {str(e)}")
 | 
			
		||||
            raise
 | 
			
		||||
    
 | 
			
		||||
    # SYNC INTERFACE (for ticker thread)
 | 
			
		||||
    def put(self, item, block: bool = True, timeout: Optional[float] = None):
 | 
			
		||||
        """Thread-safe sync put with priority ordering"""
 | 
			
		||||
        try:
 | 
			
		||||
            # Add to priority storage
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                heapq.heappush(self._priority_items, item)
 | 
			
		||||
            
 | 
			
		||||
            # Notify via janus sync queue
 | 
			
		||||
            self.sync_q.put(True, block=block, timeout=timeout)
 | 
			
		||||
            
 | 
			
		||||
            # Emit signals
 | 
			
		||||
            self._emit_put_signals(item)
 | 
			
		||||
            
 | 
			
		||||
            logger.debug(f"Successfully queued item: {self._get_item_uuid(item)}")
 | 
			
		||||
            return True
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to put item {self._get_item_uuid(item)}: {str(e)}")
 | 
			
		||||
            # Remove from priority storage if janus put failed
 | 
			
		||||
            try:
 | 
			
		||||
                with self._lock:
 | 
			
		||||
                    if item in self._priority_items:
 | 
			
		||||
                        self._priority_items.remove(item)
 | 
			
		||||
                        heapq.heapify(self._priority_items)
 | 
			
		||||
            except Exception as cleanup_e:
 | 
			
		||||
                logger.critical(f"CRITICAL: Failed to cleanup after put failure: {str(e)}")
 | 
			
		||||
            return False
 | 
			
		||||
    
 | 
			
		||||
    def get(self, block: bool = True, timeout: Optional[float] = None):
 | 
			
		||||
        """Thread-safe sync get with priority ordering"""
 | 
			
		||||
        try:
 | 
			
		||||
            # Wait for notification
 | 
			
		||||
            self.sync_q.get(block=block, timeout=timeout)
 | 
			
		||||
            
 | 
			
		||||
            # Get highest priority item
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                if not self._priority_items:
 | 
			
		||||
                    logger.critical(f"CRITICAL: Queue notification received but no priority items available")
 | 
			
		||||
                    raise Exception("Priority queue inconsistency")
 | 
			
		||||
                item = heapq.heappop(self._priority_items)
 | 
			
		||||
            
 | 
			
		||||
            # Emit signals
 | 
			
		||||
            self._emit_get_signals()
 | 
			
		||||
            
 | 
			
		||||
            logger.debug(f"Successfully retrieved item: {self._get_item_uuid(item)}")
 | 
			
		||||
            return item
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to get item from queue: {str(e)}")
 | 
			
		||||
            raise
 | 
			
		||||
    
 | 
			
		||||
    # ASYNC INTERFACE (for workers)
 | 
			
		||||
    async def async_put(self, item):
 | 
			
		||||
        """Pure async put with priority ordering"""
 | 
			
		||||
        try:
 | 
			
		||||
            # Add to priority storage
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                heapq.heappush(self._priority_items, item)
 | 
			
		||||
            
 | 
			
		||||
            # Notify via janus async queue
 | 
			
		||||
            await self.async_q.put(True)
 | 
			
		||||
            
 | 
			
		||||
            # Emit signals
 | 
			
		||||
            self._emit_put_signals(item)
 | 
			
		||||
            
 | 
			
		||||
            logger.debug(f"Successfully async queued item: {self._get_item_uuid(item)}")
 | 
			
		||||
            return True
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to async put item {self._get_item_uuid(item)}: {str(e)}")
 | 
			
		||||
            # Remove from priority storage if janus put failed
 | 
			
		||||
            try:
 | 
			
		||||
                with self._lock:
 | 
			
		||||
                    if item in self._priority_items:
 | 
			
		||||
                        self._priority_items.remove(item)
 | 
			
		||||
                        heapq.heapify(self._priority_items)
 | 
			
		||||
            except Exception as cleanup_e:
 | 
			
		||||
                logger.critical(f"CRITICAL: Failed to cleanup after async put failure: {str(e)}")
 | 
			
		||||
            return False
 | 
			
		||||
    
 | 
			
		||||
    async def async_get(self):
 | 
			
		||||
        """Pure async get with priority ordering"""
 | 
			
		||||
        try:
 | 
			
		||||
            # Wait for notification
 | 
			
		||||
            await self.async_q.get()
 | 
			
		||||
            
 | 
			
		||||
            # Get highest priority item
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                if not self._priority_items:
 | 
			
		||||
                    logger.critical(f"CRITICAL: Async queue notification received but no priority items available")
 | 
			
		||||
                    raise Exception("Priority queue inconsistency")
 | 
			
		||||
                item = heapq.heappop(self._priority_items)
 | 
			
		||||
            
 | 
			
		||||
            # Emit signals
 | 
			
		||||
            self._emit_get_signals()
 | 
			
		||||
            
 | 
			
		||||
            logger.debug(f"Successfully async retrieved item: {self._get_item_uuid(item)}")
 | 
			
		||||
            return item
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to async get item from queue: {str(e)}")
 | 
			
		||||
            raise
 | 
			
		||||
    
 | 
			
		||||
    # UTILITY METHODS
 | 
			
		||||
    def qsize(self) -> int:
 | 
			
		||||
        """Get current queue size"""
 | 
			
		||||
        try:
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                return len(self._priority_items)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to get queue size: {str(e)}")
 | 
			
		||||
            return 0
 | 
			
		||||
    
 | 
			
		||||
    def empty(self) -> bool:
 | 
			
		||||
        """Check if queue is empty"""
 | 
			
		||||
        return self.qsize() == 0
 | 
			
		||||
    
 | 
			
		||||
    def close(self):
 | 
			
		||||
        """Close the janus queue"""
 | 
			
		||||
        try:
 | 
			
		||||
            self._janus_queue.close()
 | 
			
		||||
            logger.debug("RecheckPriorityQueue closed successfully")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to close RecheckPriorityQueue: {str(e)}")
 | 
			
		||||
    
 | 
			
		||||
    # COMPATIBILITY METHODS (from original implementation)
 | 
			
		||||
    @property
 | 
			
		||||
    def queue(self):
 | 
			
		||||
        """Provide compatibility with original queue access"""
 | 
			
		||||
        try:
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                return list(self._priority_items)
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to get queue list: {str(e)}")
 | 
			
		||||
            return []
 | 
			
		||||
    
 | 
			
		||||
    def get_uuid_position(self, target_uuid: str) -> Dict[str, Any]:
 | 
			
		||||
        """Find position of UUID in queue"""
 | 
			
		||||
        try:
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                queue_list = list(self._priority_items)
 | 
			
		||||
                total_items = len(queue_list)
 | 
			
		||||
                
 | 
			
		||||
                if total_items == 0:
 | 
			
		||||
                    return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
 | 
			
		||||
                
 | 
			
		||||
                # Find target item
 | 
			
		||||
                for item in queue_list:
 | 
			
		||||
                    if (hasattr(item, 'item') and isinstance(item.item, dict) and 
 | 
			
		||||
                        item.item.get('uuid') == target_uuid):
 | 
			
		||||
                        
 | 
			
		||||
                        # Count items with higher priority
 | 
			
		||||
                        position = sum(1 for other in queue_list if other.priority < item.priority)
 | 
			
		||||
                        return {
 | 
			
		||||
                            'position': position,
 | 
			
		||||
                            'total_items': total_items, 
 | 
			
		||||
                            'priority': item.priority,
 | 
			
		||||
                            'found': True
 | 
			
		||||
                        }
 | 
			
		||||
                
 | 
			
		||||
                return {'position': None, 'total_items': total_items, 'priority': None, 'found': False}
 | 
			
		||||
                
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to get UUID position for {target_uuid}: {str(e)}")
 | 
			
		||||
            return {'position': None, 'total_items': 0, 'priority': None, 'found': False}
 | 
			
		||||
    
 | 
			
		||||
    def get_all_queued_uuids(self, limit: Optional[int] = None, offset: int = 0) -> Dict[str, Any]:
 | 
			
		||||
        """Get all queued UUIDs with pagination"""
 | 
			
		||||
        try:
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                queue_list = sorted(self._priority_items)  # Sort by priority
 | 
			
		||||
                total_items = len(queue_list)
 | 
			
		||||
                
 | 
			
		||||
                if total_items == 0:
 | 
			
		||||
                    return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
 | 
			
		||||
                
 | 
			
		||||
                # Apply pagination
 | 
			
		||||
                end_idx = min(offset + limit, total_items) if limit else total_items
 | 
			
		||||
                items_to_process = queue_list[offset:end_idx]
 | 
			
		||||
                
 | 
			
		||||
                result = []
 | 
			
		||||
                for position, item in enumerate(items_to_process, start=offset):
 | 
			
		||||
                    if (hasattr(item, 'item') and isinstance(item.item, dict) and 
 | 
			
		||||
                        'uuid' in item.item):
 | 
			
		||||
                        result.append({
 | 
			
		||||
                            'uuid': item.item['uuid'],
 | 
			
		||||
                            'position': position,
 | 
			
		||||
                            'priority': item.priority
 | 
			
		||||
                        })
 | 
			
		||||
                
 | 
			
		||||
                return {
 | 
			
		||||
                    'items': result,
 | 
			
		||||
                    'total_items': total_items,
 | 
			
		||||
                    'returned_items': len(result),
 | 
			
		||||
                    'has_more': (offset + len(result)) < total_items
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to get all queued UUIDs: {str(e)}")
 | 
			
		||||
            return {'items': [], 'total_items': 0, 'returned_items': 0, 'has_more': False}
 | 
			
		||||
    
 | 
			
		||||
    def get_queue_summary(self) -> Dict[str, Any]:
 | 
			
		||||
        """Get queue summary statistics"""
 | 
			
		||||
        try:
 | 
			
		||||
            with self._lock:
 | 
			
		||||
                queue_list = list(self._priority_items)
 | 
			
		||||
                total_items = len(queue_list)
 | 
			
		||||
                
 | 
			
		||||
                if total_items == 0:
 | 
			
		||||
                    return {
 | 
			
		||||
                        'total_items': 0, 'priority_breakdown': {},
 | 
			
		||||
                        'immediate_items': 0, 'clone_items': 0, 'scheduled_items': 0
 | 
			
		||||
                    }
 | 
			
		||||
                
 | 
			
		||||
                immediate_items = clone_items = scheduled_items = 0
 | 
			
		||||
                priority_counts = {}
 | 
			
		||||
                
 | 
			
		||||
                for item in queue_list:
 | 
			
		||||
                    priority = item.priority
 | 
			
		||||
                    priority_counts[priority] = priority_counts.get(priority, 0) + 1
 | 
			
		||||
                    
 | 
			
		||||
                    if priority == 1:
 | 
			
		||||
                        immediate_items += 1
 | 
			
		||||
                    elif priority == 5:
 | 
			
		||||
                        clone_items += 1
 | 
			
		||||
                    elif priority > 100:
 | 
			
		||||
                        scheduled_items += 1
 | 
			
		||||
                
 | 
			
		||||
                return {
 | 
			
		||||
                    'total_items': total_items,
 | 
			
		||||
                    'priority_breakdown': priority_counts,
 | 
			
		||||
                    'immediate_items': immediate_items,
 | 
			
		||||
                    'clone_items': clone_items,
 | 
			
		||||
                    'scheduled_items': scheduled_items,
 | 
			
		||||
                    'min_priority': min(priority_counts.keys()) if priority_counts else None,
 | 
			
		||||
                    'max_priority': max(priority_counts.keys()) if priority_counts else None
 | 
			
		||||
                }
 | 
			
		||||
                
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to get queue summary: {str(e)}")
 | 
			
		||||
            return {'total_items': 0, 'priority_breakdown': {}, 'immediate_items': 0, 
 | 
			
		||||
                   'clone_items': 0, 'scheduled_items': 0}
 | 
			
		||||
    
 | 
			
		||||
    # PRIVATE METHODS
 | 
			
		||||
    def _get_item_uuid(self, item) -> str:
 | 
			
		||||
        """Safely extract UUID from item for logging"""
 | 
			
		||||
        try:
 | 
			
		||||
            if hasattr(item, 'item') and isinstance(item.item, dict):
 | 
			
		||||
                return item.item.get('uuid', 'unknown')
 | 
			
		||||
        except Exception:
 | 
			
		||||
            pass
 | 
			
		||||
        return 'unknown'
 | 
			
		||||
    
 | 
			
		||||
    def _emit_put_signals(self, item):
 | 
			
		||||
        """Emit signals when item is added"""
 | 
			
		||||
        try:
 | 
			
		||||
            # Watch update signal
 | 
			
		||||
            if hasattr(item, 'item') and isinstance(item.item, dict) and 'uuid' in item.item:
 | 
			
		||||
                watch_check_update = signal('watch_check_update')
 | 
			
		||||
                if watch_check_update:
 | 
			
		||||
                    watch_check_update.send(watch_uuid=item.item['uuid'])
 | 
			
		||||
            
 | 
			
		||||
            # Queue length signal
 | 
			
		||||
            if self.queue_length_signal:
 | 
			
		||||
                self.queue_length_signal.send(length=self.qsize())
 | 
			
		||||
                
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to emit put signals: {str(e)}")
 | 
			
		||||
    
 | 
			
		||||
    def _emit_get_signals(self):
 | 
			
		||||
        """Emit signals when item is removed"""
 | 
			
		||||
        try:
 | 
			
		||||
            if self.queue_length_signal:
 | 
			
		||||
                self.queue_length_signal.send(length=self.qsize())
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to emit get signals: {str(e)}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class NotificationQueue:
 | 
			
		||||
    """
 | 
			
		||||
    Ultra-reliable notification queue using pure janus.
 | 
			
		||||
    
 | 
			
		||||
    CRITICAL DESIGN NOTE: Both sync_q and async_q are required because:
 | 
			
		||||
    - sync_q: Used by Flask routes, ticker threads, and other synchronous code
 | 
			
		||||
    - async_q: Used by async workers and coroutines
 | 
			
		||||
    
 | 
			
		||||
    DO NOT REMOVE EITHER INTERFACE - they bridge different execution contexts.
 | 
			
		||||
    See RecheckPriorityQueue docstring above for detailed explanation.
 | 
			
		||||
    
 | 
			
		||||
    Simple wrapper around janus with bulletproof error handling.
 | 
			
		||||
    """
 | 
			
		||||
    
 | 
			
		||||
    def __init__(self, maxsize: int = 0):
 | 
			
		||||
        try:
 | 
			
		||||
            self._janus_queue = janus.Queue(maxsize=maxsize)
 | 
			
		||||
            # BOTH interfaces required - see class docstring for why
 | 
			
		||||
            self.sync_q = self._janus_queue.sync_q   # Flask routes, threads
 | 
			
		||||
            self.async_q = self._janus_queue.async_q # Async workers
 | 
			
		||||
            self.notification_event_signal = signal('notification_event')
 | 
			
		||||
            logger.debug("NotificationQueue initialized successfully")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to initialize NotificationQueue: {str(e)}")
 | 
			
		||||
            raise
 | 
			
		||||
    
 | 
			
		||||
    def put(self, item: Dict[str, Any], block: bool = True, timeout: Optional[float] = None):
 | 
			
		||||
        """Thread-safe sync put with signal emission"""
 | 
			
		||||
        try:
 | 
			
		||||
            self.sync_q.put(item, block=block, timeout=timeout)
 | 
			
		||||
            self._emit_notification_signal(item)
 | 
			
		||||
            logger.debug(f"Successfully queued notification: {item.get('uuid', 'unknown')}")
 | 
			
		||||
            return True
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to put notification {item.get('uuid', 'unknown')}: {str(e)}")
 | 
			
		||||
            return False
 | 
			
		||||
    
 | 
			
		||||
    async def async_put(self, item: Dict[str, Any]):
 | 
			
		||||
        """Pure async put with signal emission"""
 | 
			
		||||
        try:
 | 
			
		||||
            await self.async_q.put(item)
 | 
			
		||||
            self._emit_notification_signal(item)
 | 
			
		||||
            logger.debug(f"Successfully async queued notification: {item.get('uuid', 'unknown')}")
 | 
			
		||||
            return True
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to async put notification {item.get('uuid', 'unknown')}: {str(e)}")
 | 
			
		||||
            return False
 | 
			
		||||
    
 | 
			
		||||
    def get(self, block: bool = True, timeout: Optional[float] = None):
 | 
			
		||||
        """Thread-safe sync get"""
 | 
			
		||||
        try:
 | 
			
		||||
            return self.sync_q.get(block=block, timeout=timeout)
 | 
			
		||||
        except queue.Empty as e:
 | 
			
		||||
            raise e
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to get notification: {str(e)}")
 | 
			
		||||
            raise e
 | 
			
		||||
    
 | 
			
		||||
    async def async_get(self):
 | 
			
		||||
        """Pure async get"""
 | 
			
		||||
        try:
 | 
			
		||||
            return await self.async_q.get()
 | 
			
		||||
        except queue.Empty as e:
 | 
			
		||||
            raise e
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to async get notification: {str(e)}")
 | 
			
		||||
            raise e
 | 
			
		||||
    
 | 
			
		||||
    def qsize(self) -> int:
 | 
			
		||||
        """Get current queue size"""
 | 
			
		||||
        try:
 | 
			
		||||
            return self.sync_q.qsize()
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to get notification queue size: {str(e)}")
 | 
			
		||||
            return 0
 | 
			
		||||
    
 | 
			
		||||
    def empty(self) -> bool:
 | 
			
		||||
        """Check if queue is empty"""
 | 
			
		||||
        return self.qsize() == 0
 | 
			
		||||
    
 | 
			
		||||
    def close(self):
 | 
			
		||||
        """Close the janus queue"""
 | 
			
		||||
        try:
 | 
			
		||||
            self._janus_queue.close()
 | 
			
		||||
            logger.debug("NotificationQueue closed successfully")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to close NotificationQueue: {str(e)}")
 | 
			
		||||
    
 | 
			
		||||
    def _emit_notification_signal(self, item: Dict[str, Any]):
 | 
			
		||||
        """Emit notification signal"""
 | 
			
		||||
        try:
 | 
			
		||||
            if self.notification_event_signal and isinstance(item, dict):
 | 
			
		||||
                watch_uuid = item.get('uuid')
 | 
			
		||||
                if watch_uuid:
 | 
			
		||||
                    self.notification_event_signal.send(watch_uuid=watch_uuid)
 | 
			
		||||
                else:
 | 
			
		||||
                    self.notification_event_signal.send()
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.critical(f"CRITICAL: Failed to emit notification signal: {str(e)}")
 | 
			
		||||
							
								
								
									
										124
									
								
								changedetectionio/realtime/README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										124
									
								
								changedetectionio/realtime/README.md
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,124 @@
 | 
			
		||||
# Real-time Socket.IO Implementation
 | 
			
		||||
 | 
			
		||||
This directory contains the Socket.IO implementation for changedetection.io's real-time updates.
 | 
			
		||||
 | 
			
		||||
## Architecture Overview
 | 
			
		||||
 | 
			
		||||
The real-time system provides live updates to the web interface for:
 | 
			
		||||
- Watch status changes (checking, completed, errors)
 | 
			
		||||
- Queue length updates  
 | 
			
		||||
- General statistics updates
 | 
			
		||||
 | 
			
		||||
## Current Implementation
 | 
			
		||||
 | 
			
		||||
### Socket.IO Configuration
 | 
			
		||||
- **Async Mode**: `threading` (default) or `gevent` (optional via SOCKETIO_MODE env var)
 | 
			
		||||
- **Server**: Flask-SocketIO with threading support
 | 
			
		||||
- **Background Tasks**: Python threading with daemon threads
 | 
			
		||||
 | 
			
		||||
### Async Worker Integration
 | 
			
		||||
- **Workers**: Async workers using asyncio for watch processing
 | 
			
		||||
- **Queue**: AsyncSignalPriorityQueue for job distribution
 | 
			
		||||
- **Signals**: Blinker signals for real-time updates between workers and Socket.IO
 | 
			
		||||
 | 
			
		||||
### Environment Variables
 | 
			
		||||
- `SOCKETIO_MODE=threading` (default, recommended)
 | 
			
		||||
- `SOCKETIO_MODE=gevent` (optional, has cross-platform limitations)
 | 
			
		||||
 | 
			
		||||
## Architecture Decision: Why Threading Mode?
 | 
			
		||||
 | 
			
		||||
### Previous Issues with Eventlet
 | 
			
		||||
**Eventlet was completely removed** due to fundamental compatibility issues:
 | 
			
		||||
 | 
			
		||||
1. **Monkey Patching Conflicts**: `eventlet.monkey_patch()` globally replaced Python's threading/socket modules, causing conflicts with:
 | 
			
		||||
   - Playwright's synchronous browser automation
 | 
			
		||||
   - Async worker event loops
 | 
			
		||||
   - Various Python libraries expecting real threading
 | 
			
		||||
 | 
			
		||||
2. **Python 3.12+ Compatibility**: Eventlet had issues with newer Python versions and asyncio integration
 | 
			
		||||
 | 
			
		||||
3. **CVE-2023-29483**: Security vulnerability in eventlet's dnspython dependency
 | 
			
		||||
 | 
			
		||||
### Current Solution Benefits
 | 
			
		||||
✅ **Threading Mode Advantages**:
 | 
			
		||||
- Full compatibility with async workers and Playwright
 | 
			
		||||
- No monkey patching - uses standard Python threading
 | 
			
		||||
- Better Python 3.12+ support
 | 
			
		||||
- Cross-platform compatibility (Windows, macOS, Linux)
 | 
			
		||||
- No external async library dependencies
 | 
			
		||||
- Fast shutdown capabilities
 | 
			
		||||
 | 
			
		||||
✅ **Optional Gevent Support**:
 | 
			
		||||
- Available via `SOCKETIO_MODE=gevent` for high-concurrency scenarios
 | 
			
		||||
- Cross-platform limitations documented in requirements.txt
 | 
			
		||||
- Not recommended as default due to Windows socket limits and macOS ARM build issues
 | 
			
		||||
 | 
			
		||||
## Socket.IO Mode Configuration
 | 
			
		||||
 | 
			
		||||
### Threading Mode (Default)
 | 
			
		||||
```python
 | 
			
		||||
# Enabled automatically
 | 
			
		||||
async_mode = 'threading'
 | 
			
		||||
socketio = SocketIO(app, async_mode='threading')
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
### Gevent Mode (Optional)
 | 
			
		||||
```bash
 | 
			
		||||
# Set environment variable
 | 
			
		||||
export SOCKETIO_MODE=gevent
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
## Background Tasks
 | 
			
		||||
 | 
			
		||||
### Queue Polling
 | 
			
		||||
- **Threading Mode**: `threading.Thread` with `threading.Event` for shutdown
 | 
			
		||||
- **Signal Handling**: Blinker signals for watch state changes
 | 
			
		||||
- **Real-time Updates**: Direct Socket.IO `emit()` calls to connected clients
 | 
			
		||||
 | 
			
		||||
### Worker Integration
 | 
			
		||||
- **Async Workers**: Run in separate asyncio event loop thread
 | 
			
		||||
- **Communication**: AsyncSignalPriorityQueue bridges async workers and Socket.IO
 | 
			
		||||
- **Updates**: Real-time updates sent when workers complete tasks
 | 
			
		||||
 | 
			
		||||
## Files in This Directory
 | 
			
		||||
 | 
			
		||||
- `socket_server.py`: Main Socket.IO initialization and event handling
 | 
			
		||||
- `events.py`: Watch operation event handlers  
 | 
			
		||||
- `__init__.py`: Module initialization
 | 
			
		||||
 | 
			
		||||
## Production Deployment
 | 
			
		||||
 | 
			
		||||
### Recommended WSGI Servers
 | 
			
		||||
For production with Socket.IO threading mode:
 | 
			
		||||
- **Gunicorn**: `gunicorn --worker-class eventlet changedetection:app` (if using gevent mode)
 | 
			
		||||
- **uWSGI**: With threading support
 | 
			
		||||
- **Docker**: Built-in Flask server works well for containerized deployments
 | 
			
		||||
 | 
			
		||||
### Performance Considerations
 | 
			
		||||
- Threading mode: Better memory usage, standard Python threading
 | 
			
		||||
- Gevent mode: Higher concurrency but platform limitations
 | 
			
		||||
- Async workers: Separate from Socket.IO, provides scalability
 | 
			
		||||
 | 
			
		||||
## Environment Variables
 | 
			
		||||
 | 
			
		||||
| Variable | Default | Description |
 | 
			
		||||
|----------|---------|-------------|
 | 
			
		||||
| `SOCKETIO_MODE` | `threading` | Socket.IO async mode (`threading` or `gevent`) |
 | 
			
		||||
| `FETCH_WORKERS` | `10` | Number of async workers for watch processing |
 | 
			
		||||
| `CHANGEDETECTION_HOST` | `0.0.0.0` | Server bind address |
 | 
			
		||||
| `CHANGEDETECTION_PORT` | `5000` | Server port |
 | 
			
		||||
 | 
			
		||||
## Debugging Tips
 | 
			
		||||
 | 
			
		||||
1. **Socket.IO Issues**: Check browser dev tools for WebSocket connection errors
 | 
			
		||||
2. **Threading Issues**: Monitor with `ps -T` to check thread count  
 | 
			
		||||
3. **Worker Issues**: Use `/worker-health` endpoint to check async worker status
 | 
			
		||||
4. **Queue Issues**: Use `/queue-status` endpoint to monitor job queue
 | 
			
		||||
5. **Performance**: Use `/gc-cleanup` endpoint to trigger memory cleanup
 | 
			
		||||
 | 
			
		||||
## Migration Notes
 | 
			
		||||
 | 
			
		||||
If upgrading from eventlet-based versions:
 | 
			
		||||
- Remove any `EVENTLET_*` environment variables
 | 
			
		||||
- No code changes needed - Socket.IO mode is automatically configured
 | 
			
		||||
- Optional: Set `SOCKETIO_MODE=gevent` if high concurrency is required and platform supports it
 | 
			
		||||
							
								
								
									
										3
									
								
								changedetectionio/realtime/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								changedetectionio/realtime/__init__.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,3 @@
 | 
			
		||||
"""
 | 
			
		||||
Socket.IO realtime updates module for changedetection.io
 | 
			
		||||
"""
 | 
			
		||||
							
								
								
									
										58
									
								
								changedetectionio/realtime/events.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										58
									
								
								changedetectionio/realtime/events.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,58 @@
 | 
			
		||||
from flask_socketio import emit
 | 
			
		||||
from loguru import logger
 | 
			
		||||
from blinker import signal
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def register_watch_operation_handlers(socketio, datastore):
 | 
			
		||||
    """Register Socket.IO event handlers for watch operations"""
 | 
			
		||||
    
 | 
			
		||||
    @socketio.on('watch_operation')
 | 
			
		||||
    def handle_watch_operation(data):
 | 
			
		||||
        """Handle watch operations like pause, mute, recheck via Socket.IO"""
 | 
			
		||||
        try:
 | 
			
		||||
            op = data.get('op')
 | 
			
		||||
            uuid = data.get('uuid')
 | 
			
		||||
            
 | 
			
		||||
            logger.debug(f"Socket.IO: Received watch operation '{op}' for UUID {uuid}")
 | 
			
		||||
            
 | 
			
		||||
            if not op or not uuid:
 | 
			
		||||
                emit('operation_result', {'success': False, 'error': 'Missing operation or UUID'})
 | 
			
		||||
                return
 | 
			
		||||
            
 | 
			
		||||
            # Check if watch exists
 | 
			
		||||
            if not datastore.data['watching'].get(uuid):
 | 
			
		||||
                emit('operation_result', {'success': False, 'error': 'Watch not found'})
 | 
			
		||||
                return
 | 
			
		||||
            
 | 
			
		||||
            watch = datastore.data['watching'][uuid]
 | 
			
		||||
            
 | 
			
		||||
            # Perform the operation
 | 
			
		||||
            if op == 'pause':
 | 
			
		||||
                watch.toggle_pause()
 | 
			
		||||
                logger.info(f"Socket.IO: Toggled pause for watch {uuid}")
 | 
			
		||||
            elif op == 'mute':
 | 
			
		||||
                watch.toggle_mute()
 | 
			
		||||
                logger.info(f"Socket.IO: Toggled mute for watch {uuid}")
 | 
			
		||||
            elif op == 'recheck':
 | 
			
		||||
                # Import here to avoid circular imports
 | 
			
		||||
                from changedetectionio.flask_app import update_q
 | 
			
		||||
                from changedetectionio import queuedWatchMetaData
 | 
			
		||||
                from changedetectionio import worker_handler
 | 
			
		||||
                
 | 
			
		||||
                worker_handler.queue_item_async_safe(update_q, queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid}))
 | 
			
		||||
                logger.info(f"Socket.IO: Queued recheck for watch {uuid}")
 | 
			
		||||
            else:
 | 
			
		||||
                emit('operation_result', {'success': False, 'error': f'Unknown operation: {op}'})
 | 
			
		||||
                return
 | 
			
		||||
            
 | 
			
		||||
            # Send signal to update UI
 | 
			
		||||
            watch_check_update = signal('watch_check_update')
 | 
			
		||||
            if watch_check_update:
 | 
			
		||||
                watch_check_update.send(watch_uuid=uuid)
 | 
			
		||||
            
 | 
			
		||||
            # Send success response to client
 | 
			
		||||
            emit('operation_result', {'success': True, 'operation': op, 'uuid': uuid})
 | 
			
		||||
            
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Socket.IO error in handle_watch_operation: {str(e)}")
 | 
			
		||||
            emit('operation_result', {'success': False, 'error': str(e)})
 | 
			
		||||
							
								
								
									
										408
									
								
								changedetectionio/realtime/socket_server.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										408
									
								
								changedetectionio/realtime/socket_server.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,408 @@
 | 
			
		||||
import timeago
 | 
			
		||||
from flask_socketio import SocketIO
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
import os
 | 
			
		||||
from loguru import logger
 | 
			
		||||
from blinker import signal
 | 
			
		||||
 | 
			
		||||
from changedetectionio import strtobool
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class SignalHandler:
 | 
			
		||||
    """A standalone class to receive signals"""
 | 
			
		||||
 | 
			
		||||
    def __init__(self, socketio_instance, datastore):
 | 
			
		||||
        self.socketio_instance = socketio_instance
 | 
			
		||||
        self.datastore = datastore
 | 
			
		||||
 | 
			
		||||
        # Connect to the watch_check_update signal
 | 
			
		||||
        from changedetectionio.flask_app import watch_check_update as wcc
 | 
			
		||||
        wcc.connect(self.handle_signal, weak=False)
 | 
			
		||||
        #        logger.info("SignalHandler: Connected to signal from direct import")
 | 
			
		||||
 | 
			
		||||
        # Connect to the queue_length signal
 | 
			
		||||
        queue_length_signal = signal('queue_length')
 | 
			
		||||
        queue_length_signal.connect(self.handle_queue_length, weak=False)
 | 
			
		||||
        #       logger.info("SignalHandler: Connected to queue_length signal")
 | 
			
		||||
 | 
			
		||||
        watch_delete_signal = signal('watch_deleted')
 | 
			
		||||
        watch_delete_signal.connect(self.handle_deleted_signal, weak=False)
 | 
			
		||||
 | 
			
		||||
        watch_favicon_bumped_signal = signal('watch_favicon_bump')
 | 
			
		||||
        watch_favicon_bumped_signal.connect(self.handle_watch_bumped_favicon_signal, weak=False)
 | 
			
		||||
 | 
			
		||||
        # Connect to the notification_event signal
 | 
			
		||||
        notification_event_signal = signal('notification_event')
 | 
			
		||||
        notification_event_signal.connect(self.handle_notification_event, weak=False)
 | 
			
		||||
        logger.info("SignalHandler: Connected to notification_event signal")
 | 
			
		||||
 | 
			
		||||
        # Create and start the queue update thread using standard threading
 | 
			
		||||
        import threading
 | 
			
		||||
        self.polling_emitter_thread = threading.Thread(
 | 
			
		||||
            target=self.polling_emit_running_or_queued_watches_threaded,
 | 
			
		||||
            daemon=True
 | 
			
		||||
        )
 | 
			
		||||
        self.polling_emitter_thread.start()
 | 
			
		||||
        logger.info("Started polling thread using threading (eventlet-free)")
 | 
			
		||||
 | 
			
		||||
        # Store the thread reference in socketio for clean shutdown
 | 
			
		||||
        self.socketio_instance.polling_emitter_thread = self.polling_emitter_thread
 | 
			
		||||
 | 
			
		||||
    def handle_signal(self, *args, **kwargs):
 | 
			
		||||
        logger.trace(f"SignalHandler: Signal received with {len(args)} args and {len(kwargs)} kwargs")
 | 
			
		||||
        # Safely extract the watch UUID from kwargs
 | 
			
		||||
        watch_uuid = kwargs.get('watch_uuid')
 | 
			
		||||
        app_context = kwargs.get('app_context')
 | 
			
		||||
 | 
			
		||||
        if watch_uuid:
 | 
			
		||||
            # Get the watch object from the datastore
 | 
			
		||||
            watch = self.datastore.data['watching'].get(watch_uuid)
 | 
			
		||||
            if watch:
 | 
			
		||||
                if app_context:
 | 
			
		||||
                    # note
 | 
			
		||||
                    with app_context.app_context():
 | 
			
		||||
                        with app_context.test_request_context():
 | 
			
		||||
                            # Forward to handle_watch_update with the watch parameter
 | 
			
		||||
                            handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore)
 | 
			
		||||
                else:
 | 
			
		||||
                    handle_watch_update(self.socketio_instance, watch=watch, datastore=self.datastore)
 | 
			
		||||
 | 
			
		||||
                logger.trace(f"Signal handler processed watch UUID {watch_uuid}")
 | 
			
		||||
            else:
 | 
			
		||||
                logger.warning(f"Watch UUID {watch_uuid} not found in datastore")
 | 
			
		||||
 | 
			
		||||
    def handle_watch_bumped_favicon_signal(self, *args, **kwargs):
 | 
			
		||||
        watch_uuid = kwargs.get('watch_uuid')
 | 
			
		||||
        if watch_uuid:
 | 
			
		||||
            # Emit the queue size to all connected clients
 | 
			
		||||
            self.socketio_instance.emit("watch_bumped_favicon", {
 | 
			
		||||
                "uuid": watch_uuid,
 | 
			
		||||
                "event_timestamp": time.time()
 | 
			
		||||
            })
 | 
			
		||||
        logger.debug(f"Watch UUID {watch_uuid} got its favicon updated")
 | 
			
		||||
 | 
			
		||||
    def handle_deleted_signal(self, *args, **kwargs):
 | 
			
		||||
        watch_uuid = kwargs.get('watch_uuid')
 | 
			
		||||
        if watch_uuid:
 | 
			
		||||
            # Emit the queue size to all connected clients
 | 
			
		||||
            self.socketio_instance.emit("watch_deleted", {
 | 
			
		||||
                "uuid": watch_uuid,
 | 
			
		||||
                "event_timestamp": time.time()
 | 
			
		||||
            })
 | 
			
		||||
        logger.debug(f"Watch UUID {watch_uuid} was deleted")
 | 
			
		||||
 | 
			
		||||
    def handle_queue_length(self, *args, **kwargs):
 | 
			
		||||
        """Handle queue_length signal and emit to all clients"""
 | 
			
		||||
        try:
 | 
			
		||||
            queue_length = kwargs.get('length', 0)
 | 
			
		||||
            logger.debug(f"SignalHandler: Queue length update received: {queue_length}")
 | 
			
		||||
 | 
			
		||||
            # Emit the queue size to all connected clients
 | 
			
		||||
            self.socketio_instance.emit("queue_size", {
 | 
			
		||||
                "q_length": queue_length,
 | 
			
		||||
                "event_timestamp": time.time()
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Socket.IO error in handle_queue_length: {str(e)}")
 | 
			
		||||
 | 
			
		||||
    def handle_notification_event(self, *args, **kwargs):
 | 
			
		||||
        """Handle notification_event signal and emit to all clients"""
 | 
			
		||||
        try:
 | 
			
		||||
            watch_uuid = kwargs.get('watch_uuid')
 | 
			
		||||
            logger.debug(f"SignalHandler: Notification event received for watch UUID: {watch_uuid}")
 | 
			
		||||
 | 
			
		||||
            # Emit the notification event to all connected clients
 | 
			
		||||
            self.socketio_instance.emit("notification_event", {
 | 
			
		||||
                "watch_uuid": watch_uuid,
 | 
			
		||||
                "event_timestamp": time.time()
 | 
			
		||||
            })
 | 
			
		||||
 | 
			
		||||
            logger.trace(f"Socket.IO: Emitted notification_event for watch UUID {watch_uuid}")
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Socket.IO error in handle_notification_event: {str(e)}")
 | 
			
		||||
 | 
			
		||||
    def polling_emit_running_or_queued_watches_threaded(self):
 | 
			
		||||
        """Threading version of polling for Windows compatibility"""
 | 
			
		||||
        import time
 | 
			
		||||
        import threading
 | 
			
		||||
        logger.info("Queue update thread started (threading mode)")
 | 
			
		||||
 | 
			
		||||
        # Import here to avoid circular imports
 | 
			
		||||
        from changedetectionio.flask_app import app
 | 
			
		||||
        from changedetectionio import worker_handler
 | 
			
		||||
        watch_check_update = signal('watch_check_update')
 | 
			
		||||
 | 
			
		||||
        # Track previous state to avoid unnecessary emissions
 | 
			
		||||
        previous_running_uuids = set()
 | 
			
		||||
 | 
			
		||||
        # Run until app shutdown - check exit flag more frequently for fast shutdown
 | 
			
		||||
        exit_event = getattr(app.config, 'exit', threading.Event())
 | 
			
		||||
 | 
			
		||||
        while not exit_event.is_set():
 | 
			
		||||
            try:
 | 
			
		||||
                # Get current running UUIDs from async workers
 | 
			
		||||
                running_uuids = set(worker_handler.get_running_uuids())
 | 
			
		||||
 | 
			
		||||
                # Only send updates for UUIDs that changed state
 | 
			
		||||
                newly_running = running_uuids - previous_running_uuids
 | 
			
		||||
                no_longer_running = previous_running_uuids - running_uuids
 | 
			
		||||
 | 
			
		||||
                # Send updates for newly running UUIDs (but exit fast if shutdown requested)
 | 
			
		||||
                for uuid in newly_running:
 | 
			
		||||
                    if exit_event.is_set():
 | 
			
		||||
                        break
 | 
			
		||||
                    logger.trace(f"Threading polling: UUID {uuid} started processing")
 | 
			
		||||
                    with app.app_context():
 | 
			
		||||
                        watch_check_update.send(app_context=app, watch_uuid=uuid)
 | 
			
		||||
                    time.sleep(0.01)  # Small yield
 | 
			
		||||
 | 
			
		||||
                # Send updates for UUIDs that finished processing (but exit fast if shutdown requested)
 | 
			
		||||
                if not exit_event.is_set():
 | 
			
		||||
                    for uuid in no_longer_running:
 | 
			
		||||
                        if exit_event.is_set():
 | 
			
		||||
                            break
 | 
			
		||||
                        logger.trace(f"Threading polling: UUID {uuid} finished processing")
 | 
			
		||||
                        with app.app_context():
 | 
			
		||||
                            watch_check_update.send(app_context=app, watch_uuid=uuid)
 | 
			
		||||
                        time.sleep(0.01)  # Small yield
 | 
			
		||||
 | 
			
		||||
                # Update tracking for next iteration
 | 
			
		||||
                previous_running_uuids = running_uuids
 | 
			
		||||
 | 
			
		||||
                # Sleep between polling cycles, but check exit flag every 0.5 seconds for fast shutdown
 | 
			
		||||
                for _ in range(20):  # 20 * 0.5 = 10 seconds total
 | 
			
		||||
                    if exit_event.is_set():
 | 
			
		||||
                        break
 | 
			
		||||
                    time.sleep(0.5)
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logger.error(f"Error in threading polling: {str(e)}")
 | 
			
		||||
                # Even during error recovery, check for exit quickly
 | 
			
		||||
                for _ in range(1):  # 1 * 0.5 = 0.5 seconds
 | 
			
		||||
                    if exit_event.is_set():
 | 
			
		||||
                        break
 | 
			
		||||
                    time.sleep(0.5)
 | 
			
		||||
 | 
			
		||||
        # Check if we're in pytest environment - if so, be more gentle with logging
 | 
			
		||||
        import sys
 | 
			
		||||
        in_pytest = "pytest" in sys.modules or "PYTEST_CURRENT_TEST" in os.environ
 | 
			
		||||
 | 
			
		||||
        if not in_pytest:
 | 
			
		||||
            logger.info("Queue update thread stopped (threading mode)")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def handle_watch_update(socketio, **kwargs):
 | 
			
		||||
    """Handle watch update signal from blinker"""
 | 
			
		||||
    try:
 | 
			
		||||
        watch = kwargs.get('watch')
 | 
			
		||||
        datastore = kwargs.get('datastore')
 | 
			
		||||
 | 
			
		||||
        # Emit the watch update to all connected clients
 | 
			
		||||
        from changedetectionio.flask_app import update_q
 | 
			
		||||
        from changedetectionio.flask_app import _jinja2_filter_datetime
 | 
			
		||||
        from changedetectionio import worker_handler
 | 
			
		||||
 | 
			
		||||
        # Get list of watches that are currently running
 | 
			
		||||
        running_uuids = worker_handler.get_running_uuids()
 | 
			
		||||
 | 
			
		||||
        # Get list of watches in the queue
 | 
			
		||||
        queue_list = []
 | 
			
		||||
        for q_item in update_q.queue:
 | 
			
		||||
            if hasattr(q_item, 'item') and 'uuid' in q_item.item:
 | 
			
		||||
                queue_list.append(q_item.item['uuid'])
 | 
			
		||||
 | 
			
		||||
        # Get the error texts from the watch
 | 
			
		||||
        error_texts = watch.compile_error_texts()
 | 
			
		||||
        # Create a simplified watch data object to send to clients
 | 
			
		||||
 | 
			
		||||
        watch_data = {
 | 
			
		||||
            'checking_now': True if watch.get('uuid') in running_uuids else False,
 | 
			
		||||
            'error_text': error_texts,
 | 
			
		||||
            'event_timestamp': time.time(),
 | 
			
		||||
            'fetch_time': watch.get('fetch_time'),
 | 
			
		||||
            'has_error': True if error_texts else False,
 | 
			
		||||
            'has_favicon': True if watch.get_favicon_filename() else False,
 | 
			
		||||
            'history_n': watch.history_n,
 | 
			
		||||
            'last_changed_text': timeago.format(int(watch.last_changed), time.time()) if watch.history_n >= 2 and int(watch.last_changed) > 0 else 'Not yet',
 | 
			
		||||
            'last_checked': watch.get('last_checked'),
 | 
			
		||||
            'last_checked_text': _jinja2_filter_datetime(watch),
 | 
			
		||||
            'notification_muted': True if watch.get('notification_muted') else False,
 | 
			
		||||
            'paused': True if watch.get('paused') else False,
 | 
			
		||||
            'queued': True if watch.get('uuid') in queue_list else False,
 | 
			
		||||
            'unviewed': watch.has_unviewed,
 | 
			
		||||
            'uuid': watch.get('uuid'),
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        errored_count = 0
 | 
			
		||||
        for watch_uuid_iter, watch_iter in datastore.data['watching'].items():
 | 
			
		||||
            if watch_iter.get('last_error'):
 | 
			
		||||
                errored_count += 1
 | 
			
		||||
 | 
			
		||||
        general_stats = {
 | 
			
		||||
            'count_errors': errored_count,
 | 
			
		||||
            'unread_changes_count': datastore.unread_changes_count
 | 
			
		||||
        }
 | 
			
		||||
 | 
			
		||||
        # Debug what's being emitted
 | 
			
		||||
        # logger.debug(f"Emitting 'watch_update' event for {watch.get('uuid')}, data: {watch_data}")
 | 
			
		||||
 | 
			
		||||
        # Emit to all clients (no 'broadcast' parameter needed - it's the default behavior)
 | 
			
		||||
        socketio.emit("watch_update", {'watch': watch_data})
 | 
			
		||||
        socketio.emit("general_stats_update", general_stats)
 | 
			
		||||
 | 
			
		||||
        # Log after successful emit - use watch_data['uuid'] to avoid variable shadowing issues
 | 
			
		||||
        logger.trace(f"Socket.IO: Emitted update for watch {watch_data['uuid']}, Checking now: {watch_data['checking_now']}")
 | 
			
		||||
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.error(f"Socket.IO error in handle_watch_update: {str(e)}")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def init_socketio(app, datastore):
 | 
			
		||||
    """Initialize SocketIO with the main Flask app"""
 | 
			
		||||
    import platform
 | 
			
		||||
    import sys
 | 
			
		||||
 | 
			
		||||
    # Platform-specific async_mode selection for better stability
 | 
			
		||||
    system = platform.system().lower()
 | 
			
		||||
    python_version = sys.version_info
 | 
			
		||||
 | 
			
		||||
    # Check for SocketIO mode configuration via environment variable
 | 
			
		||||
    # Default is 'threading' for best cross-platform compatibility
 | 
			
		||||
    socketio_mode = os.getenv('SOCKETIO_MODE', 'threading').lower()
 | 
			
		||||
 | 
			
		||||
    if socketio_mode == 'gevent':
 | 
			
		||||
        # Use gevent mode (higher concurrency but platform limitations)
 | 
			
		||||
        try:
 | 
			
		||||
            import gevent
 | 
			
		||||
            async_mode = 'gevent'
 | 
			
		||||
            logger.info(f"SOCKETIO_MODE=gevent: Using {async_mode} mode for Socket.IO")
 | 
			
		||||
        except ImportError:
 | 
			
		||||
            async_mode = 'threading'
 | 
			
		||||
            logger.warning(f"SOCKETIO_MODE=gevent but gevent not available, falling back to {async_mode} mode")
 | 
			
		||||
    elif socketio_mode == 'threading':
 | 
			
		||||
        # Use threading mode (default - best compatibility)
 | 
			
		||||
        async_mode = 'threading'
 | 
			
		||||
        logger.info(f"SOCKETIO_MODE=threading: Using {async_mode} mode for Socket.IO")
 | 
			
		||||
    else:
 | 
			
		||||
        # Invalid mode specified, use default
 | 
			
		||||
        async_mode = 'threading'
 | 
			
		||||
        logger.warning(f"Invalid SOCKETIO_MODE='{socketio_mode}', using default {async_mode} mode for Socket.IO")
 | 
			
		||||
 | 
			
		||||
    # Log platform info for debugging
 | 
			
		||||
    logger.info(f"Platform: {system}, Python: {python_version.major}.{python_version.minor}, Socket.IO mode: {async_mode}")
 | 
			
		||||
 | 
			
		||||
    # Restrict SocketIO CORS to same origin by default, can be overridden with env var
 | 
			
		||||
    cors_origins = os.environ.get('SOCKETIO_CORS_ORIGINS', None)
 | 
			
		||||
 | 
			
		||||
    socketio = SocketIO(app,
 | 
			
		||||
                        async_mode=async_mode,
 | 
			
		||||
                        cors_allowed_origins=cors_origins,  # None means same-origin only
 | 
			
		||||
                        logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')),
 | 
			
		||||
                        engineio_logger=strtobool(os.getenv('SOCKETIO_LOGGING', 'False')))
 | 
			
		||||
 | 
			
		||||
    # Set up event handlers
 | 
			
		||||
    logger.info("Socket.IO: Registering connect event handler")
 | 
			
		||||
 | 
			
		||||
    @socketio.on('checkbox-operation')
 | 
			
		||||
    def event_checkbox_operations(data):
 | 
			
		||||
        from changedetectionio.blueprint.ui import _handle_operations
 | 
			
		||||
        from changedetectionio import queuedWatchMetaData
 | 
			
		||||
        from changedetectionio import worker_handler
 | 
			
		||||
        from changedetectionio.flask_app import update_q, watch_check_update
 | 
			
		||||
        logger.trace(f"Got checkbox operations event: {data}")
 | 
			
		||||
 | 
			
		||||
        datastore = socketio.datastore
 | 
			
		||||
 | 
			
		||||
        _handle_operations(
 | 
			
		||||
            op=data.get('op'),
 | 
			
		||||
            uuids=data.get('uuids'),
 | 
			
		||||
            datastore=datastore,
 | 
			
		||||
            extra_data=data.get('extra_data'),
 | 
			
		||||
            worker_handler=worker_handler,
 | 
			
		||||
            update_q=update_q,
 | 
			
		||||
            queuedWatchMetaData=queuedWatchMetaData,
 | 
			
		||||
            watch_check_update=watch_check_update,
 | 
			
		||||
            emit_flash=False
 | 
			
		||||
        )
 | 
			
		||||
 | 
			
		||||
    @socketio.on('connect')
 | 
			
		||||
    def handle_connect():
 | 
			
		||||
        """Handle client connection"""
 | 
			
		||||
        #        logger.info("Socket.IO: CONNECT HANDLER CALLED - Starting connection process")
 | 
			
		||||
        from flask import request
 | 
			
		||||
        from flask_login import current_user
 | 
			
		||||
        from changedetectionio.flask_app import update_q
 | 
			
		||||
 | 
			
		||||
        # Access datastore from socketio
 | 
			
		||||
        datastore = socketio.datastore
 | 
			
		||||
        #        logger.info(f"Socket.IO: Current user authenticated: {current_user.is_authenticated if hasattr(current_user, 'is_authenticated') else 'No current_user'}")
 | 
			
		||||
 | 
			
		||||
        # Check if authentication is required and user is not authenticated
 | 
			
		||||
        has_password_enabled = datastore.data['settings']['application'].get('password') or os.getenv("SALTED_PASS", False)
 | 
			
		||||
        #        logger.info(f"Socket.IO: Password enabled: {has_password_enabled}")
 | 
			
		||||
        if has_password_enabled and not current_user.is_authenticated:
 | 
			
		||||
            logger.warning("Socket.IO: Rejecting unauthenticated connection")
 | 
			
		||||
            return False  # Reject the connection
 | 
			
		||||
 | 
			
		||||
        # Send the current queue size to the newly connected client
 | 
			
		||||
        try:
 | 
			
		||||
            queue_size = update_q.qsize()
 | 
			
		||||
            socketio.emit("queue_size", {
 | 
			
		||||
                "q_length": queue_size,
 | 
			
		||||
                "event_timestamp": time.time()
 | 
			
		||||
            }, room=request.sid)  # Send only to this client
 | 
			
		||||
            logger.debug(f"Socket.IO: Sent initial queue size {queue_size} to new client")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Socket.IO error sending initial queue size: {str(e)}")
 | 
			
		||||
 | 
			
		||||
        logger.info("Socket.IO: Client connected")
 | 
			
		||||
 | 
			
		||||
    #    logger.info("Socket.IO: Registering disconnect event handler")
 | 
			
		||||
    @socketio.on('disconnect')
 | 
			
		||||
    def handle_disconnect():
 | 
			
		||||
        """Handle client disconnection"""
 | 
			
		||||
        logger.info("Socket.IO: Client disconnected")
 | 
			
		||||
 | 
			
		||||
    # Create a dedicated signal handler that will receive signals and emit them to clients
 | 
			
		||||
    signal_handler = SignalHandler(socketio, datastore)
 | 
			
		||||
 | 
			
		||||
    # Register watch operation event handlers
 | 
			
		||||
    from .events import register_watch_operation_handlers
 | 
			
		||||
    register_watch_operation_handlers(socketio, datastore)
 | 
			
		||||
 | 
			
		||||
    # Store the datastore reference on the socketio object for later use
 | 
			
		||||
    socketio.datastore = datastore
 | 
			
		||||
 | 
			
		||||
    # No stop event needed for threading mode - threads check app.config.exit directly
 | 
			
		||||
 | 
			
		||||
    # Add a shutdown method to the socketio object
 | 
			
		||||
    def shutdown():
 | 
			
		||||
        """Shutdown the SocketIO server fast and aggressively"""
 | 
			
		||||
        try:
 | 
			
		||||
            logger.info("Socket.IO: Fast shutdown initiated...")
 | 
			
		||||
 | 
			
		||||
            # For threading mode, give the thread a very short time to exit gracefully
 | 
			
		||||
            if hasattr(socketio, 'polling_emitter_thread'):
 | 
			
		||||
                if socketio.polling_emitter_thread.is_alive():
 | 
			
		||||
                    logger.info("Socket.IO: Waiting 1 second for polling thread to stop...")
 | 
			
		||||
                    socketio.polling_emitter_thread.join(timeout=1.0)  # Only 1 second timeout
 | 
			
		||||
                    if socketio.polling_emitter_thread.is_alive():
 | 
			
		||||
                        logger.info("Socket.IO: Polling thread still running after timeout - continuing with shutdown")
 | 
			
		||||
                    else:
 | 
			
		||||
                        logger.info("Socket.IO: Polling thread stopped quickly")
 | 
			
		||||
                else:
 | 
			
		||||
                    logger.info("Socket.IO: Polling thread already stopped")
 | 
			
		||||
 | 
			
		||||
            logger.info("Socket.IO: Fast shutdown complete")
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logger.error(f"Socket.IO error during shutdown: {str(e)}")
 | 
			
		||||
 | 
			
		||||
    # Attach the shutdown method to the socketio object
 | 
			
		||||
    socketio.shutdown = shutdown
 | 
			
		||||
 | 
			
		||||
    logger.info("Socket.IO initialized and attached to main Flask app")
 | 
			
		||||
    logger.info(f"Socket.IO: Registered event handlers: {socketio.handlers if hasattr(socketio, 'handlers') else 'No handlers found'}")
 | 
			
		||||
    return socketio
 | 
			
		||||
							
								
								
									
										130
									
								
								changedetectionio/rss_tools.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										130
									
								
								changedetectionio/rss_tools.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,130 @@
 | 
			
		||||
"""
 | 
			
		||||
RSS/Atom feed processing tools for changedetection.io
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
from loguru import logger
 | 
			
		||||
import re
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
 | 
			
		||||
    """
 | 
			
		||||
    Process CDATA sections in HTML/XML content - inline replacement.
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        html_content: The HTML/XML content to process
 | 
			
		||||
        render_anchor_tag_content: Whether to render anchor tag content
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        Processed HTML/XML content with CDATA sections replaced inline
 | 
			
		||||
    """
 | 
			
		||||
    from xml.sax.saxutils import escape as xml_escape
 | 
			
		||||
    from .html_tools import html_to_text
 | 
			
		||||
 | 
			
		||||
    pattern = '<!\[CDATA\[(\s*(?:.(?<!\]\]>)\s*)*)\]\]>'
 | 
			
		||||
 | 
			
		||||
    def repl(m):
 | 
			
		||||
        text = m.group(1)
 | 
			
		||||
        return xml_escape(html_to_text(html_content=text, render_anchor_tag_content=render_anchor_tag_content)).strip()
 | 
			
		||||
 | 
			
		||||
    return re.sub(pattern, repl, html_content)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def format_rss_items(rss_content: str, render_anchor_tag_content=False) -> str:
 | 
			
		||||
    """
 | 
			
		||||
    Format RSS/Atom feed items in a readable text format using feedparser.
 | 
			
		||||
 | 
			
		||||
    Converts RSS <item> or Atom <entry> elements to formatted text with:
 | 
			
		||||
    - <title> → <h1>Title</h1>
 | 
			
		||||
    - <link> → Link: [url]
 | 
			
		||||
    - <guid> → Guid: [id]
 | 
			
		||||
    - <pubDate> → PubDate: [date]
 | 
			
		||||
    - <description> or <content> → Raw HTML content (CDATA and entities automatically handled)
 | 
			
		||||
 | 
			
		||||
    Args:
 | 
			
		||||
        rss_content: The RSS/Atom feed content
 | 
			
		||||
        render_anchor_tag_content: Whether to render anchor tag content in descriptions (unused, kept for compatibility)
 | 
			
		||||
 | 
			
		||||
    Returns:
 | 
			
		||||
        Formatted HTML content ready for html_to_text conversion
 | 
			
		||||
    """
 | 
			
		||||
    try:
 | 
			
		||||
        import feedparser
 | 
			
		||||
        from xml.sax.saxutils import escape as xml_escape
 | 
			
		||||
 | 
			
		||||
        # Parse the feed - feedparser handles all RSS/Atom variants, CDATA, entity unescaping, etc.
 | 
			
		||||
        feed = feedparser.parse(rss_content)
 | 
			
		||||
 | 
			
		||||
        formatted_items = []
 | 
			
		||||
 | 
			
		||||
        # Determine feed type for appropriate labels when fields are missing
 | 
			
		||||
        # feedparser sets feed.version to things like 'rss20', 'atom10', etc.
 | 
			
		||||
        is_atom = feed.version and 'atom' in feed.version
 | 
			
		||||
 | 
			
		||||
        for entry in feed.entries:
 | 
			
		||||
            item_parts = []
 | 
			
		||||
 | 
			
		||||
            # Title - feedparser handles CDATA and entity unescaping automatically
 | 
			
		||||
            if hasattr(entry, 'title') and entry.title:
 | 
			
		||||
                item_parts.append(f'<h1>{xml_escape(entry.title)}</h1>')
 | 
			
		||||
 | 
			
		||||
            # Link
 | 
			
		||||
            if hasattr(entry, 'link') and entry.link:
 | 
			
		||||
                item_parts.append(f'Link: {xml_escape(entry.link)}<br>')
 | 
			
		||||
 | 
			
		||||
            # GUID/ID
 | 
			
		||||
            if hasattr(entry, 'id') and entry.id:
 | 
			
		||||
                item_parts.append(f'Guid: {xml_escape(entry.id)}<br>')
 | 
			
		||||
 | 
			
		||||
            # Date - feedparser normalizes all date field names to 'published'
 | 
			
		||||
            if hasattr(entry, 'published') and entry.published:
 | 
			
		||||
                item_parts.append(f'PubDate: {xml_escape(entry.published)}<br>')
 | 
			
		||||
 | 
			
		||||
            # Description/Content - feedparser handles CDATA and entity unescaping automatically
 | 
			
		||||
            # Only add "Summary:" label for Atom <summary> tags
 | 
			
		||||
            content = None
 | 
			
		||||
            add_label = False
 | 
			
		||||
 | 
			
		||||
            if hasattr(entry, 'content') and entry.content:
 | 
			
		||||
                # Atom <content> - no label, just content
 | 
			
		||||
                content = entry.content[0].value if entry.content[0].value else None
 | 
			
		||||
            elif hasattr(entry, 'summary'):
 | 
			
		||||
                # Could be RSS <description> or Atom <summary>
 | 
			
		||||
                # feedparser maps both to entry.summary
 | 
			
		||||
                content = entry.summary if entry.summary else None
 | 
			
		||||
                # Only add "Summary:" label for Atom feeds (which use <summary> tag)
 | 
			
		||||
                if is_atom:
 | 
			
		||||
                    add_label = True
 | 
			
		||||
 | 
			
		||||
            # Add content with or without label
 | 
			
		||||
            if content:
 | 
			
		||||
                if add_label:
 | 
			
		||||
                    item_parts.append(f'Summary:<br>{content}')
 | 
			
		||||
                else:
 | 
			
		||||
                    item_parts.append(content)
 | 
			
		||||
            else:
 | 
			
		||||
                # No content - just show <none>
 | 
			
		||||
                item_parts.append('<none>')
 | 
			
		||||
 | 
			
		||||
            # Join all parts of this item
 | 
			
		||||
            if item_parts:
 | 
			
		||||
                formatted_items.append('\n'.join(item_parts))
 | 
			
		||||
 | 
			
		||||
        # Wrap each item in a div with classes (first, last, item-N)
 | 
			
		||||
        items_html = []
 | 
			
		||||
        total_items = len(formatted_items)
 | 
			
		||||
        for idx, item in enumerate(formatted_items):
 | 
			
		||||
            classes = ['rss-item']
 | 
			
		||||
            if idx == 0:
 | 
			
		||||
                classes.append('first')
 | 
			
		||||
            if idx == total_items - 1:
 | 
			
		||||
                classes.append('last')
 | 
			
		||||
            classes.append(f'item-{idx + 1}')
 | 
			
		||||
 | 
			
		||||
            class_str = ' '.join(classes)
 | 
			
		||||
            items_html.append(f'<div class="{class_str}">{item}</div>')
 | 
			
		||||
        return '<html><body>\n'+"\n<br><br>".join(items_html)+'\n</body></html>'
 | 
			
		||||
 | 
			
		||||
    except Exception as e:
 | 
			
		||||
        logger.warning(f"Error formatting RSS items: {str(e)}")
 | 
			
		||||
        # Fall back to original content
 | 
			
		||||
        return rss_content
 | 
			
		||||
@@ -11,32 +11,32 @@ set -e
 | 
			
		||||
 | 
			
		||||
SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
 | 
			
		||||
 | 
			
		||||
find tests/test_*py -type f|while read test_name
 | 
			
		||||
do
 | 
			
		||||
  echo "TEST RUNNING $test_name"
 | 
			
		||||
  # REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
 | 
			
		||||
  REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
 | 
			
		||||
done
 | 
			
		||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
 | 
			
		||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -n 30 --dist load  tests/test_*.py
 | 
			
		||||
 | 
			
		||||
#time pytest -n auto --dist loadfile -vv --tb=long tests/test_*.py
 | 
			
		||||
echo "RUNNING WITH BASE_URL SET"
 | 
			
		||||
 | 
			
		||||
# Now re-run some tests with BASE_URL enabled
 | 
			
		||||
# Re #65 - Ability to include a link back to the installation, in the notification.
 | 
			
		||||
export BASE_URL="https://really-unique-domain.io"
 | 
			
		||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
 | 
			
		||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest -vv --maxfail=1 tests/test_notification.py
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Re-run with HIDE_REFERER set - could affect login
 | 
			
		||||
export HIDE_REFERER=True
 | 
			
		||||
pytest tests/test_access_control.py
 | 
			
		||||
pytest -vv -s --maxfail=1 tests/test_access_control.py
 | 
			
		||||
 | 
			
		||||
# Re-run a few tests that will trigger brotli based storage
 | 
			
		||||
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
 | 
			
		||||
pytest tests/test_access_control.py
 | 
			
		||||
pytest -vv -s --maxfail=1 tests/test_access_control.py
 | 
			
		||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
 | 
			
		||||
pytest tests/test_backend.py
 | 
			
		||||
pytest tests/test_rss.py
 | 
			
		||||
pytest tests/test_unique_lines.py
 | 
			
		||||
pytest -vv -s --maxfail=1 tests/test_backend.py
 | 
			
		||||
pytest -vv -s --maxfail=1 tests/test_rss.py
 | 
			
		||||
pytest -vv -s --maxfail=1 tests/test_unique_lines.py
 | 
			
		||||
 | 
			
		||||
# Try high concurrency
 | 
			
		||||
FETCH_WORKERS=130 pytest  tests/test_history_consistency.py -v -l
 | 
			
		||||
 | 
			
		||||
# Check file:// will pickup a file when enabled
 | 
			
		||||
echo "Hello world" > /tmp/test-file.txt
 | 
			
		||||
 
 | 
			
		||||
@@ -6,6 +6,8 @@
 | 
			
		||||
 | 
			
		||||
# enable debug
 | 
			
		||||
set -x
 | 
			
		||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
 | 
			
		||||
docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4
 | 
			
		||||
 | 
			
		||||
# A extra browser is configured, but we never chose to use it, so it should NOT show in the logs
 | 
			
		||||
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url'
 | 
			
		||||
 
 | 
			
		||||
@@ -19,12 +19,13 @@ docker run --network changedet-network -d \
 | 
			
		||||
  -v `pwd`/tests/proxy_list/squid-passwords.txt:/etc/squid3/passwords \
 | 
			
		||||
  ubuntu/squid:4.13-21.10_edge
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
sleep 5
 | 
			
		||||
## 2nd test actually choose the preferred proxy from proxies.json
 | 
			
		||||
# This will force a request via "proxy-two"
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  -v `pwd`/tests/proxy_list/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
 | 
			
		||||
  -v `pwd`/tests/proxy_list/proxies.json-example:/tmp/proxies.json \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py'
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest -s tests/proxy_list/test_multiple_proxy.py --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
set +e
 | 
			
		||||
echo "- Looking for chosen.changedetection.io request in squid-one - it should NOT be here"
 | 
			
		||||
@@ -48,8 +49,10 @@ fi
 | 
			
		||||
# Test the UI configurable proxies
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py'
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
# Give squid proxies a moment to flush their logs
 | 
			
		||||
sleep 2
 | 
			
		||||
 | 
			
		||||
# Should see a request for one.changedetection.io in there
 | 
			
		||||
echo "- Looking for .changedetection.io request in squid-custom"
 | 
			
		||||
@@ -63,7 +66,10 @@ fi
 | 
			
		||||
# Test "no-proxy" option
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py'
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest tests/proxy_list/test_noproxy.py --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
# Give squid proxies a moment to flush their logs
 | 
			
		||||
sleep 2
 | 
			
		||||
 | 
			
		||||
# We need to handle grep returning 1
 | 
			
		||||
set +e
 | 
			
		||||
@@ -80,5 +86,29 @@ for c in $(echo "squid-one squid-two squid-custom"); do
 | 
			
		||||
  fi
 | 
			
		||||
done
 | 
			
		||||
 | 
			
		||||
echo "docker ps output"
 | 
			
		||||
docker ps
 | 
			
		||||
 | 
			
		||||
docker kill squid-one squid-two squid-custom
 | 
			
		||||
 | 
			
		||||
# Test that the UI is returning the correct error message when a proxy is not available
 | 
			
		||||
 | 
			
		||||
# Requests
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
# Playwright
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
# Puppeteer fast
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
# Selenium
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && WEBDRIVER_URL=http://selenium:4444/wd/hub pytest tests/proxy_list/test_proxy_noconnect.py --datastore-path /tmp'
 | 
			
		||||
 
 | 
			
		||||
@@ -5,22 +5,23 @@ set -e
 | 
			
		||||
# enable debug
 | 
			
		||||
set -x
 | 
			
		||||
 | 
			
		||||
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
 | 
			
		||||
 | 
			
		||||
# SOCKS5 related - start simple Socks5 proxy server
 | 
			
		||||
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched
 | 
			
		||||
docker run --network changedet-network -d --hostname socks5proxy --rm  --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy
 | 
			
		||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm  -p 1081:1080 --name socks5proxy-noauth  serjs/go-socks5-proxy
 | 
			
		||||
docker run --network changedet-network -d --hostname socks5proxy-noauth --rm -p 1081:1080 --name socks5proxy-noauth -e REQUIRE_AUTH=false serjs/go-socks5-proxy
 | 
			
		||||
 | 
			
		||||
echo "---------------------------------- SOCKS5 -------------------"
 | 
			
		||||
# SOCKS5 related - test from proxies.json
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \
 | 
			
		||||
  -v `pwd`/tests/proxy_socks5/proxies.json-example:/tmp/proxies.json \
 | 
			
		||||
  --rm \
 | 
			
		||||
  -e "FLASK_SERVER_NAME=cdio" \
 | 
			
		||||
  --hostname cdio \
 | 
			
		||||
  -e "SOCKSTEST=proxiesjson" \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py'
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py  --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
# SOCKS5 related - by manually entering in UI
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
@@ -29,18 +30,18 @@ docker run --network changedet-network \
 | 
			
		||||
  --hostname cdio \
 | 
			
		||||
  -e "SOCKSTEST=manual" \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py'
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy.py --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
# SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY
 | 
			
		||||
docker run --network changedet-network \
 | 
			
		||||
  -e "SOCKSTEST=manual-playwright" \
 | 
			
		||||
  --hostname cdio \
 | 
			
		||||
  -e "FLASK_SERVER_NAME=cdio" \
 | 
			
		||||
  -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \
 | 
			
		||||
  -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/tmp/proxies.json \
 | 
			
		||||
  -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \
 | 
			
		||||
  --rm \
 | 
			
		||||
  test-changedetectionio \
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py'
 | 
			
		||||
  bash -c 'cd changedetectionio && pytest --live-server-host=0.0.0.0 --live-server-port=5004  -s tests/proxy_socks5/test_socks5_proxy_sources.py --datastore-path /tmp'
 | 
			
		||||
 | 
			
		||||
echo "socks5 server logs"
 | 
			
		||||
docker logs socks5proxy
 | 
			
		||||
 
 | 
			
		||||
@@ -1,18 +0,0 @@
 | 
			
		||||
"""
 | 
			
		||||
Safe Jinja2 render with max payload sizes
 | 
			
		||||
 | 
			
		||||
See https://jinja.palletsprojects.com/en/3.1.x/sandbox/#security-considerations
 | 
			
		||||
"""
 | 
			
		||||
 | 
			
		||||
import jinja2.sandbox
 | 
			
		||||
import typing as t
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
JINJA2_MAX_RETURN_PAYLOAD_SIZE = 1024 * int(os.getenv("JINJA2_MAX_RETURN_PAYLOAD_SIZE_KB", 1024 * 10))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def render(template_str, **args: t.Any) -> str:
 | 
			
		||||
    jinja2_env = jinja2.sandbox.ImmutableSandboxedEnvironment(extensions=['jinja2_time.TimeExtension'])
 | 
			
		||||
    output = jinja2_env.from_string(template_str).render(args)
 | 
			
		||||
    return output[:JINJA2_MAX_RETURN_PAYLOAD_SIZE]
 | 
			
		||||
 | 
			
		||||
Some files were not shown because too many files have changed in this diff Show More
		Reference in New Issue
	
	Block a user