Compare commits

..

11 Commits

Author SHA1 Message Date
dgtlmoon
d07cb2a7a8 Merge branch 'master' into url-validation-improvements-2 2025-10-28 12:51:08 +01:00
dgtlmoon
f9cd4da2bb safer 2025-10-28 11:46:00 +01:00
dgtlmoon
4e699cc13b File:// type doesnt go through universal URL validation 2025-10-28 11:44:11 +01:00
dgtlmoon
964302cf3c blah 2025-10-28 11:15:58 +01:00
dgtlmoon
00b31cabd5 Fix test 2025-10-28 10:58:44 +01:00
dgtlmoon
ac13d8cbde refactor and handle spaces etc 2025-10-28 10:38:51 +01:00
dgtlmoon
a037cf7b9a Adding deeper test 2025-10-28 10:27:09 +01:00
dgtlmoon
8a7ea79fb3 Extra XSS checking 2025-10-28 10:23:27 +01:00
dgtlmoon
ec0d7cff21 Merge branch 'master' into url-validation-improvements-2 2025-10-28 10:14:58 +01:00
dgtlmoon
d48c82052a Unify safe URL checking to the one function 2025-10-28 09:59:13 +01:00
dgtlmoon
552e98519b Merge commit from fork 2025-10-28 09:26:17 +01:00
16 changed files with 310 additions and 432 deletions

View File

@@ -1,48 +0,0 @@
name: 'Extract Memory Test Report'
description: 'Extracts and displays memory test report from a container'
inputs:
container-name:
description: 'Name of the container to extract logs from'
required: true
python-version:
description: 'Python version for artifact naming'
required: true
output-dir:
description: 'Directory to store output logs'
required: false
default: 'output-logs'
runs:
using: "composite"
steps:
- name: Create output directory
shell: bash
run: |
mkdir -p ${{ inputs.output-dir }}
- name: Dump container log
shell: bash
run: |
docker logs ${{ inputs.container-name }} > ${{ inputs.output-dir }}/${{ inputs.container-name }}-stdout-${{ inputs.python-version }}.txt 2>&1 || echo "Could not get stdout"
docker logs ${{ inputs.container-name }} 2> ${{ inputs.output-dir }}/${{ inputs.container-name }}-stderr-${{ inputs.python-version }}.txt || echo "Could not get stderr"
- name: Extract and display memory test report
shell: bash
run: |
echo "Extracting test-memory.log from container..."
docker cp ${{ inputs.container-name }}:/app/changedetectionio/test-memory.log ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log || echo "test-memory.log not found in container"
echo "=== Top 10 Highest Peak Memory Tests ==="
if [ -f ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log ]; then
grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log | \
sed 's/.*Peak memory: //' | \
paste -d'|' - <(grep "Peak memory:" ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log) | \
sort -t'|' -k1 -nr | \
cut -d'|' -f2 | \
head -10
echo ""
echo "=== Full Memory Test Report ==="
cat ${{ inputs.output-dir }}/test-memory-${{ inputs.python-version }}.log
else
echo "No memory log available"
fi

View File

@@ -15,14 +15,14 @@ on:
default: false default: false
jobs: jobs:
# Build the Docker image once and share it with all test jobs test-application:
build:
runs-on: ubuntu-latest runs-on: ubuntu-latest
env: env:
PYTHON_VERSION: ${{ inputs.python-version }} PYTHON_VERSION: ${{ inputs.python-version }}
steps: steps:
- uses: actions/checkout@v5 - uses: actions/checkout@v5
# Mainly just for link/flake8
- name: Set up Python ${{ env.PYTHON_VERSION }} - name: Set up Python ${{ env.PYTHON_VERSION }}
uses: actions/setup-python@v6 uses: actions/setup-python@v6
with: with:
@@ -31,333 +31,155 @@ jobs:
- name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }} - name: Build changedetection.io container for testing under Python ${{ env.PYTHON_VERSION }}
run: | run: |
echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----" echo "---- Building for Python ${{ env.PYTHON_VERSION }} -----"
# Build a changedetection.io container and start testing inside
docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio . docker build --build-arg PYTHON_VERSION=${{ env.PYTHON_VERSION }} --build-arg LOGGER_LEVEL=TRACE -t test-changedetectionio .
# Debug info
docker run test-changedetectionio bash -c 'pip list' docker run test-changedetectionio bash -c 'pip list'
- name: We should be Python ${{ env.PYTHON_VERSION }} ... - name: We should be Python ${{ env.PYTHON_VERSION }} ...
run: | run: |
docker run test-changedetectionio bash -c 'python3 --version' docker run test-changedetectionio bash -c 'python3 --version'
- name: Save Docker image - name: Spin up ancillary testable services
run: | run: |
docker save test-changedetectionio -o /tmp/test-changedetectionio.tar
- name: Upload Docker image artifact docker network create changedet-network
uses: actions/upload-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp/test-changedetectionio.tar
retention-days: 1
# Unit tests (lightweight, no ancillary services needed) # Selenium
unit-tests: docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact # SocketPuppetBrowser + Extra for custom browser test
uses: actions/download-artifact@v5 docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
with: docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image - name: Spin up ancillary SMTP+Echo message test server
run: | run: |
docker load -i /tmp/test-changedetectionio.tar # Debug SMTP server/echo message back server, telnet 11080 to it should immediately bounce back the most recent message that tried to send (then you can see if cdio tried to send, the format, etc)
# 11025 is the SMTP port for testing
# apprise example would be 'mailto://changedetection@localhost:11025/?to=fff@home.com (it will also echo to STDOUT)
# telnet localhost 11080
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
docker ps
- name: Show docker container state and other debug info
run: |
set -x
echo "Running processes in docker..."
docker ps
- name: Run Unit Tests - name: Run Unit Tests
run: | run: |
# Unit tests
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_watch_model'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_jinja2_security'
docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver' docker run test-changedetectionio bash -c 'python3 -m unittest changedetectionio.tests.unit.test_semver'
# Basic pytest tests with ancillary services - name: Test built container with Pytest (generally as requests/plaintext fetching)
basic-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 25
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: | run: |
docker load -i /tmp/test-changedetectionio.tar # All tests
echo "run test with pytest"
- name: Test built container with Pytest # The default pytest logger_level is TRACE
run: | # To change logger_level for pytest(test/conftest.py),
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG'
docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh' docker run --name test-cdio-basic-tests --network changedet-network test-changedetectionio bash -c 'cd changedetectionio && ./run_basic_tests.sh'
- name: Extract memory report and logs # PLAYWRIGHT/NODE-> CDP
if: always() - name: Playwright and SocketPuppetBrowser - Specific tests in built container
uses: ./.github/actions/extract-memory-report
with:
container-name: test-cdio-basic-tests
python-version: ${{ env.PYTHON_VERSION }}
- name: Store test artifacts
if: always()
uses: actions/upload-artifact@v5
with:
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
path: output-logs
# Playwright tests
playwright-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up ancillary services
run: |
docker network create changedet-network
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
- name: Playwright - Specific tests in built container
run: | run: |
# Playwright via Sockpuppetbrowser fetch
# tests/visualselector/test_fetch_data.py will do browser steps
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest -vv --capture=tee-sys --showlocals --tb=long --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
- name: Playwright - Headers and requests
- name: Playwright and SocketPuppetBrowser - Headers and requests
run: | run: |
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .' docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'find .; cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py; pwd;find .'
- name: Playwright - Restock detection - name: Playwright and SocketPuppetBrowser - Restock detection
run: | run: |
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
# Pyppeteer tests # STRAIGHT TO CDP
pyppeteer-tests: - name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container
runs-on: ubuntu-latest
needs: build
if: ${{ inputs.skip-pypuppeteer == false }} if: ${{ inputs.skip-pypuppeteer == false }}
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up ancillary services
run: |
docker network create changedet-network
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
- name: Pyppeteer - Specific tests in built container
run: | run: |
# Playwright via Sockpuppetbrowser fetch
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py'
docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py'
- name: Pyppeteer - Headers and requests checks - name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks
if: ${{ inputs.skip-pypuppeteer == false }}
run: | run: |
# Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers
docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py' docker run --name "changedet" --hostname changedet --rm -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Pyppeteer - Restock detection - name: Pyppeteer and SocketPuppetBrowser - Restock detection
if: ${{ inputs.skip-pypuppeteer == false }}
run: | run: |
# restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it
docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py'
# Selenium tests # SELENIUM
selenium-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up ancillary services
run: |
docker network create changedet-network
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
sleep 3
- name: Specific tests for headers and requests checks with Selenium
run: |
docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Specific tests in built container for Selenium - name: Specific tests in built container for Selenium
run: | run: |
# Selenium fetch
docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py'
- name: Specific tests in built container for headers and requests checks with Selenium
# SMTP tests
smtp-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: | run: |
docker load -i /tmp/test-changedetectionio.tar docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_request.py'
- name: Spin up SMTP test server
run: |
docker network create changedet-network
docker run --network changedet-network -d -p 11025:11025 -p 11080:11080 --hostname mailserver test-changedetectionio bash -c 'pip3 install aiosmtpd && python changedetectionio/tests/smtp/smtp-test-server.py'
# OTHER STUFF
- name: Test SMTP notification mime types - name: Test SMTP notification mime types
run: | run: |
# SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above
# "mailserver" hostname defined above
docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' docker run --rm --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py'
# Proxy tests # @todo Add a test via playwright/puppeteer
proxy-tests: # squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up services
run: |
docker network create changedet-network
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
- name: Test proxy squid style interaction - name: Test proxy squid style interaction
run: | run: |
cd changedetectionio cd changedetectionio
./run_proxy_tests.sh ./run_proxy_tests.sh
cd ..
- name: Test proxy SOCKS5 style interaction - name: Test proxy SOCKS5 style interaction
run: | run: |
cd changedetectionio cd changedetectionio
./run_socks_proxy_tests.sh ./run_socks_proxy_tests.sh
cd ..
# Custom browser URL tests
custom-browser-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Spin up ancillary services
run: |
docker network create changedet-network
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest
docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest
- name: Test custom browser URL - name: Test custom browser URL
run: | run: |
cd changedetectionio cd changedetectionio
./run_custom_browser_url_tests.sh ./run_custom_browser_url_tests.sh
cd ..
# Container startup tests - name: Test changedetection.io container starts+runs basically without error
container-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Test container starts+runs basically without error
run: | run: |
docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio docker run --name test-changedetectionio -p 5556:5000 -d test-changedetectionio
sleep 3 sleep 3
# Should return 0 (no error) when grep finds it
curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid curl --retry-connrefused --retry 6 -s http://localhost:5556 |grep -q checkbox-uuid
# and IPv6
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
# Check whether TRACE log is enabled.
# Also, check whether TRACE came from STDOUT
docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1 docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
# Check whether DEBUG is came from STDOUT
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
docker kill test-changedetectionio docker kill test-changedetectionio
- name: Test HTTPS SSL mode - name: Test HTTPS SSL mode
@@ -365,52 +187,44 @@ jobs:
openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost" openssl req -x509 -newkey rsa:4096 -keyout privkey.pem -out cert.pem -days 365 -nodes -subj "/CN=localhost"
docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio docker run --name test-changedetectionio-ssl --rm -e SSL_CERT_FILE=cert.pem -e SSL_PRIVKEY_FILE=privkey.pem -p 5000:5000 -v ./cert.pem:/app/cert.pem -v ./privkey.pem:/app/privkey.pem -d test-changedetectionio
sleep 3 sleep 3
# Should return 0 (no error) when grep finds it
# -k because its self-signed
curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid curl --retry-connrefused --retry 6 -k https://localhost:5000 -v|grep -q checkbox-uuid
docker kill test-changedetectionio-ssl docker kill test-changedetectionio-ssl
- name: Test IPv6 Mode - name: Test IPv6 Mode
run: | run: |
# IPv6 - :: bind to all interfaces inside container (like 0.0.0.0), ::1 would be localhost only
docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio docker run --name test-changedetectionio-ipv6 --rm -p 5000:5000 -e LISTEN_HOST=:: -d test-changedetectionio
sleep 3 sleep 3
# Should return 0 (no error) when grep finds it on localhost
curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid curl --retry-connrefused --retry 6 http://[::1]:5000 -v|grep -q checkbox-uuid
docker kill test-changedetectionio-ipv6 docker kill test-changedetectionio-ipv6
# Signal tests - name: Test changedetection.io SIGTERM and SIGINT signal shutdown
signal-tests:
runs-on: ubuntu-latest
needs: build
timeout-minutes: 10
env:
PYTHON_VERSION: ${{ inputs.python-version }}
steps:
- uses: actions/checkout@v5
- name: Download Docker image artifact
uses: actions/download-artifact@v5
with:
name: test-changedetectionio-${{ env.PYTHON_VERSION }}
path: /tmp
- name: Load Docker image
run: | run: |
docker load -i /tmp/test-changedetectionio.tar
- name: Test SIGTERM and SIGINT signal shutdown
run: |
echo SIGINT Shutdown request test echo SIGINT Shutdown request test
docker run --name sig-test -d test-changedetectionio docker run --name sig-test -d test-changedetectionio
sleep 3 sleep 3
echo ">>> Sending SIGINT to sig-test container" echo ">>> Sending SIGINT to sig-test container"
docker kill --signal=SIGINT sig-test docker kill --signal=SIGINT sig-test
sleep 3 sleep 3
# invert the check (it should be not 0/not running)
docker ps docker ps
# check signal catch(STDERR) log. Because of
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1 docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGINT' || exit 1
test -z "`docker ps|grep sig-test`" test -z "`docker ps|grep sig-test`"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]
then
echo "Looks like container was running when it shouldnt be" echo "Looks like container was running when it shouldnt be"
docker ps docker ps
exit 1 exit 1
fi fi
# @todo - scan the container log to see the right "graceful shutdown" text exists
docker rm sig-test docker rm sig-test
echo SIGTERM Shutdown request test echo SIGTERM Shutdown request test
@@ -419,12 +233,56 @@ jobs:
echo ">>> Sending SIGTERM to sig-test container" echo ">>> Sending SIGTERM to sig-test container"
docker kill --signal=SIGTERM sig-test docker kill --signal=SIGTERM sig-test
sleep 3 sleep 3
# invert the check (it should be not 0/not running)
docker ps docker ps
# check signal catch(STDERR) log. Because of
# changedetectionio/__init__.py: logger.add(sys.stderr, level=logger_level)
docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1 docker logs sig-test 2>&1 | grep 'Shutdown: Got Signal - SIGTERM' || exit 1
test -z "`docker ps|grep sig-test`" test -z "`docker ps|grep sig-test`"
if [ $? -ne 0 ]; then if [ $? -ne 0 ]
then
echo "Looks like container was running when it shouldnt be" echo "Looks like container was running when it shouldnt be"
docker ps docker ps
exit 1 exit 1
fi fi
# @todo - scan the container log to see the right "graceful shutdown" text exists
docker rm sig-test docker rm sig-test
- name: Dump container log
if: always()
run: |
mkdir output-logs
docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout-${{ env.PYTHON_VERSION }}.txt
docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr-${{ env.PYTHON_VERSION }}.txt
- name: Extract and display memory test report
if: always()
run: |
# Extract test-memory.log from the container
echo "Extracting test-memory.log from container..."
docker cp test-cdio-basic-tests:/app/changedetectionio/test-memory.log output-logs/test-memory-${{ env.PYTHON_VERSION }}.log || echo "test-memory.log not found in container"
# Display the memory log contents for immediate visibility in workflow output
echo "=== Top 10 Highest Peak Memory Tests ==="
if [ -f output-logs/test-memory-${{ env.PYTHON_VERSION }}.log ]; then
# Sort by peak memory value (extract number before MB and sort numerically, reverse order)
grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log | \
sed 's/.*Peak memory: //' | \
paste -d'|' - <(grep "Peak memory:" output-logs/test-memory-${{ env.PYTHON_VERSION }}.log) | \
sort -t'|' -k1 -nr | \
cut -d'|' -f2 | \
head -10
echo ""
echo "=== Full Memory Test Report ==="
cat output-logs/test-memory-${{ env.PYTHON_VERSION }}.log
else
echo "No memory log available"
fi
- name: Store everything including test-datastore
if: always()
uses: actions/upload-artifact@v5
with:
name: test-cdio-basic-tests-output-py${{ env.PYTHON_VERSION }}
path: .

View File

@@ -1,10 +1,9 @@
import os
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from flask_restful import abort, Resource from flask_restful import abort, Resource
from flask import request from flask import request
import validators
from functools import wraps from functools import wraps
from . import auth, validate_openapi_request from . import auth, validate_openapi_request
from ..validate_url import is_safe_valid_url
def default_content_type(content_type='text/plain'): def default_content_type(content_type='text/plain'):
@@ -50,14 +49,13 @@ class Import(Resource):
urls = request.get_data().decode('utf8').splitlines() urls = request.get_data().decode('utf8').splitlines()
added = [] added = []
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
for url in urls: for url in urls:
url = url.strip() url = url.strip()
if not len(url): if not len(url):
continue continue
# If hosts that only contain alphanumerics are allowed ("localhost" for example) # If hosts that only contain alphanumerics are allowed ("localhost" for example)
if not validators.url(url, simple_host=allow_simplehost): if not is_safe_valid_url(url):
return f"Invalid or unsupported URL - {url}", 400 return f"Invalid or unsupported URL - {url}", 400
if dedupe and self.datastore.url_exists(url): if dedupe and self.datastore.url_exists(url):

View File

@@ -1,14 +1,12 @@
import os import os
from changedetectionio.strtobool import strtobool from changedetectionio.validate_url import is_safe_valid_url
from changedetectionio.html_tools import is_safe_url
from flask_expects_json import expects_json from flask_expects_json import expects_json
from changedetectionio import queuedWatchMetaData from changedetectionio import queuedWatchMetaData
from changedetectionio import worker_handler from changedetectionio import worker_handler
from flask_restful import abort, Resource from flask_restful import abort, Resource
from flask import request, make_response, send_from_directory from flask import request, make_response, send_from_directory
import validators
from . import auth from . import auth
import copy import copy
@@ -124,7 +122,7 @@ class Watch(Resource):
return validation_error, 400 return validation_error, 400
# XSS etc protection # XSS etc protection
if request.json.get('url') and not is_safe_url(request.json.get('url')): if request.json.get('url') and not is_safe_valid_url(request.json.get('url')):
return "Invalid URL", 400 return "Invalid URL", 400
watch.update(request.json) watch.update(request.json)
@@ -232,9 +230,7 @@ class CreateWatch(Resource):
json_data = request.get_json() json_data = request.get_json()
url = json_data['url'].strip() url = json_data['url'].strip()
# If hosts that only contain alphanumerics are allowed ("localhost" for example) if not is_safe_valid_url(url):
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
if not validators.url(url, simple_host=allow_simplehost):
return "Invalid or unsupported URL", 400 return "Invalid or unsupported URL", 400
if json_data.get('proxy'): if json_data.get('proxy'):

View File

@@ -133,10 +133,10 @@ def get_socketio_path():
# Socket.IO will be available at {prefix}/socket.io/ # Socket.IO will be available at {prefix}/socket.io/
return prefix return prefix
@app.template_global('is_safe_url') @app.template_global('is_safe_valid_url')
def _is_safe_url(test_url): def _is_safe_valid_url(test_url):
from .html_tools import is_safe_url from .validate_url import is_safe_valid_url
return is_safe_url(test_url) return is_safe_valid_url(test_url)
@app.template_filter('format_number_locale') @app.template_filter('format_number_locale')
@@ -387,7 +387,7 @@ def changedetection_app(config=None, datastore_o=None):
# We would sometimes get login loop errors on sites hosted in sub-paths # We would sometimes get login loop errors on sites hosted in sub-paths
# note for the future: # note for the future:
# if not is_safe_url(next): # if not is_safe_valid_url(next):
# return flask.abort(400) # return flask.abort(400)
return redirect(url_for('watchlist.index')) return redirect(url_for('watchlist.index'))

View File

@@ -28,11 +28,8 @@ from wtforms.utils import unset_value
from wtforms.validators import ValidationError from wtforms.validators import ValidationError
from validators.url import url as url_validator
from changedetectionio.widgets import TernaryNoneBooleanField from changedetectionio.widgets import TernaryNoneBooleanField
# default # default
# each select <option data-enabled="enabled-0-0" # each select <option data-enabled="enabled-0-0"
from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config
@@ -541,19 +538,10 @@ class validateURL(object):
def validate_url(test_url): def validate_url(test_url):
# If hosts that only contain alphanumerics are allowed ("localhost" for example) from changedetectionio.validate_url import is_safe_valid_url
try: if not is_safe_valid_url(test_url):
url_validator(test_url, simple_host=allow_simplehost)
except validators.ValidationError:
#@todo check for xss
message = f"'{test_url}' is not a valid URL."
# This should be wtforms.validators. # This should be wtforms.validators.
raise ValidationError(message) raise ValidationError('Watch protocol is not permitted or invalid URL format')
from changedetectionio.html_tools import is_safe_url
if not is_safe_url(test_url):
# This should be wtforms.validators.
raise ValidationError('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX or incorrect URL format')
class ValidateSinglePythonRegexString(object): class ValidateSinglePythonRegexString(object):

View File

@@ -15,8 +15,6 @@ TITLE_RE = re.compile(r"<title[^>]*>(.*?)</title>", re.I | re.S)
META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I) META_CS = re.compile(r'<meta[^>]+charset=["\']?\s*([a-z0-9_\-:+.]+)', re.I)
META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I) META_CT = re.compile(r'<meta[^>]+http-equiv=["\']?content-type["\']?[^>]*content=["\'][^>]*charset=([a-z0-9_\-:+.]+)', re.I)
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
# 'price' , 'lowPrice', 'highPrice' are usually under here # 'price' , 'lowPrice', 'highPrice' are usually under here
# All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here # All of those may or may not appear on different websites - I didnt find a way todo case-insensitive searching here
LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"] LD_JSON_PRODUCT_OFFER_SELECTORS = ["json:$..offers", "json:$..Offers"]
@@ -25,22 +23,6 @@ class JSONNotFound(ValueError):
def __init__(self, msg): def __init__(self, msg):
ValueError.__init__(self, msg) ValueError.__init__(self, msg)
def is_safe_url(test_url):
import os
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
# 'source:' is a valid way to tell us to return the source
r = re.compile(re.escape('source:'), re.IGNORECASE)
test_url = r.sub('', test_url)
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', SAFE_PROTOCOL_REGEX), re.IGNORECASE)
if not pattern.match(test_url.strip()):
return False
return True
# Doesn't look like python supports forward slash auto enclosure in re.findall # Doesn't look like python supports forward slash auto enclosure in re.findall
# So convert it to inline flag "(?i)foobar" type configuration # So convert it to inline flag "(?i)foobar" type configuration
@lru_cache(maxsize=100) @lru_cache(maxsize=100)

View File

@@ -1,5 +1,6 @@
from blinker import signal from blinker import signal
from changedetectionio.html_tools import is_safe_url from changedetectionio.validate_url import is_safe_valid_url
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from changedetectionio.jinja2_custom import render as jinja_render from changedetectionio.jinja2_custom import render as jinja_render
from . import watch_base from . import watch_base
@@ -12,9 +13,6 @@ from .. import jinja2_custom as safe_jinja
from ..diff import ADDED_PLACEMARKER_OPEN from ..diff import ADDED_PLACEMARKER_OPEN
from ..html_tools import TRANSLATE_WHITESPACE_TABLE from ..html_tools import TRANSLATE_WHITESPACE_TABLE
# Allowable protocols, protects against javascript: etc
# file:// is further checked by ALLOW_FILE_URI
SAFE_PROTOCOL_REGEX='^(http|https|ftp|file):'
FAVICON_RESAVE_THRESHOLD_SECONDS=86400 FAVICON_RESAVE_THRESHOLD_SECONDS=86400
@@ -63,7 +61,7 @@ class model(watch_base):
def link(self): def link(self):
url = self.get('url', '') url = self.get('url', '')
if not is_safe_url(url): if not is_safe_valid_url(url):
return 'DISABLED' return 'DISABLED'
ready_url = url ready_url = url
@@ -84,7 +82,7 @@ class model(watch_base):
ready_url=ready_url.replace('source:', '') ready_url=ready_url.replace('source:', '')
# Also double check it after any Jinja2 formatting just incase # Also double check it after any Jinja2 formatting just incase
if not is_safe_url(ready_url): if not is_safe_valid_url(ready_url):
return 'DISABLED' return 'DISABLED'
return ready_url return ready_url

View File

@@ -6,8 +6,6 @@
# enable debug # enable debug
set -x set -x
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
docker run --network changedet-network -d --hostname selenium -p 4444:4444 --rm --shm-size="2g" selenium/standalone-chrome:4
# A extra browser is configured, but we never chose to use it, so it should NOT show in the logs # A extra browser is configured, but we never chose to use it, so it should NOT show in the logs
docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url' docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url'

View File

@@ -5,8 +5,6 @@ set -e
# enable debug # enable debug
set -x set -x
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
# Test proxy list handling, starting two squids on different ports # Test proxy list handling, starting two squids on different ports
# Each squid adds a different header to the response, which is the main thing we test for. # Each squid adds a different header to the response, which is the main thing we test for.
docker run --network changedet-network -d --name squid-one --hostname squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf ubuntu/squid:4.13-21.10_edge docker run --network changedet-network -d --name squid-one --hostname squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf ubuntu/squid:4.13-21.10_edge

View File

@@ -5,7 +5,6 @@ set -e
# enable debug # enable debug
set -x set -x
docker network inspect changedet-network >/dev/null 2>&1 || docker network create changedet-network
# SOCKS5 related - start simple Socks5 proxy server # SOCKS5 related - start simple Socks5 proxy server
# SOCKSTEST=xyz should show in the logs of this service to confirm it fetched # SOCKSTEST=xyz should show in the logs of this service to confirm it fetched

View File

@@ -1,5 +1,6 @@
from changedetectionio.strtobool import strtobool from changedetectionio.strtobool import strtobool
from changedetectionio.html_tools import is_safe_url
from changedetectionio.validate_url import is_safe_valid_url
from flask import ( from flask import (
flash flash
@@ -341,8 +342,10 @@ class ChangeDetectionStore:
logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}") logger.error(f"Error fetching metadata for shared watch link {url} {str(e)}")
flash("Error fetching metadata for {}".format(url), 'error') flash("Error fetching metadata for {}".format(url), 'error')
return False return False
if not is_safe_url(url):
flash('Watch protocol is not permitted by SAFE_PROTOCOL_REGEX', 'error') if not is_safe_valid_url(url):
flash('Watch protocol is not permitted or invalid URL format', 'error')
return None return None
if tag and type(tag) == str: if tag and type(tag) == str:

View File

@@ -53,7 +53,7 @@
<a class="pure-menu-heading" href="{{url_for('watchlist.index')}}"> <a class="pure-menu-heading" href="{{url_for('watchlist.index')}}">
<strong>Change</strong>Detection.io</a> <strong>Change</strong>Detection.io</a>
{% endif %} {% endif %}
{% if current_diff_url and is_safe_url(current_diff_url) %} {% if current_diff_url and is_safe_valid_url(current_diff_url) %}
<a class="current-diff-url" href="{{ current_diff_url }}"> <a class="current-diff-url" href="{{ current_diff_url }}">
<span style="max-width: 30%; overflow: hidden">{{ current_diff_url }}</span></a> <span style="max-width: 30%; overflow: hidden">{{ current_diff_url }}</span></a>
{% else %} {% else %}

View File

@@ -64,29 +64,21 @@ def test_jinja2_time_offset_in_url_query(client, live_server, measure_memory_usa
# Should not have template error # Should not have template error
assert b'Invalid template' not in res.data assert b'Invalid template' not in res.data
# https://techtonics.medium.com/secure-templating-with-jinja2-understanding-ssti-and-jinja2-sandbox-environment-b956edd60456 # https://techtonics.medium.com/secure-templating-with-jinja2-understanding-ssti-and-jinja2-sandbox-environment-b956edd60456
def test_jinja2_security_url_query(client, live_server, measure_memory_usage): def test_jinja2_security_url_query(client, live_server, measure_memory_usage):
# Add our URL to the import page # Add our URL to the import page
test_url = url_for('test_return_query', _external=True) test_url = url_for('test_return_query', _external=True)
# because url_for() will URL-encode the var, but we dont here full_url = test_url + "?date={{ ''.__class__.__mro__[1].__subclasses__()}}"
full_url = "{}?{}".format(test_url,
"date={{ ''.__class__.__mro__[1].__subclasses__()}}", )
res = client.post( res = client.post(
url_for("ui.ui_views.form_quick_watch_add"), url_for("ui.ui_views.form_quick_watch_add"),
data={"url": full_url, "tags": "test"}, data={"url": full_url, "tags": "test"},
follow_redirects=True follow_redirects=True
) )
assert b"Watch added" in res.data assert b"Watch added" not in res.data
wait_for_all_checks(client)
# It should report nothing found (no new 'has-unread-changes' class)
res = client.get(url_for("watchlist.index"))
assert b'is invalid and cannot be used' in res.data
# Some of the spewed output from the subclasses
assert b'dict_values' not in res.data
def test_timezone(mocker): def test_timezone(mocker):
"""Verify that timezone is parsed.""" """Verify that timezone is parsed."""

View File

@@ -48,7 +48,7 @@ def test_bad_access(client, live_server, measure_memory_usage):
follow_redirects=True follow_redirects=True
) )
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data assert b'Watch protocol is not permitted or invalid URL format' in res.data
res = client.post( res = client.post(
url_for("ui.ui_views.form_quick_watch_add"), url_for("ui.ui_views.form_quick_watch_add"),
@@ -56,7 +56,7 @@ def test_bad_access(client, live_server, measure_memory_usage):
follow_redirects=True follow_redirects=True
) )
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data assert b'Watch protocol is not permitted or invalid URL format' in res.data
res = client.post( res = client.post(
url_for("ui.ui_views.form_quick_watch_add"), url_for("ui.ui_views.form_quick_watch_add"),
@@ -64,7 +64,7 @@ def test_bad_access(client, live_server, measure_memory_usage):
follow_redirects=True follow_redirects=True
) )
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data assert b'Watch protocol is not permitted or invalid URL format' in res.data
res = client.post( res = client.post(
@@ -73,8 +73,15 @@ def test_bad_access(client, live_server, measure_memory_usage):
follow_redirects=True follow_redirects=True
) )
assert b'Watch protocol is not permitted by SAFE_PROTOCOL_REGEX' in res.data assert b'Watch protocol is not permitted or invalid URL format' in res.data
res = client.post(
url_for("ui.ui_views.form_quick_watch_add"),
data={"url": 'https://i-wanna-xss-you.com?hereis=<script>alert(1)</script>', "tags": ''},
follow_redirects=True
)
assert b'Watch protocol is not permitted or invalid URL format' in res.data
def _runner_test_various_file_slash(client, file_uri): def _runner_test_various_file_slash(client, file_uri):
@@ -111,8 +118,8 @@ def test_file_slash_access(client, live_server, measure_memory_usage):
test_file_path = os.path.abspath(__file__) test_file_path = os.path.abspath(__file__)
_runner_test_various_file_slash(client, file_uri=f"file://{test_file_path}") _runner_test_various_file_slash(client, file_uri=f"file://{test_file_path}")
_runner_test_various_file_slash(client, file_uri=f"file:/{test_file_path}") # _runner_test_various_file_slash(client, file_uri=f"file:/{test_file_path}")
_runner_test_various_file_slash(client, file_uri=f"file:{test_file_path}") # CVE-2024-56509 # _runner_test_various_file_slash(client, file_uri=f"file:{test_file_path}") # CVE-2024-56509
def test_xss(client, live_server, measure_memory_usage): def test_xss(client, live_server, measure_memory_usage):

View File

@@ -0,0 +1,109 @@
from functools import lru_cache
from loguru import logger
from urllib.parse import urlparse, urlunparse, parse_qsl, urlencode
def normalize_url_encoding(url):
"""
Safely encode a URL's query parameters, regardless of whether they're already encoded.
Why this is necessary:
URLs can arrive in various states - some with already encoded query parameters (%20 for spaces),
some with unencoded parameters (literal spaces), or a mix of both. The validators.url() function
requires proper encoding, but simply encoding an already-encoded URL would double-encode it
(e.g., %20 would become %2520).
This function solves the problem by:
1. Parsing the URL to extract query parameters
2. parse_qsl() automatically decodes parameters if they're encoded
3. urlencode() re-encodes them properly
4. Returns a consistently encoded URL that will pass validation
Example:
- Input: "http://example.com/test?time=2025-10-28 09:19" (space not encoded)
- Output: "http://example.com/test?time=2025-10-28+09%3A19" (properly encoded)
- Input: "http://example.com/test?time=2025-10-28%2009:19" (already encoded)
- Output: "http://example.com/test?time=2025-10-28+09%3A19" (properly encoded)
Returns a properly encoded URL string.
"""
try:
# Parse the URL into components (scheme, netloc, path, params, query, fragment)
parsed = urlparse(url)
# Parse query string - this automatically decodes it if encoded
# parse_qsl handles both encoded and unencoded query strings gracefully
query_params = parse_qsl(parsed.query, keep_blank_values=True)
# Re-encode the query string properly using standard URL encoding
encoded_query = urlencode(query_params, safe='')
# Reconstruct the URL with properly encoded query string
normalized = urlunparse((
parsed.scheme,
parsed.netloc,
parsed.path,
parsed.params,
encoded_query, # Use the re-encoded query
parsed.fragment
))
return normalized
except Exception as e:
# If parsing fails for any reason, return original URL
logger.debug(f"URL normalization failed for '{url}': {e}")
return url
@lru_cache(maxsize=10000)
def is_safe_valid_url(test_url):
from changedetectionio import strtobool
from changedetectionio.jinja2_custom import render as jinja_render
import os
import re
import validators
allow_file_access = strtobool(os.getenv('ALLOW_FILE_URI', 'false'))
safe_protocol_regex = '^(http|https|ftp|file):' if allow_file_access else '^(http|https|ftp):'
# See https://github.com/dgtlmoon/changedetection.io/issues/1358
# Remove 'source:' prefix so we dont get 'source:javascript:' etc
# 'source:' is a valid way to tell us to return the source
r = re.compile('^source:', re.IGNORECASE)
test_url = r.sub('', test_url)
# Check the actual rendered URL in case of any Jinja markup
try:
test_url = jinja_render(test_url)
except Exception as e:
logger.error(f'URL "{test_url}" is not correct Jinja2? {str(e)}')
return False
# Check query parameters and fragment
if re.search(r'[<>]', test_url):
logger.warning(f'URL "{test_url}" contains suspicious characters')
return False
# Normalize URL encoding - handle both encoded and unencoded query parameters
test_url = normalize_url_encoding(test_url)
# Be sure the protocol is safe (no file, etcetc)
pattern = re.compile(os.getenv('SAFE_PROTOCOL_REGEX', safe_protocol_regex), re.IGNORECASE)
if not pattern.match(test_url.strip()):
logger.warning(f'URL "{test_url}" is not safe, aborting.')
return False
# If hosts that only contain alphanumerics are allowed ("localhost" for example)
allow_simplehost = not strtobool(os.getenv('BLOCK_SIMPLEHOSTS', 'False'))
try:
if not test_url.strip().lower().startswith('file:') and not validators.url(test_url, simple_host=allow_simplehost):
logger.warning(f'URL "{test_url}" failed validation, aborting.')
return False
except validators.ValidationError:
logger.warning(f'URL f"{test_url}" failed validation, aborting.')
return False
return True