mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			83 Commits
		
	
	
		
			0.45.13
			...
			exception-
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 284c464511 | ||
|   | e110b3ee93 | ||
|   | 3ae9bfa6f9 | ||
|   | 6f3c3b7dfb | ||
|   | 74707909f1 | ||
|   | d4dac23ba1 | ||
|   | f9954f93f3 | ||
|   | 1a43b112dc | ||
|   | db59bf73e1 | ||
|   | 8aac7bccbe | ||
|   | 9449c59fbb | ||
|   | 21f4ba2208 | ||
|   | daef1cd036 | ||
|   | 56b365df40 | ||
|   | 8e5bf91965 | ||
|   | 1ae59551be | ||
|   | a176468fb8 | ||
|   | 8fac593201 | ||
|   | e3b8c0f5af | ||
|   | 514fd7f91e | ||
|   | 38c4768b92 | ||
|   | 6555d99044 | ||
|   | e719dbd19b | ||
|   | b28a8316cc | ||
|   | e609a2d048 | ||
|   | 994d34c776 | ||
|   | de776800e9 | ||
|   | 8b8ed58f20 | ||
|   | 79c6d765de | ||
|   | c6db7fc90e | ||
|   | bc587efae2 | ||
|   | 6ee6be1a5f | ||
|   | c83485094b | ||
|   | 387ce32e6f | ||
|   | 6b9a788d75 | ||
|   | 14e632bc19 | ||
|   | 52c895b2e8 | ||
|   | a62043e086 | ||
|   | 3d390b6ea4 | ||
|   | 301a40ca34 | ||
|   | 1c099cdba6 | ||
|   | af747e6e3f | ||
|   | aefad0bdf6 | ||
|   | 904ef84f82 | ||
|   | d2569ba715 | ||
|   | ccb42bcb12 | ||
|   | 4163030805 | ||
|   | 140d375ad0 | ||
|   | 1a608d0ae6 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | e6ed91cfe3 | ||
|   | 008272cd77 | ||
|   | 823a0c99f4 | ||
|   | 1f57d9d0b6 | ||
|   | 3287283065 | ||
|   | c5a4e0aaa3 | ||
|   | 5119efe4fb | ||
|   | 78a2dceb81 | ||
|   | 72c7645f60 | ||
|   | e09eb47fb7 | ||
|   | 616c0b3f65 | ||
|   | c90b27823a | ||
|   | 3b16b19a94 | ||
|   | 4ee9fa79e1 | ||
|   | 4b49759113 | ||
|   | e9a9790cb0 | ||
|   | 593660e2f6 | ||
|   | 7d96b4ba83 | ||
|   | fca40e4d5b | ||
|   | 66e2dfcead | ||
|   | bce7eb68fb | ||
|   | 93c0385119 | ||
|   | e17f3be739 | ||
|   | 3a9f79b756 | ||
|   | 1f5670253e | ||
|   | fe3cf5ffd2 | ||
|   | d31a45d49a | ||
|   | 19ee65361d | ||
|   | 677082723c | ||
|   | 96793890f8 | ||
|   | 0439155127 | ||
| ![dependabot[bot]](/assets/img/avatar_default.png)  | 29ca2521eb | ||
|   | 7d67ad057c | ||
|   | 2e88872b7e | 
							
								
								
									
										14
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								.github/dependabot.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| version: 2 | ||||
| updates: | ||||
|   - package-ecosystem: github-actions | ||||
|     directory: / | ||||
|     schedule: | ||||
|       interval: "weekly" | ||||
|     "caronc/apprise": | ||||
|       versioning-strategy: "increase" | ||||
|       schedule: | ||||
|         interval: "daily" | ||||
|     groups: | ||||
|       all: | ||||
|         patterns: | ||||
|         - "*" | ||||
							
								
								
									
										2
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
								
							| @@ -12,8 +12,10 @@ RUN \ | ||||
|     cargo \ | ||||
|     g++ \ | ||||
|     gcc \ | ||||
|     jpeg-dev \ | ||||
|     libc-dev \ | ||||
|     libffi-dev \ | ||||
|     libjpeg \ | ||||
|     libxslt-dev \ | ||||
|     make \ | ||||
|     openssl-dev \ | ||||
|   | ||||
							
								
								
									
										6
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/codeql-analysis.yml
									
									
									
									
										vendored
									
									
								
							| @@ -34,7 +34,7 @@ jobs: | ||||
|  | ||||
|     # Initializes the CodeQL tools for scanning. | ||||
|     - name: Initialize CodeQL | ||||
|       uses: github/codeql-action/init@v2 | ||||
|       uses: github/codeql-action/init@v3 | ||||
|       with: | ||||
|         languages: ${{ matrix.language }} | ||||
|         # If you wish to specify custom queries, you can do so here or in a config file. | ||||
| @@ -45,7 +45,7 @@ jobs: | ||||
|     # Autobuild attempts to build any compiled languages  (C/C++, C#, or Java). | ||||
|     # If this step fails, then you should remove it and run the build manually (see below) | ||||
|     - name: Autobuild | ||||
|       uses: github/codeql-action/autobuild@v2 | ||||
|       uses: github/codeql-action/autobuild@v3 | ||||
|  | ||||
|     # ℹ️ Command-line programs to run using the OS shell. | ||||
|     # 📚 https://git.io/JvXDl | ||||
| @@ -59,4 +59,4 @@ jobs: | ||||
|     #   make release | ||||
|  | ||||
|     - name: Perform CodeQL Analysis | ||||
|       uses: github/codeql-action/analyze@v2 | ||||
|       uses: github/codeql-action/analyze@v3 | ||||
|   | ||||
							
								
								
									
										2
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							| @@ -41,7 +41,7 @@ jobs: | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - name: Set up Python 3.11 | ||||
|         uses: actions/setup-python@v4 | ||||
|         uses: actions/setup-python@v5 | ||||
|         with: | ||||
|           python-version: 3.11 | ||||
|  | ||||
|   | ||||
							
								
								
									
										21
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										21
									
								
								.github/workflows/pypi-release.yml
									
									
									
									
										vendored
									
									
								
							| @@ -9,9 +9,9 @@ jobs: | ||||
|     steps: | ||||
|     - uses: actions/checkout@v4 | ||||
|     - name: Set up Python | ||||
|       uses: actions/setup-python@v4 | ||||
|       uses: actions/setup-python@v5 | ||||
|       with: | ||||
|         python-version: "3.x" | ||||
|         python-version: "3.11" | ||||
|     - name: Install pypa/build | ||||
|       run: >- | ||||
|         python3 -m | ||||
| @@ -21,7 +21,7 @@ jobs: | ||||
|     - name: Build a binary wheel and a source tarball | ||||
|       run: python3 -m build | ||||
|     - name: Store the distribution packages | ||||
|       uses: actions/upload-artifact@v3 | ||||
|       uses: actions/upload-artifact@v4 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
| @@ -34,18 +34,23 @@ jobs: | ||||
|     - build | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v3 | ||||
|       uses: actions/download-artifact@v4 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|     - name: Set up Python 3.11 | ||||
|       uses: actions/setup-python@v5 | ||||
|       with: | ||||
|         python-version: '3.11' | ||||
|     - name: Test that the basic pip built package runs without error | ||||
|       run: | | ||||
|         set -e | ||||
|         set -ex | ||||
|         sudo pip3 install --upgrade pip  | ||||
|         pip3 install dist/changedetection.io*.whl | ||||
|         changedetection.io -d /tmp -p 10000 & | ||||
|         sleep 3 | ||||
|         curl http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null | ||||
|         curl http://127.0.0.1:10000/ >/dev/null | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/static/styles/pure-min.css >/dev/null | ||||
|         curl --retry-connrefused --retry 6 http://127.0.0.1:10000/ >/dev/null | ||||
|         killall changedetection.io | ||||
|  | ||||
|  | ||||
| @@ -64,7 +69,7 @@ jobs: | ||||
|  | ||||
|     steps: | ||||
|     - name: Download all the dists | ||||
|       uses: actions/download-artifact@v3 | ||||
|       uses: actions/download-artifact@v4 | ||||
|       with: | ||||
|         name: python-package-distributions | ||||
|         path: dist/ | ||||
|   | ||||
							
								
								
									
										4
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										4
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
								
							| @@ -11,12 +11,14 @@ on: | ||||
|       - requirements.txt | ||||
|       - Dockerfile | ||||
|       - .github/workflows/* | ||||
|       - .github/test/Dockerfile* | ||||
|  | ||||
|   pull_request: | ||||
|     paths: | ||||
|       - requirements.txt | ||||
|       - Dockerfile | ||||
|       - .github/workflows/* | ||||
|       - .github/test/Dockerfile* | ||||
|  | ||||
|   # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing | ||||
|   # @todo: some kind of path filter for requirements.txt and Dockerfile | ||||
| @@ -26,7 +28,7 @@ jobs: | ||||
|     steps: | ||||
|         - uses: actions/checkout@v4 | ||||
|         - name: Set up Python 3.11 | ||||
|           uses: actions/setup-python@v4 | ||||
|           uses: actions/setup-python@v5 | ||||
|           with: | ||||
|             python-version: 3.11 | ||||
|  | ||||
|   | ||||
							
								
								
									
										120
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										120
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -11,7 +11,7 @@ jobs: | ||||
|  | ||||
|       # Mainly just for link/flake8 | ||||
|       - name: Set up Python 3.11 | ||||
|         uses: actions/setup-python@v4 | ||||
|         uses: actions/setup-python@v5 | ||||
|         with: | ||||
|           python-version: '3.11' | ||||
|  | ||||
| @@ -27,13 +27,13 @@ jobs: | ||||
|         run: | | ||||
|            | ||||
|           docker network create changedet-network | ||||
|  | ||||
|           # Selenium+browserless | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|           docker run --network changedet-network -d --name browserless --hostname browserless -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.60-chrome-stable | ||||
|            | ||||
|           # For accessing custom browser tests | ||||
|           docker run --network changedet-network -d --name browserless-custom-url --hostname browserless-custom-url -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm --shm-size="2g"  browserless/chrome:1.60-chrome-stable | ||||
|           # Selenium | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome:4 | ||||
|            | ||||
|           # SocketPuppetBrowser + Extra for custom browser test | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser --hostname sockpuppetbrowser --rm -p 3000:3000 dgtlmoon/sockpuppetbrowser:latest                     | ||||
|           docker run --network changedet-network -d -e "LOG_LEVEL=TRACE" --cap-add=SYS_ADMIN --name sockpuppetbrowser-custom-url --hostname sockpuppetbrowser-custom-url  -p 3001:3000 --rm dgtlmoon/sockpuppetbrowser:latest | ||||
|  | ||||
|       - name: Build changedetection.io container for testing | ||||
|         run: |          | ||||
| @@ -47,7 +47,13 @@ jobs: | ||||
|           # Debug SMTP server/echo message back server | ||||
|           docker run --network changedet-network -d -p 11025:11025 -p 11080:11080  --hostname mailserver test-changedetectionio  bash -c 'python changedetectionio/tests/smtp/smtp-test-server.py'  | ||||
|  | ||||
|       - name: Test built container with pytest | ||||
|       - name: Show docker container state and other debug info | ||||
|         run: | | ||||
|           set -x | ||||
|           echo "Running processes in docker..." | ||||
|           docker ps | ||||
|  | ||||
|       - name: Test built container with Pytest (generally as requests/plaintext fetching) | ||||
|         run: | | ||||
|           # Unit tests | ||||
|           echo "run test with unittest" | ||||
| @@ -59,40 +65,76 @@ jobs: | ||||
|           # The default pytest logger_level is TRACE | ||||
|           # To change logger_level for pytest(test/conftest.py), | ||||
|           # append the docker option. e.g. '-e LOGGER_LEVEL=DEBUG' | ||||
|           docker run --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio && ./run_basic_tests.sh' | ||||
|           docker run --name test-cdio-basic-tests --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio && ./run_basic_tests.sh' | ||||
|  | ||||
|       - name: Test built container selenium+browserless/playwright | ||||
| # PLAYWRIGHT/NODE-> CDP | ||||
|       - name: Playwright and SocketPuppetBrowser - Specific tests in built container | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch | ||||
|           # tests/visualselector/test_fetch_data.py will do browser steps   | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|  | ||||
|  | ||||
|       - name: Playwright and SocketPuppetBrowser - Headers and requests | ||||
|         run: |        | ||||
|           # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
|       - name: Playwright and SocketPuppetBrowser - Restock detection | ||||
|         run: |                             | ||||
|           # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
| # STRAIGHT TO CDP | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Specific tests in built container | ||||
|         run: | | ||||
|           # Playwright via Sockpuppetbrowser fetch  | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_content.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/test_errorhandling.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/visualselector/test_fetch_data.py' | ||||
|           docker run --rm -e "FLASK_SERVER_NAME=cdio" -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network --hostname=cdio test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-host=0.0.0.0 --live-server-port=5004 tests/fetchers/test_custom_js_before_content.py' | ||||
|  | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Headers and requests checks | ||||
|         run: |        | ||||
|           # Settings headers playwright tests - Call back in from Sockpuppetbrowser, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm  -e "FAST_PUPPETEER_CHROME_FETCHER=True" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
|       - name: Pyppeteer and SocketPuppetBrowser - Restock detection | ||||
|         run: |                             | ||||
|           # restock detection via playwright - added name=changedet here so that playwright and sockpuppetbrowser can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet"  -e "FAST_PUPPETEER_CHROME_FETCHER=True"  -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
| # SELENIUM | ||||
|       - name: Specific tests in built container for Selenium | ||||
|         run: | | ||||
|            | ||||
|           # Selenium fetch | ||||
|           docker run --rm -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' | ||||
|            | ||||
|           # Playwright/Browserless fetch | ||||
|           docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' | ||||
|            | ||||
|           # Settings headers playwright tests - Call back in from Browserless, check headers | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000?dumpio=true" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py'           | ||||
|            | ||||
|           # restock detection via playwright - added name=changedet here so that playwright/browserless can connect to it | ||||
|           docker run --rm --name "changedet" -e "FLASK_SERVER_NAME=changedet" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest --live-server-port=5004 --live-server-host=0.0.0.0 tests/restock/test_restock.py' | ||||
|  | ||||
|       - name: Specific tests in built container for headers and requests checks with Selenium | ||||
|         run: | | ||||
|           docker run --name "changedet" --hostname changedet --rm -e "FLASK_SERVER_NAME=changedet" -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio; pytest --live-server-host=0.0.0.0  --live-server-port=5004 tests/test_request.py' | ||||
|  | ||||
| # OTHER STUFF | ||||
|       - name: Test SMTP notification mime types | ||||
|         run: | | ||||
|           # SMTP content types - needs the 'Debug SMTP server/echo message back server' container from above | ||||
|           docker run --rm  --network changedet-network test-changedetectionio bash -c 'cd changedetectionio;pytest tests/smtp/test_notification_smtp.py' | ||||
|  | ||||
|       - name: Test with puppeteer fetcher and disk cache | ||||
|         run: | | ||||
|           docker run --rm -e "PUPPETEER_DISK_CACHE=/tmp/data/" -e "USE_EXPERIMENTAL_PUPPETEER_FETCH=yes" -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' | ||||
|           # Browserless would have had -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" added above | ||||
|  | ||||
|       - name: Test proxy interaction | ||||
|       # @todo Add a test via playwright/puppeteer | ||||
|       # squid with auth is tested in run_proxy_tests.sh -> tests/proxy_list/test_select_custom_proxy.py | ||||
|       - name: Test proxy squid style interaction | ||||
|         run: | | ||||
|           cd changedetectionio | ||||
|           ./run_proxy_tests.sh | ||||
|           # And again with PLAYWRIGHT_DRIVER_URL=.. | ||||
|           cd .. | ||||
|  | ||||
|       - name: Test proxy SOCKS5 style interaction | ||||
|         run: | | ||||
|           cd changedetectionio | ||||
|           ./run_socks_proxy_tests.sh | ||||
|           cd .. | ||||
|  | ||||
|       - name: Test custom browser URL | ||||
| @@ -106,10 +148,10 @@ jobs: | ||||
|           docker run --name test-changedetectionio -p 5556:5000  -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it | ||||
|           curl -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|           curl --retry-connrefused --retry 6  -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|            | ||||
|           # and IPv6 | ||||
|           curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|           curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|  | ||||
|           # Check whether TRACE log is enabled. | ||||
|           # Also, check whether TRACE is came from STDERR | ||||
| @@ -166,6 +208,16 @@ jobs: | ||||
|           # @todo - scan the container log to see the right "graceful shutdown" text exists            | ||||
|           docker rm sig-test | ||||
|  | ||||
| #export WEBDRIVER_URL=http://localhost:4444/wd/hub | ||||
| #pytest tests/fetchers/test_content.py | ||||
| #pytest tests/test_errorhandling.py | ||||
|       - name: Dump container log | ||||
|         if: always() | ||||
|         run: | | ||||
|           mkdir output-logs | ||||
|           docker logs test-cdio-basic-tests > output-logs/test-cdio-basic-tests-stdout.txt | ||||
|           docker logs test-cdio-basic-tests 2> output-logs/test-cdio-basic-tests-stderr.txt | ||||
|  | ||||
|       - name: Store container log | ||||
|         if: always() | ||||
|         uses: actions/upload-artifact@v4 | ||||
|         with: | ||||
|           name: test-cdio-basic-tests-output | ||||
|           path: output-logs | ||||
|   | ||||
| @@ -2,7 +2,7 @@ Contributing is always welcome! | ||||
|  | ||||
| I am no professional flask developer, if you know a better way that something can be done, please let me know! | ||||
|  | ||||
| Otherwise, it's always best to PR into the `dev` branch. | ||||
| Otherwise, it's always best to PR into the `master` branch. | ||||
|  | ||||
| Please be sure that all new functionality has a matching test! | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| # pip dependencies install stage | ||||
| FROM python:3.11-slim-bookworm as builder | ||||
|  | ||||
| # @NOTE! I would love to move to 3.11 but it breaks the async handler in changedetectionio/content_fetchers/puppeteer.py | ||||
| #        If you know how to fix it, please do! and test it for both 3.10 and 3.11 | ||||
| FROM python:3.10-slim-bookworm as builder | ||||
|  | ||||
| # See `cryptography` pin comment in requirements.txt | ||||
| ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
| @@ -25,11 +28,11 @@ RUN pip install --target=/dependencies -r /requirements.txt | ||||
| # Playwright is an alternative to Selenium | ||||
| # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) | ||||
| RUN pip install --target=/dependencies playwright~=1.40 \ | ||||
| RUN pip install --target=/dependencies playwright~=1.41.2 \ | ||||
|     || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
|  | ||||
| # Final image stage | ||||
| FROM python:3.11-slim-bookworm | ||||
| FROM python:3.10-slim-bookworm | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     libxslt1.1 \ | ||||
|   | ||||
| @@ -1,8 +1,8 @@ | ||||
| recursive-include changedetectionio/api * | ||||
| recursive-include changedetectionio/blueprint * | ||||
| recursive-include changedetectionio/content_fetchers * | ||||
| recursive-include changedetectionio/model * | ||||
| recursive-include changedetectionio/processors * | ||||
| recursive-include changedetectionio/res * | ||||
| recursive-include changedetectionio/static * | ||||
| recursive-include changedetectionio/templates * | ||||
| recursive-include changedetectionio/tests * | ||||
|   | ||||
| @@ -91,6 +91,14 @@ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) glob | ||||
|  | ||||
| Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ | ||||
|  | ||||
| ### We have a Chrome extension! | ||||
|  | ||||
| Easily add the current web page to your changedetection.io tool, simply install the extension and click "Sync" to connect it to your existing changedetection.io install. | ||||
|  | ||||
| [<img src="./docs/chrome-extension-screenshot.png" style="max-width:80%;" alt="Chrome Extension to easily add the current web-page to detect a change."  title="Chrome Extension to easily add the current web-page to detect a change."  />](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) | ||||
|  | ||||
| [Goto the Chrome Webstore to download the extension.](https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop) | ||||
|  | ||||
| ## Installation | ||||
|  | ||||
| ### Docker | ||||
|   | ||||
| @@ -2,15 +2,15 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.45.13' | ||||
| __version__ = '0.45.20' | ||||
|  | ||||
| from distutils.util import strtobool | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
|  | ||||
| import os | ||||
| #os.environ['EVENTLET_NO_GREENDNS'] = 'yes' | ||||
| import eventlet | ||||
| import eventlet.wsgi | ||||
| import getopt | ||||
| import os | ||||
| import signal | ||||
| import socket | ||||
| import sys | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import os | ||||
| from distutils.util import strtobool | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from flask_expects_json import expects_json | ||||
| from changedetectionio import queuedWatchMetaData | ||||
|   | ||||
							
								
								
									
										7
									
								
								changedetectionio/blueprint/browser_steps/TODO.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										7
									
								
								changedetectionio/blueprint/browser_steps/TODO.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,7 @@ | ||||
| - This needs an abstraction to directly handle the puppeteer connection methods | ||||
| - Then remove the playwright stuff | ||||
| - Remove hack redirect at line 65 changedetectionio/processors/__init__.py | ||||
|  | ||||
| The screenshots are base64 encoded/decoded which is very CPU intensive for large screenshots (in playwright) but not | ||||
| in the direct puppeteer connection (they are binary end to end) | ||||
|  | ||||
| @@ -4,24 +4,15 @@ | ||||
| # Why? | ||||
| # `browsersteps_playwright_browser_interface.chromium.connect_over_cdp()` will only run once without async() | ||||
| # - this flask app is not async() | ||||
| # - browserless has a single timeout/keepalive which applies to the session made at .connect_over_cdp() | ||||
| # - A single timeout/keepalive which applies to the session made at .connect_over_cdp() | ||||
| # | ||||
| # So it means that we must unfortunately for now just keep a single timer since .connect_over_cdp() was run | ||||
| # and know when that reaches timeout/keepalive :( when that time is up, restart the connection and tell the user | ||||
| # that their time is up, insert another coin. (reload) | ||||
| # | ||||
| # Bigger picture | ||||
| # - It's horrible that we have this click+wait deal, some nice socket.io solution using something similar | ||||
| # to what the browserless debug UI already gives us would be smarter.. | ||||
| # | ||||
| # OR | ||||
| # - Some API call that should be hacked into browserless or playwright that we can "/api/bump-keepalive/{session_id}/60" | ||||
| # So we can tell it that we need more time (run this on each action) | ||||
| # | ||||
| # OR | ||||
| # - use multiprocessing to bump this over to its own process and add some transport layer (queue/pipes) | ||||
|  | ||||
| from distutils.util import strtobool | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from flask import Blueprint, request, make_response | ||||
| import os | ||||
|  | ||||
|   | ||||
| @@ -6,6 +6,8 @@ import re | ||||
| from random import randint | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers.base import manage_user_agent | ||||
|  | ||||
| # Two flags, tell the JS which of the "Selector" or "Value" field should be enabled in the front end | ||||
| # 0- off, 1- on | ||||
| browser_step_ui_config = {'Choose one': '0 0', | ||||
| @@ -169,7 +171,7 @@ class steppable_browser_interface(): | ||||
|         self.page.locator(selector, timeout=1000).uncheck(timeout=1000) | ||||
|  | ||||
|  | ||||
| # Responsible for maintaining a live 'context' with browserless | ||||
| # Responsible for maintaining a live 'context' with the chrome CDP | ||||
| # @todo - how long do contexts live for anyway? | ||||
| class browsersteps_live_ui(steppable_browser_interface): | ||||
|     context = None | ||||
| @@ -178,6 +180,7 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|     stale = False | ||||
|     # bump and kill this if idle after X sec | ||||
|     age_start = 0 | ||||
|     headers = {} | ||||
|  | ||||
|     # use a special driver, maybe locally etc | ||||
|     command_executor = os.getenv( | ||||
| @@ -192,7 +195,8 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|  | ||||
|     browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"') | ||||
|  | ||||
|     def __init__(self, playwright_browser, proxy=None): | ||||
|     def __init__(self, playwright_browser, proxy=None, headers=None): | ||||
|         self.headers = headers or {} | ||||
|         self.age_start = time.time() | ||||
|         self.playwright_browser = playwright_browser | ||||
|         if self.context is None: | ||||
| @@ -206,16 +210,17 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|  | ||||
|         # @todo handle multiple contexts, bind a unique id from the browser on each req? | ||||
|         self.context = self.playwright_browser.new_context( | ||||
|             # @todo | ||||
|             #                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0', | ||||
|             #               proxy=self.proxy, | ||||
|             # This is needed to enable JavaScript execution on GitHub and others | ||||
|             bypass_csp=True, | ||||
|             # Should never be needed | ||||
|             accept_downloads=False, | ||||
|             proxy=proxy | ||||
|             accept_downloads=False,  # Should never be needed | ||||
|             bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others | ||||
|             extra_http_headers=self.headers, | ||||
|             ignore_https_errors=True, | ||||
|             proxy=proxy, | ||||
|             service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'), | ||||
|             # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers | ||||
|             user_agent=manage_user_agent(headers=self.headers), | ||||
|         ) | ||||
|  | ||||
|  | ||||
|         self.page = self.context.new_page() | ||||
|  | ||||
|         # self.page.set_default_navigation_timeout(keep_open) | ||||
| @@ -243,7 +248,7 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|     def get_current_state(self): | ||||
|         """Return the screenshot and interactive elements mapping, generally always called after action_()""" | ||||
|         from pkg_resources import resource_string | ||||
|         xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8') | ||||
|         xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8') | ||||
|         now = time.time() | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
| @@ -278,10 +283,10 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|         self.page.evaluate("var include_filters=''") | ||||
|         from pkg_resources import resource_string | ||||
|         # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector | ||||
|         xpath_element_js = resource_string(__name__, "../../res/xpath_element_scraper.js").decode('utf-8') | ||||
|         from changedetectionio.content_fetcher import visualselector_xpath_selectors | ||||
|         xpath_element_js = resource_string(__name__, "../../content_fetchers/res/xpath_element_scraper.js").decode('utf-8') | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
|         xpath_element_js = xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) | ||||
|         xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}") | ||||
|         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) | ||||
|         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|  | ||||
|         return (screenshot, xpath_data) | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| from playwright.sync_api import PlaywrightContextManager | ||||
| import asyncio | ||||
|  | ||||
| # So playwright wants to run as a context manager, but we do something horrible and hacky | ||||
| # we are holding the session open for as long as possible, then shutting it down, and opening a new one | ||||
|   | ||||
| @@ -1,14 +1,11 @@ | ||||
| from concurrent.futures import ThreadPoolExecutor | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
|  | ||||
| from functools import wraps | ||||
|  | ||||
| from flask import Blueprint | ||||
| from flask_login import login_required | ||||
|  | ||||
| from changedetectionio.processors import text_json_diff | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
|  | ||||
|  | ||||
| STATUS_CHECKING = 0 | ||||
| STATUS_FAILED = 1 | ||||
| STATUS_OK = 2 | ||||
| @@ -32,7 +29,8 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     @threadpool | ||||
|     def long_task(uuid, preferred_proxy): | ||||
|         import time | ||||
|         from changedetectionio import content_fetcher | ||||
|         from changedetectionio.content_fetchers import exceptions as content_fetcher_exceptions | ||||
|         from changedetectionio.processors import text_json_diff | ||||
|  | ||||
|         status = {'status': '', 'length': 0, 'text': ''} | ||||
|         from jinja2 import Environment, BaseLoader | ||||
| @@ -43,7 +41,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             update_handler = text_json_diff.perform_site_check(datastore=datastore, watch_uuid=uuid) | ||||
|             update_handler.call_browser() | ||||
|         # title, size is len contents not len xfer | ||||
|         except content_fetcher.Non200ErrorCodeReceived as e: | ||||
|         except content_fetcher_exceptions.Non200ErrorCodeReceived as e: | ||||
|             if e.status_code == 404: | ||||
|                 status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but 404 (page not found)"}) | ||||
|             elif e.status_code == 403 or e.status_code == 401: | ||||
| @@ -52,12 +50,12 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|                 status.update({'status': 'ERROR', 'length': len(contents), 'text': f"Status code: {e.status_code}"}) | ||||
|         except text_json_diff.FilterNotFoundInResponse: | ||||
|             status.update({'status': 'OK', 'length': len(contents), 'text': f"OK but CSS/xPath filter not found (page changed layout?)"}) | ||||
|         except content_fetcher.EmptyReply as e: | ||||
|         except content_fetcher_exceptions.EmptyReply as e: | ||||
|             if e.status_code == 403 or e.status_code == 401: | ||||
|                 status.update({'status': 'ERROR OTHER', 'length': len(contents), 'text': f"Got empty reply with code {e.status_code} - Access denied"}) | ||||
|             else: | ||||
|                 status.update({'status': 'ERROR OTHER', 'length': len(contents) if contents else 0, 'text': f"Empty reply with code {e.status_code}, needs chrome?"}) | ||||
|         except content_fetcher.ReplyWithContentButNoText as e: | ||||
|         except content_fetcher_exceptions.ReplyWithContentButNoText as e: | ||||
|             txt = f"Got reply but with no content - Status code {e.status_code} - It's possible that the filters were found, but contained no usable text (or contained only an image)." | ||||
|             status.update({'status': 'ERROR', 'text': txt}) | ||||
|         except Exception as e: | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
|  | ||||
| from distutils.util import strtobool | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from flask import Blueprint, flash, redirect, url_for | ||||
| from flask_login import login_required | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| @@ -18,8 +18,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue | ||||
|     def accept(uuid): | ||||
|         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT | ||||
|         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|         return redirect(url_for("form_watch_checknow", uuid=uuid)) | ||||
|  | ||||
|         return redirect(url_for("index")) | ||||
|  | ||||
|     @login_required | ||||
|     @price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET']) | ||||
|   | ||||
| @@ -11,9 +11,16 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|     def tags_overview_page(): | ||||
|         from .form import SingleTag | ||||
|         add_form = SingleTag(request.form) | ||||
|         sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title']) | ||||
|  | ||||
|         from collections import Counter | ||||
|  | ||||
|         tag_count = Counter(tag for watch in datastore.data['watching'].values() if watch.get('tags') for tag in watch['tags']) | ||||
|  | ||||
|         output = render_template("groups-overview.html", | ||||
|                                  available_tags=sorted_tags, | ||||
|                                  form=add_form, | ||||
|                                  available_tags=datastore.data['settings']['application'].get('tags', {}), | ||||
|                                  tag_count=tag_count | ||||
|                                  ) | ||||
|  | ||||
|         return output | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| {% from '_helpers.jinja' import render_field, render_checkbox_field, render_button %} | ||||
| {% from '_common_fields.jinja' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ajax_callback_send_notification_test', watch_uuid=uuid)}}"; | ||||
|     const notification_base_url="{{url_for('ajax_callback_send_notification_test', mode="group-settings")}}"; | ||||
| </script> | ||||
|  | ||||
| <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> | ||||
|   | ||||
| @@ -27,6 +27,7 @@ | ||||
|             <thead> | ||||
|             <tr> | ||||
|                 <th></th> | ||||
|                 <th># Watches</th> | ||||
|                 <th>Tag / Label name</th> | ||||
|                 <th></th> | ||||
|             </tr> | ||||
| @@ -40,12 +41,13 @@ | ||||
|                 <td colspan="3">No website organisational tags/groups configured</td> | ||||
|             </tr> | ||||
|             {% endif %} | ||||
|             {% for uuid, tag in available_tags.items()  %} | ||||
|             {% for uuid, tag in available_tags  %} | ||||
|             <tr id="{{ uuid }}" class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}"> | ||||
|                 <td class="watch-controls"> | ||||
|                     <a class="link-mute state-{{'on' if tag.notification_muted else 'off'}}" href="{{url_for('tags.mute', uuid=tag.uuid)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications" class="icon icon-mute" ></a> | ||||
|                 </td> | ||||
|                 <td class="title-col inline">{{tag.title}}</td> | ||||
|                 <td>{{ "{:,}".format(tag_count[uuid]) if uuid in tag_count else 0 }}</td> | ||||
|                 <td class="title-col inline"> <a href="{{url_for('index', tag=uuid) }}">{{ tag.title }}</a></td> | ||||
|                 <td> | ||||
|                     <a class="pure-button pure-button-primary" href="{{ url_for('tags.form_tag_edit', uuid=uuid) }}">Edit</a>  | ||||
|                     <a class="pure-button pure-button-primary" href="{{ url_for('tags.delete', uuid=uuid) }}" title="Deletes and removes tag">Delete</a> | ||||
|   | ||||
| @@ -1,766 +0,0 @@ | ||||
| from abc import abstractmethod | ||||
| from distutils.util import strtobool | ||||
| from urllib.parse import urlparse | ||||
| import chardet | ||||
| import hashlib | ||||
| import json | ||||
| import os | ||||
| import requests | ||||
| import sys | ||||
| import time | ||||
| import urllib.parse | ||||
| from loguru import logger | ||||
|  | ||||
| visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4, header, footer, section, article, aside, details, main, nav, section, summary' | ||||
|  | ||||
|  | ||||
| class Non200ErrorCodeReceived(Exception): | ||||
|     def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         self.xpath_data = xpath_data | ||||
|         self.page_text = None | ||||
|  | ||||
|         if page_html: | ||||
|             from changedetectionio import html_tools | ||||
|             self.page_text = html_tools.html_to_text(page_html) | ||||
|         return | ||||
|  | ||||
|  | ||||
| class checksumFromPreviousCheckWasTheSame(Exception): | ||||
|     def __init__(self): | ||||
|         return | ||||
|  | ||||
|  | ||||
| class JSActionExceptions(Exception): | ||||
|     def __init__(self, status_code, url, screenshot, message=''): | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         self.message = message | ||||
|         return | ||||
|  | ||||
|  | ||||
| class BrowserStepsStepException(Exception): | ||||
|     def __init__(self, step_n, original_e): | ||||
|         self.step_n = step_n | ||||
|         self.original_e = original_e | ||||
|         logger.debug(f"Browser Steps exception at step {self.step_n} {str(original_e)}") | ||||
|         return | ||||
|  | ||||
|  | ||||
| class PageUnloadable(Exception): | ||||
|     def __init__(self, status_code, url, message, screenshot=False): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         self.message = message | ||||
|         return | ||||
|  | ||||
|  | ||||
| class EmptyReply(Exception): | ||||
|     def __init__(self, status_code, url, screenshot=None): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         return | ||||
|  | ||||
|  | ||||
| class ScreenshotUnavailable(Exception): | ||||
|     def __init__(self, status_code, url, page_html=None): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         if page_html: | ||||
|             from html_tools import html_to_text | ||||
|             self.page_text = html_to_text(page_html) | ||||
|         return | ||||
|  | ||||
|  | ||||
| class ReplyWithContentButNoText(Exception): | ||||
|     def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         self.has_filters = has_filters | ||||
|         self.html_content = html_content | ||||
|         return | ||||
|  | ||||
|  | ||||
| class Fetcher(): | ||||
|     browser_connection_is_custom = None | ||||
|     browser_connection_url = None | ||||
|     browser_steps = None | ||||
|     browser_steps_screenshot_path = None | ||||
|     content = None | ||||
|     error = None | ||||
|     fetcher_description = "No description" | ||||
|     headers = {} | ||||
|     instock_data = None | ||||
|     instock_data_js = "" | ||||
|     status_code = None | ||||
|     webdriver_js_execute_code = None | ||||
|     xpath_data = None | ||||
|     xpath_element_js = "" | ||||
|  | ||||
|     # Will be needed in the future by the VisualSelector, always get this where possible. | ||||
|     screenshot = False | ||||
|     system_http_proxy = os.getenv('HTTP_PROXY') | ||||
|     system_https_proxy = os.getenv('HTTPS_PROXY') | ||||
|  | ||||
|     # Time ONTOP of the system defined env minimum time | ||||
|     render_extract_delay = 0 | ||||
|  | ||||
|     def __init__(self): | ||||
|         from pkg_resources import resource_string | ||||
|         # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector | ||||
|         self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8') | ||||
|         self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8') | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_error(self): | ||||
|         return self.error | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|         # Should set self.error, self.status_code and self.content | ||||
|         pass | ||||
|  | ||||
|     @abstractmethod | ||||
|     def quit(self): | ||||
|         return | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_last_status_code(self): | ||||
|         return self.status_code | ||||
|  | ||||
|     @abstractmethod | ||||
|     def screenshot_step(self, step_n): | ||||
|         return None | ||||
|  | ||||
|     @abstractmethod | ||||
|     # Return true/false if this checker is ready to run, in the case it needs todo some special config check etc | ||||
|     def is_ready(self): | ||||
|         return True | ||||
|  | ||||
|     def get_all_headers(self): | ||||
|         """ | ||||
|         Get all headers but ensure all keys are lowercase | ||||
|         :return: | ||||
|         """ | ||||
|         return {k.lower(): v for k, v in self.headers.items()} | ||||
|  | ||||
|     def browser_steps_get_valid_steps(self): | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|             valid_steps = filter( | ||||
|                 lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), | ||||
|                 self.browser_steps) | ||||
|  | ||||
|             return valid_steps | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def iterate_browser_steps(self): | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|         from playwright._impl._errors import TimeoutError, Error | ||||
|         from jinja2 import Environment | ||||
|         jinja2_env = Environment(extensions=['jinja2_time.TimeExtension']) | ||||
|  | ||||
|         step_n = 0 | ||||
|  | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|             interface = steppable_browser_interface() | ||||
|             interface.page = self.page | ||||
|             valid_steps = self.browser_steps_get_valid_steps() | ||||
|  | ||||
|             for step in valid_steps: | ||||
|                 step_n += 1 | ||||
|                 logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...") | ||||
|                 self.screenshot_step("before-" + str(step_n)) | ||||
|                 self.save_step_html("before-" + str(step_n)) | ||||
|                 try: | ||||
|                     optional_value = step['optional_value'] | ||||
|                     selector = step['selector'] | ||||
|                     # Support for jinja2 template in step values, with date module added | ||||
|                     if '{%' in step['optional_value'] or '{{' in step['optional_value']: | ||||
|                         optional_value = str(jinja2_env.from_string(step['optional_value']).render()) | ||||
|                     if '{%' in step['selector'] or '{{' in step['selector']: | ||||
|                         selector = str(jinja2_env.from_string(step['selector']).render()) | ||||
|  | ||||
|                     getattr(interface, "call_action")(action_name=step['operation'], | ||||
|                                                       selector=selector, | ||||
|                                                       optional_value=optional_value) | ||||
|                     self.screenshot_step(step_n) | ||||
|                     self.save_step_html(step_n) | ||||
|                 except (Error, TimeoutError) as e: | ||||
|                     logger.debug(str(e)) | ||||
|                     # Stop processing here | ||||
|                     raise BrowserStepsStepException(step_n=step_n, original_e=e) | ||||
|  | ||||
|     # It's always good to reset these | ||||
|     def delete_browser_steps_screenshots(self): | ||||
|         import glob | ||||
|         if self.browser_steps_screenshot_path is not None: | ||||
|             dest = os.path.join(self.browser_steps_screenshot_path, 'step_*.jpeg') | ||||
|             files = glob.glob(dest) | ||||
|             for f in files: | ||||
|                 if os.path.isfile(f): | ||||
|                     os.unlink(f) | ||||
|  | ||||
|  | ||||
| #   Maybe for the future, each fetcher provides its own diff output, could be used for text, image | ||||
| #   the current one would return javascript output (as we use JS to generate the diff) | ||||
| # | ||||
| def available_fetchers(): | ||||
|     # See the if statement at the bottom of this file for how we switch between playwright and webdriver | ||||
|     import inspect | ||||
|     p = [] | ||||
|     for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass): | ||||
|         if inspect.isclass(obj): | ||||
|             # @todo html_ is maybe better as fetcher_ or something | ||||
|             # In this case, make sure to edit the default one in store.py and fetch_site_status.py | ||||
|             if name.startswith('html_'): | ||||
|                 t = tuple([name, obj.fetcher_description]) | ||||
|                 p.append(t) | ||||
|  | ||||
|     return p | ||||
|  | ||||
|  | ||||
| class base_html_playwright(Fetcher): | ||||
|     fetcher_description = "Playwright {}/Javascript".format( | ||||
|         os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize() | ||||
|     ) | ||||
|     if os.getenv("PLAYWRIGHT_DRIVER_URL"): | ||||
|         fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL")) | ||||
|  | ||||
|     browser_type = '' | ||||
|     command_executor = '' | ||||
|  | ||||
|     # Configs for Proxy setup | ||||
|     # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server" | ||||
|     playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password'] | ||||
|  | ||||
|     proxy = None | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|  | ||||
|         self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"') | ||||
|  | ||||
|         if custom_browser_connection_url: | ||||
|             self.browser_connection_is_custom = True | ||||
|             self.browser_connection_url = custom_browser_connection_url | ||||
|         else: | ||||
|             # Fallback to fetching from system | ||||
|             # .strip('"') is going to save someone a lot of time when they accidently wrap the env value | ||||
|             self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"') | ||||
|  | ||||
|  | ||||
|         # If any proxy settings are enabled, then we should setup the proxy object | ||||
|         proxy_args = {} | ||||
|         for k in self.playwright_proxy_settings_mappings: | ||||
|             v = os.getenv('playwright_proxy_' + k, False) | ||||
|             if v: | ||||
|                 proxy_args[k] = v.strip('"') | ||||
|  | ||||
|         if proxy_args: | ||||
|             self.proxy = proxy_args | ||||
|  | ||||
|         # allow per-watch proxy selection override | ||||
|         if proxy_override: | ||||
|             self.proxy = {'server': proxy_override} | ||||
|  | ||||
|         if self.proxy: | ||||
|             # Playwright needs separate username and password values | ||||
|             parsed = urlparse(self.proxy.get('server')) | ||||
|             if parsed.username: | ||||
|                 self.proxy['username'] = parsed.username | ||||
|                 self.proxy['password'] = parsed.password | ||||
|  | ||||
|     def screenshot_step(self, step_n=''): | ||||
|         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85) | ||||
|  | ||||
|         if self.browser_steps_screenshot_path is not None: | ||||
|             destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n)) | ||||
|             logger.debug(f"Saving step screenshot to {destination}") | ||||
|             with open(destination, 'wb') as f: | ||||
|                 f.write(screenshot) | ||||
|  | ||||
|     def save_step_html(self, step_n): | ||||
|         content = self.page.content() | ||||
|         destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) | ||||
|         logger.debug(f"Saving step HTML to {destination}") | ||||
|         with open(destination, 'w') as f: | ||||
|             f.write(content) | ||||
|  | ||||
|     def run_fetch_browserless_puppeteer(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         from pkg_resources import resource_string | ||||
|  | ||||
|         extra_wait_ms = (int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) * 1000 | ||||
|  | ||||
|         self.xpath_element_js = self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) | ||||
|         code = resource_string(__name__, "res/puppeteer_fetch.js").decode('utf-8') | ||||
|         # In the future inject this is a proper JS package | ||||
|         code = code.replace('%xpath_scrape_code%', self.xpath_element_js) | ||||
|         code = code.replace('%instock_scrape_code%', self.instock_data_js) | ||||
|  | ||||
|         from requests.exceptions import ConnectTimeout, ReadTimeout | ||||
|         wait_browserless_seconds = 240 | ||||
|  | ||||
|         browserless_function_url = os.getenv('BROWSERLESS_FUNCTION_URL') | ||||
|         from urllib.parse import urlparse | ||||
|         if not browserless_function_url: | ||||
|             # Convert/try to guess from PLAYWRIGHT_DRIVER_URL | ||||
|             o = urlparse(os.getenv('PLAYWRIGHT_DRIVER_URL')) | ||||
|             browserless_function_url = o._replace(scheme="http")._replace(path="function").geturl() | ||||
|  | ||||
|  | ||||
|         # Append proxy connect string | ||||
|         if self.proxy: | ||||
|             # Remove username/password if it exists in the URL or you will receive "ERR_NO_SUPPORTED_PROXIES" error | ||||
|             # Actual authentication handled by Puppeteer/node | ||||
|             o = urlparse(self.proxy.get('server')) | ||||
|             proxy_url = urllib.parse.quote(o._replace(netloc="{}:{}".format(o.hostname, o.port)).geturl()) | ||||
|             browserless_function_url = f"{browserless_function_url}&--proxy-server={proxy_url}" | ||||
|  | ||||
|         try: | ||||
|             amp = '&' if '?' in browserless_function_url else '?' | ||||
|             response = requests.request( | ||||
|                 method="POST", | ||||
|                 json={ | ||||
|                     "code": code, | ||||
|                     "context": { | ||||
|                         # Very primitive disk cache - USE WITH EXTREME CAUTION | ||||
|                         # Run browserless container  with -e "FUNCTION_BUILT_INS=[\"fs\",\"crypto\"]" | ||||
|                         'disk_cache_dir': os.getenv("PUPPETEER_DISK_CACHE", False), # or path to disk cache ending in /, ie /tmp/cache/ | ||||
|                         'execute_js': self.webdriver_js_execute_code, | ||||
|                         'extra_wait_ms': extra_wait_ms, | ||||
|                         'include_filters': current_include_filters, | ||||
|                         'req_headers': request_headers, | ||||
|                         'screenshot_quality': int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)), | ||||
|                         'url': url, | ||||
|                         'user_agent': {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None), | ||||
|                         'proxy_username': self.proxy.get('username', '') if self.proxy else False, | ||||
|                         'proxy_password': self.proxy.get('password', '') if self.proxy and self.proxy.get('username') else False, | ||||
|                         'no_cache_list': [ | ||||
|                             'twitter', | ||||
|                             '.pdf' | ||||
|                         ], | ||||
|                         # Could use https://github.com/easylist/easylist here, or install a plugin | ||||
|                         'block_url_list': [ | ||||
|                             'adnxs.com', | ||||
|                             'analytics.twitter.com', | ||||
|                             'doubleclick.net', | ||||
|                             'google-analytics.com', | ||||
|                             'googletagmanager', | ||||
|                             'trustpilot.com' | ||||
|                         ] | ||||
|                     } | ||||
|                 }, | ||||
|                 # @todo /function needs adding ws:// to http:// rebuild this | ||||
|                 url=browserless_function_url+f"{amp}--disable-features=AudioServiceOutOfProcess&dumpio=true&--disable-remote-fonts", | ||||
|                 timeout=wait_browserless_seconds) | ||||
|  | ||||
|         except ReadTimeout: | ||||
|             raise PageUnloadable(url=url, status_code=None, message=f"No response from browserless in {wait_browserless_seconds}s") | ||||
|         except ConnectTimeout: | ||||
|             raise PageUnloadable(url=url, status_code=None, message=f"Timed out connecting to browserless, retrying..") | ||||
|         else: | ||||
|             # 200 Here means that the communication to browserless worked only, not the page state | ||||
|             if response.status_code == 200: | ||||
|                 import base64 | ||||
|  | ||||
|                 x = response.json() | ||||
|                 if not x.get('screenshot'): | ||||
|                     # https://github.com/puppeteer/puppeteer/blob/v1.0.0/docs/troubleshooting.md#tips | ||||
|                     # https://github.com/puppeteer/puppeteer/issues/1834 | ||||
|                     # https://github.com/puppeteer/puppeteer/issues/1834#issuecomment-381047051 | ||||
|                     # Check your memory is shared and big enough | ||||
|                     raise ScreenshotUnavailable(url=url, status_code=None) | ||||
|  | ||||
|                 if not x.get('content', '').strip(): | ||||
|                     raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|                 if x.get('status_code', 200) != 200 and not ignore_status_codes: | ||||
|                     raise Non200ErrorCodeReceived(url=url, status_code=x.get('status_code', 200), page_html=x['content']) | ||||
|  | ||||
|                 self.content = x.get('content') | ||||
|                 self.headers = x.get('headers') | ||||
|                 self.instock_data = x.get('instock_data') | ||||
|                 self.screenshot = base64.b64decode(x.get('screenshot')) | ||||
|                 self.status_code = x.get('status_code') | ||||
|                 self.xpath_data = x.get('xpath_data') | ||||
|  | ||||
|             else: | ||||
|                 # Some other error from browserless | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=response.content.decode('utf-8')) | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|  | ||||
|         # For now, USE_EXPERIMENTAL_PUPPETEER_FETCH is not supported by watches with BrowserSteps (for now!) | ||||
|         # browser_connection_is_custom doesnt work with puppeteer style fetch (use playwright native too in this case) | ||||
|         if not self.browser_connection_is_custom and not self.browser_steps and os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH'): | ||||
|             if strtobool(os.getenv('USE_EXPERIMENTAL_PUPPETEER_FETCH')): | ||||
|                 # Temporary backup solution until we rewrite the playwright code | ||||
|                 return self.run_fetch_browserless_puppeteer( | ||||
|                     url, | ||||
|                     timeout, | ||||
|                     request_headers, | ||||
|                     request_body, | ||||
|                     request_method, | ||||
|                     ignore_status_codes, | ||||
|                     current_include_filters, | ||||
|                     is_binary) | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         import playwright._impl._errors | ||||
|  | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         response = None | ||||
|  | ||||
|         with sync_playwright() as p: | ||||
|             browser_type = getattr(p, self.browser_type) | ||||
|  | ||||
|             # Seemed to cause a connection Exception even tho I can see it connect | ||||
|             # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000) | ||||
|             # 60,000 connection timeout only | ||||
|             browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000) | ||||
|  | ||||
|             # SOCKS5 with authentication is not supported (yet) | ||||
|             # https://github.com/microsoft/playwright/issues/10567 | ||||
|  | ||||
|             # Set user agent to prevent Cloudflare from blocking the browser | ||||
|             # Use the default one configured in the App.py model that's passed from fetch_site_status.py | ||||
|             context = browser.new_context( | ||||
|                 user_agent={k.lower(): v for k, v in request_headers.items()}.get('user-agent', None), | ||||
|                 proxy=self.proxy, | ||||
|                 # This is needed to enable JavaScript execution on GitHub and others | ||||
|                 bypass_csp=True, | ||||
|                 # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers | ||||
|                 service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'), | ||||
|                 # Should never be needed | ||||
|                 accept_downloads=False | ||||
|             ) | ||||
|  | ||||
|             self.page = context.new_page() | ||||
|             if len(request_headers): | ||||
|                 context.set_extra_http_headers(request_headers) | ||||
|  | ||||
|             # Listen for all console events and handle errors | ||||
|             self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|             # Re-use as much code from browser steps as possible so its the same | ||||
|             from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|             browsersteps_interface = steppable_browser_interface() | ||||
|             browsersteps_interface.page = self.page | ||||
|  | ||||
|             response = browsersteps_interface.action_goto_url(value=url) | ||||
|             self.headers = response.all_headers() | ||||
|  | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 logger.debug("Content Fetcher > Response object was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             try: | ||||
|                 if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): | ||||
|                     browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) | ||||
|             except playwright._impl._errors.TimeoutError as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 # This can be ok, we will try to grab what we could retrieve | ||||
|                 pass | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}") | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|  | ||||
|             self.status_code = response.status | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|  | ||||
|                 screenshot=self.page.screenshot(type='jpeg', full_page=True, | ||||
|                                      quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) | ||||
|  | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|  | ||||
|             if len(self.page.content().strip()) == 0: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 logger.debug("Content Fetcher > Content was empty") | ||||
|                 raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|             # Run Browser Steps here | ||||
|             if self.browser_steps_get_valid_steps(): | ||||
|                 self.iterate_browser_steps() | ||||
|                  | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|             # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) | ||||
|             if current_include_filters is not None: | ||||
|                 self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) | ||||
|             else: | ||||
|                 self.page.evaluate("var include_filters=''") | ||||
|  | ||||
|             self.xpath_data = self.page.evaluate( | ||||
|                 "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") | ||||
|             self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}") | ||||
|  | ||||
|             self.content = self.page.content() | ||||
|             # Bug 3 in Playwright screenshot handling | ||||
|             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|             # JPEG is better here because the screenshots can be very very large | ||||
|  | ||||
|             # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
|             # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
|             # acceptable screenshot quality here | ||||
|             try: | ||||
|                 # The actual screenshot | ||||
|                 self.screenshot = self.page.screenshot(type='jpeg', full_page=True, | ||||
|                                                        quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) | ||||
|             except Exception as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise ScreenshotUnavailable(url=url, status_code=response.status_code) | ||||
|  | ||||
|             context.close() | ||||
|             browser.close() | ||||
|  | ||||
|  | ||||
| class base_html_webdriver(Fetcher): | ||||
|     if os.getenv("WEBDRIVER_URL"): | ||||
|         fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL")) | ||||
|     else: | ||||
|         fetcher_description = "WebDriver Chrome/Javascript" | ||||
|  | ||||
|     # Configs for Proxy setup | ||||
|     # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy" | ||||
|     selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy', | ||||
|                                         'proxyAutoconfigUrl', 'sslProxy', 'autodetect', | ||||
|                                         'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword'] | ||||
|     proxy = None | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|         from selenium.webdriver.common.proxy import Proxy as SeleniumProxy | ||||
|  | ||||
|         # .strip('"') is going to save someone a lot of time when they accidently wrap the env value | ||||
|         if not custom_browser_connection_url: | ||||
|             self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"') | ||||
|         else: | ||||
|             self.browser_connection_is_custom = True | ||||
|             self.browser_connection_url = custom_browser_connection_url | ||||
|  | ||||
|         # If any proxy settings are enabled, then we should setup the proxy object | ||||
|         proxy_args = {} | ||||
|         for k in self.selenium_proxy_settings_mappings: | ||||
|             v = os.getenv('webdriver_' + k, False) | ||||
|             if v: | ||||
|                 proxy_args[k] = v.strip('"') | ||||
|  | ||||
|         # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy | ||||
|         if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy: | ||||
|             proxy_args['httpProxy'] = self.system_http_proxy | ||||
|         if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy: | ||||
|             proxy_args['httpsProxy'] = self.system_https_proxy | ||||
|  | ||||
|         # Allows override the proxy on a per-request basis | ||||
|         if proxy_override is not None: | ||||
|             proxy_args['httpProxy'] = proxy_override | ||||
|  | ||||
|         if proxy_args: | ||||
|             self.proxy = SeleniumProxy(raw=proxy_args) | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|         from selenium.common.exceptions import WebDriverException | ||||
|         # request_body, request_method unused for now, until some magic in the future happens. | ||||
|  | ||||
|         options = ChromeOptions() | ||||
|         if self.proxy: | ||||
|             options.proxy = self.proxy | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.browser_connection_url, | ||||
|             options=options) | ||||
|  | ||||
|         try: | ||||
|             self.driver.get(url) | ||||
|         except WebDriverException as e: | ||||
|             # Be sure we close the session window | ||||
|             self.quit() | ||||
|             raise | ||||
|  | ||||
|         self.driver.set_window_size(1280, 1024) | ||||
|         self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         if self.webdriver_js_execute_code is not None: | ||||
|             self.driver.execute_script(self.webdriver_js_execute_code) | ||||
|             # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|             self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         # @todo - how to check this? is it possible? | ||||
|         self.status_code = 200 | ||||
|         # @todo somehow we should try to get this working for WebDriver | ||||
|         # raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
|         # @todo - dom wait loaded? | ||||
|         time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) | ||||
|         self.content = self.driver.page_source | ||||
|         self.headers = {} | ||||
|  | ||||
|         self.screenshot = self.driver.get_screenshot_as_png() | ||||
|  | ||||
|     # Does the connection to the webdriver work? run a test connection. | ||||
|     def is_ready(self): | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.command_executor, | ||||
|             options=ChromeOptions()) | ||||
|  | ||||
|         # driver.quit() seems to cause better exceptions | ||||
|         self.quit() | ||||
|         return True | ||||
|  | ||||
|     def quit(self): | ||||
|         if self.driver: | ||||
|             try: | ||||
|                 self.driver.quit() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}") | ||||
|  | ||||
|  | ||||
| # "html_requests" is listed as the default fetcher in store.py! | ||||
| class html_requests(Fetcher): | ||||
|     fetcher_description = "Basic fast Plaintext/HTTP Client" | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|         self.proxy_override = proxy_override | ||||
|         # browser_connection_url is none because its always 'launched locally' | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         # Make requests use a more modern looking user-agent | ||||
|         if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None): | ||||
|             request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", | ||||
|                                                       'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36') | ||||
|  | ||||
|         proxies = {} | ||||
|  | ||||
|         # Allows override the proxy on a per-request basis | ||||
|  | ||||
|         # https://requests.readthedocs.io/en/latest/user/advanced/#socks | ||||
|         # Should also work with `socks5://user:pass@host:port` type syntax. | ||||
|  | ||||
|         if self.proxy_override: | ||||
|             proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override} | ||||
|         else: | ||||
|             if self.system_http_proxy: | ||||
|                 proxies['http'] = self.system_http_proxy | ||||
|             if self.system_https_proxy: | ||||
|                 proxies['https'] = self.system_https_proxy | ||||
|  | ||||
|         r = requests.request(method=request_method, | ||||
|                              data=request_body, | ||||
|                              url=url, | ||||
|                              headers=request_headers, | ||||
|                              timeout=timeout, | ||||
|                              proxies=proxies, | ||||
|                              verify=False) | ||||
|  | ||||
|         # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. | ||||
|         # For example - some sites don't tell us it's utf-8, but return utf-8 content | ||||
|         # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably. | ||||
|         # https://github.com/psf/requests/issues/1604 good info about requests encoding detection | ||||
|         if not is_binary: | ||||
|             # Don't run this for PDF (and requests identified as binary) takes a _long_ time | ||||
|             if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'): | ||||
|                 encoding = chardet.detect(r.content)['encoding'] | ||||
|                 if encoding: | ||||
|                     r.encoding = encoding | ||||
|  | ||||
|         if not r.content or not len(r.content): | ||||
|             raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
|         # @todo test this | ||||
|         # @todo maybe you really want to test zero-byte return pages? | ||||
|         if r.status_code != 200 and not ignore_status_codes: | ||||
|             # maybe check with content works? | ||||
|             raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text) | ||||
|  | ||||
|         self.status_code = r.status_code | ||||
|         if is_binary: | ||||
|             # Binary files just return their checksum until we add something smarter | ||||
|             self.content = hashlib.md5(r.content).hexdigest() | ||||
|         else: | ||||
|             self.content = r.text | ||||
|  | ||||
|         self.headers = r.headers | ||||
|         self.raw_content = r.content | ||||
|  | ||||
|  | ||||
| # Decide which is the 'real' HTML webdriver, this is more a system wide config | ||||
| # rather than site-specific. | ||||
| use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False) | ||||
| if use_playwright_as_chrome_fetcher: | ||||
|     html_webdriver = base_html_playwright | ||||
| else: | ||||
|     html_webdriver = base_html_webdriver | ||||
							
								
								
									
										43
									
								
								changedetectionio/content_fetchers/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								changedetectionio/content_fetchers/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,43 @@ | ||||
| import sys | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from loguru import logger | ||||
| from changedetectionio.content_fetchers.exceptions import BrowserStepsStepException | ||||
| import os | ||||
|  | ||||
| visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary' | ||||
|  | ||||
| # available_fetchers() will scan this implementation looking for anything starting with html_ | ||||
| # this information is used in the form selections | ||||
| from changedetectionio.content_fetchers.requests import fetcher as html_requests | ||||
|  | ||||
| def available_fetchers(): | ||||
|     # See the if statement at the bottom of this file for how we switch between playwright and webdriver | ||||
|     import inspect | ||||
|     p = [] | ||||
|     for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass): | ||||
|         if inspect.isclass(obj): | ||||
|             # @todo html_ is maybe better as fetcher_ or something | ||||
|             # In this case, make sure to edit the default one in store.py and fetch_site_status.py | ||||
|             if name.startswith('html_'): | ||||
|                 t = tuple([name, obj.fetcher_description]) | ||||
|                 p.append(t) | ||||
|  | ||||
|     return p | ||||
|  | ||||
|  | ||||
| # Decide which is the 'real' HTML webdriver, this is more a system wide config | ||||
| # rather than site-specific. | ||||
| use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False) | ||||
| if use_playwright_as_chrome_fetcher: | ||||
|     # @note - For now, browser steps always uses playwright | ||||
|     if not strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')): | ||||
|         logger.debug('Using Playwright library as fetcher') | ||||
|         from .playwright import fetcher as html_webdriver | ||||
|     else: | ||||
|         logger.debug('Using direct Python Puppeteer library as fetcher') | ||||
|         from .puppeteer import fetcher as html_webdriver | ||||
|  | ||||
| else: | ||||
|     logger.debug("Falling back to selenium as fetcher") | ||||
|     from .webdriver_selenium import fetcher as html_webdriver | ||||
|  | ||||
							
								
								
									
										171
									
								
								changedetectionio/content_fetchers/base.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										171
									
								
								changedetectionio/content_fetchers/base.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,171 @@ | ||||
| import os | ||||
| from abc import abstractmethod | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers import BrowserStepsStepException | ||||
|  | ||||
|  | ||||
| def manage_user_agent(headers, current_ua=''): | ||||
|     """ | ||||
|     Basic setting of user-agent | ||||
|  | ||||
|     NOTE!!!!!! The service that does the actual Chrome fetching should handle any anti-robot techniques | ||||
|     THERE ARE MANY WAYS THAT IT CAN BE DETECTED AS A ROBOT!! | ||||
|     This does not take care of | ||||
|     - Scraping of 'navigator' (platform, productSub, vendor, oscpu etc etc) browser object (navigator.appVersion) etc | ||||
|     - TCP/IP fingerprint JA3 etc | ||||
|     - Graphic rendering fingerprinting | ||||
|     - Your IP being obviously in a pool of bad actors | ||||
|     - Too many requests | ||||
|     - Scraping of SCH-UA browser replies (thanks google!!) | ||||
|     - Scraping of ServiceWorker, new window calls etc | ||||
|  | ||||
|     See https://filipvitas.medium.com/how-to-set-user-agent-header-with-puppeteer-js-and-not-fail-28c7a02165da | ||||
|     Puppeteer requests https://github.com/dgtlmoon/pyppeteerstealth | ||||
|  | ||||
|     :param page: | ||||
|     :param headers: | ||||
|     :return: | ||||
|     """ | ||||
|     # Ask it what the user agent is, if its obviously ChromeHeadless, switch it to the default | ||||
|     ua_in_custom_headers = next((v for k, v in headers.items() if k.lower() == "user-agent"), None) | ||||
|     if ua_in_custom_headers: | ||||
|         return ua_in_custom_headers | ||||
|  | ||||
|     if not ua_in_custom_headers and current_ua: | ||||
|         current_ua = current_ua.replace('HeadlessChrome', 'Chrome') | ||||
|         return current_ua | ||||
|  | ||||
|     return None | ||||
|  | ||||
|  | ||||
| class Fetcher(): | ||||
|     browser_connection_is_custom = None | ||||
|     browser_connection_url = None | ||||
|     browser_steps = None | ||||
|     browser_steps_screenshot_path = None | ||||
|     content = None | ||||
|     error = None | ||||
|     fetcher_description = "No description" | ||||
|     headers = {} | ||||
|     instock_data = None | ||||
|     instock_data_js = "" | ||||
|     status_code = None | ||||
|     webdriver_js_execute_code = None | ||||
|     xpath_data = None | ||||
|     xpath_element_js = "" | ||||
|  | ||||
|     # Will be needed in the future by the VisualSelector, always get this where possible. | ||||
|     screenshot = False | ||||
|     system_http_proxy = os.getenv('HTTP_PROXY') | ||||
|     system_https_proxy = os.getenv('HTTPS_PROXY') | ||||
|  | ||||
|     # Time ONTOP of the system defined env minimum time | ||||
|     render_extract_delay = 0 | ||||
|  | ||||
|     def __init__(self): | ||||
|         from pkg_resources import resource_string | ||||
|         # The code that scrapes elements and makes a list of elements/size/position to click on in the VisualSelector | ||||
|         self.xpath_element_js = resource_string(__name__, "res/xpath_element_scraper.js").decode('utf-8') | ||||
|         self.instock_data_js = resource_string(__name__, "res/stock-not-in-stock.js").decode('utf-8') | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_error(self): | ||||
|         return self.error | ||||
|  | ||||
|     @abstractmethod | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|         # Should set self.error, self.status_code and self.content | ||||
|         pass | ||||
|  | ||||
|     @abstractmethod | ||||
|     def quit(self): | ||||
|         return | ||||
|  | ||||
|     @abstractmethod | ||||
|     def get_last_status_code(self): | ||||
|         return self.status_code | ||||
|  | ||||
|     @abstractmethod | ||||
|     def screenshot_step(self, step_n): | ||||
|         return None | ||||
|  | ||||
|     @abstractmethod | ||||
|     # Return true/false if this checker is ready to run, in the case it needs todo some special config check etc | ||||
|     def is_ready(self): | ||||
|         return True | ||||
|  | ||||
|     def get_all_headers(self): | ||||
|         """ | ||||
|         Get all headers but ensure all keys are lowercase | ||||
|         :return: | ||||
|         """ | ||||
|         return {k.lower(): v for k, v in self.headers.items()} | ||||
|  | ||||
|     def browser_steps_get_valid_steps(self): | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|             valid_steps = filter( | ||||
|                 lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), | ||||
|                 self.browser_steps) | ||||
|  | ||||
|             return valid_steps | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     def iterate_browser_steps(self): | ||||
|         from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|         from playwright._impl._errors import TimeoutError, Error | ||||
|         from jinja2 import Environment | ||||
|         jinja2_env = Environment(extensions=['jinja2_time.TimeExtension']) | ||||
|  | ||||
|         step_n = 0 | ||||
|  | ||||
|         if self.browser_steps is not None and len(self.browser_steps): | ||||
|             interface = steppable_browser_interface() | ||||
|             interface.page = self.page | ||||
|             valid_steps = self.browser_steps_get_valid_steps() | ||||
|  | ||||
|             for step in valid_steps: | ||||
|                 step_n += 1 | ||||
|                 logger.debug(f">> Iterating check - browser Step n {step_n} - {step['operation']}...") | ||||
|                 self.screenshot_step("before-" + str(step_n)) | ||||
|                 self.save_step_html("before-" + str(step_n)) | ||||
|                 try: | ||||
|                     optional_value = step['optional_value'] | ||||
|                     selector = step['selector'] | ||||
|                     # Support for jinja2 template in step values, with date module added | ||||
|                     if '{%' in step['optional_value'] or '{{' in step['optional_value']: | ||||
|                         optional_value = str(jinja2_env.from_string(step['optional_value']).render()) | ||||
|                     if '{%' in step['selector'] or '{{' in step['selector']: | ||||
|                         selector = str(jinja2_env.from_string(step['selector']).render()) | ||||
|  | ||||
|                     getattr(interface, "call_action")(action_name=step['operation'], | ||||
|                                                       selector=selector, | ||||
|                                                       optional_value=optional_value) | ||||
|                     self.screenshot_step(step_n) | ||||
|                     self.save_step_html(step_n) | ||||
|                 except (Error, TimeoutError) as e: | ||||
|                     logger.debug(str(e)) | ||||
|                     # Stop processing here | ||||
|                     raise BrowserStepsStepException(step_n=step_n, original_e=e) | ||||
|  | ||||
|     # It's always good to reset these | ||||
|     def delete_browser_steps_screenshots(self): | ||||
|         import glob | ||||
|         if self.browser_steps_screenshot_path is not None: | ||||
|             dest = os.path.join(self.browser_steps_screenshot_path, 'step_*.jpeg') | ||||
|             files = glob.glob(dest) | ||||
|             for f in files: | ||||
|                 if os.path.isfile(f): | ||||
|                     os.unlink(f) | ||||
|  | ||||
|     def save_step_html(self, param): | ||||
|         pass | ||||
							
								
								
									
										97
									
								
								changedetectionio/content_fetchers/exceptions/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										97
									
								
								changedetectionio/content_fetchers/exceptions/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,97 @@ | ||||
| from loguru import logger | ||||
|  | ||||
|  | ||||
| class Non200ErrorCodeReceived(Exception): | ||||
|     def __init__(self, status_code, url, screenshot=None, xpath_data=None, page_html=None): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         self.xpath_data = xpath_data | ||||
|         self.page_text = None | ||||
|  | ||||
|         if page_html: | ||||
|             from changedetectionio import html_tools | ||||
|             self.page_text = html_tools.html_to_text(page_html) | ||||
|         return | ||||
|  | ||||
|  | ||||
| class checksumFromPreviousCheckWasTheSame(Exception): | ||||
|     def __init__(self): | ||||
|         return | ||||
|  | ||||
|  | ||||
| class JSActionExceptions(Exception): | ||||
|     def __init__(self, status_code, url, screenshot, message=''): | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         self.message = message | ||||
|         return | ||||
|  | ||||
| class BrowserConnectError(Exception): | ||||
|     msg = '' | ||||
|     def __init__(self, msg): | ||||
|         self.msg = msg | ||||
|         logger.error(f"Browser connection error {msg}") | ||||
|         return | ||||
|  | ||||
| class BrowserFetchTimedOut(Exception): | ||||
|     msg = '' | ||||
|     def __init__(self, msg): | ||||
|         self.msg = msg | ||||
|         logger.error(f"Browser processing took too long - {msg}") | ||||
|         return | ||||
|  | ||||
| class BrowserStepsStepException(Exception): | ||||
|     def __init__(self, step_n, original_e): | ||||
|         self.step_n = step_n | ||||
|         self.original_e = original_e | ||||
|         logger.debug(f"Browser Steps exception at step {self.step_n} {str(original_e)}") | ||||
|         return | ||||
|  | ||||
|  | ||||
| # @todo - make base Exception class that announces via logger() | ||||
| class PageUnloadable(Exception): | ||||
|     def __init__(self, status_code=None, url='', message='', screenshot=False): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         self.message = message | ||||
|         return | ||||
|  | ||||
| class BrowserStepsInUnsupportedFetcher(Exception): | ||||
|     def __init__(self, url): | ||||
|         self.url = url | ||||
|         return | ||||
|  | ||||
| class EmptyReply(Exception): | ||||
|     def __init__(self, status_code, url, screenshot=None): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         return | ||||
|  | ||||
|  | ||||
| class ScreenshotUnavailable(Exception): | ||||
|     def __init__(self, status_code, url, page_html=None): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         if page_html: | ||||
|             from html_tools import html_to_text | ||||
|             self.page_text = html_to_text(page_html) | ||||
|         return | ||||
|  | ||||
|  | ||||
| class ReplyWithContentButNoText(Exception): | ||||
|     def __init__(self, status_code, url, screenshot=None, has_filters=False, html_content=''): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         self.screenshot = screenshot | ||||
|         self.has_filters = has_filters | ||||
|         self.html_content = html_content | ||||
|         return | ||||
							
								
								
									
										208
									
								
								changedetectionio/content_fetchers/playwright.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										208
									
								
								changedetectionio/content_fetchers/playwright.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,208 @@ | ||||
| import json | ||||
| import os | ||||
| from urllib.parse import urlparse | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     fetcher_description = "Playwright {}/Javascript".format( | ||||
|         os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize() | ||||
|     ) | ||||
|     if os.getenv("PLAYWRIGHT_DRIVER_URL"): | ||||
|         fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL")) | ||||
|  | ||||
|     browser_type = '' | ||||
|     command_executor = '' | ||||
|  | ||||
|     # Configs for Proxy setup | ||||
|     # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server" | ||||
|     playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password'] | ||||
|  | ||||
|     proxy = None | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|  | ||||
|         self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"') | ||||
|  | ||||
|         if custom_browser_connection_url: | ||||
|             self.browser_connection_is_custom = True | ||||
|             self.browser_connection_url = custom_browser_connection_url | ||||
|         else: | ||||
|             # Fallback to fetching from system | ||||
|             # .strip('"') is going to save someone a lot of time when they accidently wrap the env value | ||||
|             self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"') | ||||
|  | ||||
|         # If any proxy settings are enabled, then we should setup the proxy object | ||||
|         proxy_args = {} | ||||
|         for k in self.playwright_proxy_settings_mappings: | ||||
|             v = os.getenv('playwright_proxy_' + k, False) | ||||
|             if v: | ||||
|                 proxy_args[k] = v.strip('"') | ||||
|  | ||||
|         if proxy_args: | ||||
|             self.proxy = proxy_args | ||||
|  | ||||
|         # allow per-watch proxy selection override | ||||
|         if proxy_override: | ||||
|             self.proxy = {'server': proxy_override} | ||||
|  | ||||
|         if self.proxy: | ||||
|             # Playwright needs separate username and password values | ||||
|             parsed = urlparse(self.proxy.get('server')) | ||||
|             if parsed.username: | ||||
|                 self.proxy['username'] = parsed.username | ||||
|                 self.proxy['password'] = parsed.password | ||||
|  | ||||
|     def screenshot_step(self, step_n=''): | ||||
|         screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|  | ||||
|         if self.browser_steps_screenshot_path is not None: | ||||
|             destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n)) | ||||
|             logger.debug(f"Saving step screenshot to {destination}") | ||||
|             with open(destination, 'wb') as f: | ||||
|                 f.write(screenshot) | ||||
|  | ||||
|     def save_step_html(self, step_n): | ||||
|         content = self.page.content() | ||||
|         destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) | ||||
|         logger.debug(f"Saving step HTML to {destination}") | ||||
|         with open(destination, 'w') as f: | ||||
|             f.write(content) | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         import playwright._impl._errors | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         response = None | ||||
|  | ||||
|         with sync_playwright() as p: | ||||
|             browser_type = getattr(p, self.browser_type) | ||||
|  | ||||
|             # Seemed to cause a connection Exception even tho I can see it connect | ||||
|             # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000) | ||||
|             # 60,000 connection timeout only | ||||
|             browser = browser_type.connect_over_cdp(self.browser_connection_url, timeout=60000) | ||||
|  | ||||
|             # SOCKS5 with authentication is not supported (yet) | ||||
|             # https://github.com/microsoft/playwright/issues/10567 | ||||
|  | ||||
|             # Set user agent to prevent Cloudflare from blocking the browser | ||||
|             # Use the default one configured in the App.py model that's passed from fetch_site_status.py | ||||
|             context = browser.new_context( | ||||
|                 accept_downloads=False,  # Should never be needed | ||||
|                 bypass_csp=True,  # This is needed to enable JavaScript execution on GitHub and others | ||||
|                 extra_http_headers=request_headers, | ||||
|                 ignore_https_errors=True, | ||||
|                 proxy=self.proxy, | ||||
|                 service_workers=os.getenv('PLAYWRIGHT_SERVICE_WORKERS', 'allow'), # Should be `allow` or `block` - sites like YouTube can transmit large amounts of data via Service Workers | ||||
|                 user_agent=manage_user_agent(headers=request_headers), | ||||
|             ) | ||||
|  | ||||
|             self.page = context.new_page() | ||||
|  | ||||
|             # Listen for all console events and handle errors | ||||
|             self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|             # Re-use as much code from browser steps as possible so its the same | ||||
|             from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|             browsersteps_interface = steppable_browser_interface() | ||||
|             browsersteps_interface.page = self.page | ||||
|  | ||||
|             response = browsersteps_interface.action_goto_url(value=url) | ||||
|             self.headers = response.all_headers() | ||||
|  | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 logger.debug("Content Fetcher > Response object was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             try: | ||||
|                 if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): | ||||
|                     browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) | ||||
|             except playwright._impl._errors.TimeoutError as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 # This can be ok, we will try to grab what we could retrieve | ||||
|                 pass | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Content Fetcher > Other exception when executing custom JS code {str(e)}") | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|             try: | ||||
|                 self.status_code = response.status | ||||
|             except Exception as e: | ||||
|                 # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962 | ||||
|                 logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.") | ||||
|                 logger.critical(response) | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             if self.status_code != 200 and not ignore_status_codes: | ||||
|                 screenshot = self.page.screenshot(type='jpeg', full_page=True, | ||||
|                                                   quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|  | ||||
|                 raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|  | ||||
|             if len(self.page.content().strip()) == 0: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 logger.debug("Content Fetcher > Content was empty") | ||||
|                 raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|             # Run Browser Steps here | ||||
|             if self.browser_steps_get_valid_steps(): | ||||
|                 self.iterate_browser_steps() | ||||
|  | ||||
|             self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
|             # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) | ||||
|             if current_include_filters is not None: | ||||
|                 self.page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) | ||||
|             else: | ||||
|                 self.page.evaluate("var include_filters=''") | ||||
|  | ||||
|             self.xpath_data = self.page.evaluate( | ||||
|                 "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") | ||||
|             self.instock_data = self.page.evaluate("async () => {" + self.instock_data_js + "}") | ||||
|  | ||||
|             self.content = self.page.content() | ||||
|             # Bug 3 in Playwright screenshot handling | ||||
|             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|             # JPEG is better here because the screenshots can be very very large | ||||
|  | ||||
|             # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
|             # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
|             # acceptable screenshot quality here | ||||
|             try: | ||||
|                 # The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage | ||||
|                 self.screenshot = self.page.screenshot(type='jpeg', | ||||
|                                                        full_page=True, | ||||
|                                                        quality=int(os.getenv("SCREENSHOT_QUALITY", 72)), | ||||
|                                                        ) | ||||
|             except Exception as e: | ||||
|                 # It's likely the screenshot was too long/big and something crashed | ||||
|                 raise ScreenshotUnavailable(url=url, status_code=self.status_code) | ||||
|             finally: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
							
								
								
									
										247
									
								
								changedetectionio/content_fetchers/puppeteer.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										247
									
								
								changedetectionio/content_fetchers/puppeteer.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,247 @@ | ||||
| import asyncio | ||||
| import json | ||||
| import os | ||||
| import websockets.exceptions | ||||
| from urllib.parse import urlparse | ||||
|  | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent | ||||
| from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, BrowserFetchTimedOut, BrowserConnectError | ||||
|  | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     fetcher_description = "Puppeteer/direct {}/Javascript".format( | ||||
|         os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize() | ||||
|     ) | ||||
|     if os.getenv("PLAYWRIGHT_DRIVER_URL"): | ||||
|         fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL")) | ||||
|  | ||||
|     browser_type = '' | ||||
|     command_executor = '' | ||||
|  | ||||
|     proxy = None | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|  | ||||
|         if custom_browser_connection_url: | ||||
|             self.browser_connection_is_custom = True | ||||
|             self.browser_connection_url = custom_browser_connection_url | ||||
|         else: | ||||
|             # Fallback to fetching from system | ||||
|             # .strip('"') is going to save someone a lot of time when they accidently wrap the env value | ||||
|             self.browser_connection_url = os.getenv("PLAYWRIGHT_DRIVER_URL", 'ws://playwright-chrome:3000').strip('"') | ||||
|  | ||||
|         # allow per-watch proxy selection override | ||||
|         # @todo check global too? | ||||
|         if proxy_override: | ||||
|             # Playwright needs separate username and password values | ||||
|             parsed = urlparse(proxy_override) | ||||
|             if parsed: | ||||
|                 self.proxy = {'username': parsed.username, 'password': parsed.password} | ||||
|                 # Add the proxy server chrome start option, the username and password never gets added here | ||||
|                 # (It always goes in via await self.page.authenticate(self.proxy)) | ||||
|  | ||||
|                 # @todo filter some injection attack? | ||||
|                 # check scheme when no scheme | ||||
|                 proxy_url = parsed.scheme + "://" if parsed.scheme else 'http://' | ||||
|                 r = "?" if not '?' in self.browser_connection_url else '&' | ||||
|                 port = ":"+str(parsed.port) if parsed.port else '' | ||||
|                 q = "?"+parsed.query if parsed.query else '' | ||||
|                 proxy_url += f"{parsed.hostname}{port}{parsed.path}{q}" | ||||
|                 self.browser_connection_url += f"{r}--proxy-server={proxy_url}" | ||||
|  | ||||
|     # def screenshot_step(self, step_n=''): | ||||
|     #     screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=85) | ||||
|     # | ||||
|     #     if self.browser_steps_screenshot_path is not None: | ||||
|     #         destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n)) | ||||
|     #         logger.debug(f"Saving step screenshot to {destination}") | ||||
|     #         with open(destination, 'wb') as f: | ||||
|     #             f.write(screenshot) | ||||
|     # | ||||
|     # def save_step_html(self, step_n): | ||||
|     #     content = self.page.content() | ||||
|     #     destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.html'.format(step_n)) | ||||
|     #     logger.debug(f"Saving step HTML to {destination}") | ||||
|     #     with open(destination, 'w') as f: | ||||
|     #         f.write(content) | ||||
|  | ||||
|     async def fetch_page(self, | ||||
|                          url, | ||||
|                          timeout, | ||||
|                          request_headers, | ||||
|                          request_body, | ||||
|                          request_method, | ||||
|                          ignore_status_codes, | ||||
|                          current_include_filters, | ||||
|                          is_binary | ||||
|                          ): | ||||
|  | ||||
|         from changedetectionio.content_fetchers import visualselector_xpath_selectors | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|  | ||||
|         from pyppeteer import Pyppeteer | ||||
|         pyppeteer_instance = Pyppeteer() | ||||
|  | ||||
|         # Connect directly using the specified browser_ws_endpoint | ||||
|         # @todo timeout | ||||
|         try: | ||||
|             browser = await pyppeteer_instance.connect(browserWSEndpoint=self.browser_connection_url, | ||||
|                                                        ignoreHTTPSErrors=True | ||||
|                                                        ) | ||||
|         except websockets.exceptions.InvalidStatusCode as e: | ||||
|             raise BrowserConnectError(msg=f"Error while trying to connect the browser, Code {e.status_code} (check your access)") | ||||
|         except websockets.exceptions.InvalidURI: | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser, check your browser connection address (should be ws:// or wss://") | ||||
|         except Exception as e: | ||||
|             raise BrowserConnectError(msg=f"Error connecting to the browser {str(e)}") | ||||
|         else: | ||||
|             self.page = await browser.newPage() | ||||
|  | ||||
|         await self.page.setUserAgent(manage_user_agent(headers=request_headers, current_ua=await self.page.evaluate('navigator.userAgent'))) | ||||
|  | ||||
|         await self.page.setBypassCSP(True) | ||||
|         if request_headers: | ||||
|             await self.page.setExtraHTTPHeaders(request_headers) | ||||
|  | ||||
|         # SOCKS5 with authentication is not supported (yet) | ||||
|         # https://github.com/microsoft/playwright/issues/10567 | ||||
|         self.page.setDefaultNavigationTimeout(0) | ||||
|         await self.page.setCacheEnabled(True) | ||||
|         if self.proxy and self.proxy.get('username'): | ||||
|             # Setting Proxy-Authentication header is deprecated, and doing so can trigger header change errors from Puppeteer | ||||
|             # https://github.com/puppeteer/puppeteer/issues/676 ? | ||||
|             # https://help.brightdata.com/hc/en-us/articles/12632549957649-Proxy-Manager-How-to-Guides#h_01HAKWR4Q0AFS8RZTNYWRDFJC2 | ||||
|             # https://cri.dev/posts/2020-03-30-How-to-solve-Puppeteer-Chrome-Error-ERR_INVALID_ARGUMENT/ | ||||
|             await self.page.authenticate(self.proxy) | ||||
|  | ||||
|         # Re-use as much code from browser steps as possible so its the same | ||||
|         # from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface | ||||
|  | ||||
|         # not yet used here, we fallback to playwright when browsersteps is required | ||||
|         #            browsersteps_interface = steppable_browser_interface() | ||||
|         #            browsersteps_interface.page = self.page | ||||
|  | ||||
|         response = await self.page.goto(url, waitUntil="load") | ||||
|  | ||||
|  | ||||
|         if response is None: | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|             logger.warning("Content Fetcher > Response object was none") | ||||
|             raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|         self.headers = response.headers | ||||
|  | ||||
|         try: | ||||
|             if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): | ||||
|                 await self.page.evaluate(self.webdriver_js_execute_code) | ||||
|         except Exception as e: | ||||
|             logger.warning("Got exception when running evaluate on custom JS code") | ||||
|             logger.error(str(e)) | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|             # This can be ok, we will try to grab what we could retrieve | ||||
|             raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|         try: | ||||
|             self.status_code = response.status | ||||
|         except Exception as e: | ||||
|             # https://github.com/dgtlmoon/changedetection.io/discussions/2122#discussioncomment-8241962 | ||||
|             logger.critical(f"Response from the browser/Playwright did not have a status_code! Response follows.") | ||||
|             logger.critical(response) | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|             raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|         if self.status_code != 200 and not ignore_status_codes: | ||||
|             screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                     fullPage=True, | ||||
|                                                     quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|  | ||||
|             raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot) | ||||
|         content = await self.page.content | ||||
|         if len(content.strip()) == 0: | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|             logger.error("Content Fetcher > Content was empty") | ||||
|             raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|         # Run Browser Steps here | ||||
|         # @todo not yet supported, we switch to playwright in this case | ||||
|         #            if self.browser_steps_get_valid_steps(): | ||||
|         #                self.iterate_browser_steps() | ||||
|  | ||||
|         await asyncio.sleep(1 + extra_wait) | ||||
|  | ||||
|         # So we can find an element on the page where its selector was entered manually (maybe not xPath etc) | ||||
|         # Setup the xPath/VisualSelector scraper | ||||
|         if current_include_filters is not None: | ||||
|             js = json.dumps(current_include_filters) | ||||
|             await self.page.evaluate(f"var include_filters={js}") | ||||
|         else: | ||||
|             await self.page.evaluate(f"var include_filters=''") | ||||
|  | ||||
|         self.xpath_data = await self.page.evaluate( | ||||
|             "async () => {" + self.xpath_element_js.replace('%ELEMENTS%', visualselector_xpath_selectors) + "}") | ||||
|         self.instock_data = await self.page.evaluate("async () => {" + self.instock_data_js + "}") | ||||
|  | ||||
|         self.content = await self.page.content | ||||
|         # Bug 3 in Playwright screenshot handling | ||||
|         # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|         # JPEG is better here because the screenshots can be very very large | ||||
|  | ||||
|         # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
|         # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
|         # acceptable screenshot quality here | ||||
|         try: | ||||
|             self.screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                          fullPage=True, | ||||
|                                                          quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|         except Exception as e: | ||||
|             logger.error("Error fetching screenshot") | ||||
|             # // May fail on very large pages with 'WARNING: tile memory limits exceeded, some content may not draw' | ||||
|             # // @ todo after text extract, we can place some overlay text with red background to say 'croppped' | ||||
|             logger.error('ERROR: content-fetcher page was maybe too large for a screenshot, reverting to viewport only screenshot') | ||||
|             try: | ||||
|                 self.screenshot = await self.page.screenshot(type_='jpeg', | ||||
|                                                              fullPage=False, | ||||
|                                                              quality=int(os.getenv("SCREENSHOT_QUALITY", 72))) | ||||
|             except Exception as e: | ||||
|                 logger.error('ERROR: Failed to get viewport-only reduced screenshot :(') | ||||
|                 pass | ||||
|         finally: | ||||
|             # It's good to log here in the case that the browser crashes on shutting down but we still get the data we need | ||||
|             logger.success(f"Fetching '{url}' complete, closing page") | ||||
|             await self.page.close() | ||||
|             logger.success(f"Fetching '{url}' complete, closing browser") | ||||
|             await browser.close() | ||||
|         logger.success(f"Fetching '{url}' complete, exiting puppeteer fetch.") | ||||
|  | ||||
|     async def main(self, **kwargs): | ||||
|         await self.fetch_page(**kwargs) | ||||
|  | ||||
|     def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False, | ||||
|             current_include_filters=None, is_binary=False): | ||||
|  | ||||
|         #@todo make update_worker async which could run any of these content_fetchers within memory and time constraints | ||||
|         max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180) | ||||
|  | ||||
|         # This will work in 3.10 but not >= 3.11 because 3.11 wants tasks only | ||||
|         try: | ||||
|             asyncio.run(asyncio.wait_for(self.main( | ||||
|                 url=url, | ||||
|                 timeout=timeout, | ||||
|                 request_headers=request_headers, | ||||
|                 request_body=request_body, | ||||
|                 request_method=request_method, | ||||
|                 ignore_status_codes=ignore_status_codes, | ||||
|                 current_include_filters=current_include_filters, | ||||
|                 is_binary=is_binary | ||||
|             ), timeout=max_time)) | ||||
|         except asyncio.TimeoutError: | ||||
|             raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds.")) | ||||
|  | ||||
							
								
								
									
										91
									
								
								changedetectionio/content_fetchers/requests.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										91
									
								
								changedetectionio/content_fetchers/requests.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,91 @@ | ||||
| import hashlib | ||||
| import os | ||||
|  | ||||
| import chardet | ||||
| import requests | ||||
|  | ||||
| from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived | ||||
| from changedetectionio.content_fetchers.base import Fetcher | ||||
|  | ||||
|  | ||||
| # "html_requests" is listed as the default fetcher in store.py! | ||||
| class fetcher(Fetcher): | ||||
|     fetcher_description = "Basic fast Plaintext/HTTP Client" | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|         self.proxy_override = proxy_override | ||||
|         # browser_connection_url is none because its always 'launched locally' | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         if self.browser_steps_get_valid_steps(): | ||||
|             raise BrowserStepsInUnsupportedFetcher(url=url) | ||||
|  | ||||
|         # Make requests use a more modern looking user-agent | ||||
|         if not {k.lower(): v for k, v in request_headers.items()}.get('user-agent', None): | ||||
|             request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", | ||||
|                                                       'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36') | ||||
|  | ||||
|         proxies = {} | ||||
|  | ||||
|         # Allows override the proxy on a per-request basis | ||||
|  | ||||
|         # https://requests.readthedocs.io/en/latest/user/advanced/#socks | ||||
|         # Should also work with `socks5://user:pass@host:port` type syntax. | ||||
|  | ||||
|         if self.proxy_override: | ||||
|             proxies = {'http': self.proxy_override, 'https': self.proxy_override, 'ftp': self.proxy_override} | ||||
|         else: | ||||
|             if self.system_http_proxy: | ||||
|                 proxies['http'] = self.system_http_proxy | ||||
|             if self.system_https_proxy: | ||||
|                 proxies['https'] = self.system_https_proxy | ||||
|  | ||||
|         r = requests.request(method=request_method, | ||||
|                              data=request_body, | ||||
|                              url=url, | ||||
|                              headers=request_headers, | ||||
|                              timeout=timeout, | ||||
|                              proxies=proxies, | ||||
|                              verify=False) | ||||
|  | ||||
|         # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. | ||||
|         # For example - some sites don't tell us it's utf-8, but return utf-8 content | ||||
|         # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably. | ||||
|         # https://github.com/psf/requests/issues/1604 good info about requests encoding detection | ||||
|         if not is_binary: | ||||
|             # Don't run this for PDF (and requests identified as binary) takes a _long_ time | ||||
|             if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'): | ||||
|                 encoding = chardet.detect(r.content)['encoding'] | ||||
|                 if encoding: | ||||
|                     r.encoding = encoding | ||||
|  | ||||
|         self.headers = r.headers | ||||
|  | ||||
|         if not r.content or not len(r.content): | ||||
|             raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
|         # @todo test this | ||||
|         # @todo maybe you really want to test zero-byte return pages? | ||||
|         if r.status_code != 200 and not ignore_status_codes: | ||||
|             # maybe check with content works? | ||||
|             raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text) | ||||
|  | ||||
|         self.status_code = r.status_code | ||||
|         if is_binary: | ||||
|             # Binary files just return their checksum until we add something smarter | ||||
|             self.content = hashlib.md5(r.content).hexdigest() | ||||
|         else: | ||||
|             self.content = r.text | ||||
|  | ||||
|  | ||||
|         self.raw_content = r.content | ||||
| @@ -146,7 +146,7 @@ module.exports = async ({page, context}) => { | ||||
|     var xpath_data; | ||||
|     var instock_data; | ||||
|     try { | ||||
|         // Not sure the best way here, in the future this should be a new package added to npm then run in browserless
 | ||||
|         // Not sure the best way here, in the future this should be a new package added to npm then run in evaluatedCode
 | ||||
|         // (Once the old playwright is removed)
 | ||||
|         xpath_data = await page.evaluate((include_filters) => {%xpath_scrape_code%}, include_filters); | ||||
|         instock_data = await page.evaluate(() => {%instock_scrape_code%}); | ||||
							
								
								
									
										192
									
								
								changedetectionio/content_fetchers/res/stock-not-in-stock.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										192
									
								
								changedetectionio/content_fetchers/res/stock-not-in-stock.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,192 @@ | ||||
| // Restock Detector | ||||
| // (c) Leigh Morresi dgtlmoon@gmail.com | ||||
| // | ||||
| // Assumes the product is in stock to begin with, unless the following appears above the fold ; | ||||
| // - outOfStockTexts appears above the fold (out of stock) | ||||
| // - negateOutOfStockRegex (really is in stock) | ||||
|  | ||||
| function isItemInStock() { | ||||
|     // @todo Pass these in so the same list can be used in non-JS fetchers | ||||
|     const outOfStockTexts = [ | ||||
|         ' أخبرني عندما يتوفر', | ||||
|         '0 in stock', | ||||
|         'actuellement indisponible', | ||||
|         'agotado', | ||||
|         'article épuisé', | ||||
|         'artikel zurzeit vergriffen', | ||||
|         'as soon as stock is available', | ||||
|         'ausverkauft', // sold out | ||||
|         'available for back order', | ||||
|         'awaiting stock', | ||||
|         'back in stock soon', | ||||
|         'back-order or out of stock', | ||||
|         'backordered', | ||||
|         'benachrichtigt mich', // notify me | ||||
|         'brak na stanie', | ||||
|         'brak w magazynie', | ||||
|         'coming soon', | ||||
|         'currently have any tickets for this', | ||||
|         'currently unavailable', | ||||
|         'dieser artikel ist bald wieder verfügbar', | ||||
|         'dostępne wkrótce', | ||||
|         'en rupture de stock', | ||||
|         'ist derzeit nicht auf lager', | ||||
|         'item is no longer available', | ||||
|         'let me know when it\'s available', | ||||
|         'message if back in stock', | ||||
|         'nachricht bei', | ||||
|         'nicht auf lager', | ||||
|         'nicht lieferbar', | ||||
|         'nicht zur verfügung', | ||||
|         'niet beschikbaar', | ||||
|         'niet leverbaar', | ||||
|         'niet op voorraad', | ||||
|         'no disponible temporalmente', | ||||
|         'no longer in stock', | ||||
|         'no tickets available', | ||||
|         'not available', | ||||
|         'not currently available', | ||||
|         'not in stock', | ||||
|         'notify me when available', | ||||
|         'notify when available', | ||||
|         'não estamos a aceitar encomendas', | ||||
|         'out of stock', | ||||
|         'out-of-stock', | ||||
|         'prodotto esaurito', | ||||
|         'produkt niedostępny', | ||||
|         'sold out', | ||||
|         'sold-out', | ||||
|         'temporarily out of stock', | ||||
|         'temporarily unavailable', | ||||
|         'there were no search results for', | ||||
|         'this item is currently unavailable', | ||||
|         'tickets unavailable', | ||||
|         'tijdelijk uitverkocht', | ||||
|         'unavailable tickets', | ||||
|         'vorbestellung ist bald möglich', | ||||
|         'we couldn\'t find any products that match', | ||||
|         'we do not currently have an estimate of when this product will be back in stock.', | ||||
|         'we don\'t know when or if this item will be back in stock.', | ||||
|         'we were not able to find a match', | ||||
|         'zur zeit nicht an lager', | ||||
|         '品切れ', | ||||
|         '已售', | ||||
|         '已售完', | ||||
|         '품절' | ||||
|     ]; | ||||
|  | ||||
|  | ||||
|     const vh = Math.max(document.documentElement.clientHeight || 0, window.innerHeight || 0); | ||||
|  | ||||
|     function getElementBaseText(element) { | ||||
|         // .textContent can include text from children which may give the wrong results | ||||
|         // scan only immediate TEXT_NODEs, which will be a child of the element | ||||
|         var text = ""; | ||||
|         for (var i = 0; i < element.childNodes.length; ++i) | ||||
|             if (element.childNodes[i].nodeType === Node.TEXT_NODE) | ||||
|                 text += element.childNodes[i].textContent; | ||||
|         return text.toLowerCase().trim(); | ||||
|     } | ||||
|  | ||||
|     const negateOutOfStockRegex = new RegExp('^([0-9] in stock|add to cart|in stock)', 'ig'); | ||||
|  | ||||
|     // The out-of-stock or in-stock-text is generally always above-the-fold | ||||
|     // and often below-the-fold is a list of related products that may or may not contain trigger text | ||||
|     // so it's good to filter to just the 'above the fold' elements | ||||
|     // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist | ||||
|  | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
| // %ELEMENTS% replaced at injection time because different interfaces use it with different settings | ||||
|  | ||||
|     console.log("Scanning %ELEMENTS%"); | ||||
|  | ||||
|     function collectVisibleElements(parent, visibleElements) { | ||||
|         if (!parent) return; // Base case: if parent is null or undefined, return | ||||
|  | ||||
|         // Add the parent itself to the visible elements array if it's of the specified types | ||||
|         visibleElements.push(parent); | ||||
|  | ||||
|         // Iterate over the parent's children | ||||
|         const children = parent.children; | ||||
|         for (let i = 0; i < children.length; i++) { | ||||
|             const child = children[i]; | ||||
|             if ( | ||||
|                 child.nodeType === Node.ELEMENT_NODE && | ||||
|                 window.getComputedStyle(child).display !== 'none' && | ||||
|                 window.getComputedStyle(child).visibility !== 'hidden' && | ||||
|                 child.offsetWidth >= 0 && | ||||
|                 child.offsetHeight >= 0 && | ||||
|                 window.getComputedStyle(child).contentVisibility !== 'hidden' | ||||
|             ) { | ||||
|                 // If the child is an element and is visible, recursively collect visible elements | ||||
|                 collectVisibleElements(child, visibleElements); | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     const elementsToScan = []; | ||||
|     collectVisibleElements(document.body, elementsToScan); | ||||
|  | ||||
|     var elementText = ""; | ||||
|  | ||||
|     // REGEXS THAT REALLY MEAN IT'S IN STOCK | ||||
|     for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|         const element = elementsToScan[i]; | ||||
|  | ||||
|         // outside the 'fold' or some weird text in the heading area | ||||
|         // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|         if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             continue | ||||
|         } | ||||
|  | ||||
|         elementText = ""; | ||||
|         if (element.tagName.toLowerCase() === "input") { | ||||
|             elementText = element.value.toLowerCase().trim(); | ||||
|         } else { | ||||
|             elementText = getElementBaseText(element); | ||||
|         } | ||||
|  | ||||
|         if (elementText.length) { | ||||
|             // try which ones could mean its in stock | ||||
|             if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { | ||||
|                 console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) | ||||
|                 return 'Possibly in stock'; | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK | ||||
|     for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|         const element = elementsToScan[i]; | ||||
|         // outside the 'fold' or some weird text in the heading area | ||||
|         // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|         if (element.getBoundingClientRect().top + window.scrollY >= vh + 150 || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             continue | ||||
|         } | ||||
|         elementText = ""; | ||||
|         if (element.tagName.toLowerCase() === "input") { | ||||
|             elementText = element.value.toLowerCase().trim(); | ||||
|         } else { | ||||
|             elementText = getElementBaseText(element); | ||||
|         } | ||||
|  | ||||
|         if (elementText.length) { | ||||
|             // and these mean its out of stock | ||||
|             for (const outOfStockText of outOfStockTexts) { | ||||
|                 if (elementText.includes(outOfStockText)) { | ||||
|                     console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}"`) | ||||
|                     return outOfStockText; // item is out of stock | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     console.log(`Returning 'Possibly in stock' - cant' find any useful matching text`) | ||||
|     return 'Possibly in stock'; // possibly in stock, cant decide otherwise. | ||||
| } | ||||
|  | ||||
| // returns the element text that makes it think it's out of stock | ||||
| return isItemInStock().trim() | ||||
|  | ||||
|  | ||||
| @@ -16,24 +16,23 @@ try { | ||||
| } | ||||
| 
 | ||||
| 
 | ||||
| 
 | ||||
| // Include the getXpath script directly, easier than fetching
 | ||||
| function getxpath(e) { | ||||
|         var n = e; | ||||
|         if (n && n.id) return '//*[@id="' + n.id + '"]'; | ||||
|         for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) { | ||||
|             for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling; | ||||
|             for (d = n.nextSibling; d;) { | ||||
|                 if (d.nodeName === n.nodeName) { | ||||
|                     r = !0; | ||||
|                     break | ||||
|                 } | ||||
|                 d = d.nextSibling | ||||
|     var n = e; | ||||
|     if (n && n.id) return '//*[@id="' + n.id + '"]'; | ||||
|     for (var o = []; n && Node.ELEMENT_NODE === n.nodeType;) { | ||||
|         for (var i = 0, r = !1, d = n.previousSibling; d;) d.nodeType !== Node.DOCUMENT_TYPE_NODE && d.nodeName === n.nodeName && i++, d = d.previousSibling; | ||||
|         for (d = n.nextSibling; d;) { | ||||
|             if (d.nodeName === n.nodeName) { | ||||
|                 r = !0; | ||||
|                 break | ||||
|             } | ||||
|             o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode | ||||
|             d = d.nextSibling | ||||
|         } | ||||
|         return o.length ? "/" + o.reverse().join("/") : "" | ||||
|         o.push((n.prefix ? n.prefix + ":" : "") + n.localName + (i || r ? "[" + (i + 1) + "]" : "")), n = n.parentNode | ||||
|     } | ||||
|     return o.length ? "/" + o.reverse().join("/") : "" | ||||
| } | ||||
| 
 | ||||
| const findUpTag = (el) => { | ||||
|     let r = el | ||||
| @@ -59,14 +58,14 @@ const findUpTag = (el) => { | ||||
| 
 | ||||
|     // Strategy 2: Keep going up until we hit an ID tag, imagine it's like  #list-widget div h4
 | ||||
|     while (r.parentNode) { | ||||
|         if (depth == 5) { | ||||
|         if (depth === 5) { | ||||
|             break; | ||||
|         } | ||||
|         if ('' !== r.id) { | ||||
|             chained_css.unshift("#" + CSS.escape(r.id)); | ||||
|             final_selector = chained_css.join(' > '); | ||||
|             // Be sure theres only one, some sites have multiples of the same ID tag :-(
 | ||||
|             if (window.document.querySelectorAll(final_selector).length == 1) { | ||||
|             if (window.document.querySelectorAll(final_selector).length === 1) { | ||||
|                 return final_selector; | ||||
|             } | ||||
|             return null; | ||||
| @@ -82,30 +81,60 @@ const findUpTag = (el) => { | ||||
| 
 | ||||
| // @todo - if it's SVG or IMG, go into image diff mode
 | ||||
| // %ELEMENTS% replaced at injection time because different interfaces use it with different settings
 | ||||
| var elements = window.document.querySelectorAll("%ELEMENTS%"); | ||||
| 
 | ||||
| var size_pos = []; | ||||
| // after page fetch, inject this JS
 | ||||
| // build a map of all elements and their positions (maybe that only include text?)
 | ||||
| var bbox; | ||||
| for (var i = 0; i < elements.length; i++) { | ||||
|     bbox = elements[i].getBoundingClientRect(); | ||||
| console.log("Scanning %ELEMENTS%"); | ||||
| 
 | ||||
|     // Exclude items that are not interactable or visible
 | ||||
|     if(elements[i].style.opacity === "0") { | ||||
|         continue | ||||
| function collectVisibleElements(parent, visibleElements) { | ||||
|     if (!parent) return; // Base case: if parent is null or undefined, return
 | ||||
| 
 | ||||
| 
 | ||||
|     // Add the parent itself to the visible elements array if it's of the specified types
 | ||||
|     const tagName = parent.tagName.toLowerCase(); | ||||
|     if ("%ELEMENTS%".split(',').includes(tagName)) { | ||||
|         visibleElements.push(parent); | ||||
|     } | ||||
|     if(elements[i].style.display === "none" || elements[i].style.pointerEvents === "none" ) { | ||||
|         continue | ||||
| 
 | ||||
|     // Iterate over the parent's children
 | ||||
|     const children = parent.children; | ||||
|     for (let i = 0; i < children.length; i++) { | ||||
|         const child = children[i]; | ||||
|         if ( | ||||
|             child.nodeType === Node.ELEMENT_NODE && | ||||
|             window.getComputedStyle(child).display !== 'none' && | ||||
|             window.getComputedStyle(child).visibility !== 'hidden' && | ||||
|             child.offsetWidth >= 0 && | ||||
|             child.offsetHeight >= 0 && | ||||
|             window.getComputedStyle(child).contentVisibility !== 'hidden' | ||||
|         ) { | ||||
|             // If the child is an element and is visible, recursively collect visible elements
 | ||||
|             collectVisibleElements(child, visibleElements); | ||||
|         } | ||||
|     } | ||||
| } | ||||
| 
 | ||||
| // Create an array to hold the visible elements
 | ||||
| const visibleElementsArray = []; | ||||
| 
 | ||||
| // Call collectVisibleElements with the starting parent element
 | ||||
| collectVisibleElements(document.body, visibleElementsArray); | ||||
| 
 | ||||
| 
 | ||||
| visibleElementsArray.forEach(function (element) { | ||||
| 
 | ||||
|     bbox = element.getBoundingClientRect(); | ||||
| 
 | ||||
|     // Skip really small ones, and where width or height ==0
 | ||||
|     if (bbox['width'] * bbox['height'] < 100) { | ||||
|         continue; | ||||
|     if (bbox['width'] * bbox['height'] < 10) { | ||||
|         return | ||||
|     } | ||||
| 
 | ||||
|     // Don't include elements that are offset from canvas
 | ||||
|     if (bbox['top']+scroll_y < 0 || bbox['left'] < 0) { | ||||
|         continue; | ||||
|     if (bbox['top'] + scroll_y < 0 || bbox['left'] < 0) { | ||||
|         return | ||||
|     } | ||||
| 
 | ||||
|     // @todo the getXpath kind of sucks, it doesnt know when there is for example just one ID sometimes
 | ||||
| @@ -114,46 +143,41 @@ for (var i = 0; i < elements.length; i++) { | ||||
| 
 | ||||
|     // 1st primitive - if it has class, try joining it all and select, if theres only one.. well thats us.
 | ||||
|     xpath_result = false; | ||||
| 
 | ||||
|     try { | ||||
|         var d = findUpTag(elements[i]); | ||||
|         var d = findUpTag(element); | ||||
|         if (d) { | ||||
|             xpath_result = d; | ||||
|         } | ||||
|     } catch (e) { | ||||
|         console.log(e); | ||||
|     } | ||||
| 
 | ||||
|     // You could swap it and default to getXpath and then try the smarter one
 | ||||
|     // default back to the less intelligent one
 | ||||
|     if (!xpath_result) { | ||||
|         try { | ||||
|             // I've seen on FB and eBay that this doesnt work
 | ||||
|             // ReferenceError: getXPath is not defined at eval (eval at evaluate (:152:29), <anonymous>:67:20) at UtilityScript.evaluate (<anonymous>:159:18) at UtilityScript.<anonymous> (<anonymous>:1:44)
 | ||||
|             xpath_result = getxpath(elements[i]); | ||||
|             xpath_result = getxpath(element); | ||||
|         } catch (e) { | ||||
|             console.log(e); | ||||
|             continue; | ||||
|             return | ||||
|         } | ||||
|     } | ||||
| 
 | ||||
|     if (window.getComputedStyle(elements[i]).visibility === "hidden") { | ||||
|         continue; | ||||
|     } | ||||
| 
 | ||||
|     // @todo Possible to ONLY list where it's clickable to save JSON xfer size
 | ||||
|     size_pos.push({ | ||||
|         xpath: xpath_result, | ||||
|         width: Math.round(bbox['width']), | ||||
|         height: Math.round(bbox['height']), | ||||
|         left: Math.floor(bbox['left']), | ||||
|         top: Math.floor(bbox['top'])+scroll_y, | ||||
|         tagName: (elements[i].tagName) ? elements[i].tagName.toLowerCase() : '', | ||||
|         tagtype: (elements[i].tagName == 'INPUT' && elements[i].type) ? elements[i].type.toLowerCase() : '', | ||||
|         isClickable: (elements[i].onclick) || window.getComputedStyle(elements[i]).cursor == "pointer" | ||||
|         top: Math.floor(bbox['top']) + scroll_y, | ||||
|         tagName: (element.tagName) ? element.tagName.toLowerCase() : '', | ||||
|         tagtype: (element.tagName.toLowerCase() === 'input' && element.type) ? element.type.toLowerCase() : '', | ||||
|         isClickable: window.getComputedStyle(element).cursor == "pointer" | ||||
|     }); | ||||
| 
 | ||||
| } | ||||
| }); | ||||
| 
 | ||||
| 
 | ||||
| // Inject the current one set in the include_filters, which may be a CSS rule
 | ||||
| // used for displaying the current one in VisualSelector, where its not one we generated.
 | ||||
| @@ -180,7 +204,7 @@ if (include_filters.length) { | ||||
|             } | ||||
|         } catch (e) { | ||||
|             // Maybe catch DOMException and alert?
 | ||||
|             console.log("xpath_element_scraper: Exception selecting element from filter "+f); | ||||
|             console.log("xpath_element_scraper: Exception selecting element from filter " + f); | ||||
|             console.log(e); | ||||
|         } | ||||
| 
 | ||||
| @@ -210,8 +234,8 @@ if (include_filters.length) { | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|          | ||||
|         if(!q) { | ||||
| 
 | ||||
|         if (!q) { | ||||
|             console.log("xpath_element_scraper: filter element " + f + " was not found"); | ||||
|         } | ||||
| 
 | ||||
| @@ -221,7 +245,7 @@ if (include_filters.length) { | ||||
|                 width: parseInt(bbox['width']), | ||||
|                 height: parseInt(bbox['height']), | ||||
|                 left: parseInt(bbox['left']), | ||||
|                 top: parseInt(bbox['top'])+scroll_y | ||||
|                 top: parseInt(bbox['top']) + scroll_y | ||||
|             }); | ||||
|         } | ||||
|     } | ||||
| @@ -229,7 +253,7 @@ if (include_filters.length) { | ||||
| 
 | ||||
| // Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area
 | ||||
| // so that we dont select the wrapping element by mistake and be unable to select what we want
 | ||||
| size_pos.sort((a, b) => (a.width*a.height > b.width*b.height) ? 1 : -1) | ||||
| size_pos.sort((a, b) => (a.width * a.height > b.width * b.height) ? 1 : -1) | ||||
| 
 | ||||
| // Window.width required for proper scaling in the frontend
 | ||||
| return {'size_pos': size_pos, 'browser_width': window.innerWidth}; | ||||
							
								
								
									
										119
									
								
								changedetectionio/content_fetchers/webdriver_selenium.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										119
									
								
								changedetectionio/content_fetchers/webdriver_selenium.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,119 @@ | ||||
| import os | ||||
| import time | ||||
|  | ||||
| from loguru import logger | ||||
| from changedetectionio.content_fetchers.base import Fetcher | ||||
|  | ||||
| class fetcher(Fetcher): | ||||
|     if os.getenv("WEBDRIVER_URL"): | ||||
|         fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL")) | ||||
|     else: | ||||
|         fetcher_description = "WebDriver Chrome/Javascript" | ||||
|  | ||||
|     # Configs for Proxy setup | ||||
|     # In the ENV vars, is prefixed with "webdriver_", so it is for example "webdriver_sslProxy" | ||||
|     selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy', | ||||
|                                         'proxyAutoconfigUrl', 'sslProxy', 'autodetect', | ||||
|                                         'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword'] | ||||
|     proxy = None | ||||
|  | ||||
|     def __init__(self, proxy_override=None, custom_browser_connection_url=None): | ||||
|         super().__init__() | ||||
|         from selenium.webdriver.common.proxy import Proxy as SeleniumProxy | ||||
|  | ||||
|         # .strip('"') is going to save someone a lot of time when they accidently wrap the env value | ||||
|         if not custom_browser_connection_url: | ||||
|             self.browser_connection_url = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"') | ||||
|         else: | ||||
|             self.browser_connection_is_custom = True | ||||
|             self.browser_connection_url = custom_browser_connection_url | ||||
|  | ||||
|         # If any proxy settings are enabled, then we should setup the proxy object | ||||
|         proxy_args = {} | ||||
|         for k in self.selenium_proxy_settings_mappings: | ||||
|             v = os.getenv('webdriver_' + k, False) | ||||
|             if v: | ||||
|                 proxy_args[k] = v.strip('"') | ||||
|  | ||||
|         # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy | ||||
|         if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy: | ||||
|             proxy_args['httpProxy'] = self.system_http_proxy | ||||
|         if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy: | ||||
|             proxy_args['httpsProxy'] = self.system_https_proxy | ||||
|  | ||||
|         # Allows override the proxy on a per-request basis | ||||
|         if proxy_override is not None: | ||||
|             proxy_args['httpProxy'] = proxy_override | ||||
|  | ||||
|         if proxy_args: | ||||
|             self.proxy = SeleniumProxy(raw=proxy_args) | ||||
|  | ||||
|     def run(self, | ||||
|             url, | ||||
|             timeout, | ||||
|             request_headers, | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|         from selenium.common.exceptions import WebDriverException | ||||
|         # request_body, request_method unused for now, until some magic in the future happens. | ||||
|  | ||||
|         options = ChromeOptions() | ||||
|         if self.proxy: | ||||
|             options.proxy = self.proxy | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.browser_connection_url, | ||||
|             options=options) | ||||
|  | ||||
|         try: | ||||
|             self.driver.get(url) | ||||
|         except WebDriverException as e: | ||||
|             # Be sure we close the session window | ||||
|             self.quit() | ||||
|             raise | ||||
|  | ||||
|         self.driver.set_window_size(1280, 1024) | ||||
|         self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         if self.webdriver_js_execute_code is not None: | ||||
|             self.driver.execute_script(self.webdriver_js_execute_code) | ||||
|             # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|             self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         # @todo - how to check this? is it possible? | ||||
|         self.status_code = 200 | ||||
|         # @todo somehow we should try to get this working for WebDriver | ||||
|         # raise EmptyReply(url=url, status_code=r.status_code) | ||||
|  | ||||
|         # @todo - dom wait loaded? | ||||
|         time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay) | ||||
|         self.content = self.driver.page_source | ||||
|         self.headers = {} | ||||
|  | ||||
|         self.screenshot = self.driver.get_screenshot_as_png() | ||||
|  | ||||
|     # Does the connection to the webdriver work? run a test connection. | ||||
|     def is_ready(self): | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.chrome.options import Options as ChromeOptions | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.command_executor, | ||||
|             options=ChromeOptions()) | ||||
|  | ||||
|         # driver.quit() seems to cause better exceptions | ||||
|         self.quit() | ||||
|         return True | ||||
|  | ||||
|     def quit(self): | ||||
|         if self.driver: | ||||
|             try: | ||||
|                 self.driver.quit() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}") | ||||
| @@ -1,26 +1,19 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from copy import deepcopy | ||||
| from distutils.util import strtobool | ||||
| from feedgen.feed import FeedGenerator | ||||
| from flask_compress import Compress as FlaskCompress | ||||
| from flask_login import current_user | ||||
| from flask_restful import abort, Api | ||||
| from flask_wtf import CSRFProtect | ||||
| from functools import wraps | ||||
| from threading import Event | ||||
| import datetime | ||||
| import flask_login | ||||
| from loguru import logger | ||||
| import sys | ||||
| import os | ||||
| import pytz | ||||
| import queue | ||||
| import threading | ||||
| import time | ||||
| import timeago | ||||
| from copy import deepcopy | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from functools import wraps | ||||
| from threading import Event | ||||
|  | ||||
| import flask_login | ||||
| import pytz | ||||
| import timeago | ||||
| from feedgen.feed import FeedGenerator | ||||
| from flask import ( | ||||
|     Flask, | ||||
|     abort, | ||||
| @@ -33,10 +26,16 @@ from flask import ( | ||||
|     session, | ||||
|     url_for, | ||||
| ) | ||||
|  | ||||
| from flask_compress import Compress as FlaskCompress | ||||
| from flask_login import current_user | ||||
| from flask_paginate import Pagination, get_page_parameter | ||||
| from flask_restful import abort, Api | ||||
| from flask_cors import CORS | ||||
| from flask_wtf import CSRFProtect | ||||
| from loguru import logger | ||||
|  | ||||
| from changedetectionio import html_tools, __version__ | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio.api import api_v1 | ||||
|  | ||||
| datastore = None | ||||
| @@ -55,6 +54,9 @@ app = Flask(__name__, | ||||
|             static_folder="static", | ||||
|             template_folder="templates") | ||||
|  | ||||
| # Enable CORS, especially useful for the Chrome extension to operate from anywhere | ||||
| CORS(app) | ||||
|  | ||||
| # Super handy for compressing large BrowserSteps responses and others | ||||
| FlaskCompress(app) | ||||
|  | ||||
| @@ -317,6 +319,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|     @app.route("/rss", methods=['GET']) | ||||
|     def rss(): | ||||
|         from jinja2 import Environment, BaseLoader | ||||
|         jinja2_env = Environment(loader=BaseLoader) | ||||
|         now = time.time() | ||||
|         # Always requires token set | ||||
|         app_rss_token = datastore.data['settings']['application'].get('rss_access_token') | ||||
|         rss_url_token = request.args.get('token') | ||||
| @@ -380,8 +385,12 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                              include_equal=False, | ||||
|                                              line_feed_sep="<br>") | ||||
|  | ||||
|                 fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff), | ||||
|                            type='CDATA') | ||||
|                 # @todo Make this configurable and also consider html-colored markup | ||||
|                 # @todo User could decide if <link> goes to the diff page, or to the watch link | ||||
|                 rss_template = "<html><body>\n<h4><a href=\"{{watch_url}}\">{{watch_title}}</a></h4>\n<p>{{html_diff}}</p>\n</body></html>\n" | ||||
|                 content = jinja2_env.from_string(rss_template).render(watch_title=watch_title, html_diff=html_diff, watch_url=watch.link) | ||||
|  | ||||
|                 fe.content(content=content, type='CDATA') | ||||
|  | ||||
|                 fe.guid(guid, permalink=False) | ||||
|                 dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) | ||||
| @@ -390,6 +399,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         response = make_response(fg.rss_str()) | ||||
|         response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') | ||||
|         logger.trace(f"RSS generated in {time.time() - now:.3f}s") | ||||
|         return response | ||||
|  | ||||
|     @app.route("/", methods=['GET']) | ||||
| @@ -398,17 +408,21 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         global datastore | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         limit_tag = request.args.get('tag', '').lower().strip() | ||||
|         active_tag_req = request.args.get('tag', '').lower().strip() | ||||
|         active_tag_uuid = active_tag = None | ||||
|  | ||||
|         # Be sure limit_tag is a uuid | ||||
|         for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): | ||||
|             if limit_tag == tag.get('title', '').lower().strip(): | ||||
|                 limit_tag = uuid | ||||
|         if active_tag_req: | ||||
|             for uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): | ||||
|                 if active_tag_req == tag.get('title', '').lower().strip() or active_tag_req == uuid: | ||||
|                     active_tag = tag | ||||
|                     active_tag_uuid = uuid | ||||
|                     break | ||||
|  | ||||
|  | ||||
|         # Redirect for the old rss path which used the /?rss=true | ||||
|         if request.args.get('rss'): | ||||
|             return redirect(url_for('rss', tag=limit_tag)) | ||||
|             return redirect(url_for('rss', tag=active_tag_uuid)) | ||||
|  | ||||
|         op = request.args.get('op') | ||||
|         if op: | ||||
| @@ -419,7 +433,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 datastore.data['watching'][uuid].toggle_mute() | ||||
|  | ||||
|             datastore.needs_write = True | ||||
|             return redirect(url_for('index', tag = limit_tag)) | ||||
|             return redirect(url_for('index', tag = active_tag_uuid)) | ||||
|  | ||||
|         # Sort by last_changed and add the uuid which is usually the key.. | ||||
|         sorted_watches = [] | ||||
| @@ -430,7 +444,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             if with_errors and not watch.get('last_error'): | ||||
|                 continue | ||||
|  | ||||
|             if limit_tag and not limit_tag in watch['tags']: | ||||
|             if active_tag_uuid and not active_tag_uuid in watch['tags']: | ||||
|                     continue | ||||
|             if watch.get('last_error'): | ||||
|                 errored_count += 1 | ||||
| @@ -449,11 +463,12 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                 total=total_count, | ||||
|                                 per_page=datastore.data['settings']['application'].get('pager_size', 50), css_framework="semantic") | ||||
|  | ||||
|  | ||||
|         sorted_tags = sorted(datastore.data['settings']['application'].get('tags').items(), key=lambda x: x[1]['title']) | ||||
|         output = render_template( | ||||
|             "watch-overview.html", | ||||
|                                  # Don't link to hosting when we're on the hosting environment | ||||
|                                  active_tag=limit_tag, | ||||
|                                  active_tag=active_tag, | ||||
|                                  active_tag_uuid=active_tag_uuid, | ||||
|                                  app_rss_token=datastore.data['settings']['application']['rss_access_token'], | ||||
|                                  datastore=datastore, | ||||
|                                  errored_count=errored_count, | ||||
| @@ -468,7 +483,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                  sort_attribute=request.args.get('sort') if request.args.get('sort') else request.cookies.get('sort'), | ||||
|                                  sort_order=request.args.get('order') if request.args.get('order') else request.cookies.get('order'), | ||||
|                                  system_default_fetcher=datastore.data['settings']['application'].get('fetch_backend'), | ||||
|                                  tags=datastore.data['settings']['application'].get('tags'), | ||||
|                                  tags=sorted_tags, | ||||
|                                  watches=sorted_watches | ||||
|                                  ) | ||||
|  | ||||
| @@ -501,21 +516,38 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         watch = datastore.data['watching'].get(watch_uuid) if watch_uuid else None | ||||
|  | ||||
|         # validate URLS | ||||
|         if not len(request.form['notification_urls'].strip()): | ||||
|             return make_response({'error': 'No Notification URLs set'}, 400) | ||||
|         notification_urls = request.form['notification_urls'].strip().splitlines() | ||||
|  | ||||
|         for server_url in request.form['notification_urls'].splitlines(): | ||||
|             if len(server_url.strip()): | ||||
|                 if not apobj.add(server_url): | ||||
|                     message = '{} is not a valid AppRise URL.'.format(server_url) | ||||
|                     return make_response({'error': message}, 400) | ||||
|         if not notification_urls: | ||||
|             logger.debug("Test notification - Trying by group/tag in the edit form if available") | ||||
|             # On an edit page, we should also fire off to the tags if they have notifications | ||||
|             if request.form.get('tags') and request.form['tags'].strip(): | ||||
|                 for k in request.form['tags'].split(','): | ||||
|                     tag = datastore.tag_exists_by_name(k.strip()) | ||||
|                     notification_urls = tag.get('notifications_urls') if tag and tag.get('notifications_urls') else None | ||||
|  | ||||
|         is_global_settings_form = request.args.get('mode', '') == 'global-settings' | ||||
|         is_group_settings_form = request.args.get('mode', '') == 'group-settings' | ||||
|         if not notification_urls and not is_global_settings_form and not is_group_settings_form: | ||||
|             # In the global settings, use only what is typed currently in the text box | ||||
|             logger.debug("Test notification - Trying by global system settings notifications") | ||||
|             if datastore.data['settings']['application'].get('notification_urls'): | ||||
|                 notification_urls = datastore.data['settings']['application']['notification_urls'] | ||||
|  | ||||
|  | ||||
|         if not notification_urls: | ||||
|             return 'No Notification URLs set/found' | ||||
|  | ||||
|         for n_url in notification_urls: | ||||
|             if len(n_url.strip()): | ||||
|                 if not apobj.add(n_url): | ||||
|                     return f'Error - {n_url} is not a valid AppRise URL.' | ||||
|  | ||||
|         try: | ||||
|             # use the same as when it is triggered, but then override it with the form test values | ||||
|             n_object = { | ||||
|                 'watch_url': request.form['window_url'], | ||||
|                 'notification_urls': request.form['notification_urls'].splitlines() | ||||
|                 'notification_urls': notification_urls | ||||
|             } | ||||
|  | ||||
|             # Only use if present, if not set in n_object it should use the default system value | ||||
| @@ -534,7 +566,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         except Exception as e: | ||||
|             return make_response({'error': str(e)}, 400) | ||||
|  | ||||
|         return 'OK' | ||||
|         return 'OK - Sent test notifications' | ||||
|  | ||||
|  | ||||
|     @app.route("/clear_history/<string:uuid>", methods=['GET']) | ||||
| @@ -571,6 +603,12 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         output = render_template("clear_all_history.html") | ||||
|         return output | ||||
|  | ||||
|     def _watch_has_tag_options_set(watch): | ||||
|         """This should be fixed better so that Tag is some proper Model, a tag is just a Watch also""" | ||||
|         for tag_uuid, tag in datastore.data['settings']['application'].get('tags', {}).items(): | ||||
|             if tag_uuid in watch.get('tags', []) and (tag.get('include_filters') or tag.get('subtractive_selectors')): | ||||
|                 return True | ||||
|  | ||||
|     @app.route("/edit/<string:uuid>", methods=['GET', 'POST']) | ||||
|     @login_optionally_required | ||||
|     # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists | ||||
| @@ -741,6 +779,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                      has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, | ||||
|                                      has_empty_checktime=using_default_check_time, | ||||
|                                      has_extra_headers_file=len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, | ||||
|                                      has_special_tag_options=_watch_has_tag_options_set(watch=watch), | ||||
|                                      is_html_webdriver=is_html_webdriver, | ||||
|                                      jq_support=jq_support, | ||||
|                                      playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False), | ||||
| @@ -756,7 +795,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     @app.route("/settings", methods=['GET', "POST"]) | ||||
|     @login_optionally_required | ||||
|     def settings_page(): | ||||
|         from changedetectionio import content_fetcher, forms | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         default = deepcopy(datastore.data['settings']) | ||||
|         if datastore.proxy_list is not None: | ||||
| @@ -1264,9 +1303,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         url = request.form.get('url').strip() | ||||
|         if datastore.url_exists(url): | ||||
|             flash('The URL {} already exists'.format(url), "error") | ||||
|             return redirect(url_for('index')) | ||||
|  | ||||
|             flash(f'Warning, URL {url} already exists', "notice") | ||||
|              | ||||
|         add_paused = request.form.get('edit_and_watch_submit_button') != None | ||||
|         processor = request.form.get('processor', 'text_json_diff') | ||||
|         new_uuid = datastore.add_watch(url=url, tag=request.form.get('tags').strip(), extras={'paused': add_paused, 'processor': processor}) | ||||
| @@ -1416,6 +1454,13 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|             flash("{} watches queued for rechecking".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'clear-errors'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     datastore.data['watching'][uuid]["last_error"] = False | ||||
|             flash(f"{len(uuids)} watches errors cleared") | ||||
|  | ||||
|         elif (op == 'clear-history'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
| @@ -1603,7 +1648,7 @@ def notification_runner(): | ||||
|                     n_object['notification_title'] = datastore.data['settings']['application'].get('notification_title') | ||||
|  | ||||
|                 if not n_object.get('notification_format') and datastore.data['settings']['application'].get('notification_format'): | ||||
|                     n_object['notification_title'] = datastore.data['settings']['application'].get('notification_format') | ||||
|                     n_object['notification_format'] = datastore.data['settings']['application'].get('notification_format') | ||||
|  | ||||
|                 sent_obj = notification.process_notification(n_object, datastore) | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| import os | ||||
| import re | ||||
| from distutils.util import strtobool | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from wtforms import ( | ||||
|     BooleanField, | ||||
| @@ -27,7 +27,7 @@ from validators.url import url as url_validator | ||||
| # each select <option data-enabled="enabled-0-0" | ||||
| from changedetectionio.blueprint.browser_steps.browser_steps import browser_step_ui_config | ||||
|  | ||||
| from changedetectionio import content_fetcher, html_tools | ||||
| from changedetectionio import html_tools, content_fetchers | ||||
|  | ||||
| from changedetectionio.notification import ( | ||||
|     valid_notification_formats, | ||||
| @@ -167,33 +167,31 @@ class ValidateContentFetcherIsReady(object): | ||||
|         self.message = message | ||||
|  | ||||
|     def __call__(self, form, field): | ||||
|         import urllib3.exceptions | ||||
|         from changedetectionio import content_fetcher | ||||
|         return | ||||
|  | ||||
| # AttributeError: module 'changedetectionio.content_fetcher' has no attribute 'extra_browser_unlocked<>ASDF213r123r' | ||||
|         # Better would be a radiohandler that keeps a reference to each class | ||||
|         if field.data is not None and field.data != 'system': | ||||
|             klass = getattr(content_fetcher, field.data) | ||||
|             some_object = klass() | ||||
|             try: | ||||
|                 ready = some_object.is_ready() | ||||
|  | ||||
|             except urllib3.exceptions.MaxRetryError as e: | ||||
|                 driver_url = some_object.command_executor | ||||
|                 message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data)) | ||||
|                 message += '<br>' + field.gettext( | ||||
|                     'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.') | ||||
|                 message += '<br>' + field.gettext('Did you follow the instructions in the wiki?') | ||||
|                 message += '<br><br>' + field.gettext('WebDriver Host: %s' % (driver_url)) | ||||
|                 message += '<br><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>' | ||||
|                 message += '<br>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e))) | ||||
|  | ||||
|                 raise ValidationError(message) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s') | ||||
|                 raise ValidationError(message % (field.data, e)) | ||||
|         # if field.data is not None and field.data != 'system': | ||||
|         #     klass = getattr(content_fetcher, field.data) | ||||
|         #     some_object = klass() | ||||
|         #     try: | ||||
|         #         ready = some_object.is_ready() | ||||
|         # | ||||
|         #     except urllib3.exceptions.MaxRetryError as e: | ||||
|         #         driver_url = some_object.command_executor | ||||
|         #         message = field.gettext('Content fetcher \'%s\' did not respond.' % (field.data)) | ||||
|         #         message += '<br>' + field.gettext( | ||||
|         #             'Be sure that the selenium/webdriver runner is running and accessible via network from this container/host.') | ||||
|         #         message += '<br>' + field.gettext('Did you follow the instructions in the wiki?') | ||||
|         #         message += '<br><br>' + field.gettext('WebDriver Host: %s' % (driver_url)) | ||||
|         #         message += '<br><a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">Go here for more information</a>' | ||||
|         #         message += '<br>'+field.gettext('Content fetcher did not respond properly, unable to use it.\n %s' % (str(e))) | ||||
|         # | ||||
|         #         raise ValidationError(message) | ||||
|         # | ||||
|         #     except Exception as e: | ||||
|         #         message = field.gettext('Content fetcher \'%s\' did not respond properly, unable to use it.\n %s') | ||||
|         #         raise ValidationError(message % (field.data, e)) | ||||
|  | ||||
|  | ||||
| class ValidateNotificationBodyAndTitleWhenURLisSet(object): | ||||
| @@ -421,7 +419,7 @@ class commonSettingsForm(Form): | ||||
|     notification_title = StringField('Notification Title', default='ChangeDetection.io Notification - {{ watch_url }}', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_body = TextAreaField('Notification Body', default='{{ watch_url }} had a change.', validators=[validators.Optional(), ValidateJinja2Template()]) | ||||
|     notification_format = SelectField('Notification format', choices=valid_notification_formats.keys()) | ||||
|     fetch_backend = RadioField(u'Fetch Method', choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     fetch_backend = RadioField(u'Fetch Method', choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     extract_title_as_title = BooleanField('Extract <title> from document and use as watch title', default=False) | ||||
|     webdriver_delay = IntegerField('Wait seconds before extracting text', validators=[validators.Optional(), validators.NumberRange(min=1, | ||||
|                                                                                                                                     message="Should contain one or more seconds")]) | ||||
| @@ -465,6 +463,7 @@ class watchForm(commonSettingsForm): | ||||
|     method = SelectField('Request method', choices=valid_method, default=default_method) | ||||
|     ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) | ||||
|     check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False) | ||||
|     sort_text_alphabetically =  BooleanField('Sort text alphabetically', default=False) | ||||
|  | ||||
|     filter_text_added = BooleanField('Added lines', default=True) | ||||
|     filter_text_replaced = BooleanField('Replaced/changed lines', default=True) | ||||
| @@ -551,7 +550,7 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|                            render_kw={"placeholder": os.getenv('BASE_URL', 'Not set')} | ||||
|                            ) | ||||
|     empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False) | ||||
|     fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetchers.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) | ||||
|     global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|     ignore_whitespace = BooleanField('Ignore whitespace') | ||||
|   | ||||
| @@ -169,14 +169,14 @@ def xpath1_filter(xpath_filter, html_content, append_pretty_line_formatting=Fals | ||||
|         # And where the matched result doesn't include something that will cause Inscriptis to add a newline | ||||
|         # (This way each 'match' reliably has a new-line in the diff) | ||||
|         # Divs are converted to 4 whitespaces by inscriptis | ||||
|         if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])): | ||||
|         if append_pretty_line_formatting and len(html_block) and (not hasattr(element, 'tag') or not element.tag in (['br', 'hr', 'div', 'p'])): | ||||
|             html_block += TEXT_FILTER_LIST_LINE_SUFFIX | ||||
|  | ||||
|         if type(element) == etree._ElementStringResult: | ||||
|             html_block += str(element) | ||||
|         elif type(element) == etree._ElementUnicodeResult: | ||||
|             html_block += str(element) | ||||
|         # Some kind of text, UTF-8 or other | ||||
|         if isinstance(element, (str, bytes)): | ||||
|             html_block += element | ||||
|         else: | ||||
|             # Return the HTML which will get parsed as text | ||||
|             html_block += etree.tostring(element, pretty_print=True).decode('utf-8') | ||||
|  | ||||
|     return html_block | ||||
|   | ||||
| @@ -57,7 +57,7 @@ class import_url_list(Importer): | ||||
|  | ||||
|             # Flask wtform validators wont work with basic auth, use validators package | ||||
|             # Up to 5000 per batch so we dont flood the server | ||||
|             # @todo validators.url failed on local hostnames (such as referring to ourself when using browserless) | ||||
|             # @todo validators.url will fail when you add your own IP etc | ||||
|             if len(url) and 'http' in url.lower() and good < 5000: | ||||
|                 extras = None | ||||
|                 if processor: | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from distutils.util import strtobool | ||||
| from changedetectionio.strtobool import strtobool | ||||
| import os | ||||
| import re | ||||
| import time | ||||
| @@ -45,6 +45,7 @@ base_config = { | ||||
|     'last_error': False, | ||||
|     'last_viewed': 0,  # history key value of the last viewed via the [diff] link | ||||
|     'method': 'GET', | ||||
|     'notification_alert_count': 0, | ||||
|     # Custom notification content | ||||
|     'notification_body': None, | ||||
|     'notification_format': default_notification_format_for_watch, | ||||
| @@ -56,6 +57,8 @@ base_config = { | ||||
|     'previous_md5': False, | ||||
|     'previous_md5_before_filters': False,  # Used for skipping changedetection entirely | ||||
|     'proxy': None,  # Preferred proxy connection | ||||
|     'remote_server_reply': None, # From 'server' reply header | ||||
|     'sort_text_alphabetically': False, | ||||
|     'subtractive_selectors': [], | ||||
|     'tag': '', # Old system of text name for a tag, to be removed | ||||
|     'tags': [], # list of UUIDs to App.Tags | ||||
| @@ -246,10 +249,10 @@ class model(dict): | ||||
|     @property | ||||
|     def has_browser_steps(self): | ||||
|         has_browser_steps = self.get('browser_steps') and list(filter( | ||||
|                 lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), | ||||
|                 self.get('browser_steps'))) | ||||
|             lambda s: (s['operation'] and len(s['operation']) and s['operation'] != 'Choose one' and s['operation'] != 'Goto site'), | ||||
|             self.get('browser_steps'))) | ||||
|  | ||||
|         return  has_browser_steps | ||||
|         return has_browser_steps | ||||
|  | ||||
|     # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0. | ||||
|     @property | ||||
| @@ -359,6 +362,7 @@ class model(dict): | ||||
|         # @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status | ||||
|         return snapshot_fname | ||||
|  | ||||
|     @property | ||||
|     @property | ||||
|     def has_empty_checktime(self): | ||||
|         # using all() + dictionary comprehension | ||||
|   | ||||
| @@ -116,6 +116,9 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs): | ||||
|  | ||||
| def process_notification(n_object, datastore): | ||||
|  | ||||
|     now = time.time() | ||||
|     if n_object.get('notification_timestamp'): | ||||
|         logger.trace(f"Time since queued {now-n_object['notification_timestamp']:.3f}s") | ||||
|     # Insert variables into the notification content | ||||
|     notification_parameters = create_notification_parameters(n_object, datastore) | ||||
|  | ||||
| @@ -133,6 +136,8 @@ def process_notification(n_object, datastore): | ||||
|         # Initially text or whatever | ||||
|         n_format = datastore.data['settings']['application'].get('notification_format', valid_notification_formats[default_notification_format]) | ||||
|  | ||||
|     logger.trace(f"Complete notification body including Jinja and placeholders calculated in  {time.time() - now:.3f}s") | ||||
|  | ||||
|     # https://github.com/caronc/apprise/wiki/Development_LogCapture | ||||
|     # Anything higher than or equal to WARNING (which covers things like Connection errors) | ||||
|     # raise it as an exception | ||||
| @@ -147,6 +152,10 @@ def process_notification(n_object, datastore): | ||||
|     with apprise.LogCapture(level=apprise.logging.DEBUG) as logs: | ||||
|         for url in n_object['notification_urls']: | ||||
|             url = url.strip() | ||||
|             if not url: | ||||
|                 logger.warning(f"Process Notification: skipping empty notification URL.") | ||||
|                 continue | ||||
|  | ||||
|             logger.info(">> Process Notification: AppRise notifying {}".format(url)) | ||||
|             url = jinja2_env.from_string(url).render(**notification_parameters) | ||||
|  | ||||
|   | ||||
| @@ -2,9 +2,8 @@ from abc import abstractmethod | ||||
| import os | ||||
| import hashlib | ||||
| import re | ||||
| from changedetectionio import content_fetcher | ||||
| from copy import deepcopy | ||||
| from distutils.util import strtobool | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from loguru import logger | ||||
|  | ||||
| class difference_detection_processor(): | ||||
| @@ -50,7 +49,7 @@ class difference_detection_processor(): | ||||
|             connection = list( | ||||
|                 filter(lambda s: (s['browser_name'] == key), self.datastore.data['settings']['requests'].get('extra_browsers', []))) | ||||
|             if connection: | ||||
|                 prefer_fetch_backend = 'base_html_playwright' | ||||
|                 prefer_fetch_backend = 'html_webdriver' | ||||
|                 custom_browser_connection_url = connection[0].get('browser_connection_url') | ||||
|  | ||||
|         # PDF should be html_requests because playwright will serve it up (so far) in a embedded page | ||||
| @@ -60,17 +59,28 @@ class difference_detection_processor(): | ||||
|            prefer_fetch_backend = "html_requests" | ||||
|  | ||||
|         # Grab the right kind of 'fetcher', (playwright, requests, etc) | ||||
|         if hasattr(content_fetcher, prefer_fetch_backend): | ||||
|             fetcher_obj = getattr(content_fetcher, prefer_fetch_backend) | ||||
|         from changedetectionio import content_fetchers | ||||
|         if hasattr(content_fetchers, prefer_fetch_backend): | ||||
|             # @todo TEMPORARY HACK - SWITCH BACK TO PLAYWRIGHT FOR BROWSERSTEPS | ||||
|             if prefer_fetch_backend == 'html_webdriver' and self.watch.has_browser_steps: | ||||
|                 # This is never supported in selenium anyway | ||||
|                 logger.warning("Using playwright fetcher override for possible puppeteer request in browsersteps, because puppetteer:browser steps is incomplete.") | ||||
|                 from changedetectionio.content_fetchers.playwright import fetcher as playwright_fetcher | ||||
|                 fetcher_obj = playwright_fetcher | ||||
|             else: | ||||
|                 fetcher_obj = getattr(content_fetchers, prefer_fetch_backend) | ||||
|         else: | ||||
|             # If the klass doesnt exist, just use a default | ||||
|             fetcher_obj = getattr(content_fetcher, "html_requests") | ||||
|  | ||||
|             # What it referenced doesnt exist, Just use a default | ||||
|             fetcher_obj = getattr(content_fetchers, "html_requests") | ||||
|  | ||||
|         proxy_url = None | ||||
|         if preferred_proxy_id: | ||||
|             proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url') | ||||
|             logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}") | ||||
|             # Custom browser endpoints should NOT have a proxy added | ||||
|             if not prefer_fetch_backend.startswith('extra_browser_'): | ||||
|                 proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url') | ||||
|                 logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}") | ||||
|             else: | ||||
|                 logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified. ") | ||||
|  | ||||
|         # Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need. | ||||
|         # When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc) | ||||
|   | ||||
| @@ -1,8 +1,9 @@ | ||||
|  | ||||
| import hashlib | ||||
| import urllib3 | ||||
| from . import difference_detection_processor | ||||
| from copy import deepcopy | ||||
| from loguru import logger | ||||
| import hashlib | ||||
| import urllib3 | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
| @@ -43,11 +44,13 @@ class perform_site_check(difference_detection_processor): | ||||
|             fetched_md5 = hashlib.md5(self.fetcher.instock_data.encode('utf-8')).hexdigest() | ||||
|             # 'Possibly in stock' comes from stock-not-in-stock.js when no string found above the fold. | ||||
|             update_obj["in_stock"] = True if self.fetcher.instock_data == 'Possibly in stock' else False | ||||
|             logger.debug(f"Watch UUID {uuid} restock check returned '{self.fetcher.instock_data}' from JS scraper.") | ||||
|         else: | ||||
|             raise UnableToExtractRestockData(status_code=self.fetcher.status_code) | ||||
|  | ||||
|         # The main thing that all this at the moment comes down to :) | ||||
|         changed_detected = False | ||||
|         logger.debug(f"Watch UUID {uuid} restock check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") | ||||
|  | ||||
|         if watch.get('previous_md5') and watch.get('previous_md5') != fetched_md5: | ||||
|             # Yes if we only care about it going to instock, AND we are in stock | ||||
| @@ -60,5 +63,4 @@ class perform_site_check(difference_detection_processor): | ||||
|  | ||||
|         # Always record the new checksum | ||||
|         update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|         return changed_detected, update_obj, self.fetcher.instock_data.encode('utf-8').strip() | ||||
|   | ||||
| @@ -6,11 +6,12 @@ import os | ||||
| import re | ||||
| import urllib3 | ||||
|  | ||||
| from changedetectionio import content_fetcher, html_tools | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| from copy import deepcopy | ||||
| from . import difference_detection_processor | ||||
| from ..html_tools import PERL_STYLE_REGEX, cdata_in_document_to_text | ||||
| from changedetectionio import html_tools, content_fetchers | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| import changedetectionio.content_fetchers | ||||
| from copy import deepcopy | ||||
| from loguru import logger | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
| @@ -60,7 +61,7 @@ class perform_site_check(difference_detection_processor): | ||||
|         update_obj['previous_md5_before_filters'] = hashlib.md5(self.fetcher.content.encode('utf-8')).hexdigest() | ||||
|         if skip_when_checksum_same: | ||||
|             if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'): | ||||
|                 raise content_fetcher.checksumFromPreviousCheckWasTheSame() | ||||
|                 raise content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame() | ||||
|  | ||||
|         # Fetching complete, now filters | ||||
|  | ||||
| @@ -116,7 +117,9 @@ class perform_site_check(difference_detection_processor): | ||||
|         # and then use getattr https://docs.python.org/3/reference/datamodel.html#object.__getitem__ | ||||
|         # https://realpython.com/inherit-python-dict/ instead of doing it procedurely | ||||
|         include_filters_from_tags = self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='include_filters') | ||||
|         include_filters_rule = [*watch.get('include_filters', []), *include_filters_from_tags] | ||||
|  | ||||
|         # 1845 - remove duplicated filters in both group and watch include filter | ||||
|         include_filters_rule = list(dict.fromkeys(watch.get('include_filters', []) + include_filters_from_tags)) | ||||
|  | ||||
|         subtractive_selectors = [*self.datastore.get_tag_overrides_for_watch(uuid=uuid, attr='subtractive_selectors'), | ||||
|                                  *watch.get("subtractive_selectors", []), | ||||
| @@ -202,6 +205,12 @@ class perform_site_check(difference_detection_processor): | ||||
|                             is_rss=is_rss # #1874 activate the <title workaround hack | ||||
|                         ) | ||||
|  | ||||
|         if watch.get('sort_text_alphabetically') and stripped_text_from_html: | ||||
|             # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap | ||||
|             # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here. | ||||
|             stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n') | ||||
|             stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() )) | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
|  | ||||
| @@ -235,7 +244,7 @@ class perform_site_check(difference_detection_processor): | ||||
|         # Treat pages with no renderable text content as a change? No by default | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|         if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: | ||||
|             raise content_fetcher.ReplyWithContentButNoText(url=url, | ||||
|             raise content_fetchers.exceptions.ReplyWithContentButNoText(url=url, | ||||
|                                                             status_code=self.fetcher.get_last_status_code(), | ||||
|                                                             screenshot=screenshot, | ||||
|                                                             has_filters=has_filter_rule, | ||||
| @@ -335,6 +344,8 @@ class perform_site_check(difference_detection_processor): | ||||
|                 if not watch['title'] or not len(watch['title']): | ||||
|                     update_obj['title'] = html_tools.extract_element(find='title', html_content=self.fetcher.content) | ||||
|  | ||||
|         logger.debug(f"Watch UUID {uuid} content check - Previous MD5: {watch.get('previous_md5')}, Fetched MD5 {fetched_md5}") | ||||
|  | ||||
|         if changed_detected: | ||||
|             if watch.get('check_unique_lines', False): | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) | ||||
|   | ||||
| @@ -1,132 +0,0 @@ | ||||
| function isItemInStock() { | ||||
|     // @todo Pass these in so the same list can be used in non-JS fetchers | ||||
|     const outOfStockTexts = [ | ||||
|         ' أخبرني عندما يتوفر', | ||||
|         '0 in stock', | ||||
|         'agotado', | ||||
|         'article épuisé', | ||||
|         'artikel zurzeit vergriffen', | ||||
|         'as soon as stock is available', | ||||
|         'ausverkauft', // sold out | ||||
|         'available for back order', | ||||
|         'back-order or out of stock', | ||||
|         'backordered', | ||||
|         'benachrichtigt mich', // notify me | ||||
|         'brak na stanie', | ||||
|         'brak w magazynie', | ||||
|         'coming soon', | ||||
|         'currently have any tickets for this', | ||||
|         'currently unavailable', | ||||
|         'dostępne wkrótce', | ||||
|         'en rupture de stock', | ||||
|         'ist derzeit nicht auf lager', | ||||
|         'item is no longer available', | ||||
|         'let me know when it\'s available', | ||||
|         'message if back in stock', | ||||
|         'nachricht bei', | ||||
|         'nicht auf lager', | ||||
|         'nicht lieferbar', | ||||
|         'nicht zur verfügung', | ||||
|         'niet beschikbaar', | ||||
|         'niet leverbaar', | ||||
|         'no disponible temporalmente', | ||||
|         'no longer in stock', | ||||
|         'no tickets available', | ||||
|         'not available', | ||||
|         'not currently available', | ||||
|         'not in stock',         | ||||
|         'notify me when available', | ||||
|         'notify when available',             | ||||
|         'não estamos a aceitar encomendas', | ||||
|         'out of stock', | ||||
|         'out-of-stock', | ||||
|         'produkt niedostępny', | ||||
|         'sold out', | ||||
|         'sold-out', | ||||
|         'temporarily out of stock', | ||||
|         'temporarily unavailable', | ||||
|         'tickets unavailable', | ||||
|         'tijdelijk uitverkocht', | ||||
|         'unavailable tickets', | ||||
|         'we do not currently have an estimate of when this product will be back in stock.', | ||||
|         'we don\'t know when or if this item will be back in stock.', | ||||
|         'zur zeit nicht an lager', | ||||
|         '品切れ', | ||||
|         '已售完', | ||||
|         '품절' | ||||
|     ]; | ||||
|  | ||||
|     function getElementBaseText(element) { | ||||
|         // .textContent can include text from children which may give the wrong results | ||||
|         // scan only immediate TEXT_NODEs, which will be a child of the element | ||||
|         var text = ""; | ||||
|         for (var i = 0; i < element.childNodes.length; ++i) | ||||
|             if (element.childNodes[i].nodeType === Node.TEXT_NODE) | ||||
|                 text += element.childNodes[i].textContent; | ||||
|         return text.toLowerCase().trim(); | ||||
|     } | ||||
|  | ||||
|     const negateOutOfStockRegexs = [ | ||||
|         '[0-9] in stock' | ||||
|     ] | ||||
|     var negateOutOfStockRegexs_r = []; | ||||
|     for (let i = 0; i < negateOutOfStockRegexs.length; i++) { | ||||
|         negateOutOfStockRegexs_r.push(new RegExp(negateOutOfStockRegexs[0], 'g')); | ||||
|     } | ||||
|  | ||||
|     // The out-of-stock or in-stock-text is generally always above-the-fold | ||||
|     // and often below-the-fold is a list of related products that may or may not contain trigger text | ||||
|     // so it's good to filter to just the 'above the fold' elements | ||||
|     // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist | ||||
|     const elementsToScan = Array.from(document.getElementsByTagName('*')).filter(element => element.getBoundingClientRect().top + window.scrollY <= window.innerHeight && element.getBoundingClientRect().top + window.scrollY >= 100); | ||||
|  | ||||
|     var elementText = ""; | ||||
|  | ||||
|     // REGEXS THAT REALLY MEAN IT'S IN STOCK | ||||
|     for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|         const element = elementsToScan[i]; | ||||
|         elementText = ""; | ||||
|         if (element.tagName.toLowerCase() === "input") { | ||||
|             elementText = element.value.toLowerCase(); | ||||
|         } else { | ||||
|             elementText = getElementBaseText(element); | ||||
|         } | ||||
|  | ||||
|         if (elementText.length) { | ||||
|             // try which ones could mean its in stock | ||||
|             for (let i = 0; i < negateOutOfStockRegexs.length; i++) { | ||||
|                 if (negateOutOfStockRegexs_r[i].test(elementText)) { | ||||
|                     return 'Possibly in stock'; | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK | ||||
|     for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|         const element = elementsToScan[i]; | ||||
|         if (element.offsetWidth > 0 || element.offsetHeight > 0 || element.getClientRects().length > 0) { | ||||
|             elementText = ""; | ||||
|             if (element.tagName.toLowerCase() === "input") { | ||||
|                 elementText = element.value.toLowerCase(); | ||||
|             } else { | ||||
|                 elementText = getElementBaseText(element); | ||||
|             } | ||||
|  | ||||
|             if (elementText.length) { | ||||
|                 // and these mean its out of stock | ||||
|                 for (const outOfStockText of outOfStockTexts) { | ||||
|                     if (elementText.includes(outOfStockText)) { | ||||
|                         return outOfStockText; // item is out of stock | ||||
|                     } | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     return 'Possibly in stock'; // possibly in stock, cant decide otherwise. | ||||
| } | ||||
|  | ||||
| // returns the element text that makes it think it's out of stock | ||||
| return isItemInStock().trim() | ||||
|  | ||||
| @@ -2,20 +2,22 @@ | ||||
|  | ||||
| # run some tests and look if the 'custom-browser-search-string=1' connect string appeared in the correct containers | ||||
|  | ||||
| # @todo do it again but with the puppeteer one | ||||
|  | ||||
| # enable debug | ||||
| set -x | ||||
|  | ||||
| # A extra browser is configured, but we never chose to use it, so it should NOT show in the logs | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url' | ||||
| docker logs browserless-custom-url &>log.txt | ||||
| grep 'custom-browser-search-string=1' log.txt | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_not_via_custom_browser_url' | ||||
| docker logs sockpuppetbrowser-custom-url &>log-custom.txt | ||||
| grep 'custom-browser-search-string=1' log-custom.txt | ||||
| if [ $? -ne 1 ] | ||||
| then | ||||
|   echo "Saw a request in 'browserless-custom-url' container with 'custom-browser-search-string=1' when I should not" | ||||
|   echo "Saw a request in 'sockpuppetbrowser-custom-url' container with 'custom-browser-search-string=1' when I should not - log-custom.txt" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| docker logs browserless &>log.txt | ||||
| docker logs sockpuppetbrowser &>log.txt | ||||
| grep 'custom-browser-search-string=1' log.txt | ||||
| if [ $? -ne 1 ] | ||||
| then | ||||
| @@ -24,16 +26,16 @@ then | ||||
| fi | ||||
|  | ||||
| # Special connect string should appear in the custom-url container, but not in the 'default' one | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_via_custom_browser_url' | ||||
| docker logs browserless-custom-url &>log.txt | ||||
| grep 'custom-browser-search-string=1' log.txt | ||||
| docker run --rm -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" --network changedet-network test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/custom_browser_url/test_custom_browser_url.py::test_request_via_custom_browser_url' | ||||
| docker logs sockpuppetbrowser-custom-url &>log-custom.txt | ||||
| grep 'custom-browser-search-string=1' log-custom.txt | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see request in 'browserless-custom-url' container with 'custom-browser-search-string=1' when I should" | ||||
|   echo "Did not see request in 'sockpuppetbrowser-custom-url' container with 'custom-browser-search-string=1' when I should - log-custom.txt" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| docker logs browserless &>log.txt | ||||
| docker logs sockpuppetbrowser &>log.txt | ||||
| grep 'custom-browser-search-string=1' log.txt | ||||
| if [ $? -ne 1 ] | ||||
| then | ||||
|   | ||||
| @@ -10,41 +10,7 @@ set -x | ||||
| docker run --network changedet-network -d --name squid-one --hostname squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf ubuntu/squid:4.13-21.10_edge | ||||
| docker run --network changedet-network -d --name squid-two --hostname squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf ubuntu/squid:4.13-21.10_edge | ||||
|  | ||||
| # SOCKS5 related - start simple Socks5 proxy server | ||||
| # SOCKSTEST=xyz should show in the logs of this service to confirm it fetched | ||||
| docker run --network changedet-network -d --hostname socks5proxy --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy | ||||
| docker run --network changedet-network -d --hostname socks5proxy-noauth -p 1081:1080 --name socks5proxy-noauth  serjs/go-socks5-proxy | ||||
|  | ||||
| echo "---------------------------------- SOCKS5 -------------------" | ||||
| # SOCKS5 related - test from proxies.json | ||||
| docker run --network changedet-network \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   --rm \ | ||||
|   -e "SOCKSTEST=proxiesjson" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|  | ||||
| # SOCKS5 related - by manually entering in UI | ||||
| docker run --network changedet-network \ | ||||
|   --rm \ | ||||
|   -e "SOCKSTEST=manual" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py' | ||||
|  | ||||
| # SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY | ||||
| docker run --network changedet-network \ | ||||
|   -e "SOCKSTEST=manual-playwright" \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" \ | ||||
|   --rm \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|  | ||||
| echo "socks5 server logs" | ||||
| docker logs socks5proxy | ||||
| echo "----------------------------------" | ||||
|  | ||||
| # Used for configuring a custom proxy URL via the UI | ||||
| # Used for configuring a custom proxy URL via the UI - with username+password auth | ||||
| docker run --network changedet-network -d \ | ||||
|   --name squid-custom \ | ||||
|   --hostname squid-custom \ | ||||
| @@ -60,15 +26,17 @@ docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py' | ||||
|  | ||||
|  | ||||
| ## Should be a request in the default "first" squid | ||||
| set +e | ||||
| echo "- Looking for chosen.changedetection.io request in squid-one - it should NOT be here" | ||||
| docker logs squid-one 2>/dev/null|grep chosen.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| if [ $? -ne 1 ] | ||||
| then | ||||
|   echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy - squid one)" | ||||
|   echo "Saw a request to chosen.changedetection.io in the squid logs (while checking preferred proxy - squid one) WHEN I SHOULD NOT" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| set -e | ||||
| echo "- Looking for chosen.changedetection.io request in squid-two" | ||||
| # And one in the 'second' squid (user selects this as preferred) | ||||
| docker logs squid-two 2>/dev/null|grep chosen.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| @@ -77,7 +45,6 @@ then | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
|  | ||||
| # Test the UI configurable proxies | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
| @@ -85,6 +52,7 @@ docker run --network changedet-network \ | ||||
|  | ||||
|  | ||||
| # Should see a request for one.changedetection.io in there | ||||
| echo "- Looking for .changedetection.io request in squid-custom" | ||||
| docker logs squid-custom 2>/dev/null|grep "TCP_TUNNEL.200.*changedetection.io" | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
| @@ -101,7 +69,7 @@ docker run --network changedet-network \ | ||||
| set +e | ||||
| # Check request was never seen in any container | ||||
| for c in $(echo "squid-one squid-two squid-custom"); do | ||||
|   echo Checking $c | ||||
|   echo ....Checking $c | ||||
|   docker logs $c &> $c.txt | ||||
|   grep noproxy $c.txt | ||||
|   if [ $? -ne 1 ] | ||||
|   | ||||
							
								
								
									
										43
									
								
								changedetectionio/run_socks_proxy_tests.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										43
									
								
								changedetectionio/run_socks_proxy_tests.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,43 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # exit when any command fails | ||||
| set -e | ||||
| # enable debug | ||||
| set -x | ||||
|  | ||||
|  | ||||
| # SOCKS5 related - start simple Socks5 proxy server | ||||
| # SOCKSTEST=xyz should show in the logs of this service to confirm it fetched | ||||
| docker run --network changedet-network -d --hostname socks5proxy --rm  --name socks5proxy -p 1080:1080 -e PROXY_USER=proxy_user123 -e PROXY_PASSWORD=proxy_pass123 serjs/go-socks5-proxy | ||||
| docker run --network changedet-network -d --hostname socks5proxy-noauth --rm  -p 1081:1080 --name socks5proxy-noauth  serjs/go-socks5-proxy | ||||
|  | ||||
| echo "---------------------------------- SOCKS5 -------------------" | ||||
| # SOCKS5 related - test from proxies.json | ||||
| docker run --network changedet-network \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   --rm \ | ||||
|   -e "SOCKSTEST=proxiesjson" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|  | ||||
| # SOCKS5 related - by manually entering in UI | ||||
| docker run --network changedet-network \ | ||||
|   --rm \ | ||||
|   -e "SOCKSTEST=manual" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy.py' | ||||
|  | ||||
| # SOCKS5 related - test from proxies.json via playwright - NOTE- PLAYWRIGHT DOESNT SUPPORT AUTHENTICATING PROXY | ||||
| docker run --network changedet-network \ | ||||
|   -e "SOCKSTEST=manual-playwright" \ | ||||
|   -v `pwd`/tests/proxy_socks5/proxies.json-example-noauth:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   -e "PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000" \ | ||||
|   --rm \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_socks5/test_socks5_proxy_sources.py' | ||||
|  | ||||
| echo "socks5 server logs" | ||||
| docker logs socks5proxy | ||||
| echo "----------------------------------" | ||||
|  | ||||
| docker kill socks5proxy socks5proxy-noauth | ||||
							
								
								
									
										44
									
								
								changedetectionio/static/images/steps.svg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										44
									
								
								changedetectionio/static/images/steps.svg
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,44 @@ | ||||
| <?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||||
| <svg | ||||
|    aria-hidden="true" | ||||
|    viewBox="0 0 19.966091 17.999964" | ||||
|    class="css-1oqmxjn" | ||||
|    version="1.1" | ||||
|    id="svg4" | ||||
|    sodipodi:docname="steps.svg" | ||||
|    width="19.966091" | ||||
|    height="17.999964" | ||||
|    inkscape:version="1.1.2 (0a00cf5339, 2022-02-04)" | ||||
|    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" | ||||
|    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" | ||||
|    xmlns="http://www.w3.org/2000/svg" | ||||
|    xmlns:svg="http://www.w3.org/2000/svg"> | ||||
|   <defs | ||||
|      id="defs8" /> | ||||
|   <sodipodi:namedview | ||||
|      id="namedview6" | ||||
|      pagecolor="#ffffff" | ||||
|      bordercolor="#666666" | ||||
|      borderopacity="1.0" | ||||
|      inkscape:pageshadow="2" | ||||
|      inkscape:pageopacity="0.0" | ||||
|      inkscape:pagecheckerboard="0" | ||||
|      showgrid="false" | ||||
|      fit-margin-top="0" | ||||
|      fit-margin-left="0" | ||||
|      fit-margin-right="0" | ||||
|      fit-margin-bottom="0" | ||||
|      inkscape:zoom="8.6354167" | ||||
|      inkscape:cx="-1.3896261" | ||||
|      inkscape:cy="6.1375151" | ||||
|      inkscape:window-width="1280" | ||||
|      inkscape:window-height="667" | ||||
|      inkscape:window-x="2419" | ||||
|      inkscape:window-y="250" | ||||
|      inkscape:window-maximized="0" | ||||
|      inkscape:current-layer="svg4" /> | ||||
|   <path | ||||
|      d="m 16.95807,12.000003 c -0.7076,0.0019 -1.3917,0.2538 -1.9316,0.7113 -0.5398,0.4575 -0.9005,1.091 -1.0184,1.7887 H 5.60804 c -0.80847,0.0297 -1.60693,-0.1865 -2.29,-0.62 -0.26632,-0.1847 -0.48375,-0.4315 -0.63356,-0.7189 -0.14982,-0.2874 -0.22753,-0.607 -0.22644,-0.9311 -0.02843,-0.3931 0.03646,-0.7873 0.1894,-1.1505 0.15293,-0.3632 0.38957,-0.6851 0.6906,-0.9395 0.66628,-0.4559004 1.4637,-0.6807004 2.27,-0.6400004 h 8.35003 c 0.8515,-0.0223 1.6727,-0.3206 2.34,-0.85 0.3971,-0.3622 0.7076,-0.8091 0.9084,-1.3077 0.2008,-0.49857 0.2868,-1.03596 0.2516,-1.57229 0.0113,-0.47161 -0.0887,-0.93924 -0.292,-1.36493 -0.2033,-0.4257 -0.5041,-0.79745 -0.878,-1.08507 -0.7801,-0.55815 -1.7212,-0.84609 -2.68,-0.82 H 5.95804 c -0.12537,-0.7417 -0.5248,-1.40924 -1.11913,-1.87032996 -0.59434,-0.46108 -1.3402,-0.68207 -2.08979,-0.61917 -0.74958,0.06291 -1.44818,0.40512 -1.95736,0.95881 C 0.28259,1.5230126 0,2.2477926 0,3.0000126 c 0,0.75222 0.28259,1.47699 0.79176,2.03068 0.50918,0.55369 1.20778,0.8959 1.95736,0.95881 0.74959,0.0629 1.49545,-0.15808 2.08979,-0.61917 0.59433,-0.46109 0.99376,-1.12863 1.11913,-1.87032 h 7.70003 c 0.7353,-0.03061 1.4599,0.18397 2.06,0.61 0.2548,0.19335 0.4595,0.445 0.597,0.73385 0.1375,0.28884 0.2036,0.60644 0.193,0.92615 0.0316,0.38842 -0.0247,0.77898 -0.165,1.14258 -0.1402,0.36361 -0.3607,0.69091 -0.645,0.95741 -0.5713,0.4398 -1.2799,0.663 -2,0.63 H 5.69804 c -1.03259,-0.0462 -2.05065,0.2568 -2.89,0.86 -0.43755,0.3361 -0.78838,0.7720004 -1.02322,1.2712004 -0.23484,0.4993 -0.34688,1.0474 -0.32678,1.5988 -0.00726,0.484 0.10591,0.9622 0.32934,1.3916 0.22344,0.4295 0.55012,0.7966 0.95066,1.0684 0.85039,0.5592 1.85274,0.8421 2.87,0.81 h 8.40003 c 0.0954,0.5643 0.3502,1.0896 0.7343,1.5138 0.3842,0.4242 0.8817,0.7297 1.4338,0.8803 0.5521,0.1507 1.1358,0.1403 1.6822,-0.0299 0.5464,-0.1702 1.0328,-0.4932 1.4016,-0.9308 0.3688,-0.4376 0.6048,-0.9716 0.6801,-1.5389 0.0752,-0.5673 -0.0134,-1.1444 -0.2554,-1.663 -0.242,-0.5186 -0.6273,-0.9572 -1.1104,-1.264 -0.4831,-0.3068 -1.0439,-0.469 -1.6162,-0.4675 z m 0,5 c -0.3956,0 -0.7823,-0.1173 -1.1112,-0.3371 -0.3289,-0.2197 -0.5852,-0.5321 -0.7366,-0.8975 -0.1514,-0.3655 -0.191,-0.7676 -0.1138,-1.1556 0.0772,-0.3879 0.2677,-0.7443 0.5474,-1.024 0.2797,-0.2797 0.636,-0.4702 1.024,-0.5474 0.388,-0.0771 0.7901,-0.0375 1.1555,0.1138 0.3655,0.1514 0.6778,0.4078 0.8976,0.7367 0.2198,0.3289 0.3371,0.7155 0.3371,1.1111 0,0.5304 -0.2107,1.0391 -0.5858,1.4142 -0.3751,0.3751 -0.8838,0.5858 -1.4142,0.5858 z" | ||||
|      id="path2" | ||||
|      style="fill:#777777;fill-opacity:1" /> | ||||
| </svg> | ||||
| After Width: | Height: | Size: 3.7 KiB | 
| @@ -10,7 +10,7 @@ $(document).ready(function () { | ||||
|         } | ||||
|     }) | ||||
|     var browsersteps_session_id; | ||||
|     var browserless_seconds_remaining = 0; | ||||
|     var browser_interface_seconds_remaining = 0; | ||||
|     var apply_buttons_disabled = false; | ||||
|     var include_text_elements = $("#include_text_elements"); | ||||
|     var xpath_data = false; | ||||
| @@ -49,7 +49,7 @@ $(document).ready(function () { | ||||
|         $('#browsersteps-img').removeAttr('src'); | ||||
|         $("#browsersteps-click-start").show(); | ||||
|         $("#browsersteps-selector-wrapper .spinner").hide(); | ||||
|         browserless_seconds_remaining = 0; | ||||
|         browser_interface_seconds_remaining = 0; | ||||
|         browsersteps_session_id = false; | ||||
|         apply_buttons_disabled = false; | ||||
|         ctx.clearRect(0, 0, c.width, c.height); | ||||
| @@ -61,12 +61,12 @@ $(document).ready(function () { | ||||
|         $('#browser_steps >li:first-child').css('opacity', '0.5'); | ||||
|     } | ||||
|  | ||||
|     // Show seconds remaining until playwright/browserless needs to restart the session | ||||
|     // Show seconds remaining until the browser interface needs to restart the session | ||||
|     // (See comment at the top of changedetectionio/blueprint/browser_steps/__init__.py ) | ||||
|     setInterval(() => { | ||||
|         if (browserless_seconds_remaining >= 1) { | ||||
|             document.getElementById('browserless-seconds-remaining').innerText = browserless_seconds_remaining + " seconds remaining in session"; | ||||
|             browserless_seconds_remaining -= 1; | ||||
|         if (browser_interface_seconds_remaining >= 1) { | ||||
|             document.getElementById('browser-seconds-remaining').innerText = browser_interface_seconds_remaining + " seconds remaining in session"; | ||||
|             browser_interface_seconds_remaining -= 1; | ||||
|         } | ||||
|     }, "1000") | ||||
|  | ||||
| @@ -160,6 +160,12 @@ $(document).ready(function () { | ||||
|                     e.offsetX > item.left * y_scale && e.offsetX < item.left * y_scale + item.width * y_scale | ||||
|  | ||||
|                 ) { | ||||
|                     // Ignore really large ones, because we are scraping 'div' also from xpath_element_scraper but | ||||
|                     // that div or whatever could be some wrapper and would generally make you select the whole page | ||||
|                     if (item.width > 800 && item.height > 400) { | ||||
|                         return | ||||
|                     } | ||||
|  | ||||
|                     // There could be many elements here, record them all and then we'll find out which is the most 'useful' | ||||
|                     // (input, textarea, button, A etc) | ||||
|                     if (item.width < xpath_data['browser_width']) { | ||||
| @@ -261,7 +267,7 @@ $(document).ready(function () { | ||||
|             // This should trigger 'Goto site' | ||||
|             console.log("Got startup response, requesting Goto-Site (first) step fake click"); | ||||
|             $('#browser_steps >li:first-child .apply').click(); | ||||
|             browserless_seconds_remaining = 500; | ||||
|             browser_interface_seconds_remaining = 500; | ||||
|             set_first_gotosite_disabled(); | ||||
|         }).fail(function (data) { | ||||
|             console.log(data); | ||||
|   | ||||
| @@ -90,5 +90,10 @@ $(document).ready(function () { | ||||
|         } | ||||
|     } | ||||
|  | ||||
|  | ||||
|     $('#diff-form').on('submit', function (e) { | ||||
|         if ($('select[name=from_version]').val() === $('select[name=to_version]').val()) { | ||||
|             e.preventDefault(); | ||||
|             alert('Error - You are trying to compare the same version.'); | ||||
|         } | ||||
|     }); | ||||
| }); | ||||
|   | ||||
| @@ -28,15 +28,11 @@ $(document).ready(function() { | ||||
|       notification_format: $('#notification_format').val(), | ||||
|       notification_title: $('#notification_title').val(), | ||||
|       notification_urls: $('.notification-urls').val(), | ||||
|       tags: $('#tags').val(), | ||||
|       window_url: window.location.href, | ||||
|     } | ||||
|  | ||||
|  | ||||
|     if (!data['notification_urls'].length) { | ||||
|       alert("Notification URL list is empty, cannot send test.") | ||||
|       return; | ||||
|     } | ||||
|  | ||||
|     $.ajax({ | ||||
|       type: "POST", | ||||
|       url: notification_base_url, | ||||
| @@ -49,7 +45,7 @@ $(document).ready(function() { | ||||
|       } | ||||
|     }).done(function(data){ | ||||
|       console.log(data); | ||||
|       alert('Sent'); | ||||
|       alert(data); | ||||
|     }).fail(function(data){ | ||||
|       console.log(data); | ||||
|       alert('There was an error communicating with the server.'); | ||||
|   | ||||
| @@ -126,6 +126,8 @@ html[data-darkmode="true"] { | ||||
|   html[data-darkmode="true"] .watch-table .title-col a[target="_blank"]::after, | ||||
|   html[data-darkmode="true"] .watch-table .current-diff-url::after { | ||||
|     filter: invert(0.5) hue-rotate(10deg) brightness(2); } | ||||
|   html[data-darkmode="true"] .watch-table .status-browsersteps { | ||||
|     filter: invert(0.5) hue-rotate(10deg) brightness(1.5); } | ||||
|   html[data-darkmode="true"] .watch-table .watch-controls .state-off img { | ||||
|     opacity: 0.3; } | ||||
|   html[data-darkmode="true"] .watch-table .watch-controls .state-on img { | ||||
|   | ||||
| @@ -152,6 +152,10 @@ html[data-darkmode="true"] { | ||||
|       filter: invert(.5) hue-rotate(10deg) brightness(2); | ||||
|     } | ||||
|  | ||||
|     .status-browsersteps { | ||||
|       filter: invert(.5) hue-rotate(10deg) brightness(1.5); | ||||
|     } | ||||
|  | ||||
|     .watch-controls { | ||||
|       .state-off { | ||||
|         img { | ||||
|   | ||||
| @@ -1096,3 +1096,16 @@ ul { | ||||
|   white-space: nowrap; | ||||
| } | ||||
|  | ||||
| #chrome-extension-link { | ||||
|   img { | ||||
|     height: 21px; | ||||
|     padding: 2px; | ||||
|     vertical-align: middle; | ||||
|   } | ||||
|  | ||||
|   padding: 9px; | ||||
|   border: 1px solid var(--color-grey-800); | ||||
|   border-radius: 10px; | ||||
|   vertical-align: middle; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -342,6 +342,8 @@ html[data-darkmode="true"] { | ||||
|   html[data-darkmode="true"] .watch-table .title-col a[target="_blank"]::after, | ||||
|   html[data-darkmode="true"] .watch-table .current-diff-url::after { | ||||
|     filter: invert(0.5) hue-rotate(10deg) brightness(2); } | ||||
|   html[data-darkmode="true"] .watch-table .status-browsersteps { | ||||
|     filter: invert(0.5) hue-rotate(10deg) brightness(1.5); } | ||||
|   html[data-darkmode="true"] .watch-table .watch-controls .state-off img { | ||||
|     opacity: 0.3; } | ||||
|   html[data-darkmode="true"] .watch-table .watch-controls .state-on img { | ||||
| @@ -1178,3 +1180,13 @@ ul { | ||||
|   .restock-label.not-in-stock { | ||||
|     background-color: var(--color-background-button-cancel); | ||||
|     color: #777; } | ||||
|  | ||||
| #chrome-extension-link { | ||||
|   padding: 9px; | ||||
|   border: 1px solid var(--color-grey-800); | ||||
|   border-radius: 10px; | ||||
|   vertical-align: middle; } | ||||
|   #chrome-extension-link img { | ||||
|     height: 21px; | ||||
|     padding: 2px; | ||||
|     vertical-align: middle; } | ||||
|   | ||||
| @@ -1,4 +1,4 @@ | ||||
| from distutils.util import strtobool | ||||
| from changedetectionio.strtobool import strtobool | ||||
|  | ||||
| from flask import ( | ||||
|     flash | ||||
| @@ -255,6 +255,7 @@ class ChangeDetectionStore: | ||||
|                 'last_viewed': 0, | ||||
|                 'previous_md5': False, | ||||
|                 'previous_md5_before_filters': False, | ||||
|                 'remote_server_reply': None, | ||||
|                 'track_ldjson_price_data': None, | ||||
|             }) | ||||
|  | ||||
| @@ -656,7 +657,10 @@ class ChangeDetectionStore: | ||||
|         return res | ||||
|  | ||||
|     def tag_exists_by_name(self, tag_name): | ||||
|         return any(v.get('title', '').lower() == tag_name.lower() for k, v in self.__data['settings']['application']['tags'].items()) | ||||
|         # Check if any tag dictionary has a 'title' attribute matching the provided tag_name | ||||
|         tags = self.__data['settings']['application']['tags'].values() | ||||
|         return next((v for v in tags if v.get('title', '').lower() == tag_name.lower()), | ||||
|                     None) | ||||
|  | ||||
|     def get_updates_available(self): | ||||
|         import inspect | ||||
|   | ||||
							
								
								
									
										23
									
								
								changedetectionio/strtobool.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										23
									
								
								changedetectionio/strtobool.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,23 @@ | ||||
| # Because strtobool was removed in python 3.12 distutils | ||||
|  | ||||
| _MAP = { | ||||
|     'y': True, | ||||
|     'yes': True, | ||||
|     't': True, | ||||
|     'true': True, | ||||
|     'on': True, | ||||
|     '1': True, | ||||
|     'n': False, | ||||
|     'no': False, | ||||
|     'f': False, | ||||
|     'false': False, | ||||
|     'off': False, | ||||
|     '0': False | ||||
| } | ||||
|  | ||||
|  | ||||
| def strtobool(value): | ||||
|     try: | ||||
|         return _MAP[str(value).lower()] | ||||
|     except KeyError: | ||||
|         raise ValueError('"{}" is not a valid bool value'.format(value)) | ||||
| @@ -115,6 +115,12 @@ | ||||
| 									Warning: Contents of <code>{{ '{{diff}}' }}</code>, <code>{{ '{{diff_removed}}' }}</code>, and <code>{{ '{{diff_added}}' }}</code> depend on how the difference algorithm perceives the change. <br> | ||||
|                                     For example, an addition or removal could be perceived as a change in some cases. <a target="_new" href="https://github.com/dgtlmoon/changedetection.io/wiki/Using-the-%7B%7Bdiff%7D%7D,-%7B%7Bdiff_added%7D%7D,-and-%7B%7Bdiff_removed%7D%7D-notification-tokens">More Here</a> <br> | ||||
|                                     </p> | ||||
|                                     <p> | ||||
|                                         For JSON payloads, use <strong>|tojson</strong> without quotes for automatic escaping, for example - <code>{ "name": {{ '{{ watch_title|tojson }}' }} }</code> | ||||
|                                     </p> | ||||
|                                     <p> | ||||
|                                         URL encoding, use <strong>|urlencode</strong>, for example - <code>gets://hook-website.com/test.php?title={{ '{{ watch_title|urlencode }}' }}</code> | ||||
|                                     </p> | ||||
|                                 </div> | ||||
|                             </div> | ||||
|                             <div class="pure-control-group"> | ||||
|   | ||||
| @@ -147,7 +147,19 @@ | ||||
|     <section class="content"> | ||||
|         <div id="overlay"> | ||||
|             <div class="content"> | ||||
|                 <strong>changedetection.io needs your support!</strong><br> | ||||
|                 <h4>Try our Chrome extension</h4> | ||||
|                 <p> | ||||
|                     <a id="chrome-extension-link" | ||||
|                        title="Try our new Chrome Extension!" | ||||
|                        href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop"> | ||||
|                         <img src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}"> | ||||
|                         Chrome Webstore | ||||
|                     </a> | ||||
|                 </p> | ||||
|  | ||||
|                 Easily add the current web-page from your browser directly into your changedetection.io tool, more great features coming soon! | ||||
|  | ||||
|                 <h4>Changedetection.io needs your support!</h4> | ||||
|                 <p> | ||||
|                     You can help us by supporting changedetection.io on these platforms; | ||||
|                 </p> | ||||
|   | ||||
| @@ -13,7 +13,7 @@ | ||||
| <script src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script> | ||||
|  | ||||
| <div id="settings"> | ||||
|     <form class="pure-form " action="" method="GET"> | ||||
|     <form class="pure-form " action="" method="GET" id="diff-form"> | ||||
|         <fieldset> | ||||
|             {% if versions|length >= 1 %} | ||||
|                 <strong>Compare</strong> | ||||
|   | ||||
| @@ -7,7 +7,8 @@ | ||||
| <script> | ||||
|     const browser_steps_available_screenshots=JSON.parse('{{ watch.get_browsersteps_available_screenshots|tojson }}'); | ||||
|     const browser_steps_config=JSON.parse('{{ browser_steps_config|tojson }}'); | ||||
|     const browser_steps_fetch_screenshot_image_url="{{url_for('browser_steps.browser_steps_fetch_screenshot_image', uuid=uuid)}}"; | ||||
|     <!-- Should be _external so that firefox and others load it more reliably --> | ||||
|     const browser_steps_fetch_screenshot_image_url="{{url_for('browser_steps.browser_steps_fetch_screenshot_image', uuid=uuid, _external=True)}}"; | ||||
|     const browser_steps_last_error_step={{ watch.browser_steps_last_error_step|tojson }}; | ||||
|     const browser_steps_start_url="{{url_for('browser_steps.browsersteps_start_session', uuid=uuid)}}"; | ||||
|     const browser_steps_sync_url="{{url_for('browser_steps.browsersteps_ui_update', uuid=uuid)}}"; | ||||
| @@ -31,6 +32,7 @@ | ||||
| <script src="{{url_for('static_content', group='js', filename='browser-steps.js')}}" defer></script> | ||||
| {% endif %} | ||||
|  | ||||
| {% set has_tag_filters_extra="WARNING: Watch has tag/groups set with special filters\n" if has_special_tag_options else '' %} | ||||
| <script src="{{url_for('static_content', group='js', filename='recheck-proxy.js')}}" defer></script> | ||||
|  | ||||
| <div class="edit-form monospaced-textarea"> | ||||
| @@ -228,7 +230,7 @@ User-Agent: wonderbra 1.0") }} | ||||
|                                 </div> | ||||
|                             </div> | ||||
|                             <div id="browser-steps-fieldlist" style="padding-left: 1em;  width: 350px; font-size: 80%;" > | ||||
|                                 <span id="browserless-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> | ||||
|                                 <span id="browser-seconds-remaining">Loading</span> <span style="font-size: 80%;"> (<a target=_new href="https://github.com/dgtlmoon/changedetection.io/pull/478/files#diff-1a79d924d1840c485238e66772391268a89c95b781d69091384cf1ea1ac146c9R4">?</a>) </span> | ||||
|                                 {{ render_field(form.browser_steps) }} | ||||
|                             </div> | ||||
|                         </div> | ||||
| @@ -280,7 +282,7 @@ User-Agent: wonderbra 1.0") }} | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% set field = render_field(form.include_filters, | ||||
|                             rows=5, | ||||
|                             placeholder="#example | ||||
|                             placeholder=has_tag_filters_extra+"#example | ||||
| xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                             class="m-d") | ||||
|                         %} | ||||
| @@ -316,13 +318,14 @@ xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                 </span> | ||||
|                     </div> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_field(form.subtractive_selectors, rows=5, placeholder="header | ||||
|                     {{ render_field(form.subtractive_selectors, rows=5, placeholder=has_tag_filters_extra+"header | ||||
| footer | ||||
| nav | ||||
| .stockticker") }} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                           <li> Remove HTML element(s) by CSS selector before text conversion. </li> | ||||
|                           <li> Don't paste HTML here, use only CSS selectors </li> | ||||
|                           <li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li> | ||||
|                         </ul> | ||||
|                       </span> | ||||
| @@ -339,6 +342,10 @@ nav | ||||
|                     <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span> | ||||
|                 </fieldset> | ||||
|  | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.sort_text_alphabetically) }} | ||||
|                     <span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span> | ||||
|                 </fieldset> | ||||
|                 <fieldset class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.check_unique_lines) }} | ||||
|                     <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span> | ||||
| @@ -401,6 +408,7 @@ Unavailable") }} | ||||
|                                 <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br></li> | ||||
|                                 <li>Keyword example ‐ example <code>Out of stock</code></li> | ||||
|                                 <li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li> | ||||
|                                 <li>Example - match lines containing a keyword <code>/.*icecream.*/</code></li> | ||||
|                             </ul> | ||||
|                         </li> | ||||
|                         <li>One line per regular-expression/string match</li> | ||||
| @@ -431,7 +439,7 @@ Unavailable") }} | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% if visualselector_enabled %} | ||||
|                             <span class="pure-form-message-inline"> | ||||
|                                 The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection ‐ after the <i>Browser Steps</i> has completed.<br><br> | ||||
|                                 The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection ‐ after the <i>Browser Steps</i> has completed, this tool is a helper to manage filters in the  "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. | ||||
|                             </span> | ||||
|  | ||||
|                             <div id="selector-header"> | ||||
| @@ -482,6 +490,10 @@ Unavailable") }} | ||||
|                             <td>Last fetch time</td> | ||||
|                             <td>{{ watch.fetch_time }}s</td> | ||||
|                         </tr> | ||||
|                         <tr> | ||||
|                             <td>Notification alert count</td> | ||||
|                             <td>{{ watch.notification_alert_count }}</td> | ||||
|                         </tr> | ||||
|                         </tbody> | ||||
|                     </table> | ||||
|                 </div> | ||||
|   | ||||
| @@ -107,7 +107,7 @@ | ||||
|                                     <option value="" style="color: #aaa"> -- none --</option> | ||||
|                                     <option value="url">URL</option> | ||||
|                                     <option value="title">Title</option> | ||||
|                                     <option value="include_filter">CSS/xPath filter</option> | ||||
|                                     <option value="include_filters">CSS/xPath filter</option> | ||||
|                                     <option value="tag">Group / Tag name(s)</option> | ||||
|                                     <option value="interval_minutes">Recheck time (minutes)</option> | ||||
|                                 </select></td> | ||||
|   | ||||
| @@ -4,7 +4,7 @@ | ||||
| {% from '_helpers.jinja' import render_field, render_checkbox_field, render_button %} | ||||
| {% from '_common_fields.jinja' import render_common_settings_form %} | ||||
| <script> | ||||
|     const notification_base_url="{{url_for('ajax_callback_send_notification_test', watch_uuid=uuid)}}"; | ||||
|     const notification_base_url="{{url_for('ajax_callback_send_notification_test', mode="global-settings")}}"; | ||||
| {% if emailprefix %} | ||||
|     const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}'); | ||||
| {% endif %} | ||||
| @@ -168,12 +168,12 @@ nav | ||||
|            </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="api"> | ||||
|  | ||||
|                 <h4>API Access</h4> | ||||
|                 <p>Drive your changedetection.io via API, More about <a href="https://github.com/dgtlmoon/changedetection.io/wiki/API-Reference">API access here</a></p> | ||||
|  | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_checkbox_field(form.application.form.api_access_token_enabled) }} | ||||
|                     <div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header</div><br> | ||||
|                     <div class="pure-form-message-inline">Restrict API access limit by using <code>x-api-key</code> header - required for the Chrome Extension to work</div><br> | ||||
|                     <div class="pure-form-message-inline"><br>API Key <span id="api-key">{{api_key}}</span> | ||||
|                         <span style="display:none;" id="api-key-copy" >copy</span> | ||||
|                     </div> | ||||
| @@ -181,6 +181,20 @@ nav | ||||
|                 <div class="pure-control-group"> | ||||
|                     <a href="{{url_for('settings_reset_api_key')}}" class="pure-button button-small button-cancel">Regenerate API key</a> | ||||
|                 </div> | ||||
|                 <div class="pure-control-group"> | ||||
|                     <h4>Chrome Extension</h4> | ||||
|                     <p>Easily add any web-page to your changedetection.io installation from within Chrome.</p> | ||||
|                     <strong>Step 1</strong> Install the extension, <strong>Step 2</strong> Navigate to this page, | ||||
|                     <strong>Step 3</strong> Open the extension from the toolbar and click "<i>Sync API Access</i>" | ||||
|                     <p> | ||||
|                         <a id="chrome-extension-link" | ||||
|                            title="Try our new Chrome Extension!" | ||||
|                            href="https://chromewebstore.google.com/detail/changedetectionio-website/kefcfmgmlhmankjmnbijimhofdjekbop"> | ||||
|                             <img src="{{ url_for('static_content', group='images', filename='Google-Chrome-icon.png') }}"> | ||||
|                             Chrome Webstore | ||||
|                         </a> | ||||
|                     </p> | ||||
|                 </div> | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="proxies"> | ||||
|                 <div id="recommended-proxy"> | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| {% extends 'base.html' %} | ||||
| {% block content %} | ||||
| {% from '_helpers.jinja' import render_simple_field, render_field, render_nolabel_field %} | ||||
| {% from '_helpers.jinja' import render_simple_field, render_field, render_nolabel_field, sort_by_title %} | ||||
| <script src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script> | ||||
|  | ||||
| @@ -13,7 +13,7 @@ | ||||
|             <div id="watch-add-wrapper-zone"> | ||||
|  | ||||
|                     {{ render_nolabel_field(form.url, placeholder="https://...", required=true) }} | ||||
|                     {{ render_nolabel_field(form.tags, value=tags[active_tag].title if active_tag else '', placeholder="watch label / tag") }} | ||||
|                     {{ render_nolabel_field(form.tags, value=active_tag.title if active_tag else '', placeholder="watch label / tag") }} | ||||
|                     {{ render_nolabel_field(form.watch_submit_button, title="Watch this URL!" ) }} | ||||
|                     {{ render_nolabel_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }} | ||||
|             </div> | ||||
| @@ -37,6 +37,7 @@ | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="assign-tag" id="checkbox-assign-tag">Tag</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="mark-viewed">Mark viewed</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="notification-default">Use default notification</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" name="op" value="clear-errors">Clear errors</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="clear-history">Clear/reset history</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242;" name="op" value="delete">Delete</button> | ||||
|     </div> | ||||
| @@ -46,11 +47,13 @@ | ||||
|     {% if search_q %}<div id="search-result-info">Searching "<strong><i>{{search_q}}</i></strong>"</div>{% endif %} | ||||
|     <div> | ||||
|         <a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a> | ||||
|         {% for uuid, tag in tags.items() %} | ||||
|             {% if tag != "" %} | ||||
|                 <a href="{{url_for('index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag == uuid }}">{{ tag.title }}</a> | ||||
|             {% endif %} | ||||
|         {% endfor %} | ||||
|  | ||||
|     <!-- tag list --> | ||||
|     {% for uuid, tag in tags %} | ||||
|         {% if tag != "" %} | ||||
|             <a href="{{url_for('index', tag=uuid) }}" class="pure-button button-tag {{'active' if active_tag_uuid == uuid }}">{{ tag.title }}</a> | ||||
|         {% endif %} | ||||
|     {% endfor %} | ||||
|     </div> | ||||
|  | ||||
|     {% set sort_order = sort_order or 'asc' %} | ||||
| @@ -110,6 +113,7 @@ | ||||
|                     {% endif %} | ||||
|  | ||||
|                     {%if watch.is_pdf  %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" >{% endif %} | ||||
|                     {% if watch.has_browser_steps %}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" title="Browser Steps is enabled" >{% endif %} | ||||
|                     {% if watch.last_error is defined and watch.last_error != False %} | ||||
|                     <div class="fetch-error">{{ watch.last_error }} | ||||
|  | ||||
| @@ -196,8 +200,8 @@ | ||||
|             </li> | ||||
|             {% endif %} | ||||
|             <li> | ||||
|                <a href="{{ url_for('form_watch_checknow', tag=active_tag, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck | ||||
|                 all {% if active_tag%} in "{{tags[active_tag].title}}"{%endif%}</a> | ||||
|                <a href="{{ url_for('form_watch_checknow', tag=active_tag_uuid, with_errors=request.args.get('with_errors',0)) }}" class="pure-button button-tag ">Recheck | ||||
|                 all {% if active_tag_uuid %} in "{{active_tag.title}}"{%endif%}</a> | ||||
|             </li> | ||||
|             <li> | ||||
|                 <a href="{{ url_for('rss', tag=active_tag , token=app_rss_token)}}"><img alt="RSS Feed" id="feed-icon" src="{{url_for('static_content', group='images', filename='Generic_Feed-icon.svg')}}" height="15"></a> | ||||
|   | ||||
| @@ -7,10 +7,11 @@ from ..util import live_server_setup, wait_for_all_checks | ||||
| def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|  | ||||
|     # Grep for this string in the logs? | ||||
|     test_url = f"https://changedetection.io/ci-test.html" | ||||
|     test_url = f"https://changedetection.io/ci-test.html?non-custom-default=true" | ||||
|     # "non-custom-default" should not appear in the custom browser connection | ||||
|     custom_browser_name = 'custom browser URL' | ||||
|  | ||||
|     # needs to be set and something like 'ws://127.0.0.1:3000?stealth=1&--disable-web-security=true' | ||||
|     # needs to be set and something like 'ws://127.0.0.1:3000' | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|  | ||||
|     ##################### | ||||
| @@ -19,9 +20,7 @@ def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|         data={"application-empty_pages_are_a_change": "", | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_webdriver", | ||||
|               # browserless-custom-url is setup in  .github/workflows/test-only.yml | ||||
|               # the test script run_custom_browser_url_test.sh will look for 'custom-browser-search-string' in the container logs | ||||
|               'requests-extra_browsers-0-browser_connection_url': 'ws://browserless-custom-url:3000?stealth=1&--disable-web-security=true&custom-browser-search-string=1', | ||||
|               'requests-extra_browsers-0-browser_connection_url': 'ws://sockpuppetbrowser-custom-url:3000', | ||||
|               'requests-extra_browsers-0-browser_name': custom_browser_name | ||||
|               }, | ||||
|         follow_redirects=True | ||||
| @@ -51,7 +50,8 @@ def do_test(client, live_server, make_test_use_extra_browser=False): | ||||
|         res = client.post( | ||||
|             url_for("edit_page", uuid="first"), | ||||
|             data={ | ||||
|                   "url": test_url, | ||||
|                 # 'run_customer_browser_url_tests.sh' will search for this string to know if we hit the right browser container or not | ||||
|                   "url": f"https://changedetection.io/ci-test.html?custom-browser-search-string=1", | ||||
|                   "tags": "", | ||||
|                   "headers": "", | ||||
|                   'fetch_backend': f"extra_browser_{custom_browser_name}", | ||||
|   | ||||
| @@ -0,0 +1,56 @@ | ||||
| import os | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
|  | ||||
|  | ||||
| def test_execute_custom_js(client, live_server): | ||||
|  | ||||
|     live_server_setup(live_server) | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|  | ||||
|     test_url = url_for('test_interactive_html_endpoint', _external=True) | ||||
|     test_url = test_url.replace('localhost.localdomain', 'cdio') | ||||
|     test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added in Paused state, saving will unpause" in res.data | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first", unpause_on_save=1), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             "tags": "", | ||||
|             'fetch_backend': "html_webdriver", | ||||
|             'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();', | ||||
|             'headers': "testheader: yes\buser-agent: MyCustomAgent", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"unpaused" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|     assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)" | ||||
|  | ||||
|     assert b"This text should be removed" not in res.data | ||||
|  | ||||
|     # Check HTML conversion detected and workd | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid=uuid), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"This text should be removed" not in res.data | ||||
|     assert b"I smell JavaScript because the button was pressed" in res.data | ||||
|  | ||||
|     assert b"testheader: yes" in res.data | ||||
|     assert b"user-agent: mycustomagent" in res.data | ||||
|  | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -1,6 +1,6 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| import os | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
| @@ -9,22 +9,20 @@ def test_preferred_proxy(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|     url = "http://chosen.changedetection.io" | ||||
|  | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         # Because a URL wont show in squid/proxy logs due it being SSLed | ||||
|         # Use plain HTTP or a specific domain-name here | ||||
|         data={"urls": url}, | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     assert b"Watch added in Paused state, saving will unpause" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         url_for("edit_page", uuid="first", unpause_on_save=1), | ||||
|         data={ | ||||
|                 "include_filters": "", | ||||
|                 "fetch_backend": "html_requests", | ||||
|                 "fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests', | ||||
|                 "headers": "", | ||||
|                 "proxy": "proxy-two", | ||||
|                 "tags": "", | ||||
| @@ -32,6 +30,6 @@ def test_preferred_proxy(client, live_server): | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     assert b"unpaused" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     # Now the request should appear in the second-squid logs | ||||
|   | ||||
| @@ -3,6 +3,7 @@ | ||||
| import time | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks | ||||
| import os | ||||
|  | ||||
| # just make a request, we will grep in the docker logs to see it actually got called | ||||
| def test_select_custom(client, live_server): | ||||
| @@ -14,7 +15,7 @@ def test_select_custom(client, live_server): | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-fetch_backend": "html_requests", | ||||
|             "application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests', | ||||
|             "requests-extra_proxies-0-proxy_name": "custom-test-proxy", | ||||
|             # test:awesome is set in tests/proxy_list/squid-passwords.txt | ||||
|             "requests-extra_proxies-0-proxy_url": "http://test:awesome@squid-custom:3128", | ||||
|   | ||||
| @@ -95,7 +95,7 @@ def test_restock_detection(client, live_server): | ||||
|  | ||||
|     # We should have a notification | ||||
|     time.sleep(2) | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|     assert os.path.isfile("test-datastore/notification.txt"), "Notification received" | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
|     # Default behaviour is to only fire notification when it goes OUT OF STOCK -> IN STOCK | ||||
| @@ -103,4 +103,9 @@ def test_restock_detection(client, live_server): | ||||
|     set_original_response() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     wait_for_all_checks(client) | ||||
|     assert not os.path.isfile("test-datastore/notification.txt") | ||||
|     assert not os.path.isfile("test-datastore/notification.txt"), "No notification should have fired when it went OUT OF STOCK by default" | ||||
|  | ||||
|     # BUT we should see that it correctly shows "not in stock" | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'not-in-stock' in res.data, "Correctly showing NOT IN STOCK in the list after it changed from IN STOCK" | ||||
|  | ||||
|   | ||||
| @@ -163,6 +163,7 @@ def test_api_simple(client, live_server): | ||||
|     # Loading the most recent snapshot should force viewed to become true | ||||
|     client.get(url_for("diff_history_page", uuid="first"), follow_redirects=True) | ||||
|  | ||||
|     time.sleep(3) | ||||
|     # Fetch the whole watch again, viewed should be true | ||||
|     res = client.get( | ||||
|         url_for("watch", uuid=watch_uuid), | ||||
|   | ||||
| @@ -29,7 +29,7 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Do this a few times.. ensures we dont accidently set the status | ||||
|     for n in range(3): | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| import time | ||||
|  | ||||
| from flask import url_for | ||||
| from . util import live_server_setup | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
| from ..html_tools import * | ||||
|  | ||||
| @@ -30,7 +30,7 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text): | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(2) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     # no change | ||||
| @@ -57,7 +57,7 @@ def _runner_test_http_errors(client, live_server, http_code, expected_text): | ||||
| def test_http_error_handler(client, live_server): | ||||
|     _runner_test_http_errors(client, live_server, 403, 'Access denied') | ||||
|     _runner_test_http_errors(client, live_server, 404, 'Page not found') | ||||
|     _runner_test_http_errors(client, live_server, 500, '(Internal server Error) received') | ||||
|     _runner_test_http_errors(client, live_server, 500, '(Internal server error) received') | ||||
|     _runner_test_http_errors(client, live_server, 400, 'Error - Request returned a HTTP error code 400') | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
| @@ -76,7 +76,7 @@ def test_DNS_errors(client, live_server): | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data | ||||
| @@ -104,7 +104,7 @@ def test_low_level_errors_clear_correctly(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(2) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # We should see the DNS error | ||||
|     res = client.get(url_for("index")) | ||||
| @@ -121,7 +121,7 @@ def test_low_level_errors_clear_correctly(client, live_server): | ||||
|     ) | ||||
|  | ||||
|     # Now the error should be gone | ||||
|     time.sleep(2) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
|     found_name_resolution_error = b"Temporary failure in name resolution" in res.data or b"Name or service not known" in res.data | ||||
|     assert not found_name_resolution_error | ||||
|   | ||||
| @@ -100,6 +100,12 @@ def test_setup_group_tag(client, live_server): | ||||
|     assert b'Should be only this' in res.data | ||||
|     assert b'And never this' not in res.data | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     # 2307 the UI notice should appear in the placeholder | ||||
|     assert b'WARNING: Watch has tag/groups set with special filters' in res.data | ||||
|  | ||||
|     # RSS Group tag filter | ||||
|     # An extra one that should be excluded | ||||
| @@ -321,3 +327,154 @@ def test_clone_tag_on_quickwatchform_add(client, live_server): | ||||
|  | ||||
|     res = client.get(url_for("tags.delete_all"), follow_redirects=True) | ||||
|     assert b'All tags deleted' in res.data | ||||
|  | ||||
| def test_order_of_filters_tag_filter_and_watch_filter(client, live_server): | ||||
|  | ||||
|     # Add a tag with some config, import a tag and it should roughly work | ||||
|     res = client.post( | ||||
|         url_for("tags.form_tag_add"), | ||||
|         data={"name": "test-tag-keep-order"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Tag added" in res.data | ||||
|     assert b"test-tag-keep-order" in res.data | ||||
|     tag_filters = [ | ||||
|             '#only-this', # duplicated filters | ||||
|             '#only-this', | ||||
|             '#only-this', | ||||
|             '#only-this', | ||||
|             ] | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("tags.form_tag_edit_submit", uuid="first"), | ||||
|         data={"name": "test-tag-keep-order", | ||||
|               "include_filters": '\n'.join(tag_filters) }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated" in res.data | ||||
|     tag_uuid = get_UUID_for_tag_name(client, name="test-tag-keep-order") | ||||
|     res = client.get( | ||||
|         url_for("tags.form_tag_edit", uuid="first") | ||||
|     ) | ||||
|     assert b"#only-this" in res.data | ||||
|  | ||||
|  | ||||
|     d = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p id="only-this">And 1 this</p> | ||||
|      <br> | ||||
|      <p id="not-this">And 2 this</p> | ||||
|      <p id="">And 3 this</p><!--/html/body/p[3]/--> | ||||
|      <p id="">And 4 this</p><!--/html/body/p[4]/--> | ||||
|      <p id="">And 5 this</p><!--/html/body/p[5]/--> | ||||
|      <p id="">And 6 this</p><!--/html/body/p[6]/--> | ||||
|      <p id="">And 7 this</p><!--/html/body/p[7]/--> | ||||
|      <p id="">And 8 this</p><!--/html/body/p[8]/--> | ||||
|      <p id="">And 9 this</p><!--/html/body/p[9]/--> | ||||
|      <p id="">And 10 this</p><!--/html/body/p[10]/--> | ||||
|      <p id="">And 11 this</p><!--/html/body/p[11]/--> | ||||
|      <p id="">And 12 this</p><!--/html/body/p[12]/--> | ||||
|      <p id="">And 13 this</p><!--/html/body/p[13]/--> | ||||
|      <p id="">And 14 this</p><!--/html/body/p[14]/--> | ||||
|      <p id="not-this">And 15 this</p><!--/html/body/p[15]/--> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(d) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     filters = [ | ||||
|             '/html/body/p[3]', | ||||
|             '/html/body/p[4]', | ||||
|             '/html/body/p[5]', | ||||
|             '/html/body/p[6]', | ||||
|             '/html/body/p[7]', | ||||
|             '/html/body/p[8]', | ||||
|             '/html/body/p[9]', | ||||
|             '/html/body/p[10]', | ||||
|             '/html/body/p[11]', | ||||
|             '/html/body/p[12]', | ||||
|             '/html/body/p[13]', # duplicated tags | ||||
|             '/html/body/p[13]', | ||||
|             '/html/body/p[13]', | ||||
|             '/html/body/p[13]', | ||||
|             '/html/body/p[13]', | ||||
|             '/html/body/p[14]', | ||||
|             ] | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": '\n'.join(filters), | ||||
|             "url": test_url, | ||||
|             "tags": "test-tag-keep-order", | ||||
|             "headers": "", | ||||
|             'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"And 1 this" in res.data  # test-tag-keep-order | ||||
|  | ||||
|     a_tag_filter_check = b'And 1 this' #'#only-this' of tag_filters | ||||
|     # check there is no duplication of tag_filters | ||||
|     assert res.data.count(a_tag_filter_check) == 1, f"duplicated filters didn't removed {res.data.count(a_tag_filter_check)} of {a_tag_filter_check} in {res.data=}" | ||||
|  | ||||
|     a_filter_check = b"And 13 this" # '/html/body/p[13]' | ||||
|     # check there is no duplication of filters | ||||
|     assert res.data.count(a_filter_check) == 1, f"duplicated filters didn't removed. {res.data.count(a_filter_check)} of {a_filter_check} in {res.data=}" | ||||
|  | ||||
|     a_filter_check_not_include = b"And 2 this" # '/html/body/p[2]' | ||||
|     assert a_filter_check_not_include not in res.data | ||||
|  | ||||
|     checklist = [ | ||||
|             b"And 3 this", | ||||
|             b"And 4 this", | ||||
|             b"And 5 this", | ||||
|             b"And 6 this", | ||||
|             b"And 7 this", | ||||
|             b"And 8 this", | ||||
|             b"And 9 this", | ||||
|             b"And 10 this", | ||||
|             b"And 11 this", | ||||
|             b"And 12 this", | ||||
|             b"And 13 this", | ||||
|             b"And 14 this", | ||||
|             b"And 1 this", # result of filter from tag. | ||||
|             ] | ||||
|     # check whether everything a user requested is there | ||||
|     for test in checklist: | ||||
|         assert test in res.data | ||||
|  | ||||
|     # check whether everything a user requested is in order of filters. | ||||
|     n = 0 | ||||
|     for test in checklist: | ||||
|         t_index = res.data[n:].find(test) | ||||
|         # if the text is not searched, return -1. | ||||
|         assert t_index >= 0, f"""failed because {test=} not in {res.data[n:]=} | ||||
| ##################### | ||||
| Looks like some feature changed the order of result of filters. | ||||
| ##################### | ||||
| the {test} appeared before. {test in res.data[:n]=} | ||||
| {res.data[:n]=} | ||||
|         """ | ||||
|         n += t_index + len(test) | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|   | ||||
| @@ -456,7 +456,7 @@ def test_ignore_json_order(client, live_server): | ||||
|  | ||||
| def test_correct_header_detect(client, live_server): | ||||
|     # Like in https://github.com/dgtlmoon/changedetection.io/pull/1593 | ||||
|     # Specify extra html that JSON is sometimes wrapped in - when using Browserless/Puppeteer etc | ||||
|     # Specify extra html that JSON is sometimes wrapped in - when using SockpuppetBrowser / Puppeteer / Playwrightetc | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write('<html><body>{"hello" : 123, "world": 123}') | ||||
|  | ||||
|   | ||||
| @@ -29,7 +29,8 @@ def test_fetch_pdf(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'PDF-1.5' not in res.data | ||||
|     # PDF header should not be there (it was converted to text) | ||||
|     assert b'PDF' not in res.data[:10] | ||||
|     assert b'hello world' in res.data | ||||
|  | ||||
|     # So we know if the file changes in other ways | ||||
|   | ||||
| @@ -10,11 +10,11 @@ def test_setup(live_server): | ||||
| # Hard to just add more live server URLs when one test is already running (I think) | ||||
| # So we add our test here (was in a different file) | ||||
| def test_headers_in_request(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|     #ve_server_setup(live_server) | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_headers', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
|         # Because its no longer calling back to localhost but from browserless, set in test-only.yml | ||||
|         # Because its no longer calling back to localhost but from the browser container, set in test-only.yml | ||||
|         test_url = test_url.replace('localhost', 'changedet') | ||||
|  | ||||
|     # Add the test URL twice, we will check | ||||
| @@ -70,16 +70,17 @@ def test_headers_in_request(client, live_server): | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Re #137 -  Examine the JSON index file, it should have only one set of headers entered | ||||
|     # Re #137 -  It should have only one set of headers entered | ||||
|     watches_with_headers = 0 | ||||
|     with open('test-datastore/url-watches.json') as f: | ||||
|         app_struct = json.load(f) | ||||
|         for uuid in app_struct['watching']: | ||||
|             if (len(app_struct['watching'][uuid]['headers'])): | ||||
|     for k, watch in client.application.config.get('DATASTORE').data.get('watching').items(): | ||||
|             if (len(watch['headers'])): | ||||
|                 watches_with_headers += 1 | ||||
|     assert watches_with_headers == 1 | ||||
|  | ||||
|     # 'server' http header was automatically recorded | ||||
|     for k, watch in client.application.config.get('DATASTORE').data.get('watching').items(): | ||||
|         assert 'custom' in watch.get('remote_server_reply') # added in util.py | ||||
|  | ||||
|     # Should be only one with headers set | ||||
|     assert watches_with_headers==1 | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| @@ -88,7 +89,7 @@ def test_body_in_request(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_body', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
|         # Because its no longer calling back to localhost but from browserless, set in test-only.yml | ||||
|         # Because its no longer calling back to localhost but from the browser container, set in test-only.yml | ||||
|         test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     res = client.post( | ||||
| @@ -180,7 +181,7 @@ def test_method_in_request(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_method', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
|         # Because its no longer calling back to localhost but from browserless, set in test-only.yml | ||||
|         # Because its no longer calling back to localhost but from the browser container, set in test-only.yml | ||||
|         test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     # Add the test URL twice, we will check | ||||
| @@ -257,7 +258,7 @@ def test_headers_textfile_in_request(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_headers', _external=True) | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL'): | ||||
|         # Because its no longer calling back to localhost but from browserless, set in test-only.yml | ||||
|         # Because its no longer calling back to localhost but from the browser container, set in test-only.yml | ||||
|         test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     print ("TEST URL IS ",test_url) | ||||
|   | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup | ||||
| from .util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
| @@ -34,6 +34,23 @@ def set_modified_swapped_lines(): | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
| def set_modified_swapped_lines_with_extra_text_for_sorting(): | ||||
|     test_return_data = """<html> | ||||
|      <body> | ||||
|      <p> Which is across multiple lines</p>      | ||||
|      <p>Some initial text</p> | ||||
|      <p>   So let's see what happens.</p> | ||||
|      <p>Z last</p> | ||||
|      <p>0 numerical</p> | ||||
|      <p>A uppercase</p> | ||||
|      <p>a lowercase</p>      | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def set_modified_with_trigger_text_response(): | ||||
|     test_return_data = """<html> | ||||
| @@ -49,15 +66,14 @@ def set_modified_with_trigger_text_response(): | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def test_unique_lines_functionality(client, live_server): | ||||
| def test_setup(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
| def test_unique_lines_functionality(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|  | ||||
|     set_original_ignore_response() | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
| @@ -67,7 +83,7 @@ def test_unique_lines_functionality(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
| @@ -83,12 +99,11 @@ def test_unique_lines_functionality(client, live_server): | ||||
|     #  Make a change | ||||
|     set_modified_swapped_lines() | ||||
|  | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
| @@ -97,7 +112,57 @@ def test_unique_lines_functionality(client, live_server): | ||||
|     # Now set the content which contains the new text and re-ordered existing text | ||||
|     set_modified_with_trigger_text_response() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| def test_sort_lines_functionality(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     set_modified_swapped_lines_with_extra_text_for_sorting() | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"sort_text_alphabetically": "n", | ||||
|               "url": test_url, | ||||
|               "fetch_backend": "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     # Should be a change registered | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert res.data.find(b'0 numerical') < res.data.find(b'Z last') | ||||
|     assert res.data.find(b'A uppercase') < res.data.find(b'Z last') | ||||
|     assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines') | ||||
|      | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
| @@ -1,4 +1,4 @@ | ||||
| #!/usr/bin/python3 | ||||
| # -*- coding: utf-8 -*- | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| @@ -255,6 +255,69 @@ def test_xpath23_prefix_validation(client, live_server): | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| def test_xpath1_lxml(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     d = '''<?xml version="1.0" encoding="UTF-8"?> | ||||
|     <rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0"> | ||||
|     	<channel> | ||||
|     		<title>rpilocator.com</title> | ||||
|     		<link>https://rpilocator.com</link> | ||||
|     		<description>Find Raspberry Pi Computers in Stock</description> | ||||
|     		<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate> | ||||
|     		<image> | ||||
|     			<url>https://rpilocator.com/favicon.png</url> | ||||
|     			<title>rpilocator.com</title> | ||||
|     			<link>https://rpilocator.com/</link> | ||||
|     			<width>32</width> | ||||
|     			<height>32</height> | ||||
|     		</image> | ||||
|     		<item> | ||||
|     			<title>Stock Alert (UK): RPi CM4</title> | ||||
|     			<foo>something else unrelated</foo> | ||||
|     		</item> | ||||
|     		<item> | ||||
|     			<title>Stock Alert (UK): Big monitorěěěě</title> | ||||
|     			<foo>something else unrelated</foo> | ||||
|     		</item>		 | ||||
|     	</channel> | ||||
|     </rss>'''.encode('utf-8') | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "wb") as f: | ||||
|         f.write(d) | ||||
|  | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": "xpath1://title/text()", "url": test_url, "tags": "", "headers": "", | ||||
|               'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     ##### #2312 | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'_ElementStringResult' not in res.data # tested with 5.1.1 when it was removed and 5.1.0 | ||||
|     assert b'Exception' not in res.data | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"rpilocator.com" in res.data  # in selector | ||||
|     assert "Stock Alert (UK): Big monitorěěěě".encode('utf-8') in res.data  # not in selector | ||||
|  | ||||
|     ##### | ||||
|  | ||||
|  | ||||
| def test_xpath1_validation(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|   | ||||
| @@ -175,12 +175,16 @@ def live_server_setup(live_server): | ||||
|     @live_server.app.route('/test-headers') | ||||
|     def test_headers(): | ||||
|  | ||||
|         output= [] | ||||
|         output = [] | ||||
|  | ||||
|         for header in request.headers: | ||||
|              output.append("{}:{}".format(str(header[0]),str(header[1])   )) | ||||
|             output.append("{}:{}".format(str(header[0]), str(header[1]))) | ||||
|  | ||||
|         return "\n".join(output) | ||||
|         content = "\n".join(output) | ||||
|  | ||||
|         resp = make_response(content, 200) | ||||
|         resp.headers['server'] = 'custom' | ||||
|         return resp | ||||
|  | ||||
|     # Just return the body in the request | ||||
|     @live_server.app.route('/test-body', methods=['POST', 'GET']) | ||||
| @@ -238,5 +242,28 @@ def live_server_setup(live_server): | ||||
|             resp.headers['Content-Type'] = 'application/pdf' | ||||
|             return resp | ||||
|  | ||||
|     @live_server.app.route('/test-interactive-html-endpoint') | ||||
|     def test_interactive_html_endpoint(): | ||||
|         header_text="" | ||||
|         for k,v in request.headers.items(): | ||||
|             header_text += f"{k}: {v}<br>" | ||||
|  | ||||
|         resp = make_response(f""" | ||||
|         <html> | ||||
|           <body> | ||||
|           Primitive JS check for <pre>changedetectionio/tests/visualselector/test_fetch_data.py</pre> | ||||
|             <p id="remove">This text should be removed</p> | ||||
|               <form onsubmit="event.preventDefault();"> | ||||
|             <!-- obfuscated text so that we dont accidentally get a false positive due to conversion of the source :) ---> | ||||
|                 <button name="test-button" onclick="getElementById('remove').remove();getElementById('some-content').innerHTML = atob('SSBzbWVsbCBKYXZhU2NyaXB0IGJlY2F1c2UgdGhlIGJ1dHRvbiB3YXMgcHJlc3NlZCE=')">Click here</button> | ||||
|                 <div id=some-content></div> | ||||
|                 <pre> | ||||
|                 {header_text.lower()} | ||||
|                 </pre> | ||||
|               </body> | ||||
|          </html>""", 200) | ||||
|         resp.headers['Content-Type'] = 'text/html' | ||||
|         return resp | ||||
|  | ||||
|     live_server.start() | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,5 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| import os | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
| @@ -8,15 +7,19 @@ from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_cli | ||||
| def test_setup(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|  | ||||
| # Add a site in paused mode, add an invalid filter, we should still have visual selector data ready | ||||
| def test_visual_selector_content_ready(client, live_server): | ||||
|  | ||||
|     import os | ||||
|     import json | ||||
|  | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|  | ||||
|     # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url | ||||
|     test_url = "https://changedetection.io/ci-test/test-runjs.html" | ||||
|     test_url = url_for('test_interactive_html_endpoint', _external=True) | ||||
|     test_url = test_url.replace('localhost.localdomain', 'cdio') | ||||
|     test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
| @@ -24,28 +27,31 @@ def test_visual_selector_content_ready(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Watch added in Paused state, saving will unpause" in res.data | ||||
|  | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first", unpause_on_save=1), | ||||
|         url_for("edit_page", uuid=uuid, unpause_on_save=1), | ||||
|         data={ | ||||
|               "url": test_url, | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_webdriver", | ||||
|               'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();' | ||||
|             "url": test_url, | ||||
|             "tags": "", | ||||
|             # For now, cookies doesnt work in headers because it must be a full cookiejar object | ||||
|             'headers': "testheader: yes\buser-agent: MyCustomAgent", | ||||
|             'fetch_backend': "html_webdriver", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"unpaused" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|  | ||||
|     # Check the JS execute code before extract worked | ||||
|  | ||||
|     assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)" | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         url_for("preview_page", uuid=uuid), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b'I smell JavaScript' in res.data | ||||
|     assert b"testheader: yes" in res.data | ||||
|     assert b"user-agent: mycustomagent" in res.data | ||||
|  | ||||
|  | ||||
|     assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" | ||||
|     assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" | ||||
| @@ -75,30 +81,33 @@ def test_visual_selector_content_ready(client, live_server): | ||||
|  | ||||
| def test_basic_browserstep(client, live_server): | ||||
|  | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|     #live_server_setup(live_server) | ||||
|     assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|  | ||||
|     # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url | ||||
|     test_url = "https://changedetection.io/ci-test/test-runjs.html" | ||||
|     test_url = url_for('test_interactive_html_endpoint', _external=True) | ||||
|     test_url = test_url.replace('localhost.localdomain', 'cdio') | ||||
|     test_url = test_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added in Paused state, saving will unpause" in res.data | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first", unpause_on_save=1), | ||||
|         data={ | ||||
|               "url": test_url, | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_webdriver", | ||||
|               'browser_steps-0-operation': 'Goto site', | ||||
|               'browser_steps-1-operation': 'Click element', | ||||
|               'browser_steps-1-selector': 'button[name=test-button]', | ||||
|               'browser_steps-1-optional_value': '' | ||||
|             "url": test_url, | ||||
|             "tags": "", | ||||
|             'fetch_backend': "html_webdriver", | ||||
|             'browser_steps-0-operation': 'Goto site', | ||||
|             'browser_steps-1-operation': 'Click element', | ||||
|             'browser_steps-1-selector': 'button[name=test-button]', | ||||
|             'browser_steps-1-optional_value': '', | ||||
|             # For now, cookies doesnt work in headers because it must be a full cookiejar object | ||||
|             'headers': "testheader: yes\buser-agent: MyCustomAgent", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -106,6 +115,9 @@ def test_basic_browserstep(client, live_server): | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|     assert live_server.app.config['DATASTORE'].data['watching'][uuid].history_n >= 1, "Watch history had atleast 1 (everything fetched OK)" | ||||
|  | ||||
|     assert b"This text should be removed" not in res.data | ||||
|  | ||||
|     # Check HTML conversion detected and workd | ||||
|     res = client.get( | ||||
| @@ -115,13 +127,19 @@ def test_basic_browserstep(client, live_server): | ||||
|     assert b"This text should be removed" not in res.data | ||||
|     assert b"I smell JavaScript because the button was pressed" in res.data | ||||
|  | ||||
|     assert b"testheader: yes" in res.data | ||||
|     assert b"user-agent: mycustomagent" in res.data | ||||
|  | ||||
|     four_o_four_url =  url_for('test_endpoint', status_code=404, _external=True) | ||||
|     four_o_four_url = four_o_four_url.replace('localhost.localdomain', 'cdio') | ||||
|     four_o_four_url = four_o_four_url.replace('localhost', 'cdio') | ||||
|  | ||||
|     # now test for 404 errors | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid=uuid, unpause_on_save=1), | ||||
|         data={ | ||||
|               "url": "https://changedetection.io/404", | ||||
|               "url": four_o_four_url, | ||||
|               "tags": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_webdriver", | ||||
|               'browser_steps-0-operation': 'Goto site', | ||||
|               'browser_steps-1-operation': 'Click element', | ||||
|   | ||||
| @@ -2,8 +2,8 @@ import os | ||||
| import threading | ||||
| import queue | ||||
| import time | ||||
|  | ||||
| from changedetectionio import content_fetcher, html_tools | ||||
| from . import content_fetchers | ||||
| from changedetectionio import html_tools | ||||
| from .processors.text_json_diff import FilterNotFoundInResponse | ||||
| from .processors.restock_diff import UnableToExtractRestockData | ||||
|  | ||||
| @@ -31,6 +31,8 @@ class update_worker(threading.Thread): | ||||
|         dates = [] | ||||
|         trigger_text = '' | ||||
|  | ||||
|         now = time.time() | ||||
|  | ||||
|         if watch: | ||||
|             watch_history = watch.history | ||||
|             dates = list(watch_history.keys()) | ||||
| @@ -72,13 +74,14 @@ class update_worker(threading.Thread): | ||||
|             'diff_full': diff.render_diff(prev_snapshot, current_snapshot, include_equal=True, line_feed_sep=line_feed_sep), | ||||
|             'diff_patch': diff.render_diff(prev_snapshot, current_snapshot, line_feed_sep=line_feed_sep, patch_format=True), | ||||
|             'diff_removed': diff.render_diff(prev_snapshot, current_snapshot, include_added=False, line_feed_sep=line_feed_sep), | ||||
|             'notification_timestamp': now, | ||||
|             'screenshot': watch.get_screenshot() if watch and watch.get('notification_screenshot') else None, | ||||
|             'triggered_text': triggered_text, | ||||
|             'uuid': watch.get('uuid') if watch else None, | ||||
|             'watch_url': watch.get('url') if watch else None, | ||||
|         }) | ||||
|  | ||||
|         logger.debug(">> SENDING NOTIFICATION") | ||||
|         logger.trace(f"Main rendered notification placeholders (diff_added etc) calculated in {time.time()-now:.3f}s") | ||||
|         logger.debug("Queued notification for sending") | ||||
|         notification_q.put(n_object) | ||||
|  | ||||
|     # Prefer - Individual watch settings > Tag settings >  Global settings (in that order) | ||||
| @@ -147,6 +150,10 @@ class update_worker(threading.Thread): | ||||
|         queued = False | ||||
|         if n_object and n_object.get('notification_urls'): | ||||
|             queued = True | ||||
|  | ||||
|             count = watch.get('notification_alert_count', 0) + 1 | ||||
|             self.datastore.update_watch(uuid=watch_uuid, update_obj={'notification_alert_count': count}) | ||||
|  | ||||
|             self.queue_notification_for_watch(notification_q=self.notification_q, n_object=n_object, watch=watch) | ||||
|  | ||||
|         return queued | ||||
| @@ -283,7 +290,7 @@ class update_worker(threading.Thread): | ||||
|                         logger.critical(f"File permission error updating file, watch: {uuid}") | ||||
|                         logger.critical(str(e)) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetcher.ReplyWithContentButNoText as e: | ||||
|                     except content_fetchers.exceptions.ReplyWithContentButNoText as e: | ||||
|                         # Totally fine, it's by choice - just continue on, nothing more to care about | ||||
|                         # Page had elements/content but no renderable text | ||||
|                         # Backend (not filters) gave zero output | ||||
| @@ -305,13 +312,15 @@ class update_worker(threading.Thread): | ||||
|                             self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot) | ||||
|                         process_changedetection_results = False | ||||
|  | ||||
|                     except content_fetcher.Non200ErrorCodeReceived as e: | ||||
|                     except content_fetchers.exceptions.Non200ErrorCodeReceived as e: | ||||
|                         if e.status_code == 403: | ||||
|                             err_text = "Error - 403 (Access denied) received" | ||||
|                         elif e.status_code == 404: | ||||
|                             err_text = "Error - 404 (Page not found) received" | ||||
|                         elif e.status_code == 407: | ||||
|                             err_text = "Error - 407 (Proxy authentication required) received, did you need a username and password for the proxy?" | ||||
|                         elif e.status_code == 500: | ||||
|                             err_text = "Error - 500 (Internal server Error) received" | ||||
|                             err_text = "Error - 500 (Internal server error) received from the web site" | ||||
|                         else: | ||||
|                             err_text = "Error - Request returned a HTTP error code {}".format(str(e.status_code)) | ||||
|  | ||||
| @@ -349,13 +358,24 @@ class update_worker(threading.Thread): | ||||
|  | ||||
|                         process_changedetection_results = False | ||||
|  | ||||
|                     except content_fetcher.checksumFromPreviousCheckWasTheSame as e: | ||||
|                     except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e: | ||||
|                         # Yes fine, so nothing todo, don't continue to process. | ||||
|                         process_changedetection_results = False | ||||
|                         changed_detected = False | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': False}) | ||||
|  | ||||
|                     except content_fetcher.BrowserStepsStepException as e: | ||||
|                     except content_fetchers.exceptions.BrowserConnectError as e: | ||||
|                         self.datastore.update_watch(uuid=uuid, | ||||
|                                                     update_obj={'last_error': e.msg | ||||
|                                                                 } | ||||
|                                                     ) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.BrowserFetchTimedOut as e: | ||||
|                         self.datastore.update_watch(uuid=uuid, | ||||
|                                                     update_obj={'last_error': e.msg | ||||
|                                                                 } | ||||
|                                                     ) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.BrowserStepsStepException as e: | ||||
|  | ||||
|                         if not self.datastore.data['watching'].get(uuid): | ||||
|                             continue | ||||
| @@ -397,25 +417,25 @@ class update_worker(threading.Thread): | ||||
|  | ||||
|                         process_changedetection_results = False | ||||
|  | ||||
|                     except content_fetcher.EmptyReply as e: | ||||
|                     except content_fetchers.exceptions.EmptyReply as e: | ||||
|                         # Some kind of custom to-str handler in the exception handler that does this? | ||||
|                         err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetcher.ScreenshotUnavailable as e: | ||||
|                         err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'" | ||||
|                     except content_fetchers.exceptions.ScreenshotUnavailable as e: | ||||
|                         err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'" | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetcher.JSActionExceptions as e: | ||||
|                     except content_fetchers.exceptions.JSActionExceptions as e: | ||||
|                         err_text = "Error running JS Actions - Page request - "+e.message | ||||
|                         if e.screenshot: | ||||
|                             self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetcher.PageUnloadable as e: | ||||
|                     except content_fetchers.exceptions.PageUnloadable as e: | ||||
|                         err_text = "Page request from server didnt respond correctly" | ||||
|                         if e.message: | ||||
|                             err_text = "{} - {}".format(err_text, e.message) | ||||
| @@ -427,6 +447,12 @@ class update_worker(threading.Thread): | ||||
|                                                                            'last_check_status': e.status_code, | ||||
|                                                                            'has_ldjson_price_data': None}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e: | ||||
|                         err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher." | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text}) | ||||
|                         process_changedetection_results = False | ||||
|                         logger.error(f"Exception (BrowserStepsInUnsupportedFetcher) reached processing watch UUID: {uuid}") | ||||
|  | ||||
|                     except UnableToExtractRestockData as e: | ||||
|                         # Usually when fetcher.instock_data returns empty | ||||
|                         logger.error(f"Exception (UnableToExtractRestockData) reached processing watch UUID: {uuid}") | ||||
| @@ -436,7 +462,7 @@ class update_worker(threading.Thread): | ||||
|                     except Exception as e: | ||||
|                         logger.error(f"Exception reached processing watch UUID: {uuid}") | ||||
|                         logger.error(str(e)) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Exception: " + str(e)}) | ||||
|                         # Other serious error | ||||
|                         process_changedetection_results = False | ||||
| #                        import traceback | ||||
| @@ -471,13 +497,13 @@ class update_worker(threading.Thread): | ||||
|  | ||||
|                             # A change was detected | ||||
|                             if changed_detected: | ||||
|                                 logger.debug(f">> Change detected in UUID {uuid} - {watch['url']}") | ||||
|  | ||||
|                                 # Notifications should only trigger on the second time (first time, we gather the initial snapshot) | ||||
|                                 if watch.history_n >= 2: | ||||
|                                     logger.info(f"Change detected in UUID {uuid} - {watch['url']}") | ||||
|                                     if not self.datastore.data['watching'][uuid].get('notification_muted'): | ||||
|                                         self.send_content_changed_notification(watch_uuid=uuid) | ||||
|  | ||||
|                                 else: | ||||
|                                     logger.info(f"Change triggered in UUID {uuid} due to first history saving (no notifications sent) - {watch['url']}") | ||||
|  | ||||
|                         except Exception as e: | ||||
|                             # Catch everything possible here, so that if a worker crashes, we don't lose it until restart! | ||||
| @@ -488,6 +514,16 @@ class update_worker(threading.Thread): | ||||
|                     if self.datastore.data['watching'].get(uuid): | ||||
|                         # Always record that we atleast tried | ||||
|                         count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1 | ||||
|  | ||||
|                         # Record the 'server' header reply, can be used for actions in the future like cloudflare/akamai workarounds | ||||
|                         try: | ||||
|                             server_header = update_handler.fetcher.headers.get('server', '').strip().lower()[:255] | ||||
|                             self.datastore.update_watch(uuid=uuid, | ||||
|                                                         update_obj={'remote_server_reply': server_header} | ||||
|                                                         ) | ||||
|                         except Exception as e: | ||||
|                             pass | ||||
|  | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), | ||||
|                                                                            'last_checked': round(time.time()), | ||||
|                                                                            'check_count': count | ||||
|   | ||||
| @@ -30,7 +30,7 @@ services: | ||||
|   #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy | ||||
|   # | ||||
|   #       Alternative Playwright URL, do not use "'s or 's! | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000 | ||||
|   # | ||||
|   #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password | ||||
|   # | ||||
| @@ -71,30 +71,23 @@ services: | ||||
| #            condition: service_started | ||||
|  | ||||
|      # Used for fetching pages via Playwright+Chrome where you need Javascript support. | ||||
|      # Note: Playwright/browserless not supported on ARM type devices (rPi etc) | ||||
|      # RECOMMENDED FOR FETCHING PAGES WITH CHROME | ||||
| #    playwright-chrome: | ||||
| #        hostname: playwright-chrome | ||||
| #        image: browserless/chrome:1.60-chrome-stable | ||||
| #        image: dgtlmoon/sockpuppetbrowser:latest | ||||
| #        cap_add: | ||||
| #            - SYS_ADMIN | ||||
| ## SYS_ADMIN might be too much, but it can be needed on your platform https://github.com/puppeteer/puppeteer/blob/main/docs/troubleshooting.md#running-puppeteer-on-gitlabci | ||||
| #        restart: unless-stopped | ||||
| #        environment: | ||||
| #            - SCREEN_WIDTH=1920 | ||||
| #            - SCREEN_HEIGHT=1024 | ||||
| #            - SCREEN_DEPTH=16 | ||||
| #            - ENABLE_DEBUGGER=false | ||||
| #            - PREBOOT_CHROME=true | ||||
| #            - CONNECTION_TIMEOUT=300000 | ||||
| #            - MAX_CONCURRENT_SESSIONS=10 | ||||
| #            - CHROME_REFRESH_TIME=600000 | ||||
| #            - DEFAULT_BLOCK_ADS=true | ||||
| #            - DEFAULT_STEALTH=true | ||||
| # | ||||
| #             Ignore HTTPS errors, like for self-signed certs | ||||
| #            - DEFAULT_IGNORE_HTTPS_ERRORS=true | ||||
| # | ||||
| #            - MAX_CONCURRENT_CHROME_PROCESSES=10 | ||||
|  | ||||
|      # Used for fetching pages via Playwright+Chrome where you need Javascript support. | ||||
|      # Note: works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) and other issues | ||||
|      # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) | ||||
|      #       Does not report status codes (200, 404, 403) and other issues | ||||
| #    browser-chrome: | ||||
| #        hostname: browser-chrome | ||||
| #        image: selenium/standalone-chrome:4 | ||||
|   | ||||
							
								
								
									
										
											BIN
										
									
								
								docs/chrome-extension-screenshot.png
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								docs/chrome-extension-screenshot.png
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 125 KiB | 
| @@ -1,4 +1,7 @@ | ||||
| eventlet>=0.33.3 # related to dnspython fixes | ||||
| # Used by Pyppeteer | ||||
| pyee | ||||
|  | ||||
| eventlet==0.33.3 # related to dnspython fixes | ||||
| feedgen~=0.9 | ||||
| flask-compress | ||||
| # 0.6.3 included compatibility fix for werkzeug 3.x (2.x had deprecation of url handlers) | ||||
| @@ -6,6 +9,7 @@ flask-login>=0.6.3 | ||||
| flask-paginate | ||||
| flask_expects_json~=1.7 | ||||
| flask_restful | ||||
| flask_cors # For the Chrome extension to operate | ||||
| flask_wtf~=1.2 | ||||
| flask~=2.3 | ||||
| inscriptis~=2.2 | ||||
| @@ -19,21 +23,25 @@ validators~=0.21 | ||||
| brotli~=1.0 | ||||
| requests[socks] | ||||
|  | ||||
| urllib3>1.26 | ||||
| urllib3==1.26.18 | ||||
| chardet>2.3.0 | ||||
|  | ||||
| wtforms~=3.0 | ||||
| jsonpath-ng~=1.5.3 | ||||
|  | ||||
| dnspython~=2.4 # related to eventlet fixes | ||||
| # Pinned: module 'eventlet.green.select' has no attribute 'epoll' | ||||
| # https://github.com/eventlet/eventlet/issues/805#issuecomment-1640463482 | ||||
| dnspython==2.3.0 # related to eventlet fixes | ||||
|  | ||||
| # jq not available on Windows so must be installed manually | ||||
|  | ||||
| # Notification library | ||||
| apprise~=1.7.1 | ||||
| apprise~=1.7.4 | ||||
|  | ||||
| # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 | ||||
| paho-mqtt | ||||
| # and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible | ||||
| # use v1.x due to https://github.com/eclipse/paho.mqtt.python/issues/814 | ||||
| paho-mqtt < 2.0.0 | ||||
|  | ||||
| # This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1" | ||||
| # so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found" | ||||
| @@ -44,10 +52,10 @@ cryptography~=3.4 | ||||
| beautifulsoup4 | ||||
|  | ||||
| # XPath filtering, lxml is required by bs4 anyway, but put it here to be safe. | ||||
| lxml | ||||
| lxml >=4.8.0,<6 | ||||
|  | ||||
| # XPath 2.0-3.1 support | ||||
| elementpath | ||||
| # XPath 2.0-3.1 support - 4.2.0 broke something? | ||||
| elementpath==4.1.5 | ||||
|  | ||||
| selenium~=4.14.0 | ||||
|  | ||||
| @@ -66,6 +74,9 @@ jq~=1.3; python_version >= "3.8" and sys_platform == "linux" | ||||
| pillow | ||||
| # playwright is installed at Dockerfile build time because it's not available on all platforms | ||||
|  | ||||
| # experimental release | ||||
| pyppeteer-ng==2.0.0rc5 | ||||
|  | ||||
| # Include pytest, so if theres a support issue we can ask them to run these tests on their setup | ||||
| pytest ~=7.2 | ||||
| pytest-flask ~=1.2 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user