mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 06:37:41 +00:00 
			
		
		
		
	Compare commits
	
		
			42 Commits
		
	
	
		
			auto-sugge
			...
			ipv6
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | c81cda08f7 | ||
|   | 5d4dee2a1e | ||
|   | 4a86637f2d | ||
|   | 9b4b5cae1c | ||
|   | fc6424c39e | ||
|   | 285a65ced4 | ||
|   | 467cd099e9 | ||
|   | 794a6d59de | ||
|   | 5f997e5d1b | ||
|   | e412fd6146 | ||
|   | c950ab5219 | ||
|   | fcbda7829a | ||
|   | f0966eb23a | ||
|   | e4fb5ab4da | ||
|   | e99f07a51d | ||
|   | 08ee223b5f | ||
|   | 572f9b8a31 | ||
|   | fcfd1b5e10 | ||
|   | 0790dd555e | ||
|   | 0b20dc7712 | ||
|   | 13c4121f52 | ||
|   | e8e176f3bd | ||
|   | 7a1d2d924e | ||
|   | c3731cf055 | ||
|   | a287e5a86c | ||
|   | 235535c327 | ||
|   | 44dc62da2d | ||
|   | 0c380c170f | ||
|   | b7a2501d64 | ||
|   | e970fef991 | ||
|   | b76148a0f4 | ||
|   | 93cc30437f | ||
|   | 6562d6e0d4 | ||
|   | 6c217cc3b6 | ||
|   | f30cdf0674 | ||
|   | 14da0646a7 | ||
|   | b413cdecc7 | ||
|   | 7bf52d9275 | ||
|   | 09e6624afd | ||
|   | b58fd995b5 | ||
|   | 366baaf322 | ||
|   | 6c1b9bcc5c | 
							
								
								
									
										1
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							| @@ -50,7 +50,6 @@ jobs: | ||||
|           python -m pip install --upgrade pip | ||||
|           pip install flake8 pytest | ||||
|           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | ||||
|           if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi | ||||
|  | ||||
|       - name: Create release metadata | ||||
|         run: | | ||||
|   | ||||
							
								
								
									
										6
									
								
								.github/workflows/pypi.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										6
									
								
								.github/workflows/pypi.yml
									
									
									
									
										vendored
									
									
								
							| @@ -19,12 +19,6 @@ jobs: | ||||
|         with: | ||||
|           python-version: 3.9 | ||||
|  | ||||
| #      - name: Install dependencies | ||||
| #        run: | | ||||
| #          python -m pip install --upgrade pip | ||||
| #          pip install flake8 pytest | ||||
| #          if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | ||||
| #          if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi | ||||
|  | ||||
|       - name: Test that pip builds without error | ||||
|         run: | | ||||
|   | ||||
							
								
								
									
										66
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										66
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -8,32 +8,70 @@ jobs: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - uses: actions/checkout@v2 | ||||
|       - name: Set up Python 3.9 | ||||
|  | ||||
|       # Mainly just for link/flake8 | ||||
|       - name: Set up Python 3.10 | ||||
|         uses: actions/setup-python@v2 | ||||
|         with: | ||||
|           python-version: 3.9 | ||||
|  | ||||
|       - name: Install dependencies | ||||
|         run: | | ||||
|           python -m pip install --upgrade pip | ||||
|           pip install flake8 pytest | ||||
|           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | ||||
|           if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi | ||||
|           python-version: '3.10' | ||||
|  | ||||
|       - name: Lint with flake8 | ||||
|         run: | | ||||
|           pip3 install flake8 | ||||
|           # stop the build if there are Python syntax errors or undefined names | ||||
|           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||||
|           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||||
|           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||||
|  | ||||
|       - name: Unit tests | ||||
|       - name: Spin up ancillary testable services | ||||
|         run: | | ||||
|           python3 -m unittest changedetectionio.tests.unit.test_notification_diff | ||||
|            | ||||
|           docker network create changedet-network | ||||
|  | ||||
|       - name: Test with pytest | ||||
|           # Selenium+browserless | ||||
|           docker run --network changedet-network -d --hostname selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome-debug:3.141.59 | ||||
|           docker run --network changedet-network -d --hostname browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable | ||||
|  | ||||
|       - name: Build changedetection.io container for testing | ||||
|         run: |          | ||||
|           # Build a changedetection.io container and start testing inside | ||||
|           docker build . -t test-changedetectionio | ||||
|  | ||||
|       - name: Test built container with pytest | ||||
|         run: | | ||||
|           # Each test is totally isolated and performs its own cleanup/reset | ||||
|           cd changedetectionio; ./run_all_tests.sh | ||||
|            | ||||
|           # Unit tests | ||||
|           docker run test-changedetectionio  bash -c 'python3 -m unittest changedetectionio.tests.unit.test_notification_diff' | ||||
|            | ||||
|           # All tests | ||||
|           docker run --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio && ./run_basic_tests.sh' | ||||
|  | ||||
|       - name: Test built container selenium+browserless/playwright | ||||
|         run: | | ||||
|            | ||||
|           # Selenium fetch | ||||
|           docker run -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py' | ||||
|            | ||||
|           # Playwright/Browserless fetch | ||||
|           docker run -e "PLAYWRIGHT_DRIVER_URL=ws://browserless:3000" --network changedet-network  test-changedetectionio  bash -c 'cd changedetectionio;pytest tests/fetchers/test_content.py && pytest tests/test_errorhandling.py && pytest tests/visualselector/test_fetch_data.py' | ||||
|  | ||||
|       - name: Test proxy interaction | ||||
|         run: | | ||||
|           cd changedetectionio | ||||
|           ./run_proxy_tests.sh | ||||
|           cd .. | ||||
|  | ||||
|       - name: Test changedetection.io container starts+runs basically without error | ||||
|         run: | | ||||
|           docker run -p 5556:5000 -d test-changedetectionio | ||||
|           sleep 3 | ||||
|           # Should return 0 (no error) when grep finds it | ||||
|           curl -s http://localhost:5556 |grep -q checkbox-uuid | ||||
|           curl -s http://localhost:5556/rss|grep -q rss-specification | ||||
|           # and IPv6 | ||||
|           curl -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|           curl -s -g -6 "http://[::1]:5556/rss"|grep -q rss-specification | ||||
|  | ||||
| #export WEBDRIVER_URL=http://localhost:4444/wd/hub | ||||
| #pytest tests/fetchers/test_content.py | ||||
| #pytest tests/test_errorhandling.py | ||||
| @@ -7,9 +7,3 @@ Otherwise, it's always best to PR into the `dev` branch. | ||||
| Please be sure that all new functionality has a matching test! | ||||
|  | ||||
| Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example | ||||
|  | ||||
| ``` | ||||
| pip3 install -r requirements-dev | ||||
| ``` | ||||
|  | ||||
| this is from https://github.com/dgtlmoon/changedetection.io/blob/master/requirements-dev.txt | ||||
|   | ||||
							
								
								
									
										26
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										26
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -1,7 +1,7 @@ | ||||
| # pip dependencies install stage | ||||
| FROM python:3.8-slim as builder | ||||
| FROM python:3.10-slim as builder | ||||
|  | ||||
| # rustc compiler would be needed on ARM type devices but theres an issue with some deps not building.. | ||||
| # See `cryptography` pin comment in requirements.txt | ||||
| ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
| @@ -29,22 +29,16 @@ RUN pip install --target=/dependencies playwright~=1.27.1 \ | ||||
|     || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
|  | ||||
| # Final image stage | ||||
| FROM python:3.8-slim | ||||
| FROM python:3.10-slim | ||||
|  | ||||
| # Actual packages needed at runtime, usually due to the notification (apprise) backend | ||||
| # rustc compiler would be needed on ARM type devices but theres an issue with some deps not building.. | ||||
| ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
|  | ||||
| # Re #93, #73, excluding rustc (adds another 430Mb~) | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     g++ \ | ||||
|     gcc \ | ||||
|     libc-dev \ | ||||
|     libffi-dev \ | ||||
|     libjpeg-dev \ | ||||
|     libssl-dev \ | ||||
|     libxslt-dev \ | ||||
|     zlib1g-dev | ||||
|     libssl1.1 \ | ||||
|     libxslt1.1 \ | ||||
|     # For pdftohtml | ||||
|     poppler-utils \ | ||||
|     zlib1g \ | ||||
|     && apt-get clean && rm -rf /var/lib/apt/lists/* | ||||
|  | ||||
|  | ||||
| # https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops | ||||
| ENV PYTHONUNBUFFERED=1 | ||||
|   | ||||
| @@ -1,9 +1,10 @@ | ||||
| recursive-include changedetectionio/api * | ||||
| recursive-include changedetectionio/templates * | ||||
| recursive-include changedetectionio/static * | ||||
| recursive-include changedetectionio/blueprint * | ||||
| recursive-include changedetectionio/model * | ||||
| recursive-include changedetectionio/tests * | ||||
| recursive-include changedetectionio/res * | ||||
| recursive-include changedetectionio/static * | ||||
| recursive-include changedetectionio/templates * | ||||
| recursive-include changedetectionio/tests * | ||||
| prune changedetectionio/static/package-lock.json | ||||
| prune changedetectionio/static/styles/node_modules | ||||
| prune changedetectionio/static/styles/package-lock.json | ||||
|   | ||||
							
								
								
									
										24
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										24
									
								
								README.md
									
									
									
									
									
								
							| @@ -43,6 +43,7 @@ Requires Playwright to be enabled. | ||||
|  | ||||
| - Products and services have a change in pricing | ||||
| - _Out of stock notification_ and _Back In stock notification_ | ||||
| - Monitor and track PDF file changes, know when a PDF file has text changes. | ||||
| - Governmental department updates (changes are often only on their websites) | ||||
| - New software releases, security advisories when you're not on their mailing list. | ||||
| - Festivals with changes | ||||
| @@ -68,6 +69,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W | ||||
| - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! | ||||
| - Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq | ||||
| - Switch between fast non-JS and Chrome JS based "fetchers" | ||||
| - Track changes in PDF files (Monitor text changed in the PDF, Also monitor PDF filesize and checksums) | ||||
| - Easily specify how often a site should be checked | ||||
| - Execute JS before extracting text (Good for logging in, see examples in the UI!) | ||||
| - Override Request Headers, Specify `POST` or `GET` and other methods | ||||
| @@ -187,11 +189,29 @@ When you enable a `json:` or `jq:` filter, you can even automatically extract an | ||||
| <html> | ||||
| ... | ||||
| <script type="application/ld+json"> | ||||
|   {"@context":"http://schema.org","@type":"Product","name":"Nan Optipro Stage 1 Baby Formula  800g","price": 23.50 } | ||||
|  | ||||
| { | ||||
|    "@context":"http://schema.org/", | ||||
|    "@type":"Product", | ||||
|    "offers":{ | ||||
|       "@type":"Offer", | ||||
|       "availability":"http://schema.org/InStock", | ||||
|       "price":"3949.99", | ||||
|       "priceCurrency":"USD", | ||||
|       "url":"https://www.newegg.com/p/3D5-000D-001T1" | ||||
|    }, | ||||
|    "description":"Cobratype King Cobra Hero Desktop Gaming PC", | ||||
|    "name":"Cobratype King Cobra Hero Desktop Gaming PC", | ||||
|    "sku":"3D5-000D-001T1", | ||||
|    "itemCondition":"NewCondition" | ||||
| } | ||||
| </script> | ||||
| ```   | ||||
|  | ||||
| `json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure | ||||
| `json:$..price` or `jq:..price` would give `3949.99`, or you can extract the whole structure (use a JSONpath test website to validate with) | ||||
|  | ||||
| The application also supports notifying you that it can follow this information automatically | ||||
|  | ||||
|  | ||||
| ## Proxy Configuration | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,7 @@ import threading | ||||
| import time | ||||
| import timeago | ||||
|  | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from copy import deepcopy | ||||
| from distutils.util import strtobool | ||||
| from feedgen.feed import FeedGenerator | ||||
| @@ -35,7 +36,7 @@ from flask_wtf import CSRFProtect | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio.api import api_v1 | ||||
|  | ||||
| __version__ = '0.39.22.1' | ||||
| __version__ = '0.40.0.4' | ||||
|  | ||||
| datastore = None | ||||
|  | ||||
| @@ -404,19 +405,21 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 sorted_watches.append(watch) | ||||
|  | ||||
|         existing_tags = datastore.get_all_tags() | ||||
|  | ||||
|         form = forms.quickWatchForm(request.form) | ||||
|         output = render_template("watch-overview.html", | ||||
|                                  form=form, | ||||
|                                  watches=sorted_watches, | ||||
|                                  tags=existing_tags, | ||||
|         output = render_template( | ||||
|             "watch-overview.html", | ||||
|                                  # Don't link to hosting when we're on the hosting environment | ||||
|                                  active_tag=limit_tag, | ||||
|                                  app_rss_token=datastore.data['settings']['application']['rss_access_token'], | ||||
|                                  has_unviewed=datastore.has_unviewed, | ||||
|                                  # Don't link to hosting when we're on the hosting environment | ||||
|                                  hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|                                  form=form, | ||||
|                                  guid=datastore.data['app_guid'], | ||||
|                                  queued_uuids=[uuid for p,uuid in update_q.queue]) | ||||
|                                  has_proxies=datastore.proxy_list, | ||||
|                                  has_unviewed=datastore.has_unviewed, | ||||
|                                  hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|                                  queued_uuids=[q_uuid.item['uuid'] for q_uuid in update_q.queue], | ||||
|                                  tags=existing_tags, | ||||
|                                  watches=sorted_watches | ||||
|                                  ) | ||||
|  | ||||
|  | ||||
|         if session.get('share-link'): | ||||
| @@ -596,25 +599,16 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                     using_default_check_time = False | ||||
|                     break | ||||
|  | ||||
|             # Use the default if its the same as system wide | ||||
|             # Use the default if it's the same as system-wide. | ||||
|             if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']: | ||||
|                 extra_update_obj['fetch_backend'] = None | ||||
|  | ||||
|  | ||||
|  | ||||
|              # Ignore text | ||||
|             form_ignore_text = form.ignore_text.data | ||||
|             datastore.data['watching'][uuid]['ignore_text'] = form_ignore_text | ||||
|  | ||||
|             # Reset the previous_md5 so we process a new snapshot including stripping ignore text. | ||||
|             if form_ignore_text: | ||||
|                 if len(datastore.data['watching'][uuid].history): | ||||
|                     extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) | ||||
|  | ||||
|             # Reset the previous_md5 so we process a new snapshot including stripping ignore text. | ||||
|             if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []): | ||||
|                 if len(datastore.data['watching'][uuid].history): | ||||
|                     extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) | ||||
|  | ||||
|             # Be sure proxy value is None | ||||
|             if datastore.proxy_list is not None and form.data['proxy'] == '': | ||||
|                 extra_update_obj['proxy'] = None | ||||
| @@ -632,7 +626,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             datastore.needs_write_urgent = True | ||||
|  | ||||
|             # Queue the watch for immediate recheck, with a higher priority | ||||
|             update_q.put((1, uuid)) | ||||
|             update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|  | ||||
|             # Diff page [edit] link should go back to diff page | ||||
|             if request.args.get("next") and request.args.get("next") == 'diff': | ||||
| @@ -773,7 +767,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 importer = import_url_list() | ||||
|                 importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore) | ||||
|                 for uuid in importer.new_uuids: | ||||
|                     update_q.put((1, uuid)) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|  | ||||
|                 if len(importer.remaining_data) == 0: | ||||
|                     return redirect(url_for('index')) | ||||
| @@ -786,7 +780,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 d_importer = import_distill_io_json() | ||||
|                 d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) | ||||
|                 for uuid in d_importer.new_uuids: | ||||
|                     update_q.put((1, uuid)) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -1151,7 +1145,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         if not add_paused and new_uuid: | ||||
|             # Straight into the queue. | ||||
|             update_q.put((1, new_uuid)) | ||||
|             update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid})) | ||||
|             flash("Watch added.") | ||||
|  | ||||
|         if add_paused: | ||||
| @@ -1188,7 +1182,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         new_uuid = datastore.clone(uuid) | ||||
|         update_q.put((5, new_uuid)) | ||||
|         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=5, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) | ||||
|         flash('Cloned.') | ||||
|  | ||||
|         return redirect(url_for('index')) | ||||
| @@ -1196,7 +1190,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     @app.route("/api/checknow", methods=['GET']) | ||||
|     @login_required | ||||
|     def form_watch_checknow(): | ||||
|  | ||||
|         # Forced recheck will skip the 'skip if content is the same' rule (, 'reprocess_existing_data': True}))) | ||||
|         tag = request.args.get('tag') | ||||
|         uuid = request.args.get('uuid') | ||||
|         i = 0 | ||||
| @@ -1205,11 +1199,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         for t in running_update_threads: | ||||
|             running_uuids.append(t.current_uuid) | ||||
|  | ||||
|         # @todo check thread is running and skip | ||||
|  | ||||
|         if uuid: | ||||
|             if uuid not in running_uuids: | ||||
|                 update_q.put((1, uuid)) | ||||
|                 update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|             i = 1 | ||||
|  | ||||
|         elif tag != None: | ||||
| @@ -1217,14 +1209,14 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|                 if (tag != None and tag in watch['tag']): | ||||
|                     if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: | ||||
|                         update_q.put((1, watch_uuid)) | ||||
|                         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) | ||||
|                         i += 1 | ||||
|  | ||||
|         else: | ||||
|             # No tag, no uuid, add everything. | ||||
|             for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|                 if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: | ||||
|                     update_q.put((1, watch_uuid)) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': watch_uuid, 'skip_when_checksum_same': False})) | ||||
|                     i += 1 | ||||
|         flash("{} watches are queued for rechecking.".format(i)) | ||||
|         return redirect(url_for('index', tag=tag)) | ||||
| @@ -1271,6 +1263,14 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                     datastore.data['watching'][uuid.strip()]['notification_muted'] = False | ||||
|             flash("{} watches un-muted".format(len(uuids))) | ||||
|  | ||||
|         elif (op == 'recheck'): | ||||
|             for uuid in uuids: | ||||
|                 uuid = uuid.strip() | ||||
|                 if datastore.data['watching'].get(uuid): | ||||
|                     # Recheck and require a full reprocessing | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|  | ||||
|             flash("{} watches un-muted".format(len(uuids))) | ||||
|         elif (op == 'notification-default'): | ||||
|             from changedetectionio.notification import ( | ||||
|                 default_notification_format_for_watch | ||||
| @@ -1343,6 +1343,10 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     import changedetectionio.blueprint.browser_steps as browser_steps | ||||
|     app.register_blueprint(browser_steps.construct_blueprint(datastore), url_prefix='/browser-steps') | ||||
|  | ||||
|     import changedetectionio.blueprint.price_data_follower as price_data_follower | ||||
|     app.register_blueprint(price_data_follower.construct_blueprint(datastore, update_q), url_prefix='/price_data_follower') | ||||
|  | ||||
|  | ||||
|     # @todo handle ctrl break | ||||
|     ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start() | ||||
|     threading.Thread(target=notification_runner).start() | ||||
| @@ -1448,7 +1452,11 @@ def ticker_thread_check_time_launch_checks(): | ||||
|         watch_uuid_list = [] | ||||
|         while True: | ||||
|             try: | ||||
|                 watch_uuid_list = datastore.data['watching'].keys() | ||||
|                 # Get a list of watches sorted by last_checked, [1] because it gets passed a tuple | ||||
|                 # This is so we examine the most over-due first | ||||
|                 for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked',0)): | ||||
|                     watch_uuid_list.append(k[0]) | ||||
|  | ||||
|             except RuntimeError as e: | ||||
|                 # RuntimeError: dictionary changed size during iteration | ||||
|                 time.sleep(0.1) | ||||
| @@ -1488,7 +1496,7 @@ def ticker_thread_check_time_launch_checks(): | ||||
|             seconds_since_last_recheck = now - watch['last_checked'] | ||||
|  | ||||
|             if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds: | ||||
|                 if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]: | ||||
|                 if not uuid in running_uuids and uuid not in [q_uuid.item['uuid'] for q_uuid in update_q.queue]: | ||||
|  | ||||
|                     # Proxies can be set to have a limit on seconds between which they can be called | ||||
|                     watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid) | ||||
| @@ -1519,8 +1527,9 @@ def ticker_thread_check_time_launch_checks(): | ||||
|                             priority, | ||||
|                             watch.jitter_seconds, | ||||
|                             now - watch['last_checked'])) | ||||
|  | ||||
|                     # Into the queue with you | ||||
|                     update_q.put((priority, uuid)) | ||||
|                     update_q.put(queuedWatchMetaData.PrioritizedItem(priority=priority, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|  | ||||
|                     # Reset for next time | ||||
|                     watch.jitter_seconds = 0 | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from flask_restful import abort, Resource | ||||
| from flask import request, make_response | ||||
| import validators | ||||
| @@ -24,7 +25,7 @@ class Watch(Resource): | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
|         if request.args.get('recheck'): | ||||
|             self.update_q.put((1, uuid)) | ||||
|             self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|             return "OK", 200 | ||||
|  | ||||
|         # Return without history, get that via another API call | ||||
| @@ -100,7 +101,7 @@ class CreateWatch(Resource): | ||||
|         extras = {'title': json_data['title'].strip()} if json_data.get('title') else {} | ||||
|  | ||||
|         new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras) | ||||
|         self.update_q.put((1, new_uuid)) | ||||
|         self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': new_uuid, 'skip_when_checksum_same': True})) | ||||
|         return {'uuid': new_uuid}, 201 | ||||
|  | ||||
|     # Return concise list of available watches and some very basic info | ||||
| @@ -118,7 +119,7 @@ class CreateWatch(Resource): | ||||
|  | ||||
|         if request.args.get('recheck_all'): | ||||
|             for uuid in self.datastore.data['watching'].keys(): | ||||
|                 self.update_q.put((1, uuid)) | ||||
|                 self.update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': True})) | ||||
|             return {'status': "OK"}, 200 | ||||
|  | ||||
|         return list, 200 | ||||
|   | ||||
| @@ -75,15 +75,13 @@ class steppable_browser_interface(): | ||||
|     def action_goto_url(self, url, optional_value): | ||||
|         # self.page.set_viewport_size({"width": 1280, "height": 5000}) | ||||
|         now = time.time() | ||||
|         response = self.page.goto(url, timeout=0, wait_until='domcontentloaded') | ||||
|         print("Time to goto URL", time.time() - now) | ||||
|         response = self.page.goto(url, timeout=0, wait_until='commit') | ||||
|  | ||||
|         # Wait_until = commit | ||||
|         # - `'commit'` - consider operation to be finished when network response is received and the document started loading. | ||||
|         # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds | ||||
|         # This seemed to solve nearly all 'TimeoutErrors' | ||||
|         extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) | ||||
|         self.page.wait_for_timeout(extra_wait * 1000) | ||||
|         print("Time to goto URL ", time.time() - now) | ||||
|  | ||||
|     def action_click_element_containing_text(self, selector=None, value=''): | ||||
|         if not len(value.strip()): | ||||
|   | ||||
							
								
								
									
										33
									
								
								changedetectionio/blueprint/price_data_follower/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								changedetectionio/blueprint/price_data_follower/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
|  | ||||
| from distutils.util import strtobool | ||||
| from flask import Blueprint, flash, redirect, url_for | ||||
| from flask_login import login_required | ||||
| from changedetectionio.store import ChangeDetectionStore | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from queue import PriorityQueue | ||||
|  | ||||
| PRICE_DATA_TRACK_ACCEPT = 'accepted' | ||||
| PRICE_DATA_TRACK_REJECT = 'rejected' | ||||
|  | ||||
| def construct_blueprint(datastore: ChangeDetectionStore, update_q: PriorityQueue): | ||||
|  | ||||
|     price_data_follower_blueprint = Blueprint('price_data_follower', __name__) | ||||
|  | ||||
|     @login_required | ||||
|     @price_data_follower_blueprint.route("/<string:uuid>/accept", methods=['GET']) | ||||
|     def accept(uuid): | ||||
|         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_ACCEPT | ||||
|         update_q.put(queuedWatchMetaData.PrioritizedItem(priority=1, item={'uuid': uuid, 'skip_when_checksum_same': False})) | ||||
|         return redirect(url_for("form_watch_checknow", uuid=uuid)) | ||||
|  | ||||
|  | ||||
|     @login_required | ||||
|     @price_data_follower_blueprint.route("/<string:uuid>/reject", methods=['GET']) | ||||
|     def reject(uuid): | ||||
|         datastore.data['watching'][uuid]['track_ldjson_price_data'] = PRICE_DATA_TRACK_REJECT | ||||
|         return redirect(url_for("index")) | ||||
|  | ||||
|  | ||||
|     return price_data_follower_blueprint | ||||
|  | ||||
|  | ||||
| @@ -9,6 +9,7 @@ import getopt | ||||
| import os | ||||
| import signal | ||||
| import sys | ||||
| import socket | ||||
|  | ||||
| from . import store, changedetection_app, content_fetcher | ||||
| from . import __version__ | ||||
| @@ -126,11 +127,11 @@ def main(): | ||||
|  | ||||
|     if ssl_mode: | ||||
|         # @todo finalise SSL config, but this should get you in the right direction if you need it. | ||||
|         eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port)), | ||||
|         eventlet.wsgi.server(eventlet.wrap_ssl(eventlet.listen((host, port), socket.AF_INET6), | ||||
|                                                certfile='cert.pem', | ||||
|                                                keyfile='privkey.pem', | ||||
|                                                server_side=True), app) | ||||
|  | ||||
|     else: | ||||
|         eventlet.wsgi.server(eventlet.listen((host, int(port))), app) | ||||
|         eventlet.wsgi.server(eventlet.listen((host, int(port)), socket.AF_INET6), app) | ||||
|  | ||||
|   | ||||
| @@ -1,3 +1,4 @@ | ||||
| import hashlib | ||||
| from abc import abstractmethod | ||||
| import chardet | ||||
| import json | ||||
| @@ -23,6 +24,9 @@ class Non200ErrorCodeReceived(Exception): | ||||
|             self.page_text = html_tools.html_to_text(page_html) | ||||
|         return | ||||
|  | ||||
| class checksumFromPreviousCheckWasTheSame(Exception): | ||||
|     def __init__(self): | ||||
|         return | ||||
|  | ||||
| class JSActionExceptions(Exception): | ||||
|     def __init__(self, status_code, url, screenshot, message=''): | ||||
| @@ -39,7 +43,7 @@ class BrowserStepsStepTimout(Exception): | ||||
|  | ||||
|  | ||||
| class PageUnloadable(Exception): | ||||
|     def __init__(self, status_code, url, screenshot=False, message=False): | ||||
|     def __init__(self, status_code, url, message, screenshot=False): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
| @@ -113,7 +117,8 @@ class Fetcher(): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None): | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|         # Should set self.error, self.status_code and self.content | ||||
|         pass | ||||
|  | ||||
| @@ -238,6 +243,14 @@ class base_html_playwright(Fetcher): | ||||
|         if proxy_override: | ||||
|             self.proxy = {'server': proxy_override} | ||||
|  | ||||
|         if self.proxy: | ||||
|             # Playwright needs separate username and password values | ||||
|             from urllib.parse import urlparse | ||||
|             parsed = urlparse(self.proxy.get('server')) | ||||
|             if parsed.username: | ||||
|                 self.proxy['username'] = parsed.username | ||||
|                 self.proxy['password'] = parsed.password | ||||
|  | ||||
|     def screenshot_step(self, step_n=''): | ||||
|  | ||||
|         # There's a bug where we need to do it twice or it doesnt take the whole page, dont know why. | ||||
| @@ -264,7 +277,8 @@ class base_html_playwright(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None): | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         import playwright._impl._api_types | ||||
| @@ -286,6 +300,8 @@ class base_html_playwright(Fetcher): | ||||
|                 proxy=self.proxy, | ||||
|                 # This is needed to enable JavaScript execution on GitHub and others | ||||
|                 bypass_csp=True, | ||||
|                 # Can't think why we need the service workers for our use case? | ||||
|                 service_workers='block', | ||||
|                 # Should never be needed | ||||
|                 accept_downloads=False | ||||
|             ) | ||||
| @@ -294,24 +310,34 @@ class base_html_playwright(Fetcher): | ||||
|             if len(request_headers): | ||||
|                 context.set_extra_http_headers(request_headers) | ||||
|  | ||||
|             try: | ||||
|                 self.page.set_default_navigation_timeout(90000) | ||||
|                 self.page.set_default_timeout(90000) | ||||
|  | ||||
|                 # Listen for all console events and handle errors | ||||
|                 self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|                 # Bug - never set viewport size BEFORE page.goto | ||||
|  | ||||
|  | ||||
|                 # Waits for the next navigation. Using Python context manager | ||||
|                 # prevents a race condition between clicking and waiting for a navigation. | ||||
|                 with self.page.expect_navigation(): | ||||
|                     response = self.page.goto(url, wait_until='load') | ||||
|             # Goto page | ||||
|             try: | ||||
|                 # Wait_until = commit | ||||
|                 # - `'commit'` - consider operation to be finished when network response is received and the document started loading. | ||||
|                 # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds | ||||
|                 # This seemed to solve nearly all 'TimeoutErrors' | ||||
|                 response = self.page.goto(url, wait_until='commit') | ||||
|             except playwright._impl._api_types.Error as e: | ||||
|                 # Retry once - https://github.com/browserless/chrome/issues/2485 | ||||
|                 # Sometimes errors related to invalid cert's and other can be random | ||||
|                 print ("Content Fetcher > retrying request got error - ", str(e)) | ||||
|                 time.sleep(1) | ||||
|                 response = self.page.goto(url, wait_until='commit') | ||||
|  | ||||
|             except Exception as e: | ||||
|                 print ("Content Fetcher > Other exception when page.goto", str(e)) | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             # Execute any browser steps | ||||
|             try: | ||||
|                 extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|                 self.page.wait_for_timeout(extra_wait * 1000) | ||||
|  | ||||
| @@ -324,17 +350,15 @@ class base_html_playwright(Fetcher): | ||||
|                 # This can be ok, we will try to grab what we could retrieve | ||||
|                 pass | ||||
|             except Exception as e: | ||||
|                 print ("other exception when page.goto") | ||||
|                 print (str(e)) | ||||
|                 print ("Content Fetcher > Other exception when executing custom JS code", str(e)) | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None) | ||||
|  | ||||
|                 raise PageUnloadable(url=url, status_code=None, message=str(e)) | ||||
|  | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 print ("response object was none") | ||||
|                 print ("Content Fetcher > Response object was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             # Bug 2(?) Set the viewport size AFTER loading the page | ||||
| @@ -353,8 +377,8 @@ class base_html_playwright(Fetcher): | ||||
|             if len(self.page.content().strip()) == 0: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 print ("Content was empty") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|                 print ("Content Fetcher > Content was empty") | ||||
|                 raise EmptyReply(url=url, status_code=response.status) | ||||
|  | ||||
|             # Bug 2(?) Set the viewport size AFTER loading the page | ||||
|             self.page.set_viewport_size({"width": 1280, "height": 1024}) | ||||
| @@ -440,7 +464,8 @@ class base_html_webdriver(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None): | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | ||||
| @@ -498,7 +523,7 @@ class base_html_webdriver(Fetcher): | ||||
|             try: | ||||
|                 self.driver.quit() | ||||
|             except Exception as e: | ||||
|                 print("Exception in chrome shutdown/quit" + str(e)) | ||||
|                 print("Content Fetcher > Exception in chrome shutdown/quit" + str(e)) | ||||
|  | ||||
|  | ||||
| # "html_requests" is listed as the default fetcher in store.py! | ||||
| @@ -515,7 +540,8 @@ class html_requests(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_include_filters=None): | ||||
|             current_include_filters=None, | ||||
|             is_binary=False): | ||||
|  | ||||
|         # Make requests use a more modern looking user-agent | ||||
|         if not 'User-Agent' in request_headers: | ||||
| @@ -545,10 +571,12 @@ class html_requests(Fetcher): | ||||
|         # For example - some sites don't tell us it's utf-8, but return utf-8 content | ||||
|         # This seems to not occur when using webdriver/selenium, it seems to detect the text encoding more reliably. | ||||
|         # https://github.com/psf/requests/issues/1604 good info about requests encoding detection | ||||
|         if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'): | ||||
|             encoding = chardet.detect(r.content)['encoding'] | ||||
|             if encoding: | ||||
|                 r.encoding = encoding | ||||
|         if not is_binary: | ||||
|             # Don't run this for PDF (and requests identified as binary) takes a _long_ time | ||||
|             if not r.headers.get('content-type') or not 'charset=' in r.headers.get('content-type'): | ||||
|                 encoding = chardet.detect(r.content)['encoding'] | ||||
|                 if encoding: | ||||
|                     r.encoding = encoding | ||||
|  | ||||
|         if not r.content or not len(r.content): | ||||
|             raise EmptyReply(url=url, status_code=r.status_code) | ||||
| @@ -560,8 +588,14 @@ class html_requests(Fetcher): | ||||
|             raise Non200ErrorCodeReceived(url=url, status_code=r.status_code, page_html=r.text) | ||||
|  | ||||
|         self.status_code = r.status_code | ||||
|         self.content = r.text | ||||
|         if is_binary: | ||||
|             # Binary files just return their checksum until we add something smarter | ||||
|             self.content = hashlib.md5(r.content).hexdigest() | ||||
|         else: | ||||
|             self.content = r.text | ||||
|  | ||||
|         self.headers = r.headers | ||||
|         self.raw_content = r.content | ||||
|  | ||||
|  | ||||
| # Decide which is the 'real' HTML webdriver, this is more a system wide config | ||||
|   | ||||
| @@ -1,14 +0,0 @@ | ||||
| FROM python:3.8-slim | ||||
|  | ||||
| # https://stackoverflow.com/questions/58701233/docker-logs-erroneously-appears-empty-until-container-stops | ||||
| ENV PYTHONUNBUFFERED=1 | ||||
|  | ||||
| WORKDIR /app | ||||
|  | ||||
| RUN [ ! -d "/datastore" ] && mkdir /datastore | ||||
|  | ||||
| COPY sleep.py / | ||||
| CMD [ "python", "/sleep.py" ] | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -1,7 +0,0 @@ | ||||
| import time | ||||
|  | ||||
| print ("Sleep loop, you should run your script from the console") | ||||
|  | ||||
| while True:  | ||||
|     # Wait for 5 seconds | ||||
|     time.sleep(2) | ||||
| @@ -1,11 +1,13 @@ | ||||
| import hashlib | ||||
| import json | ||||
| import logging | ||||
| import os | ||||
| import re | ||||
| import time | ||||
| import urllib3 | ||||
|  | ||||
| from changedetectionio import content_fetcher, html_tools | ||||
| from changedetectionio.blueprint.price_data_follower import PRICE_DATA_TRACK_ACCEPT, PRICE_DATA_TRACK_REJECT | ||||
| from copy import deepcopy | ||||
|  | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
| @@ -14,6 +16,10 @@ class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
| class PDFToHTMLToolNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| # Some common stuff here that can be moved to a base class | ||||
| # (set_proxy_from_list) | ||||
| @@ -38,8 +44,7 @@ class perform_site_check(): | ||||
|  | ||||
|         return regex | ||||
|  | ||||
|     def run(self, uuid): | ||||
|         from copy import deepcopy | ||||
|     def run(self, uuid, skip_when_checksum_same=True): | ||||
|         changed_detected = False | ||||
|         screenshot = False  # as bytes | ||||
|         stripped_text_from_html = "" | ||||
| @@ -86,7 +91,7 @@ class perform_site_check(): | ||||
|             is_source = True | ||||
|  | ||||
|         # Pluggable content fetcher | ||||
|         prefer_backend = watch.get('fetch_backend') | ||||
|         prefer_backend = watch.get_fetch_backend | ||||
|         if hasattr(content_fetcher, prefer_backend): | ||||
|             klass = getattr(content_fetcher, prefer_backend) | ||||
|         else: | ||||
| @@ -116,12 +121,26 @@ class perform_site_check(): | ||||
|         if watch.get('webdriver_js_execute_code') is not None and watch.get('webdriver_js_execute_code').strip(): | ||||
|             fetcher.webdriver_js_execute_code = watch.get('webdriver_js_execute_code') | ||||
|  | ||||
|         fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters')) | ||||
|         # requests for PDF's, images etc should be passwd the is_binary flag | ||||
|         is_binary = watch.is_pdf | ||||
|  | ||||
|         fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch.get('include_filters'), is_binary=is_binary) | ||||
|         fetcher.quit() | ||||
|  | ||||
|         self.screenshot = fetcher.screenshot | ||||
|         self.xpath_data = fetcher.xpath_data | ||||
|  | ||||
|         # Track the content type | ||||
|         update_obj['content_type'] = fetcher.headers.get('Content-Type', '') | ||||
|  | ||||
|         # Watches added automatically in the queue manager will skip if its the same checksum as the previous run | ||||
|         # Saves a lot of CPU | ||||
|         update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest() | ||||
|         if skip_when_checksum_same: | ||||
|             if update_obj['previous_md5_before_filters'] == watch.get('previous_md5_before_filters'): | ||||
|                 raise content_fetcher.checksumFromPreviousCheckWasTheSame() | ||||
|  | ||||
|  | ||||
|         # Fetching complete, now filters | ||||
|         # @todo move to class / maybe inside of fetcher abstract base? | ||||
|  | ||||
| @@ -140,7 +159,32 @@ class perform_site_check(): | ||||
|             is_html = False | ||||
|             is_json = False | ||||
|  | ||||
|         include_filters_rule = watch.get('include_filters', []) | ||||
|         if watch.is_pdf or 'application/pdf' in fetcher.headers.get('Content-Type', '').lower(): | ||||
|             from shutil import which | ||||
|             tool = os.getenv("PDF_TO_HTML_TOOL", "pdftohtml") | ||||
|             if not which(tool): | ||||
|                 raise PDFToHTMLToolNotFound("Command-line `{}` tool was not found in system PATH, was it installed?".format(tool)) | ||||
|  | ||||
|             import subprocess | ||||
|             proc = subprocess.Popen( | ||||
|                 [tool, '-stdout', '-', '-s', 'out.pdf', '-i'], | ||||
|                 stdout=subprocess.PIPE, | ||||
|                 stdin=subprocess.PIPE) | ||||
|             proc.stdin.write(fetcher.raw_content) | ||||
|             proc.stdin.close() | ||||
|             fetcher.content = proc.stdout.read().decode('utf-8') | ||||
|             proc.wait(timeout=60) | ||||
|  | ||||
|             # Add a little metadata so we know if the file changes (like if an image changes, but the text is the same | ||||
|             # @todo may cause problems with non-UTF8? | ||||
|             metadata = "<p>Added by changedetection.io: Document checksum - {} Filesize - {} bytes</p>".format( | ||||
|                 hashlib.md5(fetcher.raw_content).hexdigest().upper(), | ||||
|                 len(fetcher.content)) | ||||
|  | ||||
|             fetcher.content = fetcher.content.replace('</body>', metadata + '</body>') | ||||
|  | ||||
|  | ||||
|         include_filters_rule = deepcopy(watch.get('include_filters', [])) | ||||
|         # include_filters_rule = watch['include_filters'] | ||||
|         subtractive_selectors = watch.get( | ||||
|             "subtractive_selectors", [] | ||||
| @@ -148,6 +192,10 @@ class perform_site_check(): | ||||
|             "global_subtractive_selectors", [] | ||||
|         ) | ||||
|  | ||||
|         # Inject a virtual LD+JSON price tracker rule | ||||
|         if watch.get('track_ldjson_price_data', '') == PRICE_DATA_TRACK_ACCEPT: | ||||
|             include_filters_rule.append(html_tools.LD_JSON_PRODUCT_OFFER_SELECTOR) | ||||
|  | ||||
|         has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip()) | ||||
|         has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip()) | ||||
|  | ||||
| @@ -155,6 +203,14 @@ class perform_site_check(): | ||||
|             include_filters_rule.append("json:$") | ||||
|             has_filter_rule = True | ||||
|  | ||||
|         if is_json: | ||||
|             # Sort the JSON so we dont get false alerts when the content is just re-ordered | ||||
|             try: | ||||
|                 fetcher.content = json.dumps(json.loads(fetcher.content), sort_keys=True) | ||||
|             except Exception as e: | ||||
|                 # Might have just been a snippet, or otherwise bad JSON, continue | ||||
|                 pass | ||||
|  | ||||
|         if has_filter_rule: | ||||
|             json_filter_prefixes = ['json:', 'jq:'] | ||||
|             for filter in include_filters_rule: | ||||
| @@ -162,6 +218,8 @@ class perform_site_check(): | ||||
|                     stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter) | ||||
|                     is_html = False | ||||
|  | ||||
|  | ||||
|  | ||||
|         if is_html or is_source: | ||||
|  | ||||
|             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
| @@ -173,9 +231,13 @@ class perform_site_check(): | ||||
|                 # Don't run get_text or xpath/css filters on plaintext | ||||
|                 stripped_text_from_html = html_content | ||||
|             else: | ||||
|                 # Does it have some ld+json price data? used for easier monitoring | ||||
|                 update_obj['has_ldjson_price_data'] = html_tools.has_ldjson_product_info(fetcher.content) | ||||
|  | ||||
|                 # Then we assume HTML | ||||
|                 if has_filter_rule: | ||||
|                     html_content = "" | ||||
|  | ||||
|                     for filter_rule in include_filters_rule: | ||||
|                         # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." | ||||
|                         if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|   | ||||
| @@ -426,6 +426,13 @@ class watchForm(commonSettingsForm): | ||||
|         return result | ||||
|  | ||||
|  | ||||
| class SingleExtraProxy(Form): | ||||
|  | ||||
|     # maybe better to set some <script>var.. | ||||
|     proxy_name = StringField('Name', [validators.Optional()], render_kw={"placeholder": "Name"}) | ||||
|     proxy_url = StringField('Proxy URL', [validators.Optional()], render_kw={"placeholder": "http://user:pass@...:3128", "size":50}) | ||||
|     # @todo do the validation here instead | ||||
|  | ||||
| # datastore.data['settings']['requests'].. | ||||
| class globalSettingsRequestForm(Form): | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
| @@ -433,6 +440,15 @@ class globalSettingsRequestForm(Form): | ||||
|     jitter_seconds = IntegerField('Random jitter seconds ± check', | ||||
|                                   render_kw={"style": "width: 5em;"}, | ||||
|                                   validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|     extra_proxies = FieldList(FormField(SingleExtraProxy), min_entries=5) | ||||
|  | ||||
|     def validate_extra_proxies(self, extra_validators=None): | ||||
|         for e in self.data['extra_proxies']: | ||||
|             if e.get('proxy_name') or e.get('proxy_url'): | ||||
|                 if not e.get('proxy_name','').strip() or not e.get('proxy_url','').strip(): | ||||
|                     self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.') | ||||
|                     return False | ||||
|  | ||||
|  | ||||
| # datastore.data['settings']['application'].. | ||||
| class globalSettingsApplicationForm(commonSettingsForm): | ||||
|   | ||||
| @@ -10,6 +10,10 @@ import re | ||||
| # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis | ||||
| TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>" | ||||
|  | ||||
| # 'price' , 'lowPrice', 'highPrice' are usually under here | ||||
| # all of those may or may not appear on different websites | ||||
| LD_JSON_PRODUCT_OFFER_SELECTOR = "json:$..offers" | ||||
|  | ||||
| class JSONNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
| @@ -127,8 +131,10 @@ def _get_stripped_text_from_json_match(match): | ||||
|  | ||||
|     return stripped_text_from_html | ||||
|  | ||||
| def extract_json_as_string(content, json_filter): | ||||
|  | ||||
| # content - json | ||||
| # json_filter - ie json:$..price | ||||
| # ensure_is_ldjson_info_type - str "product", optional, "@type == product" (I dont know how to do that as a json selector) | ||||
| def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None): | ||||
|     stripped_text_from_html = False | ||||
|  | ||||
|     # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson> | ||||
| @@ -139,7 +145,12 @@ def extract_json_as_string(content, json_filter): | ||||
|         # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|         s = [] | ||||
|         soup = BeautifulSoup(content, 'html.parser') | ||||
|         bs_result = soup.findAll('script') | ||||
|  | ||||
|         if ensure_is_ldjson_info_type: | ||||
|             bs_result = soup.findAll('script', {"type": "application/ld+json"}) | ||||
|         else: | ||||
|             bs_result = soup.findAll('script') | ||||
|  | ||||
|  | ||||
|         if not bs_result: | ||||
|             raise JSONNotFound("No parsable JSON found in this document") | ||||
| @@ -156,7 +167,14 @@ def extract_json_as_string(content, json_filter): | ||||
|                 continue | ||||
|             else: | ||||
|                 stripped_text_from_html = _parse_json(json_data, json_filter) | ||||
|                 if stripped_text_from_html: | ||||
|                 if ensure_is_ldjson_info_type: | ||||
|                     # Could sometimes be list, string or something else random | ||||
|                     if isinstance(json_data, dict): | ||||
|                         # If it has LD JSON 'key' @type, and @type is 'product', and something was found for the search | ||||
|                         # (Some sites have multiple of the same ld+json @type='product', but some have the review part, some have the 'price' part) | ||||
|                         if json_data.get('@type', False) and json_data.get('@type','').lower() == ensure_is_ldjson_info_type.lower() and stripped_text_from_html: | ||||
|                             break | ||||
|                 elif stripped_text_from_html: | ||||
|                     break | ||||
|  | ||||
|     if not stripped_text_from_html: | ||||
| @@ -243,6 +261,18 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|  | ||||
|     return text_content | ||||
|  | ||||
|  | ||||
| # Does LD+JSON exist with a @type=='product' and a .price set anywhere? | ||||
| def has_ldjson_product_info(content): | ||||
|     try: | ||||
|         pricing_data = extract_json_as_string(content=content, json_filter=LD_JSON_PRODUCT_OFFER_SELECTOR, ensure_is_ldjson_info_type="product") | ||||
|     except JSONNotFound as e: | ||||
|         # Totally fine | ||||
|         return False | ||||
|     x=bool(pricing_data) | ||||
|     return x | ||||
|  | ||||
|  | ||||
| def workarounds_for_obfuscations(content): | ||||
|     """ | ||||
|     Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis | ||||
|   | ||||
| @@ -15,11 +15,12 @@ class model(dict): | ||||
|                 'headers': { | ||||
|                 }, | ||||
|                 'requests': { | ||||
|                     'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds | ||||
|                     'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None}, | ||||
|                     'extra_proxies': [], # Configurable extra proxies via the UI | ||||
|                     'jitter_seconds': 0, | ||||
|                     'proxy': None, # Preferred proxy connection | ||||
|                     'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None}, | ||||
|                     'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds | ||||
|                     'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")),  # Number of threads, lower is better for slow connections | ||||
|                     'proxy': None # Preferred proxy connection | ||||
|                 }, | ||||
|                 'application': { | ||||
|                     'api_access_token_enabled': True, | ||||
| @@ -27,7 +28,6 @@ class model(dict): | ||||
|                     'base_url' : None, | ||||
|                     'extract_title_as_title': False, | ||||
|                     'empty_pages_are_a_change': False, | ||||
|                     'css_dark_mode': False, | ||||
|                     'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|                     'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, | ||||
|                     'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum | ||||
|   | ||||
| @@ -14,49 +14,52 @@ from changedetectionio.notification import ( | ||||
|  | ||||
| class model(dict): | ||||
|     __newest_history_key = None | ||||
|     __history_n=0 | ||||
|     __history_n = 0 | ||||
|     __base_config = { | ||||
|             #'history': {},  # Dict of timestamp and output stripped filename (removed) | ||||
|             #'newest_history_key': 0, (removed, taken from history.txt index) | ||||
|             'body': None, | ||||
|             'check_unique_lines': False, # On change-detected, compare against all history if its something new | ||||
|             'check_count': 0, | ||||
|             'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'extract_title_as_title': False, | ||||
|             'fetch_backend': None, | ||||
|             'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
|             'headers': {},  # Extra headers to send | ||||
|             'ignore_text': [],  # List of text to ignore when calculating the comparison checksum | ||||
|             'include_filters': [], | ||||
|             'last_checked': 0, | ||||
|             'last_error': False, | ||||
|             'last_viewed': 0,  # history key value of the last viewed via the [diff] link | ||||
|             'method': 'GET', | ||||
|              # Custom notification content | ||||
|             'notification_body': None, | ||||
|             'notification_format': default_notification_format_for_watch, | ||||
|             'notification_muted': False, | ||||
|             'notification_title': None, | ||||
|             'notification_screenshot': False, # Include the latest screenshot if available and supported by the apprise URL | ||||
|             'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise) | ||||
|             'paused': False, | ||||
|             'previous_md5': False, | ||||
|             'proxy': None, # Preferred proxy connection | ||||
|             'subtractive_selectors': [], | ||||
|             'tag': None, | ||||
|             'text_should_not_be_present': [], # Text that should not present | ||||
|             # Re #110, so then if this is set to None, we know to use the default value instead | ||||
|             # Requires setting to None on submit if it's the same as the default | ||||
|             # Should be all None by default, so we use the system default in this case. | ||||
|             'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, | ||||
|             'title': None, | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'url': None, | ||||
|             'uuid': str(uuid.uuid4()), | ||||
|             'webdriver_delay': None, | ||||
|             'webdriver_js_execute_code': None, # Run before change-detection | ||||
|         } | ||||
|         # 'history': {},  # Dict of timestamp and output stripped filename (removed) | ||||
|         # 'newest_history_key': 0, (removed, taken from history.txt index) | ||||
|         'body': None, | ||||
|         'check_unique_lines': False,  # On change-detected, compare against all history if its something new | ||||
|         'check_count': 0, | ||||
|         'consecutive_filter_failures': 0,  # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|         'extract_text': [],  # Extract text by regex after filters | ||||
|         'extract_title_as_title': False, | ||||
|         'fetch_backend': None, | ||||
|         'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
|         'has_ldjson_price_data': None, | ||||
|         'track_ldjson_price_data': None, | ||||
|         'headers': {},  # Extra headers to send | ||||
|         'ignore_text': [],  # List of text to ignore when calculating the comparison checksum | ||||
|         'include_filters': [], | ||||
|         'last_checked': 0, | ||||
|         'last_error': False, | ||||
|         'last_viewed': 0,  # history key value of the last viewed via the [diff] link | ||||
|         'method': 'GET', | ||||
|         # Custom notification content | ||||
|         'notification_body': None, | ||||
|         'notification_format': default_notification_format_for_watch, | ||||
|         'notification_muted': False, | ||||
|         'notification_title': None, | ||||
|         'notification_screenshot': False,  # Include the latest screenshot if available and supported by the apprise URL | ||||
|         'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise) | ||||
|         'paused': False, | ||||
|         'previous_md5': False, | ||||
|         'previous_md5_before_filters': False,  # Used for skipping changedetection entirely | ||||
|         'proxy': None,  # Preferred proxy connection | ||||
|         'subtractive_selectors': [], | ||||
|         'tag': None, | ||||
|         'text_should_not_be_present': [],  # Text that should not present | ||||
|         # Re #110, so then if this is set to None, we know to use the default value instead | ||||
|         # Requires setting to None on submit if it's the same as the default | ||||
|         # Should be all None by default, so we use the system default in this case. | ||||
|         'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, | ||||
|         'title': None, | ||||
|         'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|         'url': None, | ||||
|         'uuid': str(uuid.uuid4()), | ||||
|         'webdriver_delay': None, | ||||
|         'webdriver_js_execute_code': None,  # Run before change-detection | ||||
|     } | ||||
|     jitter_seconds = 0 | ||||
|  | ||||
|     def __init__(self, *arg, **kw): | ||||
| @@ -111,6 +114,24 @@ class model(dict): | ||||
|  | ||||
|         return ready_url | ||||
|  | ||||
|     @property | ||||
|     def get_fetch_backend(self): | ||||
|         """ | ||||
|         Like just using the `fetch_backend` key but there could be some logic | ||||
|         :return: | ||||
|         """ | ||||
|         # Maybe also if is_image etc? | ||||
|         # This is because chrome/playwright wont render the PDF in the browser and we will just fetch it and use pdf2html to see the text. | ||||
|         if self.is_pdf: | ||||
|             return 'html_requests' | ||||
|  | ||||
|         return self.get('fetch_backend') | ||||
|  | ||||
|     @property | ||||
|     def is_pdf(self): | ||||
|         # content_type field is set in the future | ||||
|         return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower() | ||||
|  | ||||
|     @property | ||||
|     def label(self): | ||||
|         # Used for sorting | ||||
|   | ||||
							
								
								
									
										10
									
								
								changedetectionio/queuedWatchMetaData.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								changedetectionio/queuedWatchMetaData.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| from dataclasses import dataclass, field | ||||
| from typing import Any | ||||
|  | ||||
| # So that we can queue some metadata in `item` | ||||
| # https://docs.python.org/3/library/queue.html#queue.PriorityQueue | ||||
| # | ||||
| @dataclass(order=True) | ||||
| class PrioritizedItem: | ||||
|     priority: int | ||||
|     item: Any=field(compare=False) | ||||
| @@ -1,3 +1,6 @@ | ||||
| // Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com) | ||||
| // All rights reserved. | ||||
|  | ||||
| // @file Scrape the page looking for elements of concern (%ELEMENTS%) | ||||
| // http://matatk.agrip.org.uk/tests/position-and-width/ | ||||
| // https://stackoverflow.com/questions/26813480/when-is-element-getboundingclientrect-guaranteed-to-be-updated-accurate | ||||
| @@ -81,8 +84,16 @@ var bbox; | ||||
| for (var i = 0; i < elements.length; i++) { | ||||
|     bbox = elements[i].getBoundingClientRect(); | ||||
|  | ||||
|     // Forget really small ones | ||||
|     if (bbox['width'] < 10 && bbox['height'] < 10) { | ||||
|     // Exclude items that are not interactable or visible | ||||
|     if(elements[i].style.opacity === "0") { | ||||
|         continue | ||||
|     } | ||||
|     if(elements[i].style.display === "none" || elements[i].style.pointerEvents === "none" ) { | ||||
|         continue | ||||
|     } | ||||
|  | ||||
|     // Skip really small ones, and where width or height ==0 | ||||
|     if (bbox['width'] * bbox['height'] < 100) { | ||||
|         continue; | ||||
|     } | ||||
|  | ||||
| @@ -138,7 +149,6 @@ for (var i = 0; i < elements.length; i++) { | ||||
|  | ||||
| } | ||||
|  | ||||
|  | ||||
| // Inject the current one set in the include_filters, which may be a CSS rule | ||||
| // used for displaying the current one in VisualSelector, where its not one we generated. | ||||
| if (include_filters.length) { | ||||
| @@ -166,10 +176,23 @@ if (include_filters.length) { | ||||
|         } | ||||
|  | ||||
|         if (q) { | ||||
|             bbox = q.getBoundingClientRect(); | ||||
|             console.log("xpath_element_scraper: Got filter element, scroll from top was "+scroll_y) | ||||
|         } else { | ||||
|             console.log("xpath_element_scraper: filter element "+f+" was not found"); | ||||
|             // #1231 - IN the case XPath attribute filter is applied, we will have to traverse up and find the element. | ||||
|             if (q.hasOwnProperty('getBoundingClientRect')) { | ||||
|                 bbox = q.getBoundingClientRect(); | ||||
|                 console.log("xpath_element_scraper: Got filter element, scroll from top was " + scroll_y) | ||||
|             } else { | ||||
|                 try { | ||||
|                     // Try and see we can find its ownerElement | ||||
|                     bbox = q.ownerElement.getBoundingClientRect(); | ||||
|                     console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) | ||||
|                 } catch (e) { | ||||
|                     console.log("xpath_element_scraper: error looking up ownerElement") | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|          | ||||
|         if(!q) { | ||||
|             console.log("xpath_element_scraper: filter element " + f + " was not found"); | ||||
|         } | ||||
|  | ||||
|         if (bbox && bbox['width'] > 0 && bbox['height'] > 0) { | ||||
| @@ -184,5 +207,9 @@ if (include_filters.length) { | ||||
|     } | ||||
| } | ||||
|  | ||||
| // Sort the elements so we find the smallest one first, in other words, we find the smallest one matching in that area | ||||
| // so that we dont select the wrapping element by mistake and be unable to select what we want | ||||
| size_pos.sort((a, b) => (a.width*a.height > b.width*b.height) ? 1 : -1) | ||||
|  | ||||
| // Window.width required for proper scaling in the frontend | ||||
| return {'size_pos': size_pos, 'browser_width': window.innerWidth}; | ||||
|   | ||||
| @@ -1,104 +0,0 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
|  | ||||
| # live_server will throw errors even with live_server_scope=function if I have the live_server setup in different functions | ||||
| # and I like to restart the server for each test (and have the test cleanup after each test) | ||||
| # merge request welcome :) | ||||
|  | ||||
|  | ||||
| # exit when any command fails | ||||
| set -e | ||||
|  | ||||
| SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||||
|  | ||||
| find tests/test_*py -type f|while read test_name | ||||
| do | ||||
|   echo "TEST RUNNING $test_name" | ||||
|   pytest $test_name | ||||
| done | ||||
|  | ||||
| echo "RUNNING WITH BASE_URL SET" | ||||
|  | ||||
| # Now re-run some tests with BASE_URL enabled | ||||
| # Re #65 - Ability to include a link back to the installation, in the notification. | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| pytest tests/test_notification.py | ||||
|  | ||||
|  | ||||
| # Re-run with HIDE_REFERER set - could affect login | ||||
| export HIDE_REFERER=True | ||||
| pytest tests/test_access_control.py | ||||
|  | ||||
|  | ||||
| # Now for the selenium and playwright/browserless fetchers | ||||
| # Note - this is not UI functional tests - just checking that each one can fetch the content | ||||
|  | ||||
| echo "TESTING WEBDRIVER FETCH > SELENIUM/WEBDRIVER..." | ||||
| docker run -d --name $$-test_selenium  -p 4444:4444 --rm --shm-size="2g"  selenium/standalone-chrome-debug:3.141.59 | ||||
| # takes a while to spin up | ||||
| sleep 5 | ||||
| export WEBDRIVER_URL=http://localhost:4444/wd/hub | ||||
| pytest tests/fetchers/test_content.py | ||||
| pytest tests/test_errorhandling.py | ||||
| unset WEBDRIVER_URL | ||||
| docker kill $$-test_selenium | ||||
|  | ||||
| echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..." | ||||
| # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt | ||||
| PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+") | ||||
| echo "using $PLAYWRIGHT_VERSION" | ||||
| pip3 install "$PLAYWRIGHT_VERSION" | ||||
| docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable | ||||
| # takes a while to spin up | ||||
| sleep 5 | ||||
| export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000 | ||||
| pytest tests/fetchers/test_content.py | ||||
| pytest tests/test_errorhandling.py | ||||
| pytest tests/visualselector/test_fetch_data.py | ||||
|  | ||||
| unset PLAYWRIGHT_DRIVER_URL | ||||
| docker kill $$-test_browserless | ||||
|  | ||||
| # Test proxy list handling, starting two squids on different ports | ||||
| # Each squid adds a different header to the response, which is the main thing we test for. | ||||
| docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge | ||||
| docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge | ||||
|  | ||||
|  | ||||
| # So, basic HTTP as env var test | ||||
| export HTTP_PROXY=http://localhost:3128 | ||||
| export HTTPS_PROXY=http://localhost:3128 | ||||
| pytest tests/proxy_list/test_proxy.py | ||||
| docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)" | ||||
| fi | ||||
| unset HTTP_PROXY | ||||
| unset HTTPS_PROXY | ||||
|  | ||||
|  | ||||
| # 2nd test actually choose the preferred proxy from proxies.json | ||||
| cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json | ||||
| # Makes a watch use a preferred proxy | ||||
| pytest tests/proxy_list/test_multiple_proxy.py | ||||
|  | ||||
| # Should be a request in the default "first" squid | ||||
| docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)" | ||||
| fi | ||||
|  | ||||
| # And one in the 'second' squid (user selects this as preferred) | ||||
| docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)" | ||||
| fi | ||||
|  | ||||
| # @todo - test system override proxy selection and watch defaults, setup a 3rd squid? | ||||
| docker kill $$-squid-one | ||||
| docker kill $$-squid-two | ||||
|  | ||||
|  | ||||
							
								
								
									
										30
									
								
								changedetectionio/run_basic_tests.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										30
									
								
								changedetectionio/run_basic_tests.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,30 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
|  | ||||
| # live_server will throw errors even with live_server_scope=function if I have the live_server setup in different functions | ||||
| # and I like to restart the server for each test (and have the test cleanup after each test) | ||||
| # merge request welcome :) | ||||
|  | ||||
|  | ||||
| # exit when any command fails | ||||
| set -e | ||||
|  | ||||
| SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||||
|  | ||||
| find tests/test_*py -type f|while read test_name | ||||
| do | ||||
|   echo "TEST RUNNING $test_name" | ||||
|   pytest $test_name | ||||
| done | ||||
|  | ||||
| echo "RUNNING WITH BASE_URL SET" | ||||
|  | ||||
| # Now re-run some tests with BASE_URL enabled | ||||
| # Re #65 - Ability to include a link back to the installation, in the notification. | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| pytest tests/test_notification.py | ||||
|  | ||||
|  | ||||
| # Re-run with HIDE_REFERER set - could affect login | ||||
| export HIDE_REFERER=True | ||||
| pytest tests/test_access_control.py | ||||
							
								
								
									
										61
									
								
								changedetectionio/run_proxy_tests.sh
									
									
									
									
									
										Executable file
									
								
							
							
						
						
									
										61
									
								
								changedetectionio/run_proxy_tests.sh
									
									
									
									
									
										Executable file
									
								
							| @@ -0,0 +1,61 @@ | ||||
| #!/bin/bash | ||||
|  | ||||
| # exit when any command fails | ||||
| set -e | ||||
|  | ||||
| # Test proxy list handling, starting two squids on different ports | ||||
| # Each squid adds a different header to the response, which is the main thing we test for. | ||||
| docker run --network changedet-network -d --name squid-one --hostname squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf ubuntu/squid:4.13-21.10_edge | ||||
| docker run --network changedet-network -d --name squid-two --hostname squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf ubuntu/squid:4.13-21.10_edge | ||||
|  | ||||
| # Used for configuring a custom proxy URL via the UI | ||||
| docker run --network changedet-network -d \ | ||||
|   --name squid-custom \ | ||||
|   --hostname squid-custom \ | ||||
|   --rm \ | ||||
|   -v `pwd`/tests/proxy_list/squid-auth.conf:/etc/squid/conf.d/debian.conf \ | ||||
|   -v `pwd`/tests/proxy_list/squid-passwords.txt:/etc/squid3/passwords \ | ||||
|   ubuntu/squid:4.13-21.10_edge | ||||
|  | ||||
|  | ||||
| ## 2nd test actually choose the preferred proxy from proxies.json | ||||
|  | ||||
| docker run --network changedet-network \ | ||||
|   -v `pwd`/tests/proxy_list/proxies.json-example:/app/changedetectionio/test-datastore/proxies.json \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_multiple_proxy.py' | ||||
|  | ||||
|  | ||||
| ## Should be a request in the default "first" squid | ||||
| docker logs squid-one 2>/dev/null|grep chosen.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy - squid one)" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| # And one in the 'second' squid (user selects this as preferred) | ||||
| docker logs squid-two 2>/dev/null|grep chosen.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy - squid two)" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
|  | ||||
| # Test the UI configurable proxies | ||||
|  | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_select_custom_proxy.py' | ||||
|  | ||||
|  | ||||
| # Should see a request for one.changedetection.io in there | ||||
| docker logs squid-custom 2>/dev/null|grep "TCP_TUNNEL.200.*changedetection.io" | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a valid request to changedetection.io in the squid logs (while checking preferred proxy - squid two)" | ||||
|   exit 1 | ||||
| fi | ||||
|  | ||||
| docker kill squid-one squid-two squid-custom | ||||
							
								
								
									
										9
									
								
								changedetectionio/static/images/pdf-icon.svg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								changedetectionio/static/images/pdf-icon.svg
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| <?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||||
| <svg xmlns="http://www.w3.org/2000/svg" width="75.320129mm" height="92.604164mm" viewBox="0 0 75.320129 92.604164"> | ||||
|   <g transform="translate(53.548057 -183.975276) scale(1.4843)"> | ||||
|     <path fill="#ff2116" d="M-29.632812 123.94727c-3.551967 0-6.44336 2.89347-6.44336 6.44531v49.49804c0 3.55185 2.891393 6.44532 6.44336 6.44532H8.2167969c3.5519661 0 6.4433591-2.89335 6.4433591-6.44532v-40.70117s.101353-1.19181-.416015-2.35156c-.484969-1.08711-1.275391-1.84375-1.275391-1.84375a1.0584391 1.0584391 0 0 0-.0059-.008l-9.3906254-9.21094a1.0584391 1.0584391 0 0 0-.015625-.0156s-.8017392-.76344-1.9902344-1.27344c-1.39939552-.6005-2.8417968-.53711-2.8417968-.53711l.021484-.002z" color="#000" font-family="sans-serif" overflow="visible" paint-order="markers fill stroke" style="line-height:normal;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;text-transform:none;text-orientation:mixed;white-space:normal;shape-padding:0;isolation:auto;mix-blend-mode:normal;solid-color:#000000;solid-opacity:1"/> | ||||
|     <path fill="#f5f5f5" d="M-29.632812 126.06445h28.3789058a1.0584391 1.0584391 0 0 0 .021484 0s1.13480448.011 1.96484378.36719c.79889772.34282 1.36536982.86176 1.36914062.86524.0000125.00001.00391.004.00391.004l9.3671868 9.18945s.564354.59582.837891 1.20899c.220779.49491.234375 1.40039.234375 1.40039a1.0584391 1.0584391 0 0 0-.002.0449v40.74609c0 2.41592-1.910258 4.32813-4.3261717 4.32813H-29.632812c-2.415914 0-4.326172-1.91209-4.326172-4.32813v-49.49804c0-2.41603 1.910258-4.32813 4.326172-4.32813z" color="#000" font-family="sans-serif" overflow="visible" paint-order="markers fill stroke" style="line-height:normal;font-variant-ligatures:normal;font-variant-position:normal;font-variant-caps:normal;font-variant-numeric:normal;font-variant-alternates:normal;font-feature-settings:normal;text-indent:0;text-align:start;text-decoration-line:none;text-decoration-style:solid;text-decoration-color:#000000;text-transform:none;text-orientation:mixed;white-space:normal;shape-padding:0;isolation:auto;mix-blend-mode:normal;solid-color:#000000;solid-opacity:1"/> | ||||
|     <path fill="#ff2116" d="M-23.40766 161.09299c-1.45669-1.45669.11934-3.45839 4.39648-5.58397l2.69124-1.33743 1.04845-2.29399c.57665-1.26169 1.43729-3.32036 1.91254-4.5748l.8641-2.28082-.59546-1.68793c-.73217-2.07547-.99326-5.19438-.52872-6.31588.62923-1.51909 2.69029-1.36323 3.50626.26515.63727 1.27176.57212 3.57488-.18329 6.47946l-.6193 2.38125.5455.92604c.30003.50932 1.1764 1.71867 1.9475 2.68743l1.44924 1.80272 1.8033728-.23533c5.72900399-.74758 7.6912472.523 7.6912472 2.34476 0 2.29921-4.4984914 2.48899-8.2760865-.16423-.8499666-.59698-1.4336605-1.19001-1.4336605-1.19001s-2.3665326.48178-3.531704.79583c-1.202707.32417-1.80274.52719-3.564509 1.12186 0 0-.61814.89767-1.02094 1.55026-1.49858 2.4279-3.24833 4.43998-4.49793 5.1723-1.3991.81993-2.86584.87582-3.60433.13733zm2.28605-.81668c.81883-.50607 2.47616-2.46625 3.62341-4.28553l.46449-.73658-2.11497 1.06339c-3.26655 1.64239-4.76093 3.19033-3.98386 4.12664.43653.52598.95874.48237 2.01093-.16792zm21.21809-5.95578c.80089-.56097.68463-1.69142-.22082-2.1472-.70466-.35471-1.2726074-.42759-3.1031574-.40057-1.1249.0767-2.9337647.3034-3.2403347.37237 0 0 .993716.68678 1.434896.93922.58731.33544 2.0145161.95811 3.0565161 1.27706 1.02785.31461 1.6224.28144 2.0729-.0409zm-8.53152-3.54594c-.4847-.50952-1.30889-1.57296-1.83152-2.3632-.68353-.89643-1.02629-1.52887-1.02629-1.52887s-.4996 1.60694-.90948 2.57394l-1.27876 3.16076-.37075.71695s1.971043-.64627 2.97389-.90822c1.0621668-.27744 3.21787-.70134 3.21787-.70134zm-2.74938-11.02573c.12363-1.0375.1761-2.07346-.15724-2.59587-.9246-1.01077-2.04057-.16787-1.85154 2.23517.0636.8084.26443 2.19033.53292 3.04209l.48817 1.54863.34358-1.16638c.18897-.64151.47882-2.02015.64411-3.06364z"/> | ||||
|     <path fill="#2c2c2c" d="M-20.930423 167.83862h2.364986q1.133514 0 1.840213.2169.706698.20991 1.189489.9446.482795.72769.482795 1.75625 0 .94459-.391832 1.6233-.391833.67871-1.056548.97958-.65772.30087-2.02913.30087h-.818651v3.72941h-1.581322zm1.581322 1.22447v3.33058h.783664q1.049552 0 1.44838-.39184.405826-.39183.405826-1.27345 0-.65772-.265887-1.06355-.265884-.41282-.587747-.50378-.314866-.098-1.000572-.098zm5.50664-1.22447h2.148082q1.560333 0 2.4909318.55276.9375993.55276 1.4133973 1.6443.482791 1.09153.482791 2.42096 0 1.3994-.4338151 2.49793-.4268149 1.09153-1.3154348 1.76324-.8816233.67172-2.5189212.67172h-2.267031zm1.581326 1.26645v7.018h.657715q1.378411 0 2.001144-.9516.6227329-.95858.6227329-2.5539 0-3.5125-2.6238769-3.5125zm6.4722254-1.26645h5.30372941v1.26645H-4.2075842v2.85478h2.9807225v1.26646h-2.9807225v4.16322h-1.5813254z" font-family="Franklin Gothic Medium Cond" letter-spacing="0" style="line-height:125%;-inkscape-font-specification:'Franklin Gothic Medium Cond'" word-spacing="4.26000023"/> | ||||
|   </g> | ||||
| </svg> | ||||
| After Width: | Height: | Size: 5.0 KiB | 
							
								
								
									
										2
									
								
								changedetectionio/static/images/price-tag-icon.svg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								changedetectionio/static/images/price-tag-icon.svg
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| <?xml version="1.0" encoding="UTF-8"?> | ||||
| <svg width="83.39" height="89.648" enable-background="new 0 0 122.406 122.881" version="1.1" viewBox="0 0 83.39 89.648" xml:space="preserve" xmlns="http://www.w3.org/2000/svg"><g transform="translate(5e-4 -33.234)"><path d="m44.239 42.946-39.111 39.896 34.908 34.91 39.09-39.876-1.149-34.931zm-0.91791 42.273c0.979-0.979 1.507-1.99 1.577-3.027 0.077-1.043-0.248-2.424-0.967-4.135-0.725-1.717-1.348-3.346-1.87-4.885s-0.814-3.014-0.897-4.432c-0.07-1.42 0.134-2.768 0.624-4.045 0.477-1.279 1.348-2.545 2.607-3.804 2.099-2.099 4.535-3.123 7.314-3.065 2.773 0.063 5.457 1.158 8.04 3.294l2.881 3.034c1.946 2.607 2.799 5.33 2.557 8.166-0.235 2.83-1.532 5.426-3.893 7.785l-6.296-6.297c1.291-1.291 2.035-2.531 2.238-3.727 0.191-1.197-0.165-2.252-1.081-3.168-0.821-0.82-1.717-1.195-2.69-1.139-0.967 0.064-1.908 0.547-2.817 1.457-0.922 0.922-1.393 1.914-1.412 2.977s0.306 2.416 0.973 4.064c0.661 1.652 1.24 3.25 1.736 4.801 0.496 1.553 0.782 3.035 0.858 4.445 0.076 1.426-0.127 2.787-0.591 4.104-0.477 1.316-1.336 2.596-2.588 3.848-2.125 2.125-4.522 3.186-7.212 3.18s-5.311-1.063-7.855-3.16l-3.747 3.746-2.964-2.965 3.766-3.764c-2.423-2.996-3.568-5.998-3.447-9.02 0.127-3.014 1.476-5.813 4.045-8.383l6.278 6.277c-1.412 1.412-2.175 2.799-2.277 4.16-0.108 1.367 0.414 2.627 1.571 3.783 0.839 0.84 1.755 1.26 2.741 1.242 0.985-0.017 1.92-0.47 2.798-1.347zm21.127-46.435h17.457c-0.0269 2.2368 0.69936 16.025 0.69936 16.025l0.785 23.858c0.019 0.609-0.221 1.164-0.619 1.564l5e-3 4e-3 -41.236 42.022c-0.82213 0.8378-2.175 0.83-3.004 0l-37.913-37.91c-0.83-0.83-0.83-2.176 0-3.006l41.236-42.021c0.39287-0.42671 1.502-0.53568 1.502-0.53568zm18.011 11.59c-59.392-29.687-29.696-14.843 0 0z"/></g></svg> | ||||
| After Width: | Height: | Size: 1.7 KiB | 
| @@ -1,4 +1,5 @@ | ||||
| // Horrible proof of concept code :) | ||||
| // Copyright (C) 2021 Leigh Morresi (dgtlmoon@gmail.com) | ||||
| // All rights reserved. | ||||
| // yes - this is really a hack, if you are a front-ender and want to help, please get in touch! | ||||
|  | ||||
| $(document).ready(function () { | ||||
| @@ -177,9 +178,10 @@ $(document).ready(function () { | ||||
|             // Basically, find the most 'deepest' | ||||
|             var found = 0; | ||||
|             ctx.fillStyle = 'rgba(205,0,0,0.35)'; | ||||
|             for (var i = selector_data['size_pos'].length; i !== 0; i--) { | ||||
|             // Will be sorted by smallest width*height first | ||||
|             for (var i = 0; i <= selector_data['size_pos'].length; i++) { | ||||
|                 // draw all of them? let them choose somehow? | ||||
|                 var sel = selector_data['size_pos'][i - 1]; | ||||
|                 var sel = selector_data['size_pos'][i]; | ||||
|                 // If we are in a bounding-box | ||||
|                 if (e.offsetY > sel.top * y_scale && e.offsetY < sel.top * y_scale + sel.height * y_scale | ||||
|                     && | ||||
| @@ -195,7 +197,7 @@ $(document).ready(function () { | ||||
|                     // no need to keep digging | ||||
|                     // @todo or, O to go out/up, I to go in | ||||
|                     // or double click to go up/out the selector? | ||||
|                     current_selected_i = i - 1; | ||||
|                     current_selected_i = i; | ||||
|                     found += 1; | ||||
|                     break; | ||||
|                 } | ||||
|   | ||||
							
								
								
									
										3
									
								
								changedetectionio/static/styles/.dockerignore
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										3
									
								
								changedetectionio/static/styles/.dockerignore
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,3 @@ | ||||
| node_modules | ||||
| package-lock.json | ||||
|  | ||||
| @@ -0,0 +1,17 @@ | ||||
| ul#requests-extra_proxies { | ||||
|   list-style: none; | ||||
|   /* tidy up the table to look more "inline" */ | ||||
|   li { | ||||
|     > label { | ||||
|       display: none; | ||||
|     } | ||||
|  | ||||
|   } | ||||
|   /* each proxy entry is a `table` */ | ||||
|   table { | ||||
|     tr { | ||||
|       display: inline; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
| @@ -2,10 +2,11 @@ | ||||
|  * -- BASE STYLES -- | ||||
|  */ | ||||
|  | ||||
| @import "parts/_variables"; | ||||
| @import "parts/_spinners"; | ||||
| @import "parts/_browser-steps"; | ||||
| @import "parts/_arrows"; | ||||
| @import "parts/_browser-steps"; | ||||
| @import "parts/_extra_proxies"; | ||||
| @import "parts/_spinners"; | ||||
| @import "parts/_variables"; | ||||
|  | ||||
| body { | ||||
|   color: var(--color-text); | ||||
| @@ -22,6 +23,13 @@ body { | ||||
|   width: 1px; | ||||
| } | ||||
|  | ||||
| // Row icons like chrome, pdf, share, etc | ||||
| .status-icon { | ||||
|   display: inline-block; | ||||
|   height: 1rem; | ||||
|   vertical-align: middle; | ||||
| } | ||||
|  | ||||
| .pure-table-even { | ||||
|   background: var(--color-background); | ||||
| } | ||||
| @@ -1009,3 +1017,30 @@ ul { | ||||
|   border-radius: 5px; | ||||
|   color: var(--color-warning); | ||||
| } | ||||
|  | ||||
| /* automatic price following helpers */ | ||||
| .tracking-ldjson-price-data { | ||||
|   background-color: var(--color-background-button-green); | ||||
|   color: #000; | ||||
|   padding: 3px; | ||||
|   border-radius: 3px; | ||||
|   white-space: nowrap; | ||||
| } | ||||
|  | ||||
| .ldjson-price-track-offer { | ||||
|   a.pure-button { | ||||
|     border-radius: 3px; | ||||
|     padding: 3px; | ||||
|     background-color: var(--color-background-button-green); | ||||
|   } | ||||
|  | ||||
|   font-weight: bold; | ||||
|   font-style: italic; | ||||
| } | ||||
|  | ||||
| .price-follow-tag-icon { | ||||
|   display: inline-block; | ||||
|   height: 0.8rem; | ||||
|   vertical-align: middle; | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,139 @@ | ||||
| /* | ||||
|  * -- BASE STYLES -- | ||||
|  */ | ||||
| .arrow { | ||||
|   border: solid #1b98f8; | ||||
|   border-width: 0 2px 2px 0; | ||||
|   display: inline-block; | ||||
|   padding: 3px; } | ||||
|   .arrow.right { | ||||
|     transform: rotate(-45deg); | ||||
|     -webkit-transform: rotate(-45deg); } | ||||
|   .arrow.left { | ||||
|     transform: rotate(135deg); | ||||
|     -webkit-transform: rotate(135deg); } | ||||
|   .arrow.up, .arrow.asc { | ||||
|     transform: rotate(-135deg); | ||||
|     -webkit-transform: rotate(-135deg); } | ||||
|   .arrow.down, .arrow.desc { | ||||
|     transform: rotate(45deg); | ||||
|     -webkit-transform: rotate(45deg); } | ||||
|  | ||||
| #browser_steps { | ||||
|   /* convert rows to horizontal cells */ } | ||||
|   #browser_steps th { | ||||
|     display: none; } | ||||
|   #browser_steps li { | ||||
|     list-style: decimal; | ||||
|     padding: 5px; } | ||||
|     #browser_steps li:not(:first-child):hover { | ||||
|       opacity: 1.0; } | ||||
|     #browser_steps li .control { | ||||
|       padding-left: 5px; | ||||
|       padding-right: 5px; } | ||||
|       #browser_steps li .control a { | ||||
|         font-size: 70%; } | ||||
|     #browser_steps li.empty { | ||||
|       padding: 0px; | ||||
|       opacity: 0.35; } | ||||
|       #browser_steps li.empty .control { | ||||
|         display: none; } | ||||
|     #browser_steps li:hover { | ||||
|       background: #eee; } | ||||
|     #browser_steps li > label { | ||||
|       display: none; } | ||||
|  | ||||
| #browser-steps-fieldlist { | ||||
|   height: 100%; | ||||
|   overflow-y: scroll; } | ||||
|  | ||||
| #browser-steps .flex-wrapper { | ||||
|   display: flex; | ||||
|   flex-flow: row; | ||||
|   height: 600px; | ||||
|   /*@todo make this dynamic */ } | ||||
|  | ||||
| /*  this is duplicate :( */ | ||||
| #browsersteps-selector-wrapper { | ||||
|   height: 100%; | ||||
|   width: 100%; | ||||
|   overflow-y: scroll; | ||||
|   position: relative; | ||||
|   /* nice tall skinny one */ } | ||||
|   #browsersteps-selector-wrapper > img { | ||||
|     position: absolute; | ||||
|     max-width: 100%; } | ||||
|   #browsersteps-selector-wrapper > canvas { | ||||
|     position: relative; | ||||
|     max-width: 100%; } | ||||
|     #browsersteps-selector-wrapper > canvas:hover { | ||||
|       cursor: pointer; } | ||||
|   #browsersteps-selector-wrapper .loader { | ||||
|     position: absolute; | ||||
|     left: 50%; | ||||
|     top: 50%; | ||||
|     transform: translate(-50%, -50%); | ||||
|     margin-left: -40px; | ||||
|     z-index: 100; | ||||
|     max-width: 350px; | ||||
|     text-align: center; } | ||||
|   #browsersteps-selector-wrapper .spinner, #browsersteps-selector-wrapper .spinner:after { | ||||
|     width: 80px; | ||||
|     height: 80px; | ||||
|     font-size: 3px; } | ||||
|   #browsersteps-selector-wrapper #browsersteps-click-start { | ||||
|     color: var(--color-grey-400); } | ||||
|     #browsersteps-selector-wrapper #browsersteps-click-start:hover { | ||||
|       cursor: pointer; } | ||||
|  | ||||
| ul#requests-extra_proxies { | ||||
|   list-style: none; | ||||
|   /* tidy up the table to look more "inline" */ | ||||
|   /* each proxy entry is a `table` */ } | ||||
|   ul#requests-extra_proxies li > label { | ||||
|     display: none; } | ||||
|   ul#requests-extra_proxies table tr { | ||||
|     display: inline; } | ||||
|  | ||||
| /* spinner */ | ||||
| .spinner, | ||||
| .spinner:after { | ||||
|   border-radius: 50%; | ||||
|   width: 10px; | ||||
|   height: 10px; } | ||||
|  | ||||
| .spinner { | ||||
|   margin: 0px auto; | ||||
|   font-size: 3px; | ||||
|   vertical-align: middle; | ||||
|   display: inline-block; | ||||
|   text-indent: -9999em; | ||||
|   border-top: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-right: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-bottom: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-left: 1.1em solid #2668ed; | ||||
|   -webkit-transform: translateZ(0); | ||||
|   -ms-transform: translateZ(0); | ||||
|   transform: translateZ(0); | ||||
|   -webkit-animation: load8 1.1s infinite linear; | ||||
|   animation: load8 1.1s infinite linear; } | ||||
|  | ||||
| @-webkit-keyframes load8 { | ||||
|   0% { | ||||
|     -webkit-transform: rotate(0deg); | ||||
|     transform: rotate(0deg); } | ||||
|   100% { | ||||
|     -webkit-transform: rotate(360deg); | ||||
|     transform: rotate(360deg); } } | ||||
|  | ||||
| @keyframes load8 { | ||||
|   0% { | ||||
|     -webkit-transform: rotate(0deg); | ||||
|     transform: rotate(0deg); } | ||||
|   100% { | ||||
|     -webkit-transform: rotate(360deg); | ||||
|     transform: rotate(360deg); } } | ||||
|  | ||||
| /** | ||||
|  * CSS custom properties (aka variables). | ||||
|  */ | ||||
| @@ -138,130 +271,6 @@ html[data-darkmode="true"] { | ||||
|     html[data-darkmode="true"] .watch-table .unviewed.error { | ||||
|       color: var(--color-watch-table-error); } | ||||
|  | ||||
| /* spinner */ | ||||
| .spinner, | ||||
| .spinner:after { | ||||
|   border-radius: 50%; | ||||
|   width: 10px; | ||||
|   height: 10px; } | ||||
|  | ||||
| .spinner { | ||||
|   margin: 0px auto; | ||||
|   font-size: 3px; | ||||
|   vertical-align: middle; | ||||
|   display: inline-block; | ||||
|   text-indent: -9999em; | ||||
|   border-top: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-right: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-bottom: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-left: 1.1em solid #2668ed; | ||||
|   -webkit-transform: translateZ(0); | ||||
|   -ms-transform: translateZ(0); | ||||
|   transform: translateZ(0); | ||||
|   -webkit-animation: load8 1.1s infinite linear; | ||||
|   animation: load8 1.1s infinite linear; } | ||||
|  | ||||
| @-webkit-keyframes load8 { | ||||
|   0% { | ||||
|     -webkit-transform: rotate(0deg); | ||||
|     transform: rotate(0deg); } | ||||
|   100% { | ||||
|     -webkit-transform: rotate(360deg); | ||||
|     transform: rotate(360deg); } } | ||||
|  | ||||
| @keyframes load8 { | ||||
|   0% { | ||||
|     -webkit-transform: rotate(0deg); | ||||
|     transform: rotate(0deg); } | ||||
|   100% { | ||||
|     -webkit-transform: rotate(360deg); | ||||
|     transform: rotate(360deg); } } | ||||
|  | ||||
| #browser_steps { | ||||
|   /* convert rows to horizontal cells */ } | ||||
|   #browser_steps th { | ||||
|     display: none; } | ||||
|   #browser_steps li { | ||||
|     list-style: decimal; | ||||
|     padding: 5px; } | ||||
|     #browser_steps li:not(:first-child):hover { | ||||
|       opacity: 1.0; } | ||||
|     #browser_steps li .control { | ||||
|       padding-left: 5px; | ||||
|       padding-right: 5px; } | ||||
|       #browser_steps li .control a { | ||||
|         font-size: 70%; } | ||||
|     #browser_steps li.empty { | ||||
|       padding: 0px; | ||||
|       opacity: 0.35; } | ||||
|       #browser_steps li.empty .control { | ||||
|         display: none; } | ||||
|     #browser_steps li:hover { | ||||
|       background: #eee; } | ||||
|     #browser_steps li > label { | ||||
|       display: none; } | ||||
|  | ||||
| #browser-steps-fieldlist { | ||||
|   height: 100%; | ||||
|   overflow-y: scroll; } | ||||
|  | ||||
| #browser-steps .flex-wrapper { | ||||
|   display: flex; | ||||
|   flex-flow: row; | ||||
|   height: 600px; | ||||
|   /*@todo make this dynamic */ } | ||||
|  | ||||
| /*  this is duplicate :( */ | ||||
| #browsersteps-selector-wrapper { | ||||
|   height: 100%; | ||||
|   width: 100%; | ||||
|   overflow-y: scroll; | ||||
|   position: relative; | ||||
|   /* nice tall skinny one */ } | ||||
|   #browsersteps-selector-wrapper > img { | ||||
|     position: absolute; | ||||
|     max-width: 100%; } | ||||
|   #browsersteps-selector-wrapper > canvas { | ||||
|     position: relative; | ||||
|     max-width: 100%; } | ||||
|     #browsersteps-selector-wrapper > canvas:hover { | ||||
|       cursor: pointer; } | ||||
|   #browsersteps-selector-wrapper .loader { | ||||
|     position: absolute; | ||||
|     left: 50%; | ||||
|     top: 50%; | ||||
|     transform: translate(-50%, -50%); | ||||
|     margin-left: -40px; | ||||
|     z-index: 100; | ||||
|     max-width: 350px; | ||||
|     text-align: center; } | ||||
|   #browsersteps-selector-wrapper .spinner, #browsersteps-selector-wrapper .spinner:after { | ||||
|     width: 80px; | ||||
|     height: 80px; | ||||
|     font-size: 3px; } | ||||
|   #browsersteps-selector-wrapper #browsersteps-click-start { | ||||
|     color: var(--color-grey-400); } | ||||
|     #browsersteps-selector-wrapper #browsersteps-click-start:hover { | ||||
|       cursor: pointer; } | ||||
|  | ||||
| .arrow { | ||||
|   border: solid #1b98f8; | ||||
|   border-width: 0 2px 2px 0; | ||||
|   display: inline-block; | ||||
|   padding: 3px; } | ||||
|   .arrow.right { | ||||
|     transform: rotate(-45deg); | ||||
|     -webkit-transform: rotate(-45deg); } | ||||
|   .arrow.left { | ||||
|     transform: rotate(135deg); | ||||
|     -webkit-transform: rotate(135deg); } | ||||
|   .arrow.up, .arrow.asc { | ||||
|     transform: rotate(-135deg); | ||||
|     -webkit-transform: rotate(-135deg); } | ||||
|   .arrow.down, .arrow.desc { | ||||
|     transform: rotate(45deg); | ||||
|     -webkit-transform: rotate(45deg); } | ||||
|  | ||||
| body { | ||||
|   color: var(--color-text); | ||||
|   background: var(--color-background-page); } | ||||
| @@ -275,6 +284,11 @@ body { | ||||
|   white-space: nowrap; | ||||
|   width: 1px; } | ||||
|  | ||||
| .status-icon { | ||||
|   display: inline-block; | ||||
|   height: 1rem; | ||||
|   vertical-align: middle; } | ||||
|  | ||||
| .pure-table-even { | ||||
|   background: var(--color-background); } | ||||
|  | ||||
| @@ -945,3 +959,24 @@ ul { | ||||
|     display: inline; | ||||
|     height: 26px; | ||||
|     vertical-align: middle; } | ||||
|  | ||||
| /* automatic price following helpers */ | ||||
| .tracking-ldjson-price-data { | ||||
|   background-color: var(--color-background-button-green); | ||||
|   color: #000; | ||||
|   padding: 3px; | ||||
|   border-radius: 3px; | ||||
|   white-space: nowrap; } | ||||
|  | ||||
| .ldjson-price-track-offer { | ||||
|   font-weight: bold; | ||||
|   font-style: italic; } | ||||
|   .ldjson-price-track-offer a.pure-button { | ||||
|     border-radius: 3px; | ||||
|     padding: 3px; | ||||
|     background-color: var(--color-background-button-green); } | ||||
|  | ||||
| .price-follow-tag-icon { | ||||
|   display: inline-block; | ||||
|   height: 0.8rem; | ||||
|   vertical-align: middle; } | ||||
|   | ||||
| @@ -36,7 +36,6 @@ class ChangeDetectionStore: | ||||
|         self.datastore_path = datastore_path | ||||
|         self.json_store_path = "{}/url-watches.json".format(self.datastore_path) | ||||
|         self.needs_write = False | ||||
|         self.proxy_list = None | ||||
|         self.start_time = time.time() | ||||
|         self.stop_thread = False | ||||
|         # Base definition for all watchers | ||||
| @@ -116,11 +115,6 @@ class ChangeDetectionStore: | ||||
|             secret = secrets.token_hex(16) | ||||
|             self.__data['settings']['application']['api_access_token'] = secret | ||||
|  | ||||
|         # Proxy list support - available as a selection in settings when text file is imported | ||||
|         proxy_list_file = "{}/proxies.json".format(self.datastore_path) | ||||
|         if path.isfile(proxy_list_file): | ||||
|             self.import_proxy_list(proxy_list_file) | ||||
|  | ||||
|         # Bump the update version by running updates | ||||
|         self.run_updates() | ||||
|  | ||||
| @@ -250,12 +244,15 @@ class ChangeDetectionStore: | ||||
|     def clear_watch_history(self, uuid): | ||||
|         import pathlib | ||||
|  | ||||
|         self.__data['watching'][uuid].update( | ||||
|             {'last_checked': 0, | ||||
|              'last_viewed': 0, | ||||
|              'previous_md5': False, | ||||
|              'last_notification_error': False, | ||||
|              'last_error': False}) | ||||
|         self.__data['watching'][uuid].update({ | ||||
|                 'last_checked': 0, | ||||
|                 'has_ldjson_price_data': None, | ||||
|                 'last_error': False, | ||||
|                 'last_notification_error': False, | ||||
|                 'last_viewed': 0, | ||||
|                 'previous_md5': False, | ||||
|                 'track_ldjson_price_data': None, | ||||
|             }) | ||||
|  | ||||
|         # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc | ||||
|         for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"): | ||||
| @@ -460,10 +457,30 @@ class ChangeDetectionStore: | ||||
|                     print ("Removing",item) | ||||
|                     unlink(item) | ||||
|  | ||||
|     def import_proxy_list(self, filename): | ||||
|         with open(filename) as f: | ||||
|             self.proxy_list = json.load(f) | ||||
|             print ("Registered proxy list", list(self.proxy_list.keys())) | ||||
|     @property | ||||
|     def proxy_list(self): | ||||
|         proxy_list = {} | ||||
|         proxy_list_file = os.path.join(self.datastore_path, 'proxies.json') | ||||
|  | ||||
|         # Load from external config file | ||||
|         if path.isfile(proxy_list_file): | ||||
|             with open("{}/proxies.json".format(self.datastore_path)) as f: | ||||
|                 proxy_list = json.load(f) | ||||
|  | ||||
|         # Mapping from UI config if available | ||||
|         extras = self.data['settings']['requests'].get('extra_proxies') | ||||
|         if extras: | ||||
|             i=0 | ||||
|             for proxy in extras: | ||||
|                 i += 0 | ||||
|                 if proxy.get('proxy_name') and proxy.get('proxy_url'): | ||||
|                     k = "ui-" + str(i) + proxy.get('proxy_name') | ||||
|                     proxy_list[k] = {'label': proxy.get('proxy_name'), 'url': proxy.get('proxy_url')} | ||||
|  | ||||
|  | ||||
|         return proxy_list if len(proxy_list) else None | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|     def get_preferred_proxy_for_watch(self, uuid): | ||||
| @@ -473,11 +490,10 @@ class ChangeDetectionStore: | ||||
|         :return: proxy "key" id | ||||
|         """ | ||||
|  | ||||
|         proxy_id = None | ||||
|         if self.proxy_list is None: | ||||
|             return None | ||||
|  | ||||
|         # If its a valid one | ||||
|         # If it's a valid one | ||||
|         watch = self.data['watching'].get(uuid) | ||||
|  | ||||
|         if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()): | ||||
| @@ -490,8 +506,9 @@ class ChangeDetectionStore: | ||||
|             if self.proxy_list.get(system_proxy_id): | ||||
|                 return system_proxy_id | ||||
|  | ||||
|         # Fallback - Did not resolve anything, use the first available | ||||
|         if system_proxy_id is None: | ||||
|  | ||||
|         # Fallback - Did not resolve anything, or doesnt exist, use the first available | ||||
|         if system_proxy_id is None or not self.proxy_list.get(system_proxy_id): | ||||
|             first_default = list(self.proxy_list)[0] | ||||
|             return first_default | ||||
|  | ||||
|   | ||||
| @@ -21,6 +21,7 @@ | ||||
|             <li class="tab"><a href="#fetching">Fetching</a></li> | ||||
|             <li class="tab"><a href="#filters">Global Filters</a></li> | ||||
|             <li class="tab"><a href="#api">API</a></li> | ||||
|             <li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li> | ||||
|         </ul> | ||||
|     </div> | ||||
|     <div class="box-wrap inner"> | ||||
| @@ -170,14 +171,29 @@ nav | ||||
|                     </div> | ||||
|                 </div> | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="proxies"> | ||||
|  | ||||
|                 <p><strong>Tip</strong>: You can connect to websites using <a href="https://brightdata.grsm.io/n0r16zf7eivq">BrightData</a> proxies, their service <strong>WebUnlocker</strong> will solve most CAPTCHAs, whilst their <strong>Residential Proxies</strong> may help to avoid CAPTCHA altogether. </p> | ||||
|                 <p>It may be easier to try <strong>WebUnlocker</strong> first, WebUnlocker also supports country selection.</p> | ||||
|                 <p> | ||||
|                     When you have <a href="https://brightdata.grsm.io/n0r16zf7eivq">registered</a>, enabled the required services, visit the <A href="https://brightdata.com/cp/api_example?">API example page</A>, then select <strong>Python</strong>, set the country you wish to use, then copy+paste the example URL below<br/> | ||||
|                     The Proxy URL with BrightData should start with <code>http://brd-customer...</code> | ||||
|                 </p> | ||||
|  | ||||
|                 <p>When you sign up using <a href="https://brightdata.grsm.io/n0r16zf7eivq">https://brightdata.grsm.io/n0r16zf7eivq</a> BrightData will match any first deposit up to $150</p> | ||||
|  | ||||
|  | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_field(form.requests.form.extra_proxies) }} | ||||
|                     <span class="pure-form-message-inline">"Name" will be used for selecting the proxy in the Watch Edit settings</span> | ||||
|                 </div> | ||||
|             </div> | ||||
|             <div id="actions"> | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_button(form.save_button) }} | ||||
|                     <a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a> | ||||
|                     <a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a> | ||||
|                 </div> | ||||
|  | ||||
|             </div> | ||||
|         </form> | ||||
|     </div> | ||||
|   | ||||
| @@ -32,6 +32,7 @@ | ||||
|         <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="unpause">UnPause</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="mute">Mute</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="font-size: 70%"  name="op" value="unmute">UnMute</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="recheck">Recheck</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="font-size: 70%" name="op" value="notification-default">Use default notification</button> | ||||
|         <button class="pure-button button-secondary button-xsmall" style="background: #dd4242; font-size: 70%" name="op" value="delete">Delete</button> | ||||
|     </div> | ||||
| @@ -88,16 +89,31 @@ | ||||
|                 </td> | ||||
|                 <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} | ||||
|                     <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a> | ||||
|                     <a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="icon icon-spread" /></a> | ||||
|  | ||||
|                     {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %} | ||||
|                     <a class="link-spread" href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img class="status-icon"  src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" /></a> | ||||
|  | ||||
|                     {%if watch.get_fetch_backend == "html_webdriver" %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" title="Using a chrome browser" />{% endif %} | ||||
|                     {%if watch.is_pdf  %}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" title="Converting PDF to text" />{% endif %} | ||||
|                     {% if watch.last_error is defined and watch.last_error != False %} | ||||
|                     <div class="fetch-error">{{ watch.last_error }}</div> | ||||
|                     <div class="fetch-error">{{ watch.last_error }} | ||||
|  | ||||
|                         {% if '403' in watch.last_error %} | ||||
|                             {% if has_proxies %} | ||||
|                                 <a href="{{ url_for('settings_page', uuid=watch.uuid) }}#proxies">Try other proxies/location</a>  | ||||
|                             {% endif %} | ||||
|                             <a href="{{ url_for('settings_page', uuid=watch.uuid) }}#proxies">Try adding external proxies/locations</a> | ||||
|                          | ||||
|                         {% endif %} | ||||
|                     </div> | ||||
|                     {% endif %} | ||||
|                     {% if watch.last_notification_error is defined and watch.last_notification_error != False %} | ||||
|                     <div class="fetch-error notification-error"><a href="{{url_for('notification_logs')}}">{{ watch.last_notification_error }}</a></div> | ||||
|                     {% endif %} | ||||
|                     {% if watch['has_ldjson_price_data'] and not watch['track_ldjson_price_data']  %} | ||||
|                     <div class="ldjson-price-track-offer">Embedded price data detected, follow only price data? <a href="{{url_for('price_data_follower.accept', uuid=watch.uuid)}}" class="pure-button button-xsmall">Yes</a> <a href="{{url_for('price_data_follower.reject', uuid=watch.uuid)}}" class="">No</a></div> | ||||
|                     {% endif %} | ||||
|                     {% if watch['track_ldjson_price_data'] == 'accepted' %} | ||||
|                     <span class="tracking-ldjson-price-data" title="Automatically following embedded price information"><img src="{{url_for('static_content', group='images', filename='price-tag-icon.svg')}}"  class="status-icon price-follow-tag-icon"/> Price</span> | ||||
|                     {% endif %} | ||||
|                     {% if not active_tag %} | ||||
|                     <span class="watch-tag-list">{{ watch.tag}}</span> | ||||
|                     {% endif %} | ||||
|   | ||||
| @@ -1,10 +1,10 @@ | ||||
| { | ||||
|   "proxy-one": { | ||||
|     "label": "One", | ||||
|     "url": "http://127.0.0.1:3128" | ||||
|     "label": "Proxy One", | ||||
|     "url": "http://squid-one:3128" | ||||
|   }, | ||||
|   "proxy-two": { | ||||
|     "label": "two", | ||||
|     "url": "http://127.0.0.1:3129" | ||||
|     "label": "Proxy Two", | ||||
|     "url": "http://squid-two:3128" | ||||
|   } | ||||
| } | ||||
|   | ||||
							
								
								
									
										48
									
								
								changedetectionio/tests/proxy_list/squid-auth.conf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								changedetectionio/tests/proxy_list/squid-auth.conf
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| acl localnet src 0.0.0.1-0.255.255.255  # RFC 1122 "this" network (LAN) | ||||
| acl localnet src 10.0.0.0/8             # RFC 1918 local private network (LAN) | ||||
| acl localnet src 100.64.0.0/10          # RFC 6598 shared address space (CGN) | ||||
| acl localnet src 169.254.0.0/16         # RFC 3927 link-local (directly plugged) machines | ||||
| acl localnet src 172.16.0.0/12          # RFC 1918 local private network (LAN) | ||||
| acl localnet src 192.168.0.0/16         # RFC 1918 local private network (LAN) | ||||
| acl localnet src fc00::/7               # RFC 4193 local private network range | ||||
| acl localnet src fe80::/10              # RFC 4291 link-local (directly plugged) machines | ||||
| acl localnet src 159.65.224.174 | ||||
| acl SSL_ports port 443 | ||||
| acl Safe_ports port 80          # http | ||||
| acl Safe_ports port 21          # ftp | ||||
| acl Safe_ports port 443         # https | ||||
| acl Safe_ports port 70          # gopher | ||||
| acl Safe_ports port 210         # wais | ||||
| acl Safe_ports port 1025-65535  # unregistered ports | ||||
| acl Safe_ports port 280         # http-mgmt | ||||
| acl Safe_ports port 488         # gss-http | ||||
| acl Safe_ports port 591         # filemaker | ||||
| acl Safe_ports port 777         # multiling http | ||||
| acl CONNECT method CONNECT | ||||
|  | ||||
| http_access deny !Safe_ports | ||||
| http_access deny CONNECT !SSL_ports | ||||
| #http_access allow localhost manager | ||||
| http_access deny manager | ||||
| #http_access allow localhost | ||||
| #http_access allow localnet | ||||
|  | ||||
| auth_param basic program /usr/lib/squid3/basic_ncsa_auth /etc/squid3/passwords | ||||
| auth_param basic realm proxy | ||||
| acl authenticated proxy_auth REQUIRED | ||||
| http_access allow authenticated | ||||
| http_access deny all | ||||
|  | ||||
|  | ||||
| http_port 3128 | ||||
| coredump_dir /var/spool/squid | ||||
| refresh_pattern ^ftp:           1440    20%     10080 | ||||
| refresh_pattern ^gopher:        1440    0%      1440 | ||||
| refresh_pattern -i (/cgi-bin/|\?) 0     0%      0 | ||||
| refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims | ||||
| refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims | ||||
| refresh_pattern \/InRelease$ 0 0% 0 refresh-ims | ||||
| refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims | ||||
| refresh_pattern .               0       20%     4320 | ||||
| logfile_rotate 0 | ||||
|  | ||||
							
								
								
									
										1
									
								
								changedetectionio/tests/proxy_list/squid-passwords.txt
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changedetectionio/tests/proxy_list/squid-passwords.txt
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| test:$apr1$xvhFolTA$E/kz5/Rw1ewcyaSUdwqZs. | ||||
| @@ -0,0 +1,50 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks | ||||
|  | ||||
| # just make a request, we will grep in the docker logs to see it actually got called | ||||
| def test_select_custom(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     # Goto settings, add our custom one | ||||
|     res = client.post( | ||||
|         url_for("settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-fetch_backend": "html_requests", | ||||
|             "requests-extra_proxies-0-proxy_name": "custom-test-proxy", | ||||
|             # test:awesome is set in tests/proxy_list/squid-passwords.txt | ||||
|             "requests-extra_proxies-0-proxy_url": "http://test:awesome@squid-custom:3128", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         # Because a URL wont show in squid/proxy logs due it being SSLed | ||||
|         # Use plain HTTP or a specific domain-name here | ||||
|         data={"urls": "https://changedetection.io/CHANGELOG.txt"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'Proxy Authentication Required' not in res.data | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     # We should see something via proxy | ||||
|     assert b'<div class=""> - 0.' in res.data | ||||
|  | ||||
|     # | ||||
|     # Now we should see the request in the container logs for "squid-squid-custom" because it will be the only default | ||||
|  | ||||
							
								
								
									
										
											BIN
										
									
								
								changedetectionio/tests/test.pdf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								changedetectionio/tests/test.pdf
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							
							
								
								
									
										146
									
								
								changedetectionio/tests/test_automatic_follow_ldjson_price.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										146
									
								
								changedetectionio/tests/test_automatic_follow_ldjson_price.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,146 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup, extract_UUID_from_client, extract_api_key_from_UI | ||||
|  | ||||
| def set_response_with_ldjson(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <div class="sametext">Some text thats the same</div> | ||||
|      <div class="changetext">Some text that will change</div> | ||||
|      <script type="application/ld+json"> | ||||
|         { | ||||
|            "@context":"https://schema.org/", | ||||
|            "@type":"Product", | ||||
|            "@id":"https://www.some-virtual-phone-shop.com/celular-iphone-14/p", | ||||
|            "name":"Celular Iphone 14 Pro Max 256Gb E Sim A16 Bionic", | ||||
|            "brand":{ | ||||
|               "@type":"Brand", | ||||
|               "name":"APPLE" | ||||
|            }, | ||||
|            "image":"https://www.some-virtual-phone-shop.com/15509426/image.jpg", | ||||
|            "description":"You dont need it", | ||||
|            "mpn":"111111", | ||||
|            "sku":"22222", | ||||
|            "offers":{ | ||||
|               "@type":"AggregateOffer", | ||||
|               "lowPrice":8097000, | ||||
|               "highPrice":8099900, | ||||
|               "priceCurrency":"COP", | ||||
|               "offers":[ | ||||
|                  { | ||||
|                     "@type":"Offer", | ||||
|                     "price":8097000, | ||||
|                     "priceCurrency":"COP", | ||||
|                     "availability":"http://schema.org/InStock", | ||||
|                     "sku":"102375961", | ||||
|                     "itemCondition":"http://schema.org/NewCondition", | ||||
|                     "seller":{ | ||||
|                        "@type":"Organization", | ||||
|                        "name":"ajax" | ||||
|                     } | ||||
|                  } | ||||
|               ], | ||||
|               "offerCount":1 | ||||
|            } | ||||
|         } | ||||
|        </script> | ||||
|      </body> | ||||
|      </html> | ||||
| """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| def set_response_without_ldjson(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <div class="sametext">Some text thats the same</div> | ||||
|      <div class="changetext">Some text that will change</div>      | ||||
|      </body> | ||||
|      </html> | ||||
| """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| # actually only really used by the distll.io importer, but could be handy too | ||||
| def test_check_ldjson_price_autodetect(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     set_response_with_ldjson() | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Should get a notice that it's available | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'ldjson-price-track-offer' in res.data | ||||
|  | ||||
|     # Accept it | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|  | ||||
|     client.get(url_for('price_data_follower.accept', uuid=uuid, follow_redirects=True)) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(2) | ||||
|     # Offer should be gone | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'Embedded price data' not in res.data | ||||
|     assert b'tracking-ldjson-price-data' in res.data | ||||
|  | ||||
|     # and last snapshop (via API) should be just the price | ||||
|     api_key = extract_api_key_from_UI(client) | ||||
|     res = client.get( | ||||
|         url_for("watchsinglehistory", uuid=uuid, timestamp='latest'), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|  | ||||
|     # Should see this (dont know where the whitespace came from) | ||||
|     assert b'"highPrice": 8099900' in res.data | ||||
|     # And not this cause its not the ld-json | ||||
|     assert b"So let's see what happens" not in res.data | ||||
|  | ||||
|     client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|  | ||||
|     ########################################################################################## | ||||
|     # And we shouldnt see the offer | ||||
|     set_response_without_ldjson() | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(3) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'ldjson-price-track-offer' not in res.data | ||||
|      | ||||
|     ########################################################################################## | ||||
|     client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
| @@ -1,8 +1,7 @@ | ||||
| import os | ||||
| import time | ||||
| import re | ||||
| from flask import url_for | ||||
| from .util import set_original_response, live_server_setup | ||||
| from .util import set_original_response, live_server_setup, extract_UUID_from_client | ||||
| from changedetectionio.model import App | ||||
|  | ||||
|  | ||||
| @@ -121,6 +120,10 @@ def run_filter_test(client, content_filter): | ||||
|         notification = f.read() | ||||
|     assert not 'CSS/xPath filter was not present in the page' in notification | ||||
|  | ||||
|     # Re #1247 - All tokens got replaced | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|     assert uuid in notification | ||||
|  | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
|   | ||||
| @@ -394,6 +394,48 @@ def check_json_ext_filter(json_filter, client, live_server): | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| def test_ignore_json_order(client, live_server): | ||||
|     # A change in order shouldn't trigger a notification | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write('{"hello" : 123, "world": 123}') | ||||
|  | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', content_type="application/json", _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     time.sleep(2) | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write('{"world" : 123, "hello": 123}') | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|     # Just to be sure it still works | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write('{"world" : 123, "hello": 124}') | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(2) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| def test_check_jsonpath_ext_filter(client, live_server): | ||||
|     check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server) | ||||
|  | ||||
|   | ||||
							
								
								
									
										40
									
								
								changedetectionio/tests/test_pdf.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										40
									
								
								changedetectionio/tests/test_pdf.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,40 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import set_original_response, set_modified_response, live_server_setup | ||||
|  | ||||
| sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
| # `subtractive_selectors` should still work in `source:` type requests | ||||
| def test_fetch_pdf(client, live_server): | ||||
|     import shutil | ||||
|     shutil.copy("tests/test.pdf", "test-datastore/endpoint-test.pdf") | ||||
|  | ||||
|     live_server_setup(live_server) | ||||
|     test_url = url_for('test_pdf_endpoint', _external=True) | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'PDF-1.5' not in res.data | ||||
|     assert b'hello world' in res.data | ||||
|  | ||||
|     # So we know if the file changes in other ways | ||||
|     import hashlib | ||||
|     md5 = hashlib.md5(open("test-datastore/endpoint-test.pdf", 'rb').read()).hexdigest().upper() | ||||
|     # We should have one | ||||
|     assert len(md5) >0 | ||||
|     # And it's going to be in the document | ||||
|     assert b'Document checksum - '+bytes(str(md5).encode('utf-8')) in res.data | ||||
| @@ -168,5 +168,15 @@ def live_server_setup(live_server): | ||||
|     def test_return_query(): | ||||
|         return request.query_string | ||||
|  | ||||
|  | ||||
|     @live_server.app.route('/endpoint-test.pdf') | ||||
|     def test_pdf_endpoint(): | ||||
|  | ||||
|         # Tried using a global var here but didn't seem to work, so reading from a file instead. | ||||
|         with open("test-datastore/endpoint-test.pdf", "rb") as f: | ||||
|             resp = make_response(f.read(), 200) | ||||
|             resp.headers['Content-Type'] = 'application/pdf' | ||||
|             return resp | ||||
|  | ||||
|     live_server.start() | ||||
|  | ||||
|   | ||||
| @@ -4,6 +4,7 @@ import queue | ||||
| import time | ||||
|  | ||||
| from changedetectionio import content_fetcher | ||||
| from changedetectionio import queuedWatchMetaData | ||||
| from changedetectionio.fetch_site_status import FilterNotFoundInResponse | ||||
|  | ||||
| # A single update worker | ||||
| @@ -92,7 +93,7 @@ class update_worker(threading.Thread): | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{{{watch_url}}}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{{{base_url}}}}/edit/{{{{watch_uuid}}}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         ", ".join(watch['include_filters']), | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
| @@ -157,11 +158,12 @@ class update_worker(threading.Thread): | ||||
|         while not self.app.config.exit.is_set(): | ||||
|  | ||||
|             try: | ||||
|                 priority, uuid = self.q.get(block=False) | ||||
|                 queued_item_data = self.q.get(block=False) | ||||
|             except queue.Empty: | ||||
|                 pass | ||||
|  | ||||
|             else: | ||||
|                 uuid = queued_item_data.item.get('uuid') | ||||
|                 self.current_uuid = uuid | ||||
|  | ||||
|                 if uuid in list(self.datastore.data['watching'].keys()): | ||||
| @@ -171,11 +173,11 @@ class update_worker(threading.Thread): | ||||
|                     update_obj= {} | ||||
|                     xpath_data = False | ||||
|                     process_changedetection_results = True | ||||
|                     print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url'])) | ||||
|                     print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])) | ||||
|                     now = time.time() | ||||
|  | ||||
|                     try: | ||||
|                         changed_detected, update_obj, contents = update_handler.run(uuid) | ||||
|                         changed_detected, update_obj, contents = update_handler.run(uuid, skip_when_checksum_same=queued_item_data.item.get('skip_when_checksum_same')) | ||||
|                         # Re #342 | ||||
|                         # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. | ||||
|                         # We then convert/.decode('utf-8') for the notification etc | ||||
| @@ -241,6 +243,10 @@ class update_worker(threading.Thread): | ||||
|  | ||||
|                         process_changedetection_results = True | ||||
|  | ||||
|                     except content_fetcher.checksumFromPreviousCheckWasTheSame as e: | ||||
|                         # Yes fine, so nothing todo | ||||
|                         pass | ||||
|  | ||||
|                     except content_fetcher.BrowserStepsStepTimout as e: | ||||
|  | ||||
|                         if not self.datastore.data['watching'].get(uuid): | ||||
|   | ||||
| @@ -1,2 +0,0 @@ | ||||
| pytest ~=6.2 | ||||
| pytest-flask ~=1.2 | ||||
| @@ -29,8 +29,9 @@ apprise~=1.2.0 | ||||
| # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 | ||||
| paho-mqtt | ||||
|  | ||||
| # Pinned version of cryptography otherwise | ||||
| # ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly | ||||
| # This mainly affects some ARM builds, which unlike the other builds ignores "ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1" | ||||
| # so without this pinning, the newer versions on ARM will forcefully try to build rust, which results in "rust compiler not found" | ||||
| # (introduced once apprise became a dep) | ||||
| cryptography~=3.4 | ||||
|  | ||||
| # Used for CSS filtering | ||||
| @@ -58,3 +59,7 @@ jq~=1.3 ;python_version >= "3.8" and sys_platform == "linux" | ||||
| # Any current modern version, required so far for screenshot PNG->JPEG conversion but will be used more in the future | ||||
| pillow | ||||
| # playwright is installed at Dockerfile build time because it's not available on all platforms | ||||
|  | ||||
| # Include pytest, so if theres a support issue we can ask them to run these tests on their setup | ||||
| pytest ~=6.2 | ||||
| pytest-flask ~=1.2 | ||||
		Reference in New Issue
	
	Block a user