mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-30 22:27:52 +00:00 
			
		
		
		
	Compare commits
	
		
			67 Commits
		
	
	
		
			proxies-js
			...
			hours-day-
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | b2b8c3f288 | ||
|   | ca3b351bae | ||
|   | 83add91f78 | ||
|   | b7e0f0a5e4 | ||
|   | fedb16c242 | ||
|   | 61f0ac2937 | ||
|   | 2d948ea6d1 | ||
|   | dee0c735e6 | ||
|   | 9fa98f4ec6 | ||
|   | b3b4b5d3f1 | ||
|   | a3f9ac0a6f | ||
|   | fcda5a0818 | ||
|   | 3920e613b9 | ||
|   | d023aa982e | ||
|   | c341baf71b | ||
|   | fca66eb558 | ||
|   | 359fc48fb4 | ||
|   | d0efeb9770 | ||
|   | 3416532cd6 | ||
|   | defc7a340e | ||
|   | c197c062e1 | ||
|   | 77b59809ca | ||
|   | f90b170e68 | ||
|   | c93ca1841c | ||
|   | 57f604dff1 | ||
|   | 8499468749 | ||
|   | 7f6a13ea6c | ||
|   | 9874f0cbc7 | ||
|   | 72834a42fd | ||
|   | 724cb17224 | ||
|   | 4eb4b401a1 | ||
|   | 5d40e16c73 | ||
|   | 492bbce6b6 | ||
|   | 0394a56be5 | ||
|   | 7839551d6b | ||
|   | 9c5588c791 | ||
|   | 5a43a350de | ||
|   | 3c31f023ce | ||
|   | 4cbcc59461 | ||
|   | 4be0260381 | ||
|   | 957a3c1c16 | ||
|   | 85897e0bf9 | ||
|   | 63095f70ea | ||
|   | 8d5b0b5576 | ||
|   | 1b077abd93 | ||
|   | 32ea1a8721 | ||
|   | fff32cef0d | ||
|   | 8fb146f3e4 | ||
|   | 770b0faa45 | ||
|   | f6faa90340 | ||
|   | 669fd3ae0b | ||
|   | 17d37fb626 | ||
|   | dfa7fc3a81 | ||
|   | cd467df97a | ||
|   | 71bc2fed82 | ||
|   | 738fcfe01c | ||
|   | 3ebb2ab9ba | ||
|   | ac98bc9144 | ||
|   | 3705ce6681 | ||
|   | f7ea99412f | ||
|   | d4715e2bc8 | ||
|   | 8567a83c47 | ||
|   | 77fdf59ae3 | ||
|   | 0e194aa4b4 | ||
|   | 2ba55bb477 | ||
|   | 4c759490da | ||
|   | 58a52c1f60 | 
							
								
								
									
										31
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										31
									
								
								.github/test/Dockerfile-alpine
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,31 @@ | ||||
| # Taken from https://github.com/linuxserver/docker-changedetection.io/blob/main/Dockerfile | ||||
| # Test that we can still build on Alpine (musl modified libc https://musl.libc.org/) | ||||
| # Some packages wont install via pypi because they dont have a wheel available under this architecture. | ||||
|  | ||||
| FROM ghcr.io/linuxserver/baseimage-alpine:3.16 | ||||
| ENV PYTHONUNBUFFERED=1 | ||||
|  | ||||
| COPY requirements.txt /requirements.txt | ||||
|  | ||||
| RUN \ | ||||
|   apk add --update --no-cache --virtual=build-dependencies \ | ||||
|     cargo \ | ||||
|     g++ \ | ||||
|     gcc \ | ||||
|     libc-dev \ | ||||
|     libffi-dev \ | ||||
|     libxslt-dev \ | ||||
|     make \ | ||||
|     openssl-dev \ | ||||
|     py3-wheel \ | ||||
|     python3-dev \ | ||||
|     zlib-dev && \ | ||||
|   apk add --update --no-cache \ | ||||
|     libxslt \ | ||||
|     python3 \ | ||||
|     py3-pip && \ | ||||
|   echo "**** pip3 install test of changedetection.io ****" && \ | ||||
|   pip3 install -U pip wheel setuptools && \ | ||||
|   pip3 install -U --no-cache-dir --find-links https://wheel-index.linuxserver.io/alpine-3.16/ -r /requirements.txt && \ | ||||
|   apk del --purge \ | ||||
|     build-dependencies | ||||
							
								
								
									
										66
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							
							
						
						
									
										66
									
								
								.github/workflows/test-container-build.yml
									
									
									
									
										vendored
									
									
										Normal file
									
								
							| @@ -0,0 +1,66 @@ | ||||
| name: ChangeDetection.io Container Build Test | ||||
|  | ||||
| # Triggers the workflow on push or pull request events | ||||
|  | ||||
| # This line doesnt work, even tho it is the documented one | ||||
| #on: [push, pull_request] | ||||
|  | ||||
| on: | ||||
|   push: | ||||
|     paths: | ||||
|       - requirements.txt | ||||
|       - Dockerfile | ||||
|  | ||||
|   pull_request: | ||||
|     paths: | ||||
|       - requirements.txt | ||||
|       - Dockerfile | ||||
|  | ||||
|   # Changes to requirements.txt packages and Dockerfile may or may not always be compatible with arm etc, so worth testing | ||||
|   # @todo: some kind of path filter for requirements.txt and Dockerfile | ||||
| jobs: | ||||
|   test-container-build: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|         - uses: actions/checkout@v2 | ||||
|         - name: Set up Python 3.9 | ||||
|           uses: actions/setup-python@v2 | ||||
|           with: | ||||
|             python-version: 3.9 | ||||
|  | ||||
|         # Just test that the build works, some libraries won't compile on ARM/rPi etc | ||||
|         - name: Set up QEMU | ||||
|           uses: docker/setup-qemu-action@v1 | ||||
|           with: | ||||
|             image: tonistiigi/binfmt:latest | ||||
|             platforms: all | ||||
|  | ||||
|         - name: Set up Docker Buildx | ||||
|           id: buildx | ||||
|           uses: docker/setup-buildx-action@v1 | ||||
|           with: | ||||
|             install: true | ||||
|             version: latest | ||||
|             driver-opts: image=moby/buildkit:master | ||||
|  | ||||
|         # https://github.com/dgtlmoon/changedetection.io/pull/1067 | ||||
|         # Check we can still build under alpine/musl | ||||
|         - name: Test that the docker containers can build (musl via alpine check) | ||||
|           id: docker_build_musl | ||||
|           uses: docker/build-push-action@v2 | ||||
|           with: | ||||
|             context: ./ | ||||
|             file: ./.github/test/Dockerfile-alpine | ||||
|             platforms: linux/amd64,linux/arm64 | ||||
|  | ||||
|         - name: Test that the docker containers can build | ||||
|           id: docker_build | ||||
|           uses: docker/build-push-action@v2 | ||||
|           # https://github.com/docker/build-push-action#customizing | ||||
|           with: | ||||
|             context: ./ | ||||
|             file: ./Dockerfile | ||||
|             platforms: linux/arm/v7,linux/arm/v6,linux/amd64,linux/arm64, | ||||
|             cache-from: type=local,src=/tmp/.buildx-cache | ||||
|             cache-to: type=local,dest=/tmp/.buildx-cache | ||||
|  | ||||
							
								
								
									
										12
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										12
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -1,28 +1,25 @@ | ||||
| name: ChangeDetection.io Test | ||||
| name: ChangeDetection.io App Test | ||||
|  | ||||
| # Triggers the workflow on push or pull request events | ||||
| on: [push, pull_request] | ||||
|  | ||||
| jobs: | ||||
|   test-build: | ||||
|   test-application: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|  | ||||
|       - uses: actions/checkout@v2 | ||||
|       - name: Set up Python 3.9 | ||||
|         uses: actions/setup-python@v2 | ||||
|         with: | ||||
|           python-version: 3.9 | ||||
|  | ||||
|       - name: Show env vars | ||||
|         run: set | ||||
|  | ||||
|       - name: Install dependencies | ||||
|         run: | | ||||
|           python -m pip install --upgrade pip | ||||
|           pip install flake8 pytest | ||||
|           if [ -f requirements.txt ]; then pip install -r requirements.txt; fi | ||||
|           if [ -f requirements-dev.txt ]; then pip install -r requirements-dev.txt; fi | ||||
|  | ||||
|       - name: Lint with flake8 | ||||
|         run: | | ||||
|           # stop the build if there are Python syntax errors or undefined names | ||||
| @@ -39,7 +36,4 @@ jobs: | ||||
|           # Each test is totally isolated and performs its own cleanup/reset | ||||
|           cd changedetectionio; ./run_all_tests.sh | ||||
|  | ||||
|       # https://github.com/docker/build-push-action/blob/master/docs/advanced/test-before-push.md ? | ||||
|       # https://github.com/docker/buildx/issues/59 ? Needs to be one platform? | ||||
|  | ||||
|       # https://github.com/docker/buildx/issues/495#issuecomment-918925854 | ||||
|   | ||||
| @@ -6,7 +6,7 @@ Otherwise, it's always best to PR into the `dev` branch. | ||||
|  | ||||
| Please be sure that all new functionality has a matching test! | ||||
|  | ||||
| Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notifications.py` for example | ||||
| Use `pytest` to validate/test, you can run the existing tests as `pytest tests/test_notification.py` for example | ||||
|  | ||||
| ``` | ||||
| pip3 install -r requirements-dev | ||||
|   | ||||
							
								
								
									
										13
									
								
								Dockerfile
									
									
									
									
									
								
							
							
						
						
									
										13
									
								
								Dockerfile
									
									
									
									
									
								
							| @@ -5,13 +5,14 @@ FROM python:3.8-slim as builder | ||||
| ARG CRYPTOGRAPHY_DONT_BUILD_RUST=1 | ||||
|  | ||||
| RUN apt-get update && apt-get install -y --no-install-recommends \ | ||||
|     libssl-dev \ | ||||
|     libffi-dev \ | ||||
|     g++ \ | ||||
|     gcc \ | ||||
|     libc-dev \ | ||||
|     libffi-dev \ | ||||
|     libssl-dev \ | ||||
|     libxslt-dev \ | ||||
|     zlib1g-dev \ | ||||
|     g++ | ||||
|     make \ | ||||
|     zlib1g-dev | ||||
|  | ||||
| RUN mkdir /install | ||||
| WORKDIR /install | ||||
| @@ -22,7 +23,8 @@ RUN pip install --target=/dependencies -r /requirements.txt | ||||
|  | ||||
| # Playwright is an alternative to Selenium | ||||
| # Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing | ||||
| RUN pip install --target=/dependencies playwright~=1.24 \ | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1067 also musl/alpine (not supported) | ||||
| RUN pip install --target=/dependencies playwright~=1.26 \ | ||||
|     || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled." | ||||
|  | ||||
| # Final image stage | ||||
| @@ -58,6 +60,7 @@ EXPOSE 5000 | ||||
|  | ||||
| # The actual flask app | ||||
| COPY changedetectionio /app/changedetectionio | ||||
|  | ||||
| # The eventlet server wrapper | ||||
| COPY changedetection.py /app/changedetection.py | ||||
|  | ||||
|   | ||||
| @@ -2,6 +2,7 @@ recursive-include changedetectionio/api * | ||||
| recursive-include changedetectionio/templates * | ||||
| recursive-include changedetectionio/static * | ||||
| recursive-include changedetectionio/model * | ||||
| recursive-include changedetectionio/tests * | ||||
| include changedetection.py | ||||
| global-exclude *.pyc | ||||
| global-exclude node_modules | ||||
|   | ||||
| @@ -33,7 +33,7 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W | ||||
| #### Key Features | ||||
|  | ||||
| - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! | ||||
| - Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules | ||||
| - Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq | ||||
| - Switch between fast non-JS and Chrome JS based "fetchers" | ||||
| - Easily specify how often a site should be checked | ||||
| - Execute JS before extracting text (Good for logging in, see examples in the UI!) | ||||
|   | ||||
							
								
								
									
										44
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										44
									
								
								README.md
									
									
									
									
									
								
							| @@ -1,6 +1,7 @@ | ||||
| ## Web Site Change Detection, Monitoring and Notification. | ||||
|  | ||||
| Live your data-life pro-actively, track website content changes and receive notifications via Discord, Email, Slack, Telegram and 70+ more | ||||
| _Live your data-life pro-actively, Detect website changes and perform meaningful actions, trigger notifications via Discord, Email, Slack, Telegram, API calls and many more._ | ||||
|  | ||||
|  | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring"  title="Self-hosted web page change monitoring"  />](https://lemonade.changedetection.io/start?src=github) | ||||
|  | ||||
| @@ -8,15 +9,16 @@ Live your data-life pro-actively, track website content changes and receive noti | ||||
|  | ||||
|  | ||||
|  | ||||
| Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more | ||||
|  | ||||
| [**Don't have time? Let us host it for you! try our $6.99/month subscription - use our proxies and support!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_ | ||||
|  | ||||
| - Chrome browser included. | ||||
| - Super fast, no registration needed setup. | ||||
| - Start watching and receiving change notifications instantly. | ||||
|  | ||||
|  | ||||
| - Automatic Updates, Automatic Backups, No Heroku "paused application", don't miss a change! | ||||
| - Javascript browser included | ||||
| - Unlimited checks and watches! | ||||
| Easily see what changed, examine by word, line, or individual character. | ||||
|  | ||||
| <img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference "  title="Self-hosted web page change monitoring context difference " /> | ||||
|  | ||||
|  | ||||
| #### Example use cases | ||||
| @@ -44,22 +46,18 @@ _Need an actual Chrome runner with Javascript support? We support fetching via W | ||||
| #### Key Features | ||||
|  | ||||
| - Lots of trigger filters, such as "Trigger on text", "Remove text by selector", "Ignore text", "Extract text", also using regular-expressions! | ||||
| - Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JsonPath rules | ||||
| - Target elements with xPath and CSS Selectors, Easily monitor complex JSON with JSONPath or jq | ||||
| - Switch between fast non-JS and Chrome JS based "fetchers" | ||||
| - Easily specify how often a site should be checked | ||||
| - Execute JS before extracting text (Good for logging in, see examples in the UI!) | ||||
| - Override Request Headers, Specify `POST` or `GET` and other methods | ||||
| - Use the "Visual Selector" to help target specific elements | ||||
| - Configurable [proxy per watch](https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration) | ||||
|  | ||||
| We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) global proxy services, Bright Data will match any first deposit up to $100 using our signup link. | ||||
|  | ||||
| ## Screenshots | ||||
|  | ||||
| ### Examine differences in content. | ||||
|  | ||||
| Easily see what changed, examine by word, line, or individual character. | ||||
|  | ||||
| <img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot-diff.png" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference "  title="Self-hosted web page change monitoring context difference " /> | ||||
|  | ||||
| Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ | ||||
|  | ||||
| ### Filter by elements using the Visual Selector tool. | ||||
| @@ -122,8 +120,8 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io | ||||
|  | ||||
|  | ||||
| ## Filters | ||||
| XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools. | ||||
|  | ||||
| XPath, JSONPath, jq, and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.  | ||||
| (We support LXML `re:test`, `re:math` and `re:replace`.) | ||||
|  | ||||
| ## Notifications | ||||
| @@ -152,7 +150,7 @@ Now you can also customise your notification content! | ||||
|  | ||||
| ## JSON API Monitoring | ||||
|  | ||||
| Detect changes and monitor data in JSON API's by using the built-in JSONPath selectors as a filter / selector. | ||||
| Detect changes and monitor data in JSON API's by using either JSONPath or jq to filter, parse, and restructure JSON as needed. | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -160,9 +158,17 @@ This will re-parse the JSON and apply formatting to the text, making it super ea | ||||
|  | ||||
|  | ||||
|  | ||||
| ### JSONPath or jq? | ||||
|  | ||||
| For more complex parsing, filtering, and modifying of JSON data, jq is recommended due to the built-in operators and functions. Refer to the [documentation](https://stedolan.github.io/jq/manual/) for more specifc information on jq. | ||||
|  | ||||
| One big advantage of `jq` is that you can use logic in your JSON filter, such as filters to only show items that have a value greater than/less than etc. | ||||
|  | ||||
| See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/JSON-Selector-Filter-help for more information and examples | ||||
|  | ||||
| ### Parse JSON embedded in HTML! | ||||
|  | ||||
| When you enable a `json:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.  | ||||
| When you enable a `json:` or `jq:` filter, you can even automatically extract and parse embedded JSON inside a HTML page! Amazingly handy for sites that build content based on JSON, such as many e-commerce websites.  | ||||
|  | ||||
| ``` | ||||
| <html> | ||||
| @@ -172,11 +178,11 @@ When you enable a `json:` filter, you can even automatically extract and parse e | ||||
| </script> | ||||
| ```   | ||||
|  | ||||
| `json:$.price` would give `23.50`, or you can extract the whole structure | ||||
| `json:$.price` or `jq:.price` would give `23.50`, or you can extract the whole structure | ||||
|  | ||||
| ## Proxy configuration | ||||
| ## Proxy Configuration | ||||
|  | ||||
| See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration | ||||
| See the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration , we also support using [BrightData proxy services where possible]( https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support) | ||||
|  | ||||
| ## Raspberry Pi support? | ||||
|  | ||||
|   | ||||
| @@ -33,7 +33,7 @@ from flask_wtf import CSRFProtect | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio.api import api_v1 | ||||
|  | ||||
| __version__ = '0.39.19.1' | ||||
| __version__ = '0.39.21.1' | ||||
|  | ||||
| datastore = None | ||||
|  | ||||
| @@ -194,7 +194,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     watch_api.add_resource(api_v1.Watch, '/api/v1/watch/<string:uuid>', | ||||
|                            resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) | ||||
|  | ||||
|  | ||||
|     watch_api.add_resource(api_v1.SystemInfo, '/api/v1/systeminfo', | ||||
|                            resource_class_kwargs={'datastore': datastore, 'update_q': update_q}) | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -547,6 +548,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         # Defaults for proxy choice | ||||
|         if datastore.proxy_list is not None:  # When enabled | ||||
|             # @todo | ||||
|             # Radio needs '' not None, or incase that the chosen one no longer exists | ||||
|             if default['proxy'] is None or not any(default['proxy'] in tup for tup in datastore.proxy_list): | ||||
|                 default['proxy'] = '' | ||||
| @@ -560,7 +562,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead | ||||
|             del form.proxy | ||||
|         else: | ||||
|             form.proxy.choices = [('', 'Default')] + datastore.proxy_list | ||||
|             form.proxy.choices = [('', 'Default')] | ||||
|             for p in datastore.proxy_list: | ||||
|                 form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) | ||||
|  | ||||
|         if request.method == 'POST' and form.validate(): | ||||
|             extra_update_obj = {} | ||||
| @@ -568,16 +572,6 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             if request.args.get('unpause_on_save'): | ||||
|                 extra_update_obj['paused'] = False | ||||
|  | ||||
|             # Re #110, if they submit the same as the default value, set it to None, so we continue to follow the default | ||||
|             # Assume we use the default value, unless something relevant is different, then use the form value | ||||
|             # values could be None, 0 etc. | ||||
|             # Set to None unless the next for: says that something is different | ||||
|             extra_update_obj['time_between_check'] = dict.fromkeys(form.time_between_check.data) | ||||
|             for k, v in form.time_between_check.data.items(): | ||||
|                 if v and v != datastore.data['settings']['requests']['time_between_check'][k]: | ||||
|                     extra_update_obj['time_between_check'] = form.time_between_check.data | ||||
|                     using_default_check_time = False | ||||
|                     break | ||||
|  | ||||
|             # Use the default if its the same as system wide | ||||
|             if form.fetch_backend.data == datastore.data['settings']['application']['fetch_backend']: | ||||
| @@ -594,7 +588,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                     extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) | ||||
|  | ||||
|             # Reset the previous_md5 so we process a new snapshot including stripping ignore text. | ||||
|             if form.css_filter.data.strip() != datastore.data['watching'][uuid]['css_filter']: | ||||
|             if form.include_filters.data != datastore.data['watching'][uuid].get('include_filters', []): | ||||
|                 if len(datastore.data['watching'][uuid].history): | ||||
|                     extra_update_obj['previous_md5'] = get_current_checksum_include_ignore_text(uuid=uuid) | ||||
|  | ||||
| @@ -632,20 +626,27 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # Only works reliably with Playwright | ||||
|             visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and default['fetch_backend'] == 'html_webdriver' | ||||
|  | ||||
|             # JQ is difficult to install on windows and must be manually added (outside requirements.txt) | ||||
|             jq_support = True | ||||
|             try: | ||||
|                 import jq | ||||
|             except ModuleNotFoundError: | ||||
|                 jq_support = False | ||||
|  | ||||
|             output = render_template("edit.html", | ||||
|                                      uuid=uuid, | ||||
|                                      watch=datastore.data['watching'][uuid], | ||||
|                                      form=form, | ||||
|                                      has_empty_checktime=using_default_check_time, | ||||
|                                      has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, | ||||
|                                      using_global_webdriver_wait=default['webdriver_delay'] is None, | ||||
|                                      current_base_url=datastore.data['settings']['application']['base_url'], | ||||
|                                      emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), | ||||
|                                      form=form, | ||||
|                                      has_default_notification_urls=True if len(datastore.data['settings']['application']['notification_urls']) else False, | ||||
|                                      has_empty_checktime=using_default_check_time, | ||||
|                                      jq_support=jq_support, | ||||
|                                      playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False), | ||||
|                                      settings_application=datastore.data['settings']['application'], | ||||
|                                      using_global_webdriver_wait=default['webdriver_delay'] is None, | ||||
|                                      uuid=uuid, | ||||
|                                      visualselector_data_is_ready=visualselector_data_is_ready, | ||||
|                                      visualselector_enabled=visualselector_enabled, | ||||
|                                      playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False) | ||||
|                                      watch=datastore.data['watching'][uuid], | ||||
|                                      ) | ||||
|  | ||||
|         return output | ||||
| @@ -657,15 +658,16 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         default = deepcopy(datastore.data['settings']) | ||||
|         if datastore.proxy_list is not None: | ||||
|             available_proxies = list(datastore.proxy_list.keys()) | ||||
|             # When enabled | ||||
|             system_proxy = datastore.data['settings']['requests']['proxy'] | ||||
|             # In the case it doesnt exist anymore | ||||
|             if not any([system_proxy in tup for tup in datastore.proxy_list]): | ||||
|             if not system_proxy in available_proxies: | ||||
|                 system_proxy = None | ||||
|  | ||||
|             default['requests']['proxy'] = system_proxy if system_proxy is not None else datastore.proxy_list[0][0] | ||||
|             default['requests']['proxy'] = system_proxy if system_proxy is not None else available_proxies[0] | ||||
|             # Used by the form handler to keep or remove the proxy settings | ||||
|             default['proxy_list'] = datastore.proxy_list | ||||
|             default['proxy_list'] = available_proxies[0] | ||||
|  | ||||
|  | ||||
|         # Don't use form.data on POST so that it doesnt overrid the checkbox status from the POST status | ||||
| @@ -680,7 +682,10 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # @todo - Couldn't get setattr() etc dynamic addition working, so remove it instead | ||||
|             del form.requests.form.proxy | ||||
|         else: | ||||
|             form.requests.form.proxy.choices = datastore.proxy_list | ||||
|             form.requests.form.proxy.choices = [] | ||||
|             for p in datastore.proxy_list: | ||||
|                 form.requests.form.proxy.choices.append(tuple((p, datastore.proxy_list[p]['label']))) | ||||
|  | ||||
|  | ||||
|         if request.method == 'POST': | ||||
|             # Password unset is a GET, but we can lock the session to a salted env password to always need the password | ||||
| @@ -716,13 +721,19 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             else: | ||||
|                 flash("An error occurred, please see below.", "error") | ||||
|  | ||||
|         import datetime | ||||
|         datetime = datetime.datetime.now(pytz.timezone(datastore.data['settings']['application'].get('timezone'))) | ||||
|  | ||||
|         output = render_template("settings.html", | ||||
|                                  form=form, | ||||
|                                  current_base_url = datastore.data['settings']['application']['base_url'], | ||||
|                                  hide_remove_pass=os.getenv("SALTED_PASS", False), | ||||
|                                  api_key=datastore.data['settings']['application'].get('api_access_token'), | ||||
|                                  current_base_url=datastore.data['settings']['application']['base_url'], | ||||
|                                  datetime=str(datetime), | ||||
|                                  emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), | ||||
|                                  settings_application=datastore.data['settings']['application']) | ||||
|                                  form=form, | ||||
|                                  hide_remove_pass=os.getenv("SALTED_PASS", False), | ||||
|                                  settings_application=datastore.data['settings']['application'], | ||||
|                                  timezone=datastore.data['settings']['application'].get('timezone') | ||||
|                                  ) | ||||
|  | ||||
|         return output | ||||
|  | ||||
| @@ -801,8 +812,10 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         newest_file = history[dates[-1]] | ||||
|  | ||||
|         # Read as binary and force decode as UTF-8 | ||||
|         # Windows may fail decode in python if we just use 'r' mode (chardet decode exception) | ||||
|         try: | ||||
|             with open(newest_file, 'r') as f: | ||||
|             with open(newest_file, 'r', encoding='utf-8', errors='ignore') as f: | ||||
|                 newest_version_file_contents = f.read() | ||||
|         except Exception as e: | ||||
|             newest_version_file_contents = "Unable to read {}.\n".format(newest_file) | ||||
| @@ -815,7 +828,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             previous_file = history[dates[-2]] | ||||
|  | ||||
|         try: | ||||
|             with open(previous_file, 'r') as f: | ||||
|             with open(previous_file, 'r', encoding='utf-8', errors='ignore') as f: | ||||
|                 previous_version_file_contents = f.read() | ||||
|         except Exception as e: | ||||
|             previous_version_file_contents = "Unable to read {}.\n".format(previous_file) | ||||
| @@ -892,7 +905,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         timestamp = list(watch.history.keys())[-1] | ||||
|         filename = watch.history[timestamp] | ||||
|         try: | ||||
|             with open(filename, 'r') as f: | ||||
|             with open(filename, 'r', encoding='utf-8', errors='ignore') as f: | ||||
|                 tmp = f.readlines() | ||||
|  | ||||
|                 # Get what needs to be highlighted | ||||
| @@ -967,9 +980,6 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         # create a ZipFile object | ||||
|         backupname = "changedetection-backup-{}.zip".format(int(time.time())) | ||||
|  | ||||
|         # We only care about UUIDS from the current index file | ||||
|         uuids = list(datastore.data['watching'].keys()) | ||||
|         backup_filepath = os.path.join(datastore_o.datastore_path, backupname) | ||||
|  | ||||
|         with zipfile.ZipFile(backup_filepath, "w", | ||||
| @@ -985,12 +995,12 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             # Add the flask app secret | ||||
|             zipObj.write(os.path.join(datastore_o.datastore_path, "secret.txt"), arcname="secret.txt") | ||||
|  | ||||
|             # Add any snapshot data we find, use the full path to access the file, but make the file 'relative' in the Zip. | ||||
|             for txt_file_path in Path(datastore_o.datastore_path).rglob('*.txt'): | ||||
|                 parent_p = txt_file_path.parent | ||||
|                 if parent_p.name in uuids: | ||||
|                     zipObj.write(txt_file_path, | ||||
|                                  arcname=str(txt_file_path).replace(datastore_o.datastore_path, ''), | ||||
|             # Add any data in the watch data directory. | ||||
|             for uuid, w in datastore.data['watching'].items(): | ||||
|                 for f in Path(w.watch_data_dir).glob('*'): | ||||
|                     zipObj.write(f, | ||||
|                                  # Use the full path to access the file, but make the file 'relative' in the Zip. | ||||
|                                  arcname=os.path.join(f.parts[-2], f.parts[-1]), | ||||
|                                  compress_type=zipfile.ZIP_DEFLATED, | ||||
|                                  compresslevel=8) | ||||
|  | ||||
| @@ -1292,8 +1302,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|     threading.Thread(target=notification_runner).start() | ||||
|  | ||||
|     # Check for new release version, but not when running in test/build | ||||
|     if not os.getenv("GITHUB_REF", False): | ||||
|     # Check for new release version, but not when running in test/build or pytest | ||||
|     if not os.getenv("GITHUB_REF", False) and not config.get('disable_checkver') == True: | ||||
|         threading.Thread(target=check_for_new_version).start() | ||||
|  | ||||
|     return app | ||||
| @@ -1368,6 +1378,8 @@ def ticker_thread_check_time_launch_checks(): | ||||
|     import random | ||||
|     from changedetectionio import update_worker | ||||
|  | ||||
|     proxy_last_called_time = {} | ||||
|  | ||||
|     recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20)) | ||||
|     print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds) | ||||
|  | ||||
| @@ -1428,10 +1440,34 @@ def ticker_thread_check_time_launch_checks(): | ||||
|                 if watch.jitter_seconds == 0: | ||||
|                     watch.jitter_seconds = random.uniform(-abs(jitter), jitter) | ||||
|  | ||||
|  | ||||
|             seconds_since_last_recheck = now - watch['last_checked'] | ||||
|  | ||||
|             if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds: | ||||
|  | ||||
|                 if not watch.is_schedule_permitted: | ||||
|                     # Skip if the schedule (day of week and time) isnt permitted | ||||
|                     continue | ||||
|  | ||||
|                 if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]: | ||||
|                     # Proxies can be set to have a limit on seconds between which they can be called | ||||
|                     watch_proxy = datastore.get_preferred_proxy_for_watch(uuid=uuid) | ||||
|                     if watch_proxy and watch_proxy in list(datastore.proxy_list.keys()): | ||||
|                         # Proxy may also have some threshold minimum | ||||
|                         proxy_list_reuse_time_minimum = int(datastore.proxy_list.get(watch_proxy, {}).get('reuse_time_minimum', 0)) | ||||
|                         if proxy_list_reuse_time_minimum: | ||||
|                             proxy_last_used_time = proxy_last_called_time.get(watch_proxy, 0) | ||||
|                             time_since_proxy_used = int(time.time() - proxy_last_used_time) | ||||
|                             if time_since_proxy_used < proxy_list_reuse_time_minimum: | ||||
|                                 # Not enough time difference reached, skip this watch | ||||
|                                 print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid, | ||||
|                                                                                                                          watch_proxy, | ||||
|                                                                                                                          time_since_proxy_used, | ||||
|                                                                                                                          proxy_list_reuse_time_minimum)) | ||||
|                                 continue | ||||
|                             else: | ||||
|                                 # Record the last used time | ||||
|                                 proxy_last_called_time[watch_proxy] = int(time.time()) | ||||
|  | ||||
|                     # Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it. | ||||
|                     priority = int(time.time()) | ||||
|                     print( | ||||
|   | ||||
| @@ -122,3 +122,37 @@ class CreateWatch(Resource): | ||||
|             return {'status': "OK"}, 200 | ||||
|  | ||||
|         return list, 200 | ||||
|  | ||||
| class SystemInfo(Resource): | ||||
|     def __init__(self, **kwargs): | ||||
|         # datastore is a black box dependency | ||||
|         self.datastore = kwargs['datastore'] | ||||
|         self.update_q = kwargs['update_q'] | ||||
|  | ||||
|     @auth.check_token | ||||
|     def get(self): | ||||
|         import time | ||||
|         overdue_watches = [] | ||||
|  | ||||
|         # Check all watches and report which have not been checked but should have been | ||||
|  | ||||
|         for uuid, watch in self.datastore.data.get('watching', {}).items(): | ||||
|             # see if now - last_checked is greater than the time that should have been | ||||
|             # this is not super accurate (maybe they just edited it) but better than nothing | ||||
|             t = watch.threshold_seconds() | ||||
|             if not t: | ||||
|                 # Use the system wide default | ||||
|                 t = self.datastore.threshold_seconds | ||||
|  | ||||
|             time_since_check = time.time() - watch.get('last_checked') | ||||
|  | ||||
|             # Allow 5 minutes of grace time before we decide it's overdue | ||||
|             if time_since_check - (5 * 60) > t: | ||||
|                 overdue_watches.append(uuid) | ||||
|  | ||||
|         return { | ||||
|                    'queue_size': self.update_q.qsize(), | ||||
|                    'overdue_watches': overdue_watches, | ||||
|                    'uptime': round(time.time() - self.datastore.start_time, 2), | ||||
|                    'watch_count': len(self.datastore.data.get('watching', {})) | ||||
|                }, 200 | ||||
|   | ||||
| @@ -102,6 +102,14 @@ def main(): | ||||
|                     has_password=datastore.data['settings']['application']['password'] != False | ||||
|                     ) | ||||
|  | ||||
|     # Monitored websites will not receive a Referer header when a user clicks on an outgoing link. | ||||
|     # @Note: Incompatible with password login (and maybe other features) for now, submit a PR! | ||||
|     @app.after_request | ||||
|     def hide_referrer(response): | ||||
|         if os.getenv("HIDE_REFERER", False): | ||||
|             response.headers["Referrer-Policy"] = "no-referrer" | ||||
|         return response | ||||
|  | ||||
|     # Proxy sub-directory support | ||||
|     # Set environment var USE_X_SETTINGS=1 on this script | ||||
|     # And then in your proxy_pass settings | ||||
|   | ||||
| @@ -164,16 +164,16 @@ class Fetcher(): | ||||
|                 } | ||||
|  | ||||
|  | ||||
|                 // inject the current one set in the css_filter, which may be a CSS rule | ||||
|                 // inject the current one set in the include_filters, which may be a CSS rule | ||||
|                 // used for displaying the current one in VisualSelector, where its not one we generated. | ||||
|                 if (css_filter.length) { | ||||
|                 if (include_filters.length) { | ||||
|                    q=false;                    | ||||
|                    try { | ||||
|                        // is it xpath? | ||||
|                        if (css_filter.startsWith('/') || css_filter.startsWith('xpath:')) { | ||||
|                          q=document.evaluate(css_filter.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; | ||||
|                        if (include_filters.startsWith('/') || include_filters.startsWith('xpath:')) { | ||||
|                          q=document.evaluate(include_filters.replace('xpath:',''), document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue; | ||||
|                        } else { | ||||
|                          q=document.querySelector(css_filter); | ||||
|                          q=document.querySelector(include_filters); | ||||
|                        }                        | ||||
|                    } catch (e) { | ||||
|                     // Maybe catch DOMException and alert?  | ||||
| @@ -186,7 +186,7 @@ class Fetcher(): | ||||
|                                     | ||||
|                    if (bbox && bbox['width'] >0 && bbox['height']>0) {                        | ||||
|                        size_pos.push({ | ||||
|                            xpath: css_filter, | ||||
|                            xpath: include_filters, | ||||
|                            width: bbox['width'],  | ||||
|                            height: bbox['height'], | ||||
|                            left: bbox['left'], | ||||
| @@ -220,7 +220,7 @@ class Fetcher(): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_css_filter=None): | ||||
|             current_include_filters=None): | ||||
|         # Should set self.error, self.status_code and self.content | ||||
|         pass | ||||
|  | ||||
| @@ -310,12 +310,13 @@ class base_html_playwright(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_css_filter=None): | ||||
|             current_include_filters=None): | ||||
|  | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         import playwright._impl._api_types | ||||
|         from playwright._impl._api_types import Error, TimeoutError | ||||
|         response = None | ||||
|  | ||||
|         with sync_playwright() as p: | ||||
|             browser_type = getattr(p, self.browser_type) | ||||
|  | ||||
| @@ -373,8 +374,11 @@ class base_html_playwright(Fetcher): | ||||
|                 print("response object was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             # Bug 2(?) Set the viewport size AFTER loading the page | ||||
|             page.set_viewport_size({"width": 1280, "height": 1024}) | ||||
|  | ||||
|             # Removed browser-set-size, seemed to be needed to make screenshots work reliably in older playwright versions | ||||
|             # Was causing exceptions like 'waiting for page but content is changing' etc | ||||
|             # https://www.browserstack.com/docs/automate/playwright/change-browser-window-size 1280x720 should be the default | ||||
|                          | ||||
|             extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|             time.sleep(extra_wait) | ||||
|  | ||||
| @@ -398,14 +402,21 @@ class base_html_playwright(Fetcher): | ||||
|  | ||||
|                     raise JSActionExceptions(status_code=response.status, screenshot=error_screenshot, message=str(e), url=url) | ||||
|  | ||||
|                 else: | ||||
|                     # JS eval was run, now we also wait some time if possible to let the page settle | ||||
|                     if self.render_extract_delay: | ||||
|                         page.wait_for_timeout(self.render_extract_delay * 1000) | ||||
|  | ||||
|             page.wait_for_timeout(500) | ||||
|  | ||||
|             self.content = page.content() | ||||
|             self.status_code = response.status | ||||
|             self.headers = response.all_headers() | ||||
|  | ||||
|             if current_css_filter is not None: | ||||
|                 page.evaluate("var css_filter={}".format(json.dumps(current_css_filter))) | ||||
|             if current_include_filters is not None: | ||||
|                 page.evaluate("var include_filters={}".format(json.dumps(current_include_filters))) | ||||
|             else: | ||||
|                 page.evaluate("var css_filter=''") | ||||
|                 page.evaluate("var include_filters=''") | ||||
|  | ||||
|             self.xpath_data = page.evaluate("async () => {" + self.xpath_element_js + "}") | ||||
|  | ||||
| @@ -486,7 +497,7 @@ class base_html_webdriver(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_css_filter=None): | ||||
|             current_include_filters=None): | ||||
|  | ||||
|         from selenium import webdriver | ||||
|         from selenium.webdriver.common.desired_capabilities import DesiredCapabilities | ||||
| @@ -514,8 +525,6 @@ class base_html_webdriver(Fetcher): | ||||
|             # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|             self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         self.screenshot = self.driver.get_screenshot_as_png() | ||||
|  | ||||
|         # @todo - how to check this? is it possible? | ||||
|         self.status_code = 200 | ||||
|         # @todo somehow we should try to get this working for WebDriver | ||||
| @@ -526,6 +535,8 @@ class base_html_webdriver(Fetcher): | ||||
|         self.content = self.driver.page_source | ||||
|         self.headers = {} | ||||
|  | ||||
|         self.screenshot = self.driver.get_screenshot_as_png() | ||||
|  | ||||
|     # Does the connection to the webdriver work? run a test connection. | ||||
|     def is_ready(self): | ||||
|         from selenium import webdriver | ||||
| @@ -562,7 +573,12 @@ class html_requests(Fetcher): | ||||
|             request_body, | ||||
|             request_method, | ||||
|             ignore_status_codes=False, | ||||
|             current_css_filter=None): | ||||
|             current_include_filters=None): | ||||
|  | ||||
|         # Make requests use a more modern looking user-agent | ||||
|         if not 'User-Agent' in request_headers: | ||||
|             request_headers['User-Agent'] = os.getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", | ||||
|                                                       'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36') | ||||
|  | ||||
|         proxies = {} | ||||
|  | ||||
|   | ||||
| @@ -10,6 +10,12 @@ from changedetectionio import content_fetcher, html_tools | ||||
| urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
|  | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
|  | ||||
| # Some common stuff here that can be moved to a base class | ||||
| # (set_proxy_from_list) | ||||
| class perform_site_check(): | ||||
| @@ -20,34 +26,6 @@ class perform_site_check(): | ||||
|         super().__init__(*args, **kwargs) | ||||
|         self.datastore = datastore | ||||
|  | ||||
|     # If there was a proxy list enabled, figure out what proxy_args/which proxy to use | ||||
|     # if watch.proxy use that | ||||
|     # fetcher.proxy_override = watch.proxy or main config proxy | ||||
|     # Allows override the proxy on a per-request basis | ||||
|     # ALWAYS use the first one is nothing selected | ||||
|  | ||||
|     def set_proxy_from_list(self, watch): | ||||
|         proxy_args = None | ||||
|         if self.datastore.proxy_list is None: | ||||
|             return None | ||||
|  | ||||
|         # If its a valid one | ||||
|         if any([watch['proxy'] in p for p in self.datastore.proxy_list]): | ||||
|             proxy_args = watch['proxy'] | ||||
|  | ||||
|         # not valid (including None), try the system one | ||||
|         else: | ||||
|             system_proxy = self.datastore.data['settings']['requests']['proxy'] | ||||
|             # Is not None and exists | ||||
|             if any([system_proxy in p for p in self.datastore.proxy_list]): | ||||
|                 proxy_args = system_proxy | ||||
|  | ||||
|         # Fallback - Did not resolve anything, use the first available | ||||
|         if proxy_args is None: | ||||
|             proxy_args = self.datastore.proxy_list[0][0] | ||||
|  | ||||
|         return proxy_args | ||||
|  | ||||
|     # Doesn't look like python supports forward slash auto enclosure in re.findall | ||||
|     # So convert it to inline flag "foobar(?i)" type configuration | ||||
|     def forward_slash_enclosed_regex_to_options(self, regex): | ||||
| @@ -68,6 +46,8 @@ class perform_site_check(): | ||||
|         stripped_text_from_html = "" | ||||
|  | ||||
|         watch = self.datastore.data['watching'].get(uuid) | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         # Protect against file:// access | ||||
|         if re.search(r'^file', watch['url'], re.IGNORECASE) and not os.getenv('ALLOW_FILE_URI', False): | ||||
| @@ -90,8 +70,10 @@ class perform_site_check(): | ||||
|         if 'Accept-Encoding' in request_headers and "br" in request_headers['Accept-Encoding']: | ||||
|             request_headers['Accept-Encoding'] = request_headers['Accept-Encoding'].replace(', br', '') | ||||
|  | ||||
|         timeout = self.datastore.data['settings']['requests']['timeout'] | ||||
|         url = watch.get('url') | ||||
|         timeout = self.datastore.data['settings']['requests'].get('timeout') | ||||
|  | ||||
|         url = watch.link | ||||
|  | ||||
|         request_body = self.datastore.data['watching'][uuid].get('body') | ||||
|         request_method = self.datastore.data['watching'][uuid].get('method') | ||||
|         ignore_status_codes = self.datastore.data['watching'][uuid].get('ignore_status_codes', False) | ||||
| @@ -110,9 +92,13 @@ class perform_site_check(): | ||||
|             # If the klass doesnt exist, just use a default | ||||
|             klass = getattr(content_fetcher, "html_requests") | ||||
|  | ||||
|         proxy_id = self.datastore.get_preferred_proxy_for_watch(uuid=uuid) | ||||
|         proxy_url = None | ||||
|         if proxy_id: | ||||
|             proxy_url = self.datastore.proxy_list.get(proxy_id).get('url') | ||||
|             print ("UUID {} Using proxy {}".format(uuid, proxy_url)) | ||||
|  | ||||
|         proxy_args = self.set_proxy_from_list(watch) | ||||
|         fetcher = klass(proxy_override=proxy_args) | ||||
|         fetcher = klass(proxy_override=proxy_url) | ||||
|  | ||||
|         # Configurable per-watch or global extra delay before extracting text (for webDriver types) | ||||
|         system_webdriver_delay = self.datastore.data['settings']['application'].get('webdriver_delay', None) | ||||
| @@ -124,7 +110,7 @@ class perform_site_check(): | ||||
|         if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): | ||||
|             fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] | ||||
|  | ||||
|         fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter']) | ||||
|         fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['include_filters']) | ||||
|         fetcher.quit() | ||||
|  | ||||
|         self.screenshot = fetcher.screenshot | ||||
| @@ -148,24 +134,26 @@ class perform_site_check(): | ||||
|             is_html = False | ||||
|             is_json = False | ||||
|  | ||||
|         css_filter_rule = watch['css_filter'] | ||||
|         include_filters_rule = watch['include_filters'] | ||||
|         subtractive_selectors = watch.get( | ||||
|             "subtractive_selectors", [] | ||||
|         ) + self.datastore.data["settings"]["application"].get( | ||||
|             "global_subtractive_selectors", [] | ||||
|         ) | ||||
|  | ||||
|         has_filter_rule = css_filter_rule and len(css_filter_rule.strip()) | ||||
|         has_filter_rule = include_filters_rule and len("".join(include_filters_rule).strip()) | ||||
|         has_subtractive_selectors = subtractive_selectors and len(subtractive_selectors[0].strip()) | ||||
|  | ||||
|         if is_json and not has_filter_rule: | ||||
|             css_filter_rule = "json:$" | ||||
|             include_filters_rule.append("json:$") | ||||
|             has_filter_rule = True | ||||
|  | ||||
|         if has_filter_rule: | ||||
|             if 'json:' in css_filter_rule: | ||||
|                 stripped_text_from_html = html_tools.extract_json_as_string(content=fetcher.content, jsonpath_filter=css_filter_rule) | ||||
|                 is_html = False | ||||
|             json_filter_prefixes = ['json:', 'jq:'] | ||||
|             for filter in include_filters_rule: | ||||
|                 if any(prefix in filter for prefix in json_filter_prefixes): | ||||
|                     stripped_text_from_html += html_tools.extract_json_as_string(content=fetcher.content, json_filter=filter) | ||||
|                     is_html = False | ||||
|  | ||||
|         if is_html or is_source: | ||||
|              | ||||
| @@ -180,18 +168,28 @@ class perform_site_check(): | ||||
|             else: | ||||
|                 # Then we assume HTML | ||||
|                 if has_filter_rule: | ||||
|                     # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." | ||||
|                     if css_filter_rule[0] == '/' or css_filter_rule.startswith('xpath:'): | ||||
|                         html_content = html_tools.xpath_filter(xpath_filter=css_filter_rule.replace('xpath:', ''), | ||||
|                                                                html_content=fetcher.content) | ||||
|                     else: | ||||
|                         # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|                         html_content = html_tools.css_filter(css_filter=css_filter_rule, html_content=fetcher.content) | ||||
|                     html_content = "" | ||||
|                     for filter_rule in include_filters_rule: | ||||
|                         # For HTML/XML we offer xpath as an option, just start a regular xPath "/.." | ||||
|                         if filter_rule[0] == '/' or filter_rule.startswith('xpath:'): | ||||
|                             html_content += html_tools.xpath_filter(xpath_filter=filter_rule.replace('xpath:', ''), | ||||
|                                                                     html_content=fetcher.content, | ||||
|                                                                     append_pretty_line_formatting=not is_source) | ||||
|                         else: | ||||
|                             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|                             html_content += html_tools.include_filters(include_filters=filter_rule, | ||||
|                                                                   html_content=fetcher.content, | ||||
|                                                                   append_pretty_line_formatting=not is_source) | ||||
|  | ||||
|                     if not html_content.strip(): | ||||
|                         raise FilterNotFoundInResponse(include_filters_rule) | ||||
|  | ||||
|                 if has_subtractive_selectors: | ||||
|                     html_content = html_tools.element_removal(subtractive_selectors, html_content) | ||||
|  | ||||
|                 if not is_source: | ||||
|                 if is_source: | ||||
|                     stripped_text_from_html = html_content | ||||
|                 else: | ||||
|                     # extract text | ||||
|                     stripped_text_from_html = \ | ||||
|                         html_tools.html_to_text( | ||||
| @@ -201,12 +199,6 @@ class perform_site_check(): | ||||
|                                 "render_anchor_tag_content", False) | ||||
|                         ) | ||||
|  | ||||
|                 elif is_source: | ||||
|                     stripped_text_from_html = html_content | ||||
|  | ||||
|             # Re #340 - return the content before the 'ignore text' was applied | ||||
|             text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
|  | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,5 @@ | ||||
| import re | ||||
|  | ||||
| import pytz | ||||
| from wtforms import ( | ||||
|     BooleanField, | ||||
|     Field, | ||||
| @@ -8,9 +8,11 @@ from wtforms import ( | ||||
|     PasswordField, | ||||
|     RadioField, | ||||
|     SelectField, | ||||
|     SelectMultipleField, | ||||
|     StringField, | ||||
|     SubmitField, | ||||
|     TextAreaField, | ||||
|     TimeField, | ||||
|     fields, | ||||
|     validators, | ||||
|     widgets, | ||||
| @@ -97,6 +99,44 @@ class TimeBetweenCheckForm(Form): | ||||
|     seconds = IntegerField('Seconds', validators=[validators.Optional(), validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|     # @todo add total seconds minimum validatior = minimum_seconds_recheck_time | ||||
|  | ||||
| class MultiCheckboxDayOfWeekField(SelectMultipleField): | ||||
|     widget = widgets.ListWidget(prefix_label=False) | ||||
|     option_widget = widgets.CheckboxInput() | ||||
|  | ||||
| class TimeScheduleCheckLimitForm(Form): | ||||
|     # @todo must be a better python way todo this c/i list | ||||
|     c=[] | ||||
|     i=0 | ||||
|     for d in ['Mon', 'Tue', 'Wed', 'Thu', 'Fri', 'Sat', 'Sun']: | ||||
|         c.append((i, d)) | ||||
|         i+=1 | ||||
|     day_of_week = MultiCheckboxDayOfWeekField('',coerce=int, choices=c) | ||||
|     from_time = TimeField('From', validators=[validators.Optional()]) | ||||
|     until_time = TimeField('Until', validators=[validators.Optional()]) | ||||
|  | ||||
|     def validate(self, **kwargs): | ||||
|         if not super().validate(): | ||||
|             return False | ||||
|  | ||||
|         result = True | ||||
|  | ||||
|         f = self.data.get('from_time') | ||||
|         u = self.data.get('until_time') | ||||
|         if f and u: | ||||
|             import time | ||||
|             f = time.strptime(str(f), '%H:%M:%S') | ||||
|             u = time.strptime(str(u), '%H:%M:%S') | ||||
|             if f >= u: | ||||
|                 #@todo doesnt present | ||||
|                 self.from_time.errors.append('From time must be LESS than the until/end time') | ||||
|                 result = False | ||||
|  | ||||
|         if len(self.data.get('day_of_week', [])) == 0: | ||||
|             self.day_of_week.errors.append('No day selected') | ||||
|             result = False | ||||
|  | ||||
|         return result | ||||
|  | ||||
| # Separated by  key:value | ||||
| class StringDictKeyValue(StringField): | ||||
|     widget = widgets.TextArea() | ||||
| @@ -303,6 +343,25 @@ class ValidateCSSJSONXPATHInput(object): | ||||
|  | ||||
|                 # Re #265 - maybe in the future fetch the page and offer a | ||||
|                 # warning/notice that its possible the rule doesnt yet match anything? | ||||
|                 if not self.allow_json: | ||||
|                     raise ValidationError("jq not permitted in this field!") | ||||
|  | ||||
|             if 'jq:' in line: | ||||
|                 try: | ||||
|                     import jq | ||||
|                 except ModuleNotFoundError: | ||||
|                     # `jq` requires full compilation in windows and so isn't generally available | ||||
|                     raise ValidationError("jq not support not found") | ||||
|  | ||||
|                 input = line.replace('jq:', '') | ||||
|  | ||||
|                 try: | ||||
|                     jq.compile(input) | ||||
|                 except (ValueError) as e: | ||||
|                     message = field.gettext('\'%s\' is not a valid jq expression. (%s)') | ||||
|                     raise ValidationError(message % (input, str(e))) | ||||
|                 except: | ||||
|                     raise ValidationError("A system-error occurred when validating your jq expression") | ||||
|  | ||||
|  | ||||
| class quickWatchForm(Form): | ||||
| @@ -328,27 +387,22 @@ class watchForm(commonSettingsForm): | ||||
|     url = fields.URLField('URL', validators=[validateURL()]) | ||||
|     tag = StringField('Group tag', [validators.Optional()], default='') | ||||
|  | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|  | ||||
|     css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='') | ||||
|  | ||||
|     subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|  | ||||
|     extract_text = StringListField('Extract text', [ValidateListRegex()]) | ||||
|  | ||||
|     title = StringField('Title', default='') | ||||
|  | ||||
|     ignore_text = StringListField('Ignore text', [ValidateListRegex()]) | ||||
|     headers = StringDictKeyValue('Request headers') | ||||
|     body = TextAreaField('Request body', [validators.Optional()]) | ||||
|     method = SelectField('Request method', choices=valid_method, default=default_method) | ||||
|     ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) | ||||
|     check_unique_lines = BooleanField('Only trigger when new lines appear', default=False) | ||||
|     trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) | ||||
|     extract_text = StringListField('Extract text', [ValidateListRegex()]) | ||||
|     headers = StringDictKeyValue('Request headers') | ||||
|     ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) | ||||
|     ignore_text = StringListField('Ignore text', [ValidateListRegex()]) | ||||
|     include_filters = StringListField('CSS/JSONPath/JQ/XPath Filters', [ValidateCSSJSONXPATHInput()], default='') | ||||
|     method = SelectField('Request method', choices=valid_method, default=default_method) | ||||
|     subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|     text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()]) | ||||
|  | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|     time_schedule_check_limit = FormField(TimeScheduleCheckLimitForm) | ||||
|     time_use_system_default = BooleanField('Use system/default check time', default=False, validators=[validators.Optional()]) | ||||
|     title = StringField('Title', default='') | ||||
|     trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) | ||||
|     webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()]) | ||||
|  | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|     proxy = RadioField('Proxy') | ||||
| @@ -370,10 +424,10 @@ class watchForm(commonSettingsForm): | ||||
|  | ||||
|         return result | ||||
|  | ||||
|  | ||||
| # datastore.data['settings']['requests'].. | ||||
| class globalSettingsRequestForm(Form): | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|     time_schedule_check_limit = FormField(TimeScheduleCheckLimitForm) | ||||
|     proxy = RadioField('Proxy') | ||||
|     jitter_seconds = IntegerField('Random jitter seconds ± check', | ||||
|                                   render_kw={"style": "width: 5em;"}, | ||||
| @@ -382,21 +436,21 @@ class globalSettingsRequestForm(Form): | ||||
| # datastore.data['settings']['application'].. | ||||
| class globalSettingsApplicationForm(commonSettingsForm): | ||||
|  | ||||
|     base_url = StringField('Base URL', validators=[validators.Optional()]) | ||||
|     global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|     global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) | ||||
|     ignore_whitespace = BooleanField('Ignore whitespace') | ||||
|     removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False) | ||||
|     render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) | ||||
|     fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()]) | ||||
|     password = SaltyPasswordField() | ||||
|  | ||||
|     base_url = StringField('Base URL', validators=[validators.Optional()]) | ||||
|     empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False) | ||||
|     fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', | ||||
|                                                                   render_kw={"style": "width: 5em;"}, | ||||
|                                                                   validators=[validators.NumberRange(min=0, | ||||
|                                                                                                      message="Should contain zero or more attempts")]) | ||||
|     global_ignore_text = StringListField('Ignore Text', [ValidateListRegex()]) | ||||
|     global_subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|     ignore_whitespace = BooleanField('Ignore whitespace') | ||||
|     password = SaltyPasswordField() | ||||
|     removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) | ||||
|     timezone = SelectField('Timezone', choices=pytz.all_timezones) | ||||
|  | ||||
|  | ||||
| class globalSettingsForm(Form): | ||||
|   | ||||
| @@ -1,32 +1,36 @@ | ||||
| import json | ||||
| from typing import List | ||||
|  | ||||
| from bs4 import BeautifulSoup | ||||
| from jsonpath_ng.ext import parse | ||||
| import re | ||||
| from inscriptis import get_text | ||||
| from inscriptis.model.config import ParserConfig | ||||
| from jsonpath_ng.ext import parse | ||||
| from typing import List | ||||
| import json | ||||
| import re | ||||
|  | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
| # HTML added to be sure each result matching a filter (.example) gets converted to a new line by Inscriptis | ||||
| TEXT_FILTER_LIST_LINE_SUFFIX = "<br/>" | ||||
|  | ||||
| class JSONNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
|          | ||||
| # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches | ||||
| def css_filter(css_filter, html_content): | ||||
| def include_filters(include_filters, html_content, append_pretty_line_formatting=False): | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     html_block = "" | ||||
|     r = soup.select(css_filter, separator="") | ||||
|     if len(html_content) > 0 and len(r) == 0: | ||||
|         raise FilterNotFoundInResponse(css_filter) | ||||
|     for item in r: | ||||
|         html_block += str(item) | ||||
|     r = soup.select(include_filters, separator="") | ||||
|  | ||||
|     return html_block + "\n" | ||||
|     for element in r: | ||||
|         # When there's more than 1 match, then add the suffix to separate each line | ||||
|         # And where the matched result doesn't include something that will cause Inscriptis to add a newline | ||||
|         # (This way each 'match' reliably has a new-line in the diff) | ||||
|         # Divs are converted to 4 whitespaces by inscriptis | ||||
|         if append_pretty_line_formatting and len(html_block) and not element.name in (['br', 'hr', 'div', 'p']): | ||||
|             html_block += TEXT_FILTER_LIST_LINE_SUFFIX | ||||
|  | ||||
|         html_block += str(element) | ||||
|  | ||||
|     return html_block | ||||
|  | ||||
| def subtractive_css_selector(css_selector, html_content): | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
| @@ -42,25 +46,29 @@ def element_removal(selectors: List[str], html_content): | ||||
|  | ||||
|  | ||||
| # Return str Utf-8 of matched rules | ||||
| def xpath_filter(xpath_filter, html_content): | ||||
| def xpath_filter(xpath_filter, html_content, append_pretty_line_formatting=False): | ||||
|     from lxml import etree, html | ||||
|  | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8')) | ||||
|     html_block = "" | ||||
|  | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}) | ||||
|     if len(html_content) > 0 and len(r) == 0: | ||||
|         raise FilterNotFoundInResponse(xpath_filter) | ||||
|  | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|  | ||||
|     for element in r: | ||||
|         # When there's more than 1 match, then add the suffix to separate each line | ||||
|         # And where the matched result doesn't include something that will cause Inscriptis to add a newline | ||||
|         # (This way each 'match' reliably has a new-line in the diff) | ||||
|         # Divs are converted to 4 whitespaces by inscriptis | ||||
|         if append_pretty_line_formatting and len(html_block) and (not hasattr( element, 'tag' ) or not element.tag in (['br', 'hr', 'div', 'p'])): | ||||
|             html_block += TEXT_FILTER_LIST_LINE_SUFFIX | ||||
|  | ||||
|         if type(element) == etree._ElementStringResult: | ||||
|             html_block += str(element) + "<br/>" | ||||
|             html_block += str(element) | ||||
|         elif type(element) == etree._ElementUnicodeResult: | ||||
|             html_block += str(element) + "<br/>" | ||||
|             html_block += str(element) | ||||
|         else: | ||||
|             html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>" | ||||
|             html_block += etree.tostring(element, pretty_print=True).decode('utf-8') | ||||
|  | ||||
|     return html_block | ||||
|  | ||||
| @@ -79,19 +87,35 @@ def extract_element(find='title', html_content=''): | ||||
|     return element_text | ||||
|  | ||||
| # | ||||
| def _parse_json(json_data, jsonpath_filter): | ||||
|     s=[] | ||||
|     jsonpath_expression = parse(jsonpath_filter.replace('json:', '')) | ||||
|     match = jsonpath_expression.find(json_data) | ||||
| def _parse_json(json_data, json_filter): | ||||
|     if 'json:' in json_filter: | ||||
|         jsonpath_expression = parse(json_filter.replace('json:', '')) | ||||
|         match = jsonpath_expression.find(json_data) | ||||
|         return _get_stripped_text_from_json_match(match) | ||||
|  | ||||
|     if 'jq:' in json_filter: | ||||
|  | ||||
|         try: | ||||
|             import jq | ||||
|         except ModuleNotFoundError: | ||||
|             # `jq` requires full compilation in windows and so isn't generally available | ||||
|             raise Exception("jq not support not found") | ||||
|  | ||||
|         jq_expression = jq.compile(json_filter.replace('jq:', '')) | ||||
|         match = jq_expression.input(json_data).all() | ||||
|  | ||||
|         return _get_stripped_text_from_json_match(match) | ||||
|  | ||||
| def _get_stripped_text_from_json_match(match): | ||||
|     s = [] | ||||
|     # More than one result, we will return it as a JSON list. | ||||
|     if len(match) > 1: | ||||
|         for i in match: | ||||
|             s.append(i.value) | ||||
|             s.append(i.value if hasattr(i, 'value') else i) | ||||
|  | ||||
|     # Single value, use just the value, as it could be later used in a token in notifications. | ||||
|     if len(match) == 1: | ||||
|         s = match[0].value | ||||
|         s = match[0].value if hasattr(match[0], 'value') else match[0] | ||||
|  | ||||
|     # Re #257 - Better handling where it does not exist, in the case the original 's' value was False.. | ||||
|     if not match: | ||||
| @@ -103,16 +127,16 @@ def _parse_json(json_data, jsonpath_filter): | ||||
|  | ||||
|     return stripped_text_from_html | ||||
|  | ||||
| def extract_json_as_string(content, jsonpath_filter): | ||||
| def extract_json_as_string(content, json_filter): | ||||
|  | ||||
|     stripped_text_from_html = False | ||||
|  | ||||
|     # Try to parse/filter out the JSON, if we get some parser error, then maybe it's embedded <script type=ldjson> | ||||
|     try: | ||||
|         stripped_text_from_html = _parse_json(json.loads(content), jsonpath_filter) | ||||
|         stripped_text_from_html = _parse_json(json.loads(content), json_filter) | ||||
|     except json.JSONDecodeError: | ||||
|  | ||||
|         # Foreach <script json></script> blob.. just return the first that matches jsonpath_filter | ||||
|         # Foreach <script json></script> blob.. just return the first that matches json_filter | ||||
|         s = [] | ||||
|         soup = BeautifulSoup(content, 'html.parser') | ||||
|         bs_result = soup.findAll('script') | ||||
| @@ -131,7 +155,7 @@ def extract_json_as_string(content, jsonpath_filter): | ||||
|                 # Just skip it | ||||
|                 continue | ||||
|             else: | ||||
|                 stripped_text_from_html = _parse_json(json_data, jsonpath_filter) | ||||
|                 stripped_text_from_html = _parse_json(json_data, json_filter) | ||||
|                 if stripped_text_from_html: | ||||
|                     break | ||||
|  | ||||
|   | ||||
| @@ -103,12 +103,12 @@ class import_distill_io_json(Importer): | ||||
|                     pass | ||||
|                 except IndexError: | ||||
|                     pass | ||||
|  | ||||
|                 extras['include_filters'] = [] | ||||
|                 try: | ||||
|                     extras['css_filter'] = d_config['selections'][0]['frames'][0]['includes'][0]['expr'] | ||||
|                     if d_config['selections'][0]['frames'][0]['includes'][0]['type'] == 'xpath': | ||||
|                         extras['css_filter'] = 'xpath:' + extras['css_filter'] | ||||
|  | ||||
|                         extras['include_filters'].append('xpath:' + d_config['selections'][0]['frames'][0]['includes'][0]['expr']) | ||||
|                     else: | ||||
|                         extras['include_filters'].append(d_config['selections'][0]['frames'][0]['includes'][0]['expr']) | ||||
|                 except KeyError: | ||||
|                     pass | ||||
|                 except IndexError: | ||||
|   | ||||
| @@ -13,37 +13,35 @@ class model(dict): | ||||
|             'watching': {}, | ||||
|             'settings': { | ||||
|                 'headers': { | ||||
|                     'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'), | ||||
|                     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', | ||||
|                     'Accept-Encoding': 'gzip, deflate',  # No support for brolti in python requests yet. | ||||
|                     'Accept-Language': 'en-GB,en-US;q=0.9,en;' | ||||
|                 }, | ||||
|                 'requests': { | ||||
|                     'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds | ||||
|                     'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None}, | ||||
|                     'time_schedule_check_limit': {'day_of_week': [0, 1, 2, 3, 4, 5, 6], 'time_from': '', 'time_until': ''}, | ||||
|                     'jitter_seconds': 0, | ||||
|                     'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")),  # Number of threads, lower is better for slow connections | ||||
|                     'proxy': None # Preferred proxy connection | ||||
|                 }, | ||||
|                 'application': { | ||||
|                     # Custom notification content | ||||
|                     'api_access_token_enabled': True, | ||||
|                     'password': False, | ||||
|                     'base_url' : None, | ||||
|                     'extract_title_as_title': False, | ||||
|                     'empty_pages_are_a_change': False, | ||||
|                     'extract_title_as_title': False, | ||||
|                     'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|                     'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, | ||||
|                     'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum | ||||
|                     'global_subtractive_selectors': [], | ||||
|                     'ignore_whitespace': True, | ||||
|                     'render_anchor_tag_content': False, | ||||
|                     'notification_urls': [], # Apprise URL list | ||||
|                     # Custom notification content | ||||
|                     'notification_title': default_notification_title, | ||||
|                     'notification_body': default_notification_body, | ||||
|                     'notification_format': default_notification_format, | ||||
|                     'notification_title': default_notification_title, | ||||
|                     'notification_urls': [], # Apprise URL list | ||||
|                     'password': False, | ||||
|                     'render_anchor_tag_content': False, | ||||
|                     'schema_version' : 0, | ||||
|                     'webdriver_delay': None  # Extra delay in seconds before extracting text | ||||
|                     'timezone': 'UTC', | ||||
|                     'webdriver_delay': None,  # Extra delay in seconds before extracting text | ||||
|                 } | ||||
|             } | ||||
|         } | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| import os | ||||
| import uuid as uuid_builder | ||||
| from distutils.util import strtobool | ||||
| import logging | ||||
| import os | ||||
| import time | ||||
| import uuid | ||||
|  | ||||
| minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60)) | ||||
| mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} | ||||
| @@ -14,42 +16,44 @@ class model(dict): | ||||
|     __newest_history_key = None | ||||
|     __history_n=0 | ||||
|     __base_config = { | ||||
|             'url': None, | ||||
|             'tag': None, | ||||
|             'last_checked': 0, | ||||
|             'paused': False, | ||||
|             'last_viewed': 0,  # history key value of the last viewed via the [diff] link | ||||
|             #'newest_history_key': 0, | ||||
|             'title': None, | ||||
|             'previous_md5': False, | ||||
|             'uuid': str(uuid_builder.uuid4()), | ||||
|             'headers': {},  # Extra headers to send | ||||
|             #'history': {},  # Dict of timestamp and output stripped filename (removed) | ||||
|             #'newest_history_key': 0, (removed, taken from history.txt index) | ||||
|             'body': None, | ||||
|             'method': 'GET', | ||||
|             #'history': {},  # Dict of timestamp and output stripped filename | ||||
|             'check_unique_lines': False, # On change-detected, compare against all history if its something new | ||||
|             'check_count': 0, | ||||
|             'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'extract_title_as_title': False, | ||||
|             'fetch_backend': None, | ||||
|             'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
|             'headers': {},  # Extra headers to send | ||||
|             'ignore_text': [],  # List of text to ignore when calculating the comparison checksum | ||||
|             # Custom notification content | ||||
|             'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise) | ||||
|             'notification_title': None, | ||||
|             'include_filters': [], | ||||
|             'last_checked': 0, | ||||
|             'last_error': False, | ||||
|             'last_viewed': 0,  # history key value of the last viewed via the [diff] link | ||||
|             'method': 'GET', | ||||
|              # Custom notification content | ||||
|             'notification_body': None, | ||||
|             'notification_format': default_notification_format_for_watch, | ||||
|             'notification_muted': False, | ||||
|             'css_filter': '', | ||||
|             'last_error': False, | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'subtractive_selectors': [], | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'text_should_not_be_present': [], # Text that should not present | ||||
|             'fetch_backend': None, | ||||
|             'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
|             'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'extract_title_as_title': False, | ||||
|             'check_unique_lines': False, # On change-detected, compare against all history if its something new | ||||
|             'notification_title': None, | ||||
|             'notification_urls': [],  # List of URLs to add to the notification Queue (Usually AppRise) | ||||
|             'paused': False, | ||||
|             'previous_md5': False, | ||||
|             'proxy': None, # Preferred proxy connection | ||||
|             'subtractive_selectors': [], | ||||
|             'tag': None, | ||||
|             'text_should_not_be_present': [], # Text that should not present | ||||
|             # Re #110, so then if this is set to None, we know to use the default value instead | ||||
|             # Requires setting to None on submit if it's the same as the default | ||||
|             # Should be all None by default, so we use the system default in this case. | ||||
|             'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, | ||||
|             'time_schedule_check_limit': {'day_of_week': [0, 1, 2, 3, 4, 5, 6], 'time_from': '', 'time_until': ''}, | ||||
|             'title': None, | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'url': None, | ||||
|             'uuid': str(uuid.uuid4()), | ||||
|             'webdriver_delay': None, | ||||
|             'webdriver_js_execute_code': None, # Run before change-detection | ||||
|         } | ||||
| @@ -60,7 +64,7 @@ class model(dict): | ||||
|         self.update(self.__base_config) | ||||
|         self.__datastore_path = kw['datastore_path'] | ||||
|  | ||||
|         self['uuid'] = str(uuid_builder.uuid4()) | ||||
|         self['uuid'] = str(uuid.uuid4()) | ||||
|  | ||||
|         del kw['datastore_path'] | ||||
|  | ||||
| @@ -82,10 +86,19 @@ class model(dict): | ||||
|         return False | ||||
|  | ||||
|     def ensure_data_dir_exists(self): | ||||
|         target_path = os.path.join(self.__datastore_path, self['uuid']) | ||||
|         if not os.path.isdir(target_path): | ||||
|             print ("> Creating data dir {}".format(target_path)) | ||||
|             os.mkdir(target_path) | ||||
|         if not os.path.isdir(self.watch_data_dir): | ||||
|             print ("> Creating data dir {}".format(self.watch_data_dir)) | ||||
|             os.mkdir(self.watch_data_dir) | ||||
|  | ||||
|     @property | ||||
|     def link(self): | ||||
|         url = self.get('url', '') | ||||
|         if '{%' in url or '{{' in url: | ||||
|             from jinja2 import Environment | ||||
|             # Jinja2 available in URLs along with https://pypi.org/project/jinja2-time/ | ||||
|             jinja2_env = Environment(extensions=['jinja2_time.TimeExtension']) | ||||
|             return str(jinja2_env.from_string(url).render()) | ||||
|         return url | ||||
|  | ||||
|     @property | ||||
|     def label(self): | ||||
| @@ -109,16 +122,40 @@ class model(dict): | ||||
|  | ||||
|     @property | ||||
|     def history(self): | ||||
|         """History index is just a text file as a list | ||||
|             {watch-uuid}/history.txt | ||||
|  | ||||
|             contains a list like | ||||
|  | ||||
|             {epoch-time},{filename}\n | ||||
|  | ||||
|             We read in this list as the history information | ||||
|  | ||||
|         """ | ||||
|         tmp_history = {} | ||||
|         import logging | ||||
|         import time | ||||
|  | ||||
|         # Read the history file as a dict | ||||
|         fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt") | ||||
|         fname = os.path.join(self.watch_data_dir, "history.txt") | ||||
|         if os.path.isfile(fname): | ||||
|             logging.debug("Reading history index " + str(time.time())) | ||||
|             with open(fname, "r") as f: | ||||
|                 tmp_history = dict(i.strip().split(',', 2) for i in f.readlines()) | ||||
|                 for i in f.readlines(): | ||||
|                     if ',' in i: | ||||
|                         k, v = i.strip().split(',', 2) | ||||
|  | ||||
|                         # The index history could contain a relative path, so we need to make the fullpath | ||||
|                         # so that python can read it | ||||
|                         if not '/' in v and not '\'' in v: | ||||
|                             v = os.path.join(self.watch_data_dir, v) | ||||
|                         else: | ||||
|                             # It's possible that they moved the datadir on older versions | ||||
|                             # So the snapshot exists but is in a different path | ||||
|                             snapshot_fname = v.split('/')[-1] | ||||
|                             proposed_new_path = os.path.join(self.watch_data_dir, snapshot_fname) | ||||
|                             if not os.path.exists(v) and os.path.exists(proposed_new_path): | ||||
|                                 v = proposed_new_path | ||||
|  | ||||
|                         tmp_history[k] = v | ||||
|  | ||||
|         if len(tmp_history): | ||||
|             self.__newest_history_key = list(tmp_history.keys())[-1] | ||||
| @@ -129,7 +166,7 @@ class model(dict): | ||||
|  | ||||
|     @property | ||||
|     def has_history(self): | ||||
|         fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt") | ||||
|         fname = os.path.join(self.watch_data_dir, "history.txt") | ||||
|         return os.path.isfile(fname) | ||||
|  | ||||
|     # Returns the newest key, but if theres only 1 record, then it's counted as not being new, so return 0. | ||||
| @@ -148,31 +185,33 @@ class model(dict): | ||||
|     # Save some text file to the appropriate path and bump the history | ||||
|     # result_obj from fetch_site_status.run() | ||||
|     def save_history_text(self, contents, timestamp): | ||||
|         import uuid | ||||
|         import logging | ||||
|  | ||||
|         output_path = "{}/{}".format(self.__datastore_path, self['uuid']) | ||||
|  | ||||
|         self.ensure_data_dir_exists() | ||||
|  | ||||
|         snapshot_fname = "{}/{}.stripped.txt".format(output_path, uuid.uuid4()) | ||||
|         logging.debug("Saving history text {}".format(snapshot_fname)) | ||||
|         # Small hack so that we sleep just enough to allow 1 second  between history snapshots | ||||
|         # this is because history.txt indexes/keys snapshots by epoch seconds and we dont want dupe keys | ||||
|         if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key): | ||||
|             time.sleep(timestamp - self.__newest_history_key) | ||||
|  | ||||
|         with open(snapshot_fname, 'wb') as f: | ||||
|         snapshot_fname = "{}.txt".format(str(uuid.uuid4())) | ||||
|  | ||||
|         # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading | ||||
|         # most sites are utf-8 and some are even broken utf-8 | ||||
|         with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f: | ||||
|             f.write(contents) | ||||
|             f.close() | ||||
|  | ||||
|         # Append to index | ||||
|         # @todo check last char was \n | ||||
|         index_fname = "{}/history.txt".format(output_path) | ||||
|         index_fname = os.path.join(self.watch_data_dir, "history.txt") | ||||
|         with open(index_fname, 'a') as f: | ||||
|             f.write("{},{}\n".format(timestamp, snapshot_fname)) | ||||
|             f.close() | ||||
|  | ||||
|         self.__newest_history_key = timestamp | ||||
|         self.__history_n+=1 | ||||
|         self.__history_n += 1 | ||||
|  | ||||
|         #@todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status | ||||
|         # @todo bump static cache of the last timestamp so we dont need to examine the file to set a proper ''viewed'' status | ||||
|         return snapshot_fname | ||||
|  | ||||
|     @property | ||||
| @@ -190,6 +229,11 @@ class model(dict): | ||||
|                 seconds += x * n | ||||
|         return seconds | ||||
|  | ||||
|     def is_schedule_permitted(self): | ||||
|         """According to the current day of week and time, is this watch queueable?""" | ||||
|  | ||||
|         return True | ||||
|  | ||||
|     # Iterate over all history texts and see if something new exists | ||||
|     def lines_contain_something_unique_compared_to_history(self, lines: list): | ||||
|         local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) | ||||
| @@ -205,14 +249,14 @@ class model(dict): | ||||
|         return not local_lines.issubset(existing_history) | ||||
|  | ||||
|     def get_screenshot(self): | ||||
|         fname = os.path.join(self.__datastore_path, self['uuid'], "last-screenshot.png") | ||||
|         fname = os.path.join(self.watch_data_dir, "last-screenshot.png") | ||||
|         if os.path.isfile(fname): | ||||
|             return fname | ||||
|  | ||||
|         return False | ||||
|  | ||||
|     def __get_file_ctime(self, filename): | ||||
|         fname = os.path.join(self.__datastore_path, self['uuid'], filename) | ||||
|         fname = os.path.join(self.watch_data_dir, filename) | ||||
|         if os.path.isfile(fname): | ||||
|             return int(os.path.getmtime(fname)) | ||||
|         return False | ||||
| @@ -237,9 +281,14 @@ class model(dict): | ||||
|     def snapshot_error_screenshot_ctime(self): | ||||
|         return self.__get_file_ctime('last-error-screenshot.png') | ||||
|  | ||||
|     @property | ||||
|     def watch_data_dir(self): | ||||
|         # The base dir of the watch data | ||||
|         return os.path.join(self.__datastore_path, self['uuid']) | ||||
|      | ||||
|     def get_error_text(self): | ||||
|         """Return the text saved from a previous request that resulted in a non-200 error""" | ||||
|         fname = os.path.join(self.__datastore_path, self['uuid'], "last-error.txt") | ||||
|         fname = os.path.join(self.watch_data_dir, "last-error.txt") | ||||
|         if os.path.isfile(fname): | ||||
|             with open(fname, 'r') as f: | ||||
|                 return f.read() | ||||
| @@ -247,7 +296,7 @@ class model(dict): | ||||
|  | ||||
|     def get_error_snapshot(self): | ||||
|         """Return path to the screenshot that resulted in a non-200 error""" | ||||
|         fname = os.path.join(self.__datastore_path, self['uuid'], "last-error-screenshot.png") | ||||
|         fname = os.path.join(self.watch_data_dir, "last-error-screenshot.png") | ||||
|         if os.path.isfile(fname): | ||||
|             return fname | ||||
|         return False | ||||
|   | ||||
| @@ -9,6 +9,8 @@ | ||||
| # exit when any command fails | ||||
| set -e | ||||
|  | ||||
| SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd ) | ||||
|  | ||||
| find tests/test_*py -type f|while read test_name | ||||
| do | ||||
|   echo "TEST RUNNING $test_name" | ||||
| @@ -22,7 +24,6 @@ echo "RUNNING WITH BASE_URL SET" | ||||
| export BASE_URL="https://really-unique-domain.io" | ||||
| pytest tests/test_notification.py | ||||
|  | ||||
|  | ||||
| # Now for the selenium and playwright/browserless fetchers | ||||
| # Note - this is not UI functional tests - just checking that each one can fetch the content | ||||
|  | ||||
| @@ -38,7 +39,9 @@ docker kill $$-test_selenium | ||||
|  | ||||
| echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..." | ||||
| # Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt | ||||
| pip3 install playwright~=1.24 | ||||
| PLAYWRIGHT_VERSION=$(grep -i -E "RUN pip install.+" "$SCRIPT_DIR/../Dockerfile" | grep --only-matching -i -E "playwright[=><~+]+[0-9\.]+") | ||||
| echo "using $PLAYWRIGHT_VERSION" | ||||
| pip3 install "$PLAYWRIGHT_VERSION" | ||||
| docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm  -p 3000:3000  --shm-size="2g"  browserless/chrome:1.53-chrome-stable | ||||
| # takes a while to spin up | ||||
| sleep 5 | ||||
| @@ -48,4 +51,48 @@ pytest tests/test_errorhandling.py | ||||
| pytest tests/visualselector/test_fetch_data.py | ||||
|  | ||||
| unset PLAYWRIGHT_DRIVER_URL | ||||
| docker kill $$-test_browserless | ||||
| docker kill $$-test_browserless | ||||
|  | ||||
| # Test proxy list handling, starting two squids on different ports | ||||
| # Each squid adds a different header to the response, which is the main thing we test for. | ||||
| docker run -d --name $$-squid-one --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3128:3128 ubuntu/squid:4.13-21.10_edge | ||||
| docker run -d --name $$-squid-two --rm -v `pwd`/tests/proxy_list/squid.conf:/etc/squid/conf.d/debian.conf -p 3129:3128 ubuntu/squid:4.13-21.10_edge | ||||
|  | ||||
|  | ||||
| # So, basic HTTP as env var test | ||||
| export HTTP_PROXY=http://localhost:3128 | ||||
| export HTTPS_PROXY=http://localhost:3128 | ||||
| pytest tests/proxy_list/test_proxy.py | ||||
| docker logs $$-squid-one 2>/dev/null|grep one.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a request to one.changedetection.io in the squid logs (while checking env vars HTTP_PROXY/HTTPS_PROXY)" | ||||
| fi | ||||
| unset HTTP_PROXY | ||||
| unset HTTPS_PROXY | ||||
|  | ||||
|  | ||||
| # 2nd test actually choose the preferred proxy from proxies.json | ||||
| cp tests/proxy_list/proxies.json-example ./test-datastore/proxies.json | ||||
| # Makes a watch use a preferred proxy | ||||
| pytest tests/proxy_list/test_multiple_proxy.py | ||||
|  | ||||
| # Should be a request in the default "first" squid | ||||
| docker logs $$-squid-one 2>/dev/null|grep chosen.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)" | ||||
| fi | ||||
|  | ||||
| # And one in the 'second' squid (user selects this as preferred) | ||||
| docker logs $$-squid-two 2>/dev/null|grep chosen.changedetection.io | ||||
| if [ $? -ne 0 ] | ||||
| then | ||||
|   echo "Did not see a request to chosen.changedetection.io in the squid logs (while checking preferred proxy)" | ||||
| fi | ||||
|  | ||||
| # @todo - test system override proxy selection and watch defaults, setup a 3rd squid? | ||||
| docker kill $$-squid-one | ||||
| docker kill $$-squid-two | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -50,7 +50,7 @@ $(document).ready(function() { | ||||
|         state_clicked=false; | ||||
|         ctx.clearRect(0, 0, c.width, c.height); | ||||
|         xctx.clearRect(0, 0, c.width, c.height); | ||||
|         $("#css_filter").val(''); | ||||
|         $("#include_filters").val(''); | ||||
|     }); | ||||
|  | ||||
|  | ||||
| @@ -68,7 +68,7 @@ $(document).ready(function() { | ||||
|                xctx = c.getContext("2d"); | ||||
|                 // redline highlight context | ||||
|                ctx = c.getContext("2d"); | ||||
|                current_default_xpath =$("#css_filter").val(); | ||||
|                current_default_xpath =$("#include_filters").val(); | ||||
|                fetch_data(); | ||||
|                $('#selector-canvas').off("mousemove mousedown"); | ||||
|                // screenshot_url defined in the edit.html template | ||||
| @@ -205,9 +205,9 @@ $(document).ready(function() { | ||||
|         var sel = selector_data['size_pos'][current_selected_i]; | ||||
|         if (sel[0] == '/') { | ||||
|         // @todo - not sure just checking / is right | ||||
|             $("#css_filter").val('xpath:'+sel.xpath); | ||||
|             $("#include_filters").val('xpath:'+sel.xpath); | ||||
|         } else { | ||||
|             $("#css_filter").val(sel.xpath); | ||||
|             $("#include_filters").val(sel.xpath); | ||||
|         } | ||||
|         xctx.fillStyle = 'rgba(205,205,205,0.95)'; | ||||
|         xctx.strokeStyle = 'rgba(225,0,0,0.9)'; | ||||
|   | ||||
| @@ -132,7 +132,7 @@ body:after, body:before { | ||||
|  | ||||
| .fetch-error { | ||||
|   padding-top: 1em; | ||||
|   font-size: 60%; | ||||
|   font-size: 80%; | ||||
|   max-width: 400px; | ||||
|   display: block; } | ||||
|  | ||||
| @@ -480,6 +480,22 @@ ul { | ||||
|   .time-check-widget tr input[type="number"] { | ||||
|     width: 5em; } | ||||
|  | ||||
| .pure-control-group table label { | ||||
|   color: #333; | ||||
|   font-weight: normal; } | ||||
|  | ||||
| .time-schedule-check-limit-widget tr { | ||||
|   display: inline-block; } | ||||
|  | ||||
| .time-schedule-check-limit-widget li { | ||||
|   text-decoration: none; } | ||||
|  | ||||
| .time-schedule-check-limit-widget ul { | ||||
|   padding-left: 0px; } | ||||
|   .time-schedule-check-limit-widget ul li { | ||||
|     display: inline-block; | ||||
|     width: 3em; } | ||||
|  | ||||
| #selector-wrapper { | ||||
|   height: 600px; | ||||
|   overflow-y: scroll; | ||||
|   | ||||
| @@ -156,7 +156,7 @@ body:after, body:before { | ||||
|  | ||||
| .fetch-error { | ||||
|   padding-top: 1em; | ||||
|   font-size: 60%; | ||||
|   font-size: 80%; | ||||
|   max-width: 400px; | ||||
|   display: block; | ||||
| } | ||||
| @@ -677,6 +677,29 @@ ul { | ||||
|         } | ||||
|     } | ||||
| } | ||||
| .pure-control-group table  label { | ||||
|   color: #333; | ||||
|   font-weight: normal; | ||||
| } | ||||
|  | ||||
| .time-schedule-check-limit-widget { | ||||
|   tr { | ||||
|     display: inline-block; | ||||
|   } | ||||
|  | ||||
|   li { | ||||
|     text-decoration: none; | ||||
|   } | ||||
|  | ||||
|   ul { | ||||
|     padding-left: 0px; | ||||
|     li { | ||||
|       display: inline-block; | ||||
|       width: 3em; | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| #selector-wrapper { | ||||
|  height: 600px; | ||||
| @@ -803,4 +826,4 @@ ul { | ||||
|   padding: 0.5rem; | ||||
|   border-radius: 5px; | ||||
|   color: #ff3300; | ||||
| } | ||||
| } | ||||
|   | ||||
| @@ -27,17 +27,18 @@ class ChangeDetectionStore: | ||||
|     # For when we edit, we should write to disk | ||||
|     needs_write_urgent = False | ||||
|  | ||||
|     __version_check = True | ||||
|  | ||||
|     def __init__(self, datastore_path="/datastore", include_default_watches=True, version_tag="0.0.0"): | ||||
|         # Should only be active for docker | ||||
|         # logging.basicConfig(filename='/dev/stdout', level=logging.INFO) | ||||
|         self.needs_write = False | ||||
|         self.__data = App.model() | ||||
|         self.datastore_path = datastore_path | ||||
|         self.json_store_path = "{}/url-watches.json".format(self.datastore_path) | ||||
|         self.needs_write = False | ||||
|         self.proxy_list = None | ||||
|         self.start_time = time.time() | ||||
|         self.stop_thread = False | ||||
|  | ||||
|         self.__data = App.model() | ||||
|  | ||||
|         # Base definition for all watchers | ||||
|         # deepcopy part of #569 - not sure why its needed exactly | ||||
|         self.generic_definition = deepcopy(Watch.model(datastore_path = datastore_path, default={})) | ||||
| @@ -81,10 +82,13 @@ class ChangeDetectionStore: | ||||
|         except (FileNotFoundError, json.decoder.JSONDecodeError): | ||||
|             if include_default_watches: | ||||
|                 print("Creating JSON store at", self.datastore_path) | ||||
|                 self.add_watch(url='https://news.ycombinator.com/', | ||||
|                                tag='Tech news', | ||||
|                                extras={'fetch_backend': 'html_requests'}) | ||||
|  | ||||
|                 self.add_watch(url='http://www.quotationspage.com/random.php', tag='test') | ||||
|                 self.add_watch(url='https://news.ycombinator.com/', tag='Tech news') | ||||
|                 self.add_watch(url='https://changedetection.io/CHANGELOG.txt', tag='changedetection.io') | ||||
|                 self.add_watch(url='https://changedetection.io/CHANGELOG.txt', | ||||
|                                tag='changedetection.io', | ||||
|                                extras={'fetch_backend': 'html_requests'}) | ||||
|  | ||||
|         self.__data['version_tag'] = version_tag | ||||
|  | ||||
| @@ -113,9 +117,7 @@ class ChangeDetectionStore: | ||||
|             self.__data['settings']['application']['api_access_token'] = secret | ||||
|  | ||||
|         # Proxy list support - available as a selection in settings when text file is imported | ||||
|         # CSV list | ||||
|         # "name, address", or just "name" | ||||
|         proxy_list_file = "{}/proxies.txt".format(self.datastore_path) | ||||
|         proxy_list_file = "{}/proxies.json".format(self.datastore_path) | ||||
|         if path.isfile(proxy_list_file): | ||||
|             self.import_proxy_list(proxy_list_file) | ||||
|  | ||||
| @@ -270,7 +272,7 @@ class ChangeDetectionStore: | ||||
|             extras = {} | ||||
|         # should always be str | ||||
|         if tag is None or not tag: | ||||
|             tag='' | ||||
|             tag = '' | ||||
|  | ||||
|         # Incase these are copied across, assume it's a reference and deepcopy() | ||||
|         apply_extras = deepcopy(extras) | ||||
| @@ -285,17 +287,31 @@ class ChangeDetectionStore: | ||||
|                 res = r.json() | ||||
|  | ||||
|                 # List of permissible attributes we accept from the wild internet | ||||
|                 for k in ['url', 'tag', | ||||
|                           'paused', 'title', | ||||
|                           'previous_md5', 'headers', | ||||
|                           'body', 'method', | ||||
|                           'ignore_text', 'css_filter', | ||||
|                           'subtractive_selectors', 'trigger_text', | ||||
|                           'extract_title_as_title', 'extract_text', | ||||
|                           'text_should_not_be_present', | ||||
|                           'webdriver_js_execute_code']: | ||||
|                 for k in [ | ||||
|                     'body', | ||||
|                     'css_filter', | ||||
|                     'extract_text', | ||||
|                     'extract_title_as_title', | ||||
|                     'headers', | ||||
|                     'ignore_text', | ||||
|                     'include_filters', | ||||
|                     'method', | ||||
|                     'paused', | ||||
|                     'previous_md5', | ||||
|                     'subtractive_selectors', | ||||
|                     'tag', | ||||
|                     'text_should_not_be_present', | ||||
|                     'title', | ||||
|                     'trigger_text', | ||||
|                     'webdriver_js_execute_code', | ||||
|                     'url', | ||||
|                 ]: | ||||
|                     if res.get(k): | ||||
|                         apply_extras[k] = res[k] | ||||
|                         if k != 'css_filter': | ||||
|                             apply_extras[k] = res[k] | ||||
|                         else: | ||||
|                             # We renamed the field and made it a list | ||||
|                             apply_extras['include_filters'] = [res['css_filter']] | ||||
|  | ||||
|             except Exception as e: | ||||
|                 logging.error("Error fetching metadata for shared watch link", url, str(e)) | ||||
| @@ -318,12 +334,13 @@ class ChangeDetectionStore: | ||||
|                     del apply_extras[k] | ||||
|  | ||||
|             new_watch.update(apply_extras) | ||||
|             self.__data['watching'][new_uuid]=new_watch | ||||
|             self.__data['watching'][new_uuid] = new_watch | ||||
|  | ||||
|         self.__data['watching'][new_uuid].ensure_data_dir_exists() | ||||
|  | ||||
|         if write_to_disk_now: | ||||
|             self.sync_to_json() | ||||
|  | ||||
|         return new_uuid | ||||
|  | ||||
|     def visualselector_data_is_ready(self, watch_uuid): | ||||
| @@ -437,20 +454,42 @@ class ChangeDetectionStore: | ||||
|                     unlink(item) | ||||
|  | ||||
|     def import_proxy_list(self, filename): | ||||
|         import csv | ||||
|         with open(filename, newline='') as f: | ||||
|             reader = csv.reader(f, skipinitialspace=True) | ||||
|             # @todo This loop can could be improved | ||||
|             l = [] | ||||
|             for row in reader: | ||||
|                 if len(row): | ||||
|                     if len(row)>=2: | ||||
|                         l.append(tuple(row[:2])) | ||||
|                     else: | ||||
|                         l.append(tuple([row[0], row[0]])) | ||||
|             self.proxy_list = l if len(l) else None | ||||
|         with open(filename) as f: | ||||
|             self.proxy_list = json.load(f) | ||||
|             print ("Registered proxy list", list(self.proxy_list.keys())) | ||||
|  | ||||
|  | ||||
|     def get_preferred_proxy_for_watch(self, uuid): | ||||
|         """ | ||||
|         Returns the preferred proxy by ID key | ||||
|         :param uuid: UUID | ||||
|         :return: proxy "key" id | ||||
|         """ | ||||
|  | ||||
|         proxy_id = None | ||||
|         if self.proxy_list is None: | ||||
|             return None | ||||
|  | ||||
|         # If its a valid one | ||||
|         watch = self.data['watching'].get(uuid) | ||||
|  | ||||
|         if watch.get('proxy') and watch.get('proxy') in list(self.proxy_list.keys()): | ||||
|             return watch.get('proxy') | ||||
|  | ||||
|         # not valid (including None), try the system one | ||||
|         else: | ||||
|             system_proxy_id = self.data['settings']['requests'].get('proxy') | ||||
|             # Is not None and exists | ||||
|             if self.proxy_list.get(system_proxy_id): | ||||
|                 return system_proxy_id | ||||
|  | ||||
|         # Fallback - Did not resolve anything, use the first available | ||||
|         if system_proxy_id is None: | ||||
|             first_default = list(self.proxy_list)[0] | ||||
|             return first_default | ||||
|  | ||||
|         return None | ||||
|  | ||||
|     # Run all updates | ||||
|     # IMPORTANT - Each update could be run even when they have a new install and the schema is correct | ||||
|     #             So therefor - each `update_n` should be very careful about checking if it needs to actually run | ||||
| @@ -557,3 +596,22 @@ class ChangeDetectionStore: | ||||
|                 continue | ||||
|         return | ||||
|  | ||||
|  | ||||
|     # We incorrectly used common header overrides that should only apply to Requests | ||||
|     # These are now handled in content_fetcher::html_requests and shouldnt be passed to Playwright/Selenium | ||||
|     def update_7(self): | ||||
|         # These were hard-coded in early versions | ||||
|         for v in ['User-Agent', 'Accept', 'Accept-Encoding', 'Accept-Language']: | ||||
|             if self.data['settings']['headers'].get(v): | ||||
|                 del self.data['settings']['headers'][v] | ||||
|  | ||||
|     # Convert filters to a list of filters css_filter -> include_filters | ||||
|     def update_8(self): | ||||
|         for uuid, watch in self.data['watching'].items(): | ||||
|             try: | ||||
|                 existing_filter = watch.get('css_filter', '') | ||||
|                 if existing_filter: | ||||
|                     watch['include_filters'] = [existing_filter] | ||||
|             except: | ||||
|                 continue | ||||
|         return | ||||
| @@ -40,7 +40,8 @@ | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }} | ||||
|                         <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span> | ||||
|                         <span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br/> | ||||
|                         <span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br/> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.title, class="m-d") }} | ||||
| @@ -50,14 +51,15 @@ | ||||
|                         <span class="pure-form-message-inline">Organisational tag/group name used in the main listing page</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.time_use_system_default) }} | ||||
|                         <div style="opacity: 0.5"> | ||||
|                         {{ render_field(form.time_between_check, class="time-check-widget") }} | ||||
|                         {{ render_field(form.time_schedule_check_limit, class="time-schedule-check-limit-widget") }} | ||||
| @todo - add 'use default' checkbox | ||||
|                         </div> | ||||
|                         {% if has_empty_checktime %} | ||||
|                         <span class="pure-form-message-inline">Currently using the <a | ||||
|                                 href="{{ url_for('settings_page', uuid=uuid) }}">default global settings</a>, change to another value if you want to be specific.</span> | ||||
|                         {% else %} | ||||
|                         <span class="pure-form-message-inline">Set to blank to use the <a | ||||
|                                 href="{{ url_for('settings_page', uuid=uuid) }}">default global settings</a>.</span> | ||||
|                         {% endif %} | ||||
|  | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.extract_title_as_title) }} | ||||
| @@ -77,6 +79,7 @@ | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                             <p>Use the <strong>Basic</strong> method (default) where your watched site doesn't need Javascript to render.</p> | ||||
|                             <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p> | ||||
|                             Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a> | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 {% if form.proxy %} | ||||
| @@ -172,19 +175,29 @@ User-Agent: wonderbra 1.0") }} | ||||
|                         </div> | ||||
|                     </fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% set field = render_field(form.css_filter, | ||||
|                             placeholder=".class-name or #some-id, or other CSS selector rule.", | ||||
|                         {% set field = render_field(form.include_filters, | ||||
|                             rows=5, | ||||
|                             placeholder="#example | ||||
| xpath://body/div/span[contains(@class, 'example-class')]", | ||||
|                             class="m-d") | ||||
|                         %} | ||||
|                         {{ field }} | ||||
|                         {% if '/text()' in  field %} | ||||
|                           <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/> | ||||
|                         {% endif %} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                         <span class="pure-form-message-inline">One rule per line, <i>any</i> rules that matches will be used.<br/> | ||||
|                     <ul> | ||||
|                         <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required,  <a | ||||
|                                 href="https://jsonpath.com/" target="new">test your JSONPath here</a></li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using either <a href="https://pypi.org/project/jsonpath-ng/" target="new">JSONPath</a> or <a href="https://stedolan.github.io/jq/" target="new">jq</a> (if installed). | ||||
|                             <ul> | ||||
|                                 <li>JSONPath: Prefix with <code>json:</code>, use <code>json:$</code> to force re-formatting if required,  <a href="https://jsonpath.com/" target="new">test your JSONPath here</a>.</li> | ||||
|                                 {% if jq_support %} | ||||
|                                 <li>jq: Prefix with <code>jq:</code> and <a href="https://jqplay.org/" target="new">test your jq here</a>. Using <a href="https://stedolan.github.io/jq/" target="new">jq</a> allows for complex filtering and processing of JSON data with built-in functions, regex, filtering, and more. See examples and documentation <a href="https://stedolan.github.io/jq/manual/" target="new">here</a>.</li> | ||||
|                                 {% else %} | ||||
|                                 <li>jq support not installed</li> | ||||
|                                 {% endif %} | ||||
|                             </ul> | ||||
|                         </li> | ||||
|                         <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, | ||||
|                             <ul> | ||||
|                                 <li>Example:  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a | ||||
| @@ -193,7 +206,7 @@ User-Agent: wonderbra 1.0") }} | ||||
|                             </ul> | ||||
|                             </li> | ||||
|                     </ul> | ||||
|                     Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a | ||||
|                     Please be sure that you thoroughly understand how to write CSS, JSONPath, XPath{% if jq_support %}, or jq selector{%endif%} rules before filing an issue on GitHub! <a | ||||
|                                 href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/> | ||||
|                 </span> | ||||
|                     </div> | ||||
|   | ||||
| @@ -21,6 +21,7 @@ | ||||
|             <li class="tab"><a href="#fetching">Fetching</a></li> | ||||
|             <li class="tab"><a href="#filters">Global Filters</a></li> | ||||
|             <li class="tab"><a href="#api">API</a></li> | ||||
|             <li class="tab"><a href="#date-time">Date & Time</a></li> | ||||
|         </ul> | ||||
|     </div> | ||||
|     <div class="box-wrap inner"> | ||||
| @@ -30,6 +31,7 @@ | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.requests.form.time_between_check, class="time-check-widget") }} | ||||
|                         {{ render_field(form.requests.form.time_schedule_check_limit, class="time-schedule-check-limit-widget") }} | ||||
|                         <span class="pure-form-message-inline">Default time for all watches, when the watch does not have a specific time setting.</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
| @@ -91,7 +93,6 @@ | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="fetching"> | ||||
|                 <div class="pure-control-group inline-radio"> | ||||
|                     {{ render_field(form.application.form.fetch_backend, class="fetch-backend") }} | ||||
| @@ -99,6 +100,8 @@ | ||||
|                         <p>Use the <strong>Basic</strong> method (default) where your watched sites don't need Javascript to render.</p> | ||||
|                         <p>The <strong>Chrome/Javascript</strong> method requires a network connection to a running WebDriver+Chrome server, set by the ENV var 'WEBDRIVER_URL'. </p> | ||||
|                     </span> | ||||
|                     <br/> | ||||
|                     Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using BrightData Proxies, find out more here.</a> | ||||
|                 </div> | ||||
|                 <fieldset class="pure-group" id="webdriver-override-options"> | ||||
|                     <div class="pure-form-message-inline"> | ||||
| @@ -168,6 +171,19 @@ nav | ||||
|                     </div> | ||||
|                 </div> | ||||
|             </div> | ||||
|             <div class="tab-pane-inner" id="date-time"> | ||||
|                 <fieldset> | ||||
|                     <div class="field-group"> | ||||
|                         {{ render_field(form.application.form.timezone) }} | ||||
|                     </div> | ||||
|                     <div class="field-group"> | ||||
|                         <p> | ||||
|                         <label>Local time</label> {{ datetime }}<br/> | ||||
|                         <label>Configured timezone:</label> {{ timezone }}<br/> | ||||
|                         </p> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
|  | ||||
|             <div id="actions"> | ||||
|                 <div class="pure-control-group"> | ||||
|   | ||||
| @@ -87,7 +87,7 @@ | ||||
|                     <a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a> | ||||
|                 </td> | ||||
|                 <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} | ||||
|                     <a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a> | ||||
|                     <a class="external" target="_blank" rel="noopener" href="{{ watch.link.replace('source:','') }}"></a> | ||||
|                     <a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a> | ||||
|  | ||||
|                     {%if watch.fetch_backend == "html_webdriver" %}<img style="height: 1em; display:inline-block;" src="{{url_for('static_content', group='images', filename='Google-Chrome-icon.png')}}" />{% endif %} | ||||
|   | ||||
| @@ -41,7 +41,7 @@ def app(request): | ||||
|  | ||||
|     cleanup(datastore_path) | ||||
|  | ||||
|     app_config = {'datastore_path': datastore_path} | ||||
|     app_config = {'datastore_path': datastore_path, 'disable_checkver' : True} | ||||
|     cleanup(app_config['datastore_path']) | ||||
|     datastore = store.ChangeDetectionStore(datastore_path=app_config['datastore_path'], include_default_watches=False) | ||||
|     app = changedetection_app(app_config, datastore) | ||||
|   | ||||
							
								
								
									
										2
									
								
								changedetectionio/tests/proxy_list/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										2
									
								
								changedetectionio/tests/proxy_list/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,2 @@ | ||||
| """Tests for the app.""" | ||||
|  | ||||
							
								
								
									
										14
									
								
								changedetectionio/tests/proxy_list/conftest.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										14
									
								
								changedetectionio/tests/proxy_list/conftest.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,14 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| from .. import conftest | ||||
|  | ||||
| #def pytest_addoption(parser): | ||||
| #    parser.addoption("--url_suffix", action="store", default="identifier for request") | ||||
|  | ||||
|  | ||||
| #def pytest_generate_tests(metafunc): | ||||
| #    # This is called for every test. Only get/set command line arguments | ||||
| #    # if the argument is specified in the list of test "fixturenames". | ||||
| #    option_value = metafunc.config.option.url_suffix | ||||
| #    if 'url_suffix' in metafunc.fixturenames and option_value is not None: | ||||
| #        metafunc.parametrize("url_suffix", [option_value]) | ||||
							
								
								
									
										10
									
								
								changedetectionio/tests/proxy_list/proxies.json-example
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										10
									
								
								changedetectionio/tests/proxy_list/proxies.json-example
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,10 @@ | ||||
| { | ||||
|   "proxy-one": { | ||||
|     "label": "One", | ||||
|     "url": "http://127.0.0.1:3128" | ||||
|   }, | ||||
|   "proxy-two": { | ||||
|     "label": "two", | ||||
|     "url": "http://127.0.0.1:3129" | ||||
|   } | ||||
| } | ||||
							
								
								
									
										41
									
								
								changedetectionio/tests/proxy_list/squid.conf
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										41
									
								
								changedetectionio/tests/proxy_list/squid.conf
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,41 @@ | ||||
| acl localnet src 0.0.0.1-0.255.255.255  # RFC 1122 "this" network (LAN) | ||||
| acl localnet src 10.0.0.0/8             # RFC 1918 local private network (LAN) | ||||
| acl localnet src 100.64.0.0/10          # RFC 6598 shared address space (CGN) | ||||
| acl localnet src 169.254.0.0/16         # RFC 3927 link-local (directly plugged) machines | ||||
| acl localnet src 172.16.0.0/12          # RFC 1918 local private network (LAN) | ||||
| acl localnet src 192.168.0.0/16         # RFC 1918 local private network (LAN) | ||||
| acl localnet src fc00::/7               # RFC 4193 local private network range | ||||
| acl localnet src fe80::/10              # RFC 4291 link-local (directly plugged) machines | ||||
| acl localnet src 159.65.224.174 | ||||
| acl SSL_ports port 443 | ||||
| acl Safe_ports port 80          # http | ||||
| acl Safe_ports port 21          # ftp | ||||
| acl Safe_ports port 443         # https | ||||
| acl Safe_ports port 70          # gopher | ||||
| acl Safe_ports port 210         # wais | ||||
| acl Safe_ports port 1025-65535  # unregistered ports | ||||
| acl Safe_ports port 280         # http-mgmt | ||||
| acl Safe_ports port 488         # gss-http | ||||
| acl Safe_ports port 591         # filemaker | ||||
| acl Safe_ports port 777         # multiling http | ||||
| acl CONNECT method CONNECT | ||||
|  | ||||
| http_access deny !Safe_ports | ||||
| http_access deny CONNECT !SSL_ports | ||||
| http_access allow localhost manager | ||||
| http_access deny manager | ||||
| http_access allow localhost | ||||
| http_access allow localnet | ||||
| http_access deny all | ||||
| http_port 3128 | ||||
| coredump_dir /var/spool/squid | ||||
| refresh_pattern ^ftp:           1440    20%     10080 | ||||
| refresh_pattern ^gopher:        1440    0%      1440 | ||||
| refresh_pattern -i (/cgi-bin/|\?) 0     0%      0 | ||||
| refresh_pattern \/(Packages|Sources)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims | ||||
| refresh_pattern \/Release(|\.gpg)$ 0 0% 0 refresh-ims | ||||
| refresh_pattern \/InRelease$ 0 0% 0 refresh-ims | ||||
| refresh_pattern \/(Translation-.*)(|\.bz2|\.gz|\.xz)$ 0 0% 0 refresh-ims | ||||
| refresh_pattern .               0       20%     4320 | ||||
| logfile_rotate 0 | ||||
|  | ||||
							
								
								
									
										38
									
								
								changedetectionio/tests/proxy_list/test_multiple_proxy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										38
									
								
								changedetectionio/tests/proxy_list/test_multiple_proxy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,38 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup | ||||
|  | ||||
| def test_preferred_proxy(client, live_server): | ||||
|     time.sleep(1) | ||||
|     live_server_setup(live_server) | ||||
|     time.sleep(1) | ||||
|     url = "http://chosen.changedetection.io" | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         # Because a URL wont show in squid/proxy logs due it being SSLed | ||||
|         # Use plain HTTP or a specific domain-name here | ||||
|         data={"urls": url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     time.sleep(2) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={ | ||||
|                 "include_filters": "", | ||||
|                 "fetch_backend": "html_requests", | ||||
|                 "headers": "", | ||||
|                 "proxy": "proxy-two", | ||||
|                 "tag": "", | ||||
|                 "url": url, | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     time.sleep(2) | ||||
|     # Now the request should appear in the second-squid logs | ||||
							
								
								
									
										19
									
								
								changedetectionio/tests/proxy_list/test_proxy.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										19
									
								
								changedetectionio/tests/proxy_list/test_proxy.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,19 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client | ||||
|  | ||||
| # just make a request, we will grep in the docker logs to see it actually got called | ||||
| def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         # Because a URL wont show in squid/proxy logs due it being SSLed | ||||
|         # Use plain HTTP or a specific domain-name here | ||||
|         data={"urls": "http://one.changedetection.io"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(3) | ||||
| @@ -147,6 +147,16 @@ def test_api_simple(client, live_server): | ||||
|     # @todo how to handle None/default global values? | ||||
|     assert watch['history_n'] == 2, "Found replacement history section, which is in its own API" | ||||
|  | ||||
|     # basic systeminfo check | ||||
|     res = client.get( | ||||
|         url_for("systeminfo"), | ||||
|         headers={'x-api-key': api_key}, | ||||
|     ) | ||||
|     info = json.loads(res.data) | ||||
|     assert info.get('watch_count') == 1 | ||||
|     assert info.get('uptime') > 0.5 | ||||
|  | ||||
|  | ||||
|     # Finally delete the watch | ||||
|     res = client.delete( | ||||
|         url_for("watch", uuid=watch_uuid), | ||||
|   | ||||
| @@ -23,7 +23,7 @@ def test_basic_auth(client, live_server): | ||||
|     # Check form validation | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": "", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|   | ||||
| @@ -3,7 +3,7 @@ | ||||
| import time | ||||
| from flask import url_for | ||||
| from urllib.request import urlopen | ||||
| from .util import set_original_response, set_modified_response, live_server_setup | ||||
| from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks | ||||
|  | ||||
| sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
| @@ -36,7 +36,7 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|         # Give the thread time to pick it up | ||||
|         time.sleep(sleep_time_for_fetch_thread) | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         res = client.get(url_for("index")) | ||||
| @@ -69,7 +69,7 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'1 watches are queued for rechecking.' in res.data | ||||
|  | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Now something should be ready, indicated by having a 'unviewed' class | ||||
|     res = client.get(url_for("index")) | ||||
| @@ -98,14 +98,14 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     assert b'which has this one new line' in res.data | ||||
|     assert b'Which is across multiple lines' not in res.data | ||||
|  | ||||
|     time.sleep(2) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Do this a few times.. ensures we dont accidently set the status | ||||
|     for n in range(2): | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|         # Give the thread time to pick it up | ||||
|         time.sleep(sleep_time_for_fetch_thread) | ||||
|         wait_for_all_checks(client) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         res = client.get(url_for("index")) | ||||
| @@ -125,7 +125,7 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     ) | ||||
|  | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|   | ||||
| @@ -1,18 +1,31 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from .util import set_original_response, set_modified_response, live_server_setup | ||||
| from flask import url_for | ||||
| from urllib.request import urlopen | ||||
| from . util import set_original_response, set_modified_response, live_server_setup | ||||
| from zipfile import ZipFile | ||||
| import re | ||||
| import time | ||||
|  | ||||
|  | ||||
| def test_backup(client, live_server): | ||||
|  | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     set_original_response() | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": url_for('test_endpoint', _external=True)}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(3) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("get_backup"), | ||||
|         follow_redirects=True | ||||
| @@ -20,6 +33,19 @@ def test_backup(client, live_server): | ||||
|  | ||||
|     # Should get the right zip content type | ||||
|     assert res.content_type == "application/zip" | ||||
|  | ||||
|     # Should be PK/ZIP stream | ||||
|     assert res.data.count(b'PK') >= 2 | ||||
|  | ||||
|     # ZipFile from buffer seems non-obvious, just save it instead | ||||
|     with open("download.zip", 'wb') as f: | ||||
|         f.write(res.data) | ||||
|  | ||||
|     zip = ZipFile('download.zip') | ||||
|     l = zip.namelist() | ||||
|     uuid4hex = re.compile('^[a-f0-9]{8}-?[a-f0-9]{4}-?4[a-f0-9]{3}-?[89ab][a-f0-9]{3}-?[a-f0-9]{12}.*txt', re.I) | ||||
|     newlist = list(filter(uuid4hex.match, l))  # Read Note below | ||||
|  | ||||
|     # Should be two txt files in the archive (history and the snapshot) | ||||
|     assert len(newlist) == 2 | ||||
|  | ||||
|   | ||||
| @@ -46,22 +46,23 @@ def set_modified_response(): | ||||
|  | ||||
|  | ||||
| # Test that the CSS extraction works how we expect, important here is the right placing of new lines \n's | ||||
| def test_css_filter_output(): | ||||
|     from changedetectionio import fetch_site_status | ||||
| def test_include_filters_output(): | ||||
|     from inscriptis import get_text | ||||
|  | ||||
|     # Check text with sub-parts renders correctly | ||||
|     content = """<html> <body><div id="thingthing" >  Some really <b>bold</b> text  </div> </body> </html>""" | ||||
|     html_blob = css_filter(css_filter="#thingthing", html_content=content) | ||||
|     html_blob = include_filters(include_filters="#thingthing", html_content=content) | ||||
|     text = get_text(html_blob) | ||||
|     assert text == "  Some really bold text" | ||||
|  | ||||
|     content = """<html> <body> | ||||
|     <p>foo bar blah</p> | ||||
|     <div class="parts">Block A</div> <div class="parts">Block B</div></body>  | ||||
|     <DIV class="parts">Block A</DiV> <div class="parts">Block B</DIV></body>  | ||||
|     </html> | ||||
| """ | ||||
|     html_blob = css_filter(css_filter=".parts", html_content=content) | ||||
|  | ||||
|     # in xPath this would be //*[@class='parts'] | ||||
|     html_blob = include_filters(include_filters=".parts", html_content=content) | ||||
|     text = get_text(html_blob) | ||||
|  | ||||
|     # Divs are converted to 4 whitespaces by inscriptis | ||||
| @@ -69,10 +70,10 @@ def test_css_filter_output(): | ||||
|  | ||||
|  | ||||
| # Tests the whole stack works with the CSS Filter | ||||
| def test_check_markup_css_filter_restriction(client, live_server): | ||||
| def test_check_markup_include_filters_restriction(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
|     css_filter = "#sametext" | ||||
|     include_filters = "#sametext" | ||||
|  | ||||
|     set_original_response() | ||||
|  | ||||
| @@ -98,7 +99,7 @@ def test_check_markup_css_filter_restriction(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -107,7 +108,7 @@ def test_check_markup_css_filter_restriction(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(css_filter.encode('utf-8')) in res.data | ||||
|     assert bytes(include_filters.encode('utf-8')) in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
| @@ -126,3 +127,58 @@ def test_check_markup_css_filter_restriction(client, live_server): | ||||
|     # Because it should be looking at only that 'sametext' id | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|  | ||||
| # Tests the whole stack works with the CSS Filter | ||||
| def test_check_multiple_filters(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
|     include_filters = "#blob-a\r\nxpath://*[contains(@id,'blob-b')]" | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""<html><body> | ||||
|      <div id="blob-a">Blob A</div> | ||||
|      <div id="blob-b">Blob B</div> | ||||
|      <div id="blob-c">Blob C</div> | ||||
|      </body> | ||||
|      </html> | ||||
|     """) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"include_filters": include_filters, | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Only the two blobs should be here | ||||
|     assert b"Blob A" in res.data # CSS was ok | ||||
|     assert b"Blob B" in res.data # xPath was ok | ||||
|     assert b"Blob C" not in res.data # Should not be included | ||||
|   | ||||
| @@ -88,7 +88,7 @@ def test_check_filter_multiline(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": '', | ||||
|         data={"include_filters": '', | ||||
|               'extract_text': '/something.+?6 billion.+?lines/si', | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
| @@ -116,7 +116,7 @@ def test_check_filter_multiline(client, live_server): | ||||
|  | ||||
| def test_check_filter_and_regex_extract(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|     css_filter = ".changetext" | ||||
|     include_filters = ".changetext" | ||||
|  | ||||
|     set_original_response() | ||||
|  | ||||
| @@ -143,7 +143,7 @@ def test_check_filter_and_regex_extract(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": css_filter, | ||||
|         data={"include_filters": include_filters, | ||||
|               'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i', | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|   | ||||
| @@ -92,7 +92,7 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se | ||||
|         "tag": "my tag", | ||||
|         "title": "my title", | ||||
|         "headers": "", | ||||
|         "css_filter": '.ticket-available', | ||||
|         "include_filters": '.ticket-available', | ||||
|         "fetch_backend": "html_requests"}) | ||||
|  | ||||
|     res = client.post( | ||||
|   | ||||
| @@ -76,7 +76,7 @@ def run_filter_test(client, content_filter): | ||||
|         "title": "my title", | ||||
|         "headers": "", | ||||
|         "filter_failure_notification_send": 'y', | ||||
|         "css_filter": content_filter, | ||||
|         "include_filters": content_filter, | ||||
|         "fetch_backend": "html_requests"}) | ||||
|  | ||||
|     res = client.post( | ||||
| @@ -95,7 +95,7 @@ def run_filter_test(client, content_filter): | ||||
|         time.sleep(3) | ||||
|  | ||||
|     # We should see something in the frontend | ||||
|     assert b'Warning, filter' in res.data | ||||
|     assert b'Warning, no filters were found' in res.data | ||||
|  | ||||
|     # Now it should exist and contain our "filter not found" alert | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
| @@ -131,7 +131,7 @@ def run_filter_test(client, content_filter): | ||||
| def test_setup(live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_check_css_filter_failure_notification(client, live_server): | ||||
| def test_check_include_filters_failure_notification(client, live_server): | ||||
|     set_original_response() | ||||
|     time.sleep(1) | ||||
|     run_filter_test(client, '#nope-doesnt-exist') | ||||
|   | ||||
							
								
								
									
										33
									
								
								changedetectionio/tests/test_jinja2.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										33
									
								
								changedetectionio/tests/test_jinja2.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,33 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup | ||||
|  | ||||
|  | ||||
| # If there was only a change in the whitespacing, then we shouldnt have a change detected | ||||
| def test_jinja2_in_url_query(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_return_query', _external=True) | ||||
|  | ||||
|     # because url_for() will URL-encode the var, but we dont here | ||||
|     full_url = "{}?{}".format(test_url, | ||||
|                               "date={% now 'Europe/Berlin', '%Y' %}.{% now 'Europe/Berlin', '%m' %}.{% now 'Europe/Berlin', '%d' %}", ) | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": full_url, "tag": "test"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Watch added" in res.data | ||||
|     time.sleep(3) | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b'date=2' in res.data | ||||
| @@ -2,10 +2,15 @@ | ||||
| # coding=utf-8 | ||||
| 
 | ||||
| import time | ||||
| from flask import url_for | ||||
| from flask import url_for, escape | ||||
| from . util import live_server_setup | ||||
| import pytest | ||||
| jq_support = True | ||||
| 
 | ||||
| try: | ||||
|     import jq | ||||
| except ModuleNotFoundError: | ||||
|     jq_support = False | ||||
| 
 | ||||
| def test_setup(live_server): | ||||
|     live_server_setup(live_server) | ||||
| @@ -36,16 +41,28 @@ and it can also be repeated | ||||
|     from .. import html_tools | ||||
| 
 | ||||
|     # See that we can find the second <script> one, which is not broken, and matches our filter | ||||
|     text = html_tools.extract_json_as_string(content, "$.offers.price") | ||||
|     text = html_tools.extract_json_as_string(content, "json:$.offers.price") | ||||
|     assert text == "23.5" | ||||
| 
 | ||||
|     text = html_tools.extract_json_as_string('{"id":5}', "$.id") | ||||
|     # also check for jq | ||||
|     if jq_support: | ||||
|         text = html_tools.extract_json_as_string(content, "jq:.offers.price") | ||||
|         assert text == "23.5" | ||||
| 
 | ||||
|         text = html_tools.extract_json_as_string('{"id":5}', "jq:.id") | ||||
|         assert text == "5" | ||||
| 
 | ||||
|     text = html_tools.extract_json_as_string('{"id":5}', "json:$.id") | ||||
|     assert text == "5" | ||||
| 
 | ||||
|     # When nothing at all is found, it should throw JSONNOTFound | ||||
|     # Which is caught and shown to the user in the watch-overview table | ||||
|     with pytest.raises(html_tools.JSONNotFound) as e_info: | ||||
|         html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "$.id") | ||||
|         html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "json:$.id") | ||||
| 
 | ||||
|     if jq_support: | ||||
|         with pytest.raises(html_tools.JSONNotFound) as e_info: | ||||
|             html_tools.extract_json_as_string('COMPLETE GIBBERISH, NO JSON!', "jq:.id") | ||||
| 
 | ||||
| def set_original_ext_response(): | ||||
|     data = """ | ||||
| @@ -66,6 +83,7 @@ def set_original_ext_response(): | ||||
| 
 | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(data) | ||||
|     return None | ||||
| 
 | ||||
| def set_modified_ext_response(): | ||||
|     data = """ | ||||
| @@ -86,6 +104,7 @@ def set_modified_ext_response(): | ||||
| 
 | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(data) | ||||
|     return None | ||||
| 
 | ||||
| def set_original_response(): | ||||
|     test_return_data = """ | ||||
| @@ -113,7 +132,7 @@ def set_original_response(): | ||||
|     return None | ||||
| 
 | ||||
| 
 | ||||
| def set_response_with_html(): | ||||
| def set_json_response_with_html(): | ||||
|     test_return_data = """ | ||||
|     { | ||||
|       "test": [ | ||||
| @@ -157,7 +176,7 @@ def set_modified_response(): | ||||
| def test_check_json_without_filter(client, live_server): | ||||
|     # Request a JSON document from a application/json source containing HTML | ||||
|     # and be sure it doesn't get chewed up by instriptis | ||||
|     set_response_with_html() | ||||
|     set_json_response_with_html() | ||||
| 
 | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
| @@ -170,9 +189,6 @@ def test_check_json_without_filter(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| 
 | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
| 
 | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
| 
 | ||||
| @@ -181,13 +197,14 @@ def test_check_json_without_filter(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| 
 | ||||
|     # Should still see '"html": "<b>"' | ||||
|     assert b'"<b>' in res.data | ||||
|     assert res.data.count(b'{\n') >= 2 | ||||
| 
 | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
| 
 | ||||
| def test_check_json_filter(client, live_server): | ||||
|     json_filter = 'json:boss.name' | ||||
| 
 | ||||
| def check_json_filter(json_filter, client, live_server): | ||||
|     set_original_response() | ||||
| 
 | ||||
|     # Give the endpoint time to spin up | ||||
| @@ -202,9 +219,6 @@ def test_check_json_filter(client, live_server): | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
| 
 | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
| 
 | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
| 
 | ||||
| @@ -212,7 +226,7 @@ def test_check_json_filter(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": json_filter, | ||||
|         data={"include_filters": json_filter, | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
| @@ -226,10 +240,7 @@ def test_check_json_filter(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(json_filter.encode('utf-8')) in res.data | ||||
| 
 | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     assert bytes(escape(json_filter).encode('utf-8')) in res.data | ||||
| 
 | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
| @@ -252,10 +263,17 @@ def test_check_json_filter(client, live_server): | ||||
|     # And #462 - check we see the proper utf-8 string there | ||||
|     assert "Örnsköldsvik".encode('utf-8') in res.data | ||||
| 
 | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
| 
 | ||||
| def test_check_json_filter_bool_val(client, live_server): | ||||
|     json_filter = "json:$['available']" | ||||
| def test_check_jsonpath_filter(client, live_server): | ||||
|     check_json_filter('json:boss.name', client, live_server) | ||||
| 
 | ||||
| def test_check_jq_filter(client, live_server): | ||||
|     if jq_support: | ||||
|         check_json_filter('jq:.boss.name', client, live_server) | ||||
| 
 | ||||
| def check_json_filter_bool_val(json_filter, client, live_server): | ||||
|     set_original_response() | ||||
| 
 | ||||
|     # Give the endpoint time to spin up | ||||
| @@ -275,7 +293,7 @@ def test_check_json_filter_bool_val(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": json_filter, | ||||
|         data={"include_filters": json_filter, | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
| @@ -285,11 +303,6 @@ def test_check_json_filter_bool_val(client, live_server): | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| 
 | ||||
|     time.sleep(3) | ||||
| 
 | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
| 
 | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|     #  Make a change | ||||
| @@ -304,14 +317,22 @@ def test_check_json_filter_bool_val(client, live_server): | ||||
|     # But the change should be there, tho its hard to test the change was detected because it will show old and new versions | ||||
|     assert b'false' in res.data | ||||
| 
 | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
| 
 | ||||
| def test_check_jsonpath_filter_bool_val(client, live_server): | ||||
|     check_json_filter_bool_val("json:$['available']", client, live_server) | ||||
| 
 | ||||
| def test_check_jq_filter_bool_val(client, live_server): | ||||
|     if jq_support: | ||||
|         check_json_filter_bool_val("jq:.available", client, live_server) | ||||
| 
 | ||||
| # Re #265 - Extended JSON selector test | ||||
| # Stuff to consider here | ||||
| # - Selector should be allowed to return empty when it doesnt match (people might wait for some condition) | ||||
| # - The 'diff' tab could show the old and new content | ||||
| # - Form should let us enter a selector that doesnt (yet) match anything | ||||
| def test_check_json_ext_filter(client, live_server): | ||||
|     json_filter = 'json:$[?(@.status==Sold)]' | ||||
| 
 | ||||
| def check_json_ext_filter(json_filter, client, live_server): | ||||
|     set_original_ext_response() | ||||
| 
 | ||||
|     # Give the endpoint time to spin up | ||||
| @@ -326,9 +347,6 @@ def test_check_json_ext_filter(client, live_server): | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
| 
 | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
| 
 | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
| 
 | ||||
| @@ -336,7 +354,7 @@ def test_check_json_ext_filter(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": json_filter, | ||||
|         data={"include_filters": json_filter, | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
| @@ -350,10 +368,7 @@ def test_check_json_ext_filter(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(json_filter.encode('utf-8')) in res.data | ||||
| 
 | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     assert bytes(escape(json_filter).encode('utf-8')) in res.data | ||||
| 
 | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
| @@ -376,3 +391,12 @@ def test_check_json_ext_filter(client, live_server): | ||||
|     assert b'ForSale' not in res.data | ||||
|     assert b'Sold' in res.data | ||||
| 
 | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
| 
 | ||||
| def test_check_jsonpath_ext_filter(client, live_server): | ||||
|     check_json_ext_filter('json:$[?(@.status==Sold)]', client, live_server) | ||||
| 
 | ||||
| def test_check_jq_ext_filter(client, live_server): | ||||
|     if jq_support: | ||||
|         check_json_ext_filter('jq:.[] | select(.status | contains("Sold"))', client, live_server) | ||||
| @@ -14,7 +14,7 @@ def test_share_watch(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     css_filter = ".nice-filter" | ||||
|     include_filters = ".nice-filter" | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
| @@ -29,7 +29,7 @@ def test_share_watch(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": css_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": include_filters, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -37,7 +37,7 @@ def test_share_watch(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(css_filter.encode('utf-8')) in res.data | ||||
|     assert bytes(include_filters.encode('utf-8')) in res.data | ||||
|  | ||||
|     # click share the link | ||||
|     res = client.get( | ||||
| @@ -73,4 +73,8 @@ def test_share_watch(client, live_server): | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(css_filter.encode('utf-8')) in res.data | ||||
|     assert bytes(include_filters.encode('utf-8')) in res.data | ||||
|  | ||||
|     # Check it saved the URL | ||||
|     res = client.get(url_for("index")) | ||||
|     assert bytes(test_url.encode('utf-8')) in res.data | ||||
|   | ||||
| @@ -57,10 +57,9 @@ def test_check_basic_change_detection_functionality_source(client, live_server): | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
| # `subtractive_selectors` should still work in `source:` type requests | ||||
| def test_check_ignore_elements(client, live_server): | ||||
|     set_original_response() | ||||
|  | ||||
|     time.sleep(2) | ||||
|     test_url = 'source:'+url_for('test_endpoint', _external=True) | ||||
|     # Add our URL to the import page | ||||
| @@ -77,9 +76,9 @@ def test_check_ignore_elements(client, live_server): | ||||
|     ##################### | ||||
|     # We want <span> and <p> ONLY, but ignore span with .foobar-detection | ||||
|  | ||||
|     res = client.post( | ||||
|     client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": 'span,p', "url": test_url, "tag": "", "subtractive_selectors": ".foobar-detection", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
| @@ -89,7 +88,6 @@ def test_check_ignore_elements(client, live_server): | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'foobar-detection' not in res.data | ||||
|     assert b'<br' not in res.data | ||||
|     assert b'<p' in res.data | ||||
| @@ -49,7 +49,7 @@ def test_trigger_regex_functionality_with_filter(client, live_server): | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"trigger_text": "/cool.stuff/", | ||||
|               "url": test_url, | ||||
|               "css_filter": '#in-here', | ||||
|               "include_filters": '#in-here', | ||||
|               "fetch_backend": "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
| @@ -22,7 +22,7 @@ def test_check_watch_field_storage(client, live_server): | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={ "notification_urls": "json://127.0.0.1:30000\r\njson://128.0.0.1\r\n", | ||||
|                "time_between_check-minutes": 126, | ||||
|                "css_filter" : ".fooclass", | ||||
|                "include_filters" : ".fooclass", | ||||
|                "title" : "My title", | ||||
|                "ignore_text" : "ignore this", | ||||
|                "url": test_url, | ||||
|   | ||||
| @@ -89,7 +89,7 @@ def test_check_xpath_filter_utf8(client, live_server): | ||||
|     time.sleep(1) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -143,7 +143,7 @@ def test_check_xpath_text_function_utf8(client, live_server): | ||||
|     time.sleep(1) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -182,9 +182,6 @@ def test_check_markup_xpath_filter_restriction(client, live_server): | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
| @@ -192,7 +189,7 @@ def test_check_markup_xpath_filter_restriction(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": xpath_filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
| @@ -230,10 +227,11 @@ def test_xpath_validation(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(2) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters": "/something horrible", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"is not a valid XPath expression" in res.data | ||||
| @@ -242,7 +240,7 @@ def test_xpath_validation(client, live_server): | ||||
|  | ||||
|  | ||||
| # actually only really used by the distll.io importer, but could be handy too | ||||
| def test_check_with_prefix_css_filter(client, live_server): | ||||
| def test_check_with_prefix_include_filters(client, live_server): | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| @@ -263,7 +261,7 @@ def test_check_with_prefix_css_filter(client, live_server): | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         data={"include_filters":  "xpath://*[contains(@class, 'sametext')]", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|   | ||||
| @@ -86,6 +86,7 @@ def extract_UUID_from_client(client): | ||||
| def wait_for_all_checks(client): | ||||
|     # Loop waiting until done.. | ||||
|     attempt=0 | ||||
|     time.sleep(0.1) | ||||
|     while attempt < 60: | ||||
|         time.sleep(1) | ||||
|         res = client.get(url_for("index")) | ||||
| @@ -159,5 +160,10 @@ def live_server_setup(live_server): | ||||
|         ret = " ".join([auth.username, auth.password, auth.type]) | ||||
|         return ret | ||||
|  | ||||
|     # Just return some GET var | ||||
|     @live_server.app.route('/test-return-query', methods=['GET']) | ||||
|     def test_return_query(): | ||||
|         return request.query_string | ||||
|  | ||||
|     live_server.start() | ||||
|  | ||||
|   | ||||
| @@ -13,9 +13,9 @@ def test_visual_selector_content_ready(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page, maybe better to use something we control? | ||||
|     # We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket | ||||
|     test_url = 'https://news.ycombinator.com' | ||||
|     # Add our URL to the import page, because the docker container (playwright/selenium) wont be able to connect to our usual test url | ||||
|     test_url = "https://changedetection.io/ci-test/test-runjs.html" | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, | ||||
| @@ -25,13 +25,27 @@ def test_visual_selector_content_ready(client, live_server): | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first", unpause_on_save=1), | ||||
|         data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"}, | ||||
|         data={ | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_webdriver", | ||||
|               'webdriver_js_execute_code': 'document.querySelector("button[name=test-button]").click();' | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"unpaused" in res.data | ||||
|     time.sleep(1) | ||||
|     wait_for_all_checks(client) | ||||
|     uuid = extract_UUID_from_client(client) | ||||
|  | ||||
|     # Check the JS execute code before extract worked | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b'I smell JavaScript' in res.data | ||||
|  | ||||
|     assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist" | ||||
|     assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist" | ||||
|  | ||||
|   | ||||
| @@ -4,7 +4,7 @@ import queue | ||||
| import time | ||||
|  | ||||
| from changedetectionio import content_fetcher | ||||
| from changedetectionio.html_tools import FilterNotFoundInResponse | ||||
| from changedetectionio.fetch_site_status import FilterNotFoundInResponse | ||||
|  | ||||
| # A single update worker | ||||
| # | ||||
| @@ -91,8 +91,8 @@ class update_worker(threading.Thread): | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         watch['css_filter'], | ||||
|                     'notification_body': "Your configured CSS/xPath filters of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         ", ".join(watch['include_filters']), | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
| @@ -189,7 +189,7 @@ class update_worker(threading.Thread): | ||||
|                         if not self.datastore.data['watching'].get(uuid): | ||||
|                             continue | ||||
|  | ||||
|                         err_text = "Warning, filter '{}' not found".format(str(e)) | ||||
|                         err_text = "Warning, no filters were found, no change detection ran." | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            # So that we get a trigger when the content is added again | ||||
|                                                                            'previous_md5': ''}) | ||||
| @@ -282,10 +282,12 @@ class update_worker(threading.Thread): | ||||
|                             self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) | ||||
|  | ||||
|  | ||||
|                     # Always record that we atleast tried | ||||
|                     count = self.datastore.data['watching'][uuid].get('check_count', 0) + 1 | ||||
|                     self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), | ||||
|                                                                        'last_checked': round(time.time())}) | ||||
|                                                                        'last_checked': round(time.time()), | ||||
|                                                                        'check_count': count | ||||
|                                                                        }) | ||||
|  | ||||
|                     # Always save the screenshot if it's available | ||||
|                     if update_handler.screenshot: | ||||
|   | ||||
| @@ -6,6 +6,8 @@ services: | ||||
|       hostname: changedetection | ||||
|       volumes: | ||||
|         - changedetection-data:/datastore | ||||
| # Configurable proxy list support, see https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#proxy-list-support | ||||
| #        - ./proxies.json:/datastore/proxies.json | ||||
|  | ||||
|   #    environment: | ||||
|   #        Default listening port, can also be changed with the -p option | ||||
| @@ -43,6 +45,9 @@ services: | ||||
|   #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;` | ||||
|   #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory | ||||
|   #      - USE_X_SETTINGS=1 | ||||
|   # | ||||
|   #        Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname. | ||||
|   #      - HIDE_REFERER=true | ||||
|  | ||||
|       # Comment out ports: when using behind a reverse proxy , enable networks: etc. | ||||
|       ports: | ||||
|   | ||||
							
								
								
									
										
											BIN
										
									
								
								docs/proxy-example.jpg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										
											BIN
										
									
								
								docs/proxy-example.jpg
									
									
									
									
									
										Normal file
									
								
							
										
											Binary file not shown.
										
									
								
							| After Width: | Height: | Size: 46 KiB | 
| @@ -1,31 +1,36 @@ | ||||
| flask~= 2.0 | ||||
| flask~=2.0 | ||||
| flask_wtf | ||||
| eventlet>=0.31.0 | ||||
| validators | ||||
| timeago ~=1.0 | ||||
| inscriptis ~= 2.2 | ||||
| feedgen ~= 0.9 | ||||
| flask-login ~= 0.5 | ||||
| timeago~=1.0 | ||||
| inscriptis~=2.2 | ||||
| feedgen~=0.9 | ||||
| flask-login~=0.5 | ||||
| flask_restful | ||||
| pytz | ||||
|  | ||||
| # Set these versions together to avoid a RequestsDependencyWarning | ||||
| requests[socks] ~= 2.26 | ||||
| urllib3 > 1.26 | ||||
| chardet > 2.3.0 | ||||
| # >= 2.26 also adds Brotli support if brotli is installed | ||||
| brotli~=1.0 | ||||
| requests[socks] ~=2.28 | ||||
|  | ||||
| wtforms ~= 3.0 | ||||
| jsonpath-ng ~= 1.5.3 | ||||
| urllib3>1.26 | ||||
| chardet>2.3.0 | ||||
|  | ||||
| wtforms~=3.0 | ||||
| jsonpath-ng~=1.5.3 | ||||
|  | ||||
| # jq not available on Windows so must be installed manually | ||||
|  | ||||
| # Notification library | ||||
| apprise ~= 1.0.0 | ||||
| apprise~=1.1.0 | ||||
|  | ||||
| # apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315 | ||||
| paho-mqtt | ||||
|  | ||||
| # Pinned version of cryptography otherwise | ||||
| # ERROR: Could not build wheels for cryptography which use PEP 517 and cannot be installed directly | ||||
| cryptography ~= 3.4 | ||||
| cryptography~=3.4 | ||||
|  | ||||
| # Used for CSS filtering | ||||
| bs4 | ||||
| @@ -34,11 +39,20 @@ bs4 | ||||
| lxml | ||||
|  | ||||
| # 3.141 was missing socksVersion, 3.150 was not in pypi, so we try 4.1.0 | ||||
| selenium ~= 4.1.0 | ||||
| selenium~=4.1.0 | ||||
|  | ||||
| # https://stackoverflow.com/questions/71652965/importerror-cannot-import-name-safe-str-cmp-from-werkzeug-security/71653849#71653849 | ||||
| # ImportError: cannot import name 'safe_str_cmp' from 'werkzeug.security' | ||||
| # need to revisit flask login versions | ||||
| werkzeug ~= 2.0.0 | ||||
| werkzeug~=2.0.0 | ||||
|  | ||||
| # Templating, so far just in the URLs but in the future can be for the notifications also | ||||
| jinja2~=3.1 | ||||
| jinja2-time | ||||
|  | ||||
| # https://peps.python.org/pep-0508/#environment-markers | ||||
| # https://github.com/dgtlmoon/changedetection.io/pull/1009 | ||||
| jq~=1.3 ;python_version >= "3.8" and sys_platform == "linux" | ||||
|  | ||||
| # playwright is installed at Dockerfile build time because it's not available on all platforms | ||||
|  | ||||
|   | ||||
		Reference in New Issue
	
	Block a user