mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 14:47:21 +00:00 
			
		
		
		
	Compare commits
	
		
			83 Commits
		
	
	
		
			bug/RSS-fe
			...
			ui-tweaks
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | c0dc18b849 | ||
|   | a1c3107cd6 | ||
|   | 8fef3ff4ab | ||
|   | baa25c9f9e | ||
|   | 488699b7d4 | ||
|   | cf3a1ee3e3 | ||
|   | daae43e9f9 | ||
|   | cdeedaa65c | ||
|   | 3c9d2ded38 | ||
|   | 9f4364a130 | ||
|   | 5bd9eaf99d | ||
|   | b1c51c0a65 | ||
|   | 232bd92389 | ||
|   | e6173357a9 | ||
|   | f2b8888aff | ||
|   | 9c46f175f9 | ||
|   | 1f27865fdf | ||
|   | faa42d75e0 | ||
|   | 3b6e6d85bb | ||
|   | 30d6a272ce | ||
|   | 291700554e | ||
|   | a82fad7059 | ||
|   | c2fe5ae0d1 | ||
|   | 5beefdb7cc | ||
|   | 872bbba71c | ||
|   | d578de1a35 | ||
|   | cdc104be10 | ||
|   | dd0eeca056 | ||
|   | a95468be08 | ||
|   | ace44d0e00 | ||
|   | ebb8b88621 | ||
|   | 12fc2200de | ||
|   | 52d3d375ba | ||
|   | 08117089e6 | ||
|   | 2ba3a6d53f | ||
|   | 2f636553a9 | ||
|   | 0bde48b282 | ||
|   | fae1164c0b | ||
|   | 169c293143 | ||
|   | 46cb5cff66 | ||
|   | 05584ea886 | ||
|   | 176a591357 | ||
|   | 15569f9592 | ||
|   | 5f9e475fe0 | ||
|   | 34b8784f50 | ||
|   | 2b054ced8c | ||
|   | 6553980cd5 | ||
|   | 7c12c47204 | ||
|   | dbd9b470d7 | ||
|   | 83555a9991 | ||
|   | 5bfdb28bd2 | ||
|   | 31a6a6717b | ||
|   | 7da32f9ac3 | ||
|   | bb732d3d2e | ||
|   | 485e55f9ed | ||
|   | 601a20ea49 | ||
|   | 76996b9eb8 | ||
|   | fba2b1a39d | ||
|   | 4a91505af5 | ||
|   | 4841c79b4c | ||
|   | 2ba00d2e1d | ||
|   | 19c96f4bdd | ||
|   | 82b900fbf4 | ||
|   | 358a365303 | ||
|   | a07ca4b136 | ||
|   | ba8cf2c8cf | ||
|   | 3106b6688e | ||
|   | 2c83845dac | ||
|   | 111266d6fa | ||
|   | ead610151f | ||
|   | 7e1e763989 | ||
|   | 327cc4af34 | ||
|   | 6008ff516e | ||
|   | cdcf4b353f | ||
|   | 1ab70f8e86 | ||
|   | 8227c012a7 | ||
|   | c113d5fb24 | ||
|   | 8c8d4066d7 | ||
|   | 277dc9e1c1 | ||
|   | fc0fd1ce9d | ||
|   | bd6127728a | ||
|   | 4101ae00c6 | ||
|   | 62f14df3cb | 
							
								
								
									
										2
									
								
								.github/ISSUE_TEMPLATE/bug_report.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ISSUE_TEMPLATE/bug_report.md
									
									
									
									
										vendored
									
									
								
							| @@ -21,7 +21,7 @@ Steps to reproduce the behavior: | ||||
| 3. Scroll down to '....' | ||||
| 4. See error | ||||
|  | ||||
| ! ALWAYS INCLUDE AN EXAMPLE URL WHERE IT IS POSSIBLE TO RE-CREATE THE ISSUE ! | ||||
| ! ALWAYS INCLUDE AN EXAMPLE URL WHERE IT IS POSSIBLE TO RE-CREATE THE ISSUE - USE THE 'SHARE WATCH' FEATURE AND PASTE IN THE SHARE-LINK! | ||||
|  | ||||
| **Expected behavior** | ||||
| A clear and concise description of what you expected to happen. | ||||
|   | ||||
							
								
								
									
										15
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							| @@ -85,8 +85,8 @@ jobs: | ||||
|           version: latest | ||||
|           driver-opts: image=moby/buildkit:master | ||||
|  | ||||
|       # master always builds :latest | ||||
|       - name: Build and push :latest | ||||
|       # master branch -> :dev container tag | ||||
|       - name: Build and push :dev | ||||
|         id: docker_build | ||||
|         if: ${{ github.ref }} == "refs/heads/master" | ||||
|         uses: docker/build-push-action@v2 | ||||
| @@ -95,12 +95,12 @@ jobs: | ||||
|           file: ./Dockerfile | ||||
|           push: true | ||||
|           tags: | | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest,ghcr.io/${{ github.repository }}:latest | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7 | ||||
|           cache-from: type=local,src=/tmp/.buildx-cache | ||||
|           cache-to: type=local,dest=/tmp/.buildx-cache | ||||
|  | ||||
|       # A new tagged release is required, which builds :tag | ||||
|       # A new tagged release is required, which builds :tag and :latest | ||||
|       - name: Build and push :tag | ||||
|         id: docker_build_tag_release | ||||
|         if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.') | ||||
| @@ -110,7 +110,10 @@ jobs: | ||||
|           file: ./Dockerfile | ||||
|           push: true | ||||
|           tags: | | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }},ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }} | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }} | ||||
|             ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }} | ||||
|             ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest | ||||
|             ghcr.io/dgtlmoon/changedetection.io:latest | ||||
|           platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7 | ||||
|           cache-from: type=local,src=/tmp/.buildx-cache | ||||
|           cache-to: type=local,dest=/tmp/.buildx-cache | ||||
| @@ -125,5 +128,3 @@ jobs: | ||||
|           key: ${{ runner.os }}-buildx-${{ github.sha }} | ||||
|           restore-keys: | | ||||
|             ${{ runner.os }}-buildx- | ||||
|  | ||||
|  | ||||
|   | ||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -8,5 +8,6 @@ __pycache__ | ||||
| build | ||||
| dist | ||||
| venv | ||||
| test-datastore | ||||
| *.egg-info* | ||||
| .vscode/settings.json | ||||
|   | ||||
							
								
								
									
										17
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								README.md
									
									
									
									
									
								
							| @@ -3,14 +3,16 @@ | ||||
|  | ||||
|  | ||||
|  | ||||
| ## Self-Hosted, Open Source, Change Monitoring of Web Pages | ||||
| ## Web Site Change Detection, Monitoring and Notification - Self-Hosted or SaaS. | ||||
|  | ||||
| _Know when web pages change! Stay ontop of new information!_  | ||||
| _Know when web pages change! Stay ontop of new information! get notifications when important website content changes_  | ||||
|  | ||||
| Live your data-life *pro-actively* instead of *re-actively*. | ||||
|  | ||||
| Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start) | ||||
|  | ||||
| [](https://discord.gg/XJZy7QK3ja) [ ](https://www.youtube.com/channel/UCbS09q1TRf0o4N2t-WA3emQ) [](https://www.linkedin.com/company/changedetection-io/) | ||||
|  | ||||
|  | ||||
| [<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring"  title="Self-hosted web page change monitoring"  />](https://lemonade.changedetection.io/start) | ||||
|  | ||||
| @@ -33,6 +35,7 @@ Free, Open-source web page monitoring, notification and change detection. Don't | ||||
| - New software releases, security advisories when you're not on their mailing list. | ||||
| - Festivals with changes | ||||
| - Realestate listing changes | ||||
| - Know when your favourite whiskey is on sale, or other special deals are announced before anyone else | ||||
| - COVID related news from government websites | ||||
| - University/organisation news from their website | ||||
| - Detect and monitor changes in JSON API responses  | ||||
| @@ -56,9 +59,9 @@ Easily see what changed, examine by word, line, or individual character. | ||||
|  | ||||
| Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/ | ||||
|  | ||||
| ### Target elements with the Visual Selector tool. | ||||
| ### Filter by elements using the Visual Selector tool. | ||||
|  | ||||
| Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (available also as part of our subscription service) | ||||
| Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service) | ||||
|  | ||||
| <img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference "  title="Self-hosted web page change monitoring context difference " /> | ||||
|  | ||||
| @@ -67,14 +70,18 @@ Available when connected to a <a href="https://github.com/dgtlmoon/changedetecti | ||||
| ### Docker | ||||
|  | ||||
| With Docker composer, just clone this repository and.. | ||||
|  | ||||
| ```bash | ||||
| $ docker-compose up -d | ||||
| ``` | ||||
|  | ||||
| Docker standalone | ||||
| ```bash | ||||
| $ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io | ||||
| ``` | ||||
|  | ||||
| `:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch. | ||||
|  | ||||
| ### Windows | ||||
|  | ||||
| See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows | ||||
| @@ -114,7 +121,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io | ||||
| ## Filters | ||||
| XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools. | ||||
|  | ||||
| (We support LXML re:test, re:math and re:replace.) | ||||
| (We support LXML `re:test`, `re:math` and `re:replace`.) | ||||
|  | ||||
| ## Notifications | ||||
|  | ||||
|   | ||||
							
								
								
									
										1
									
								
								changedetectionio/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								changedetectionio/.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1 +1,2 @@ | ||||
| test-datastore | ||||
| package-lock.json | ||||
|   | ||||
| @@ -20,6 +20,7 @@ from copy import deepcopy | ||||
| from threading import Event | ||||
|  | ||||
| import flask_login | ||||
| import logging | ||||
| import pytz | ||||
| import timeago | ||||
| from feedgen.feed import FeedGenerator | ||||
| @@ -43,7 +44,7 @@ from flask_wtf import CSRFProtect | ||||
| from changedetectionio import html_tools | ||||
| from changedetectionio.api import api_v1 | ||||
|  | ||||
| __version__ = '0.39.14' | ||||
| __version__ = '0.39.17.1' | ||||
|  | ||||
| datastore = None | ||||
|  | ||||
| @@ -53,7 +54,7 @@ ticker_thread = None | ||||
|  | ||||
| extra_stylesheets = [] | ||||
|  | ||||
| update_q = queue.Queue() | ||||
| update_q = queue.PriorityQueue() | ||||
|  | ||||
| notification_q = queue.Queue() | ||||
|  | ||||
| @@ -107,7 +108,7 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"): | ||||
|     # Worker thread tells us which UUID it is currently processing. | ||||
|     for t in running_update_threads: | ||||
|         if t.current_uuid == watch_obj['uuid']: | ||||
|             return "Checking now.." | ||||
|             return '<span class="loader"></span><span> Checking now</span>' | ||||
|  | ||||
|     if watch_obj['last_checked'] == 0: | ||||
|         return 'Not yet' | ||||
| @@ -297,7 +298,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         # Sort by last_changed and add the uuid which is usually the key.. | ||||
|         sorted_watches = [] | ||||
|  | ||||
|         # @todo needs a .itemsWithTag() or something | ||||
|         # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away | ||||
|         for uuid, watch in datastore.data['watching'].items(): | ||||
|  | ||||
|             if limit_tag != None: | ||||
| @@ -351,7 +352,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 latest_fname = watch.history[dates[-1]] | ||||
|  | ||||
|                 html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>") | ||||
|                 fe.description(description="<![CDATA[<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff)) | ||||
|                 fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff), | ||||
|                            type='CDATA') | ||||
|  | ||||
|                 fe.guid(guid, permalink=False) | ||||
|                 dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key)) | ||||
| @@ -359,7 +361,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 fe.pubDate(dt) | ||||
|  | ||||
|         response = make_response(fg.rss_str()) | ||||
|         response.headers.set('Content-Type', 'application/rss+xml') | ||||
|         response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8') | ||||
|         return response | ||||
|  | ||||
|     @app.route("/", methods=['GET']) | ||||
| @@ -368,20 +370,20 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         from changedetectionio import forms | ||||
|  | ||||
|         limit_tag = request.args.get('tag') | ||||
|         pause_uuid = request.args.get('pause') | ||||
|  | ||||
|         # Redirect for the old rss path which used the /?rss=true | ||||
|         if request.args.get('rss'): | ||||
|             return redirect(url_for('rss', tag=limit_tag)) | ||||
|  | ||||
|         if pause_uuid: | ||||
|             try: | ||||
|                 datastore.data['watching'][pause_uuid]['paused'] ^= True | ||||
|                 datastore.needs_write = True | ||||
|         op = request.args.get('op') | ||||
|         if op: | ||||
|             uuid = request.args.get('uuid') | ||||
|             if op == 'pause': | ||||
|                 datastore.data['watching'][uuid]['paused'] ^= True | ||||
|             elif op == 'mute': | ||||
|                 datastore.data['watching'][uuid]['notification_muted'] ^= True | ||||
|  | ||||
|                 return redirect(url_for('index', tag = limit_tag)) | ||||
|             except KeyError: | ||||
|                 pass | ||||
|             datastore.needs_write = True | ||||
|             return redirect(url_for('index', tag = limit_tag)) | ||||
|  | ||||
|         # Sort by last_changed and add the uuid which is usually the key.. | ||||
|         sorted_watches = [] | ||||
| @@ -401,12 +403,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 watch['uuid'] = uuid | ||||
|                 sorted_watches.append(watch) | ||||
|  | ||||
|         sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True) | ||||
|  | ||||
|         existing_tags = datastore.get_all_tags() | ||||
|  | ||||
|         form = forms.quickWatchForm(request.form) | ||||
|  | ||||
|         output = render_template("watch-overview.html", | ||||
|                                  form=form, | ||||
|                                  watches=sorted_watches, | ||||
| @@ -417,7 +416,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                  # Don't link to hosting when we're on the hosting environment | ||||
|                                  hosted_sticky=os.getenv("SALTED_PASS", False) == False, | ||||
|                                  guid=datastore.data['app_guid'], | ||||
|                                  queued_uuids=update_q.queue) | ||||
|                                  queued_uuids=[uuid for p,uuid in update_q.queue]) | ||||
|  | ||||
|  | ||||
|         if session.get('share-link'): | ||||
| @@ -431,7 +430,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     def ajax_callback_send_notification_test(): | ||||
|  | ||||
|         import apprise | ||||
|         apobj = apprise.Apprise() | ||||
|         from .apprise_asset import asset | ||||
|         apobj = apprise.Apprise(asset=asset) | ||||
|  | ||||
|  | ||||
|         # validate URLS | ||||
|         if not len(request.form['notification_urls'].strip()): | ||||
| @@ -456,25 +457,39 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         return 'OK' | ||||
|  | ||||
|     @app.route("/scrub", methods=['GET', 'POST']) | ||||
|  | ||||
|     @app.route("/clear_history/<string:uuid>", methods=['GET']) | ||||
|     @login_required | ||||
|     def scrub_page(): | ||||
|     def clear_watch_history(uuid): | ||||
|         try: | ||||
|             datastore.clear_watch_history(uuid) | ||||
|         except KeyError: | ||||
|             flash('Watch not found', 'error') | ||||
|         else: | ||||
|             flash("Cleared snapshot history for watch {}".format(uuid)) | ||||
|  | ||||
|         return redirect(url_for('index')) | ||||
|  | ||||
|     @app.route("/clear_history", methods=['GET', 'POST']) | ||||
|     @login_required | ||||
|     def clear_all_history(): | ||||
|  | ||||
|         if request.method == 'POST': | ||||
|             confirmtext = request.form.get('confirmtext') | ||||
|  | ||||
|             if confirmtext == 'scrub': | ||||
|             if confirmtext == 'clear': | ||||
|                 changes_removed = 0 | ||||
|                 for uuid in datastore.data['watching'].keys(): | ||||
|                     datastore.scrub_watch(uuid) | ||||
|                     datastore.clear_watch_history(uuid) | ||||
|                     #TODO: KeyError not checked, as it is above | ||||
|  | ||||
|                 flash("Cleared all snapshot history") | ||||
|                 flash("Cleared snapshot history for all watches") | ||||
|             else: | ||||
|                 flash('Incorrect confirmation text.', 'error') | ||||
|  | ||||
|             return redirect(url_for('index')) | ||||
|  | ||||
|         output = render_template("scrub.html") | ||||
|         output = render_template("clear_all_history.html") | ||||
|         return output | ||||
|  | ||||
|  | ||||
| @@ -564,6 +579,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         if request.method == 'POST' and form.validate(): | ||||
|             extra_update_obj = {} | ||||
|  | ||||
|             if request.args.get('unpause_on_save'): | ||||
|                 extra_update_obj['paused'] = False | ||||
|  | ||||
|             # Re #110, if they submit the same as the default value, set it to None, so we continue to follow the default | ||||
|             # Assume we use the default value, unless something relevant is different, then use the form value | ||||
|             # values could be None, 0 etc. | ||||
| @@ -603,24 +621,23 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             datastore.data['watching'][uuid].update(form.data) | ||||
|             datastore.data['watching'][uuid].update(extra_update_obj) | ||||
|  | ||||
|             flash("Updated watch.") | ||||
|             if request.args.get('unpause_on_save'): | ||||
|                 flash("Updated watch - unpaused!.") | ||||
|             else: | ||||
|                 flash("Updated watch.") | ||||
|  | ||||
|             # Re #286 - We wait for syncing new data to disk in another thread every 60 seconds | ||||
|             # But in the case something is added we should save straight away | ||||
|             datastore.needs_write_urgent = True | ||||
|  | ||||
|             # Queue the watch for immediate recheck | ||||
|             update_q.put(uuid) | ||||
|             # Queue the watch for immediate recheck, with a higher priority | ||||
|             update_q.put((1, uuid)) | ||||
|  | ||||
|             # Diff page [edit] link should go back to diff page | ||||
|             if request.args.get("next") and request.args.get("next") == 'diff' and not form.save_and_preview_button.data: | ||||
|             if request.args.get("next") and request.args.get("next") == 'diff': | ||||
|                 return redirect(url_for('diff_history_page', uuid=uuid)) | ||||
|             else: | ||||
|                 if form.save_and_preview_button.data: | ||||
|                     flash('You may need to reload this page to see the new content.') | ||||
|                     return redirect(url_for('preview_page', uuid=uuid)) | ||||
|                 else: | ||||
|                     return redirect(url_for('index')) | ||||
|  | ||||
|             return redirect(url_for('index')) | ||||
|  | ||||
|         else: | ||||
|             if request.method == 'POST' and not form.validate(): | ||||
| @@ -641,7 +658,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                      current_base_url=datastore.data['settings']['application']['base_url'], | ||||
|                                      emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False), | ||||
|                                      visualselector_data_is_ready=visualselector_data_is_ready, | ||||
|                                      visualselector_enabled=visualselector_enabled | ||||
|                                      visualselector_enabled=visualselector_enabled, | ||||
|                                      playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False) | ||||
|                                      ) | ||||
|  | ||||
|         return output | ||||
| @@ -723,7 +741,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 importer = import_url_list() | ||||
|                 importer.run(data=request.values.get('urls'), flash=flash, datastore=datastore) | ||||
|                 for uuid in importer.new_uuids: | ||||
|                     update_q.put(uuid) | ||||
|                     update_q.put((1, uuid)) | ||||
|  | ||||
|                 if len(importer.remaining_data) == 0: | ||||
|                     return redirect(url_for('index')) | ||||
| @@ -736,7 +754,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                 d_importer = import_distill_io_json() | ||||
|                 d_importer.run(data=request.values.get('distill-io'), flash=flash, datastore=datastore) | ||||
|                 for uuid in d_importer.new_uuids: | ||||
|                     update_q.put(uuid) | ||||
|                     update_q.put((1, uuid)) | ||||
|  | ||||
|  | ||||
|  | ||||
| @@ -807,18 +825,25 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         screenshot_url = datastore.get_screenshot(uuid) | ||||
|  | ||||
|         output = render_template("diff.html", watch_a=watch, | ||||
|         system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' | ||||
|  | ||||
|         is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( | ||||
|                     watch.get('fetch_backend', None) is None and system_uses_webdriver) else False | ||||
|  | ||||
|         output = render_template("diff.html", | ||||
|                                  watch_a=watch, | ||||
|                                  newest=newest_version_file_contents, | ||||
|                                  previous=previous_version_file_contents, | ||||
|                                  extra_stylesheets=extra_stylesheets, | ||||
|                                  versions=dates[1:], | ||||
|                                  versions=dates[:-1], # All except current/last | ||||
|                                  uuid=uuid, | ||||
|                                  newest_version_timestamp=dates[-1], | ||||
|                                  current_previous_version=str(previous_version), | ||||
|                                  current_diff_url=watch['url'], | ||||
|                                  extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']), | ||||
|                                  left_sticky=True, | ||||
|                                  screenshot=screenshot_url) | ||||
|                                  screenshot=screenshot_url, | ||||
|                                  is_html_webdriver=is_html_webdriver) | ||||
|  | ||||
|         return output | ||||
|  | ||||
| @@ -833,6 +858,12 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         if uuid == 'first': | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         # Normally you would never reach this, because the 'preview' button is not available when there's no history | ||||
|         # However they may try to clear snapshots and reload the page | ||||
|         if datastore.data['watching'][uuid].history_n == 0: | ||||
|             flash("Preview unavailable - No fetch/check completed or triggers not reached", "error") | ||||
|             return redirect(url_for('index')) | ||||
|  | ||||
|         extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] | ||||
|  | ||||
|         try: | ||||
| @@ -841,44 +872,47 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             flash("No history found for the specified link, bad link?", "error") | ||||
|             return redirect(url_for('index')) | ||||
|  | ||||
|         if watch.history_n >0: | ||||
|             timestamps = sorted(watch.history.keys(), key=lambda x: int(x)) | ||||
|             filename = watch.history[timestamps[-1]] | ||||
|             try: | ||||
|                 with open(filename, 'r') as f: | ||||
|                     tmp = f.readlines() | ||||
|  | ||||
|                     # Get what needs to be highlighted | ||||
|                     ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text'] | ||||
|         timestamp = list(watch.history.keys())[-1] | ||||
|         filename = watch.history[timestamp] | ||||
|         try: | ||||
|             with open(filename, 'r') as f: | ||||
|                 tmp = f.readlines() | ||||
|  | ||||
|                     # .readlines will keep the \n, but we will parse it here again, in the future tidy this up | ||||
|                     ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), | ||||
|                                                                         wordlist=ignore_rules, | ||||
|                                                                         mode='line numbers' | ||||
|                                                                         ) | ||||
|                 # Get what needs to be highlighted | ||||
|                 ignore_rules = watch.get('ignore_text', []) + datastore.data['settings']['application']['global_ignore_text'] | ||||
|  | ||||
|                     trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), | ||||
|                                                                         wordlist=watch['trigger_text'], | ||||
|                                                                         mode='line numbers' | ||||
|                                                                         ) | ||||
|                     # Prepare the classes and lines used in the template | ||||
|                     i=0 | ||||
|                     for l in tmp: | ||||
|                         classes=[] | ||||
|                         i+=1 | ||||
|                         if i in ignored_line_numbers: | ||||
|                             classes.append('ignored') | ||||
|                         if i in trigger_line_numbers: | ||||
|                             classes.append('triggered') | ||||
|                         content.append({'line': l, 'classes': ' '.join(classes)}) | ||||
|                 # .readlines will keep the \n, but we will parse it here again, in the future tidy this up | ||||
|                 ignored_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), | ||||
|                                                                     wordlist=ignore_rules, | ||||
|                                                                     mode='line numbers' | ||||
|                                                                     ) | ||||
|  | ||||
|                 trigger_line_numbers = html_tools.strip_ignore_text(content="".join(tmp), | ||||
|                                                                     wordlist=watch['trigger_text'], | ||||
|                                                                     mode='line numbers' | ||||
|                                                                     ) | ||||
|                 # Prepare the classes and lines used in the template | ||||
|                 i=0 | ||||
|                 for l in tmp: | ||||
|                     classes=[] | ||||
|                     i+=1 | ||||
|                     if i in ignored_line_numbers: | ||||
|                         classes.append('ignored') | ||||
|                     if i in trigger_line_numbers: | ||||
|                         classes.append('triggered') | ||||
|                     content.append({'line': l, 'classes': ' '.join(classes)}) | ||||
|  | ||||
|         except Exception as e: | ||||
|             content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 content.append({'line': "File doesnt exist or unable to read file {}".format(filename), 'classes': ''}) | ||||
|         else: | ||||
|             content.append({'line': "No history found", 'classes': ''}) | ||||
|  | ||||
|         screenshot_url = datastore.get_screenshot(uuid) | ||||
|         system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' | ||||
|  | ||||
|         is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( | ||||
|                 watch.get('fetch_backend', None) is None and system_uses_webdriver) else False | ||||
|  | ||||
|         output = render_template("preview.html", | ||||
|                                  content=content, | ||||
|                                  extra_stylesheets=extra_stylesheets, | ||||
| @@ -887,8 +921,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|                                  current_diff_url=watch['url'], | ||||
|                                  screenshot=screenshot_url, | ||||
|                                  watch=watch, | ||||
|                                  uuid=uuid) | ||||
|          | ||||
|                                  uuid=uuid, | ||||
|                                  is_html_webdriver=is_html_webdriver) | ||||
|  | ||||
|         return output | ||||
|  | ||||
|     @app.route("/settings/notification-logs", methods=['GET']) | ||||
| @@ -896,7 +931,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|     def notification_logs(): | ||||
|         global notification_debug_log | ||||
|         output = render_template("notification-log.html", | ||||
|                                  logs=notification_debug_log if len(notification_debug_log) else ["No errors or warnings detected"]) | ||||
|                                  logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."]) | ||||
|  | ||||
|         return output | ||||
|  | ||||
| @@ -1027,9 +1062,9 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|         except FileNotFoundError: | ||||
|             abort(404) | ||||
|  | ||||
|     @app.route("/api/add", methods=['POST']) | ||||
|     @app.route("/form/add/quickwatch", methods=['POST']) | ||||
|     @login_required | ||||
|     def form_watch_add(): | ||||
|     def form_quick_watch_add(): | ||||
|         from changedetectionio import forms | ||||
|         form = forms.quickWatchForm(request.form) | ||||
|  | ||||
| @@ -1042,13 +1077,19 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             flash('The URL {} already exists'.format(url), "error") | ||||
|             return redirect(url_for('index')) | ||||
|  | ||||
|         # @todo add_watch should throw a custom Exception for validation etc | ||||
|         new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip()) | ||||
|         if new_uuid: | ||||
|         add_paused = request.form.get('edit_and_watch_submit_button') != None | ||||
|         new_uuid = datastore.add_watch(url=url, tag=request.form.get('tag').strip(), extras={'paused': add_paused}) | ||||
|  | ||||
|  | ||||
|         if not add_paused and new_uuid: | ||||
|             # Straight into the queue. | ||||
|             update_q.put(new_uuid) | ||||
|             update_q.put((1, new_uuid)) | ||||
|             flash("Watch added.") | ||||
|  | ||||
|         if add_paused: | ||||
|             flash('Watch added in Paused state, saving will unpause.') | ||||
|             return redirect(url_for('edit_page', uuid=new_uuid, unpause_on_save=1)) | ||||
|  | ||||
|         return redirect(url_for('index')) | ||||
|  | ||||
|  | ||||
| @@ -1079,7 +1120,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             uuid = list(datastore.data['watching'].keys()).pop() | ||||
|  | ||||
|         new_uuid = datastore.clone(uuid) | ||||
|         update_q.put(new_uuid) | ||||
|         update_q.put((5, new_uuid)) | ||||
|         flash('Cloned.') | ||||
|  | ||||
|         return redirect(url_for('index')) | ||||
| @@ -1100,7 +1141,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|         if uuid: | ||||
|             if uuid not in running_uuids: | ||||
|                 update_q.put(uuid) | ||||
|                 update_q.put((1, uuid)) | ||||
|             i = 1 | ||||
|  | ||||
|         elif tag != None: | ||||
| @@ -1108,7 +1149,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|                 if (tag != None and tag in watch['tag']): | ||||
|                     if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: | ||||
|                         update_q.put(watch_uuid) | ||||
|                         update_q.put((1, watch_uuid)) | ||||
|                         i += 1 | ||||
|  | ||||
|         else: | ||||
| @@ -1116,7 +1157,7 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|             for watch_uuid, watch in datastore.data['watching'].items(): | ||||
|  | ||||
|                 if watch_uuid not in running_uuids and not datastore.data['watching'][watch_uuid]['paused']: | ||||
|                     update_q.put(watch_uuid) | ||||
|                     update_q.put((1, watch_uuid)) | ||||
|                     i += 1 | ||||
|         flash("{} watches are queued for rechecking.".format(i)) | ||||
|         return redirect(url_for('index', tag=tag)) | ||||
| @@ -1167,7 +1208,8 @@ def changedetection_app(config=None, datastore_o=None): | ||||
|  | ||||
|  | ||||
|         except Exception as e: | ||||
|             flash("Could not share, something went wrong while communicating with the share server.", 'error') | ||||
|             logging.error("Error sharing -{}".format(str(e))) | ||||
|             flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error') | ||||
|  | ||||
|         # https://changedetection.io/share/VrMv05wpXyQa | ||||
|         # in the browser - should give you a nice info page - wtf | ||||
| @@ -1215,6 +1257,9 @@ def check_for_new_version(): | ||||
|  | ||||
| def notification_runner(): | ||||
|     global notification_debug_log | ||||
|     from datetime import datetime | ||||
|     import json | ||||
|  | ||||
|     while not app.config.exit.is_set(): | ||||
|         try: | ||||
|             # At the moment only one thread runs (single runner) | ||||
| @@ -1223,13 +1268,17 @@ def notification_runner(): | ||||
|             time.sleep(1) | ||||
|  | ||||
|         else: | ||||
|             # Process notifications | ||||
|  | ||||
|             now = datetime.now() | ||||
|             sent_obj = None | ||||
|  | ||||
|             try: | ||||
|                 from changedetectionio import notification | ||||
|                 notification.process_notification(n_object, datastore) | ||||
|  | ||||
|                 sent_obj = notification.process_notification(n_object, datastore) | ||||
|  | ||||
|             except Exception as e: | ||||
|                 print("Watch URL: {}  Error {}".format(n_object['watch_url'], str(e))) | ||||
|                 logging.error("Watch URL: {}  Error {}".format(n_object['watch_url'], str(e))) | ||||
|  | ||||
|                 # UUID wont be present when we submit a 'test' from the global settings | ||||
|                 if 'uuid' in n_object: | ||||
| @@ -1239,14 +1288,18 @@ def notification_runner(): | ||||
|                 log_lines = str(e).splitlines() | ||||
|                 notification_debug_log += log_lines | ||||
|  | ||||
|                 # Trim the log length | ||||
|                 notification_debug_log = notification_debug_log[-100:] | ||||
|  | ||||
|             # Process notifications | ||||
|             notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))] | ||||
|             # Trim the log length | ||||
|             notification_debug_log = notification_debug_log[-100:] | ||||
|  | ||||
| # Thread runner to check every minute, look for new watches to feed into the Queue. | ||||
| def ticker_thread_check_time_launch_checks(): | ||||
|     import random | ||||
|     from changedetectionio import update_worker | ||||
|     import logging | ||||
|  | ||||
|     recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20)) | ||||
|     print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds) | ||||
|  | ||||
|     # Spin up Workers that do the fetching | ||||
|     # Can be overriden by ENV or use the default settings | ||||
| @@ -1279,14 +1332,12 @@ def ticker_thread_check_time_launch_checks(): | ||||
|         while update_q.qsize() >= 2000: | ||||
|             time.sleep(1) | ||||
|  | ||||
|  | ||||
|         recheck_time_system_seconds = int(datastore.threshold_seconds) | ||||
|  | ||||
|         # Check for watches outside of the time threshold to put in the thread queue. | ||||
|         now = time.time() | ||||
|  | ||||
|         recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60)) | ||||
|         recheck_time_system_seconds = datastore.threshold_seconds | ||||
|  | ||||
|         for uuid in watch_uuid_list: | ||||
|  | ||||
|             now = time.time() | ||||
|             watch = datastore.data['watching'].get(uuid) | ||||
|             if not watch: | ||||
|                 logging.error("Watch: {} no longer present.".format(uuid)) | ||||
| @@ -1297,20 +1348,33 @@ def ticker_thread_check_time_launch_checks(): | ||||
|                 continue | ||||
|  | ||||
|             # If they supplied an individual entry minutes to threshold. | ||||
|             threshold = now | ||||
|  | ||||
|             watch_threshold_seconds = watch.threshold_seconds() | ||||
|             if watch_threshold_seconds: | ||||
|                 threshold -= watch_threshold_seconds | ||||
|             else: | ||||
|                 threshold -= recheck_time_system_seconds | ||||
|             threshold = watch_threshold_seconds if watch_threshold_seconds > 0 else recheck_time_system_seconds | ||||
|  | ||||
|             # Yeah, put it in the queue, it's more than time | ||||
|             if watch['last_checked'] <= max(threshold, recheck_time_minimum_seconds): | ||||
|                 if not uuid in running_uuids and uuid not in update_q.queue: | ||||
|                     update_q.put(uuid) | ||||
|             # #580 - Jitter plus/minus amount of time to make the check seem more random to the server | ||||
|             jitter = datastore.data['settings']['requests'].get('jitter_seconds', 0) | ||||
|             if jitter > 0: | ||||
|                 if watch.jitter_seconds == 0: | ||||
|                     watch.jitter_seconds = random.uniform(-abs(jitter), jitter) | ||||
|  | ||||
|         # Wait a few seconds before checking the list again | ||||
|         time.sleep(3) | ||||
|  | ||||
|             seconds_since_last_recheck = now - watch['last_checked'] | ||||
|             if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds: | ||||
|                 if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]: | ||||
|                     print("> Queued watch UUID {} last checked at {} queued at {:0.2f} jitter {:0.2f}s, {:0.2f}s since last checked".format(uuid, | ||||
|                                                                                                          watch['last_checked'], | ||||
|                                                                                                          now, | ||||
|                                                                                                          watch.jitter_seconds, | ||||
|                                                                                                          now - watch['last_checked'])) | ||||
|                     # Into the queue with you | ||||
|                     update_q.put((5, uuid)) | ||||
|  | ||||
|                     # Reset for next time | ||||
|                     watch.jitter_seconds = 0 | ||||
|  | ||||
|         # Wait before checking the list again - saves CPU | ||||
|         time.sleep(1) | ||||
|  | ||||
|         # Should be low so we can break this out in testing | ||||
|         app.config.exit.wait(1) | ||||
|         app.config.exit.wait(1) | ||||
|   | ||||
| @@ -24,7 +24,7 @@ class Watch(Resource): | ||||
|             abort(404, message='No watch exists with the UUID of {}'.format(uuid)) | ||||
|  | ||||
|         if request.args.get('recheck'): | ||||
|             self.update_q.put(uuid) | ||||
|             self.update_q.put((1, uuid)) | ||||
|             return "OK", 200 | ||||
|  | ||||
|         # Return without history, get that via another API call | ||||
| @@ -100,7 +100,7 @@ class CreateWatch(Resource): | ||||
|         extras = {'title': json_data['title'].strip()} if json_data.get('title') else {} | ||||
|  | ||||
|         new_uuid = self.datastore.add_watch(url=json_data['url'].strip(), tag=tag, extras=extras) | ||||
|         self.update_q.put(new_uuid) | ||||
|         self.update_q.put((1, new_uuid)) | ||||
|         return {'uuid': new_uuid}, 201 | ||||
|  | ||||
|     # Return concise list of available watches and some very basic info | ||||
| @@ -118,7 +118,7 @@ class CreateWatch(Resource): | ||||
|  | ||||
|         if request.args.get('recheck_all'): | ||||
|             for uuid in self.datastore.data['watching'].keys(): | ||||
|                 self.update_q.put(uuid) | ||||
|                 self.update_q.put((1, uuid)) | ||||
|             return {'status': "OK"}, 200 | ||||
|  | ||||
|         return list, 200 | ||||
|   | ||||
							
								
								
									
										11
									
								
								changedetectionio/apprise_asset.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										11
									
								
								changedetectionio/apprise_asset.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,11 @@ | ||||
| import apprise | ||||
|  | ||||
| # Create our AppriseAsset and populate it with some of our new values: | ||||
| # https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object | ||||
| asset = apprise.AppriseAsset( | ||||
|    image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png' | ||||
| ) | ||||
|  | ||||
| asset.app_id = "changedetection.io" | ||||
| asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection" | ||||
| asset.app_url = "https://changedetection.io" | ||||
| @@ -35,7 +35,7 @@ def main(): | ||||
|     create_datastore_dir = False | ||||
|  | ||||
|     for opt, arg in opts: | ||||
|         #        if opt == '--purge': | ||||
|         #        if opt == '--clear-all-history': | ||||
|         # Remove history, the actual files you need to delete manually. | ||||
|         #            for uuid, watch in datastore.data['watching'].items(): | ||||
|         #                watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None}) | ||||
|   | ||||
| @@ -46,6 +46,7 @@ class Fetcher(): | ||||
|     headers = None | ||||
|  | ||||
|     fetcher_description = "No description" | ||||
|     webdriver_js_execute_code = None | ||||
|     xpath_element_js = """                | ||||
|                 // Include the getXpath script directly, easier than fetching | ||||
|                 !function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}}); | ||||
| @@ -62,12 +63,12 @@ class Fetcher(): | ||||
|                       break; | ||||
|                     } | ||||
|                     if('' !==r.id) { | ||||
|                       chained_css.unshift("#"+r.id); | ||||
|                       final_selector= chained_css.join('>'); | ||||
|                       chained_css.unshift("#"+CSS.escape(r.id)); | ||||
|                       final_selector= chained_css.join(' > '); | ||||
|                       // Be sure theres only one, some sites have multiples of the same ID tag :-( | ||||
|                       if (window.document.querySelectorAll(final_selector).length ==1 ) { | ||||
|                         return final_selector; | ||||
|                       } | ||||
|                         } | ||||
|                       return null; | ||||
|                     } else { | ||||
|                       chained_css.unshift(r.tagName.toLowerCase()); | ||||
| @@ -175,7 +176,6 @@ class Fetcher(): | ||||
|  | ||||
|     # Will be needed in the future by the VisualSelector, always get this where possible. | ||||
|     screenshot = False | ||||
|     fetcher_description = "No description" | ||||
|     system_http_proxy = os.getenv('HTTP_PROXY') | ||||
|     system_https_proxy = os.getenv('HTTPS_PROXY') | ||||
|  | ||||
| @@ -281,13 +281,14 @@ class base_html_playwright(Fetcher): | ||||
|         from playwright.sync_api import sync_playwright | ||||
|         import playwright._impl._api_types | ||||
|         from playwright._impl._api_types import Error, TimeoutError | ||||
|  | ||||
|         response = None | ||||
|         with sync_playwright() as p: | ||||
|             browser_type = getattr(p, self.browser_type) | ||||
|  | ||||
|             # Seemed to cause a connection Exception even tho I can see it connect | ||||
|             # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000) | ||||
|             browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000) | ||||
|             # 60,000 connection timeout only | ||||
|             browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000) | ||||
|  | ||||
|             # Set user agent to prevent Cloudflare from blocking the browser | ||||
|             # Use the default one configured in the App.py model that's passed from fetch_site_status.py | ||||
| @@ -300,21 +301,35 @@ class base_html_playwright(Fetcher): | ||||
|                 accept_downloads=False | ||||
|             ) | ||||
|  | ||||
|             if len(request_headers): | ||||
|                 context.set_extra_http_headers(request_headers) | ||||
|  | ||||
|             page = context.new_page() | ||||
|             try: | ||||
|                # Bug - never set viewport size BEFORE page.goto | ||||
|                 response = page.goto(url, timeout=timeout * 1000, wait_until='commit') | ||||
|                 # Wait_until = commit | ||||
|                 # - `'commit'` - consider operation to be finished when network response is received and the document started loading. | ||||
|                 # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds | ||||
|                 # This seemed to solve nearly all 'TimeoutErrors' | ||||
|                 extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|                 page.wait_for_timeout(extra_wait * 1000) | ||||
|                 page.set_default_navigation_timeout(90000) | ||||
|                 page.set_default_timeout(90000) | ||||
|  | ||||
|                 # Listen for all console events and handle errors | ||||
|                 page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|                 # Bug - never set viewport size BEFORE page.goto | ||||
|  | ||||
|                 # Waits for the next navigation. Using Python context manager | ||||
|                 # prevents a race condition between clicking and waiting for a navigation. | ||||
|                 with page.expect_navigation(): | ||||
|                     response = page.goto(url, wait_until='load') | ||||
|  | ||||
|                 if self.webdriver_js_execute_code is not None: | ||||
|                     page.evaluate(self.webdriver_js_execute_code) | ||||
|  | ||||
|             except playwright._impl._api_types.TimeoutError as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|                 # This can be ok, we will try to grab what we could retrieve | ||||
|                 pass | ||||
|             except Exception as e: | ||||
|                 print ("other exception when page.goto") | ||||
|                 print (str(e)) | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise PageUnloadable(url=url, status_code=None) | ||||
| @@ -322,18 +337,22 @@ class base_html_playwright(Fetcher): | ||||
|             if response is None: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             if len(page.content().strip()) == 0: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 print ("response object was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             # Bug 2(?) Set the viewport size AFTER loading the page | ||||
|             page.set_viewport_size({"width": 1280, "height": 1024}) | ||||
|  | ||||
|             self.status_code = response.status | ||||
|             page.set_viewport_size({"width": 1280, "height": 1024})             | ||||
|             extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|             time.sleep(extra_wait) | ||||
|             self.content = page.content() | ||||
|             self.status_code = response.status | ||||
|  | ||||
|             if len(self.content.strip()) == 0: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
|                 print ("Content was empty") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|              | ||||
|             self.headers = response.all_headers() | ||||
|  | ||||
|             if current_css_filter is not None: | ||||
| @@ -346,9 +365,15 @@ class base_html_playwright(Fetcher): | ||||
|             # Bug 3 in Playwright screenshot handling | ||||
|             # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it | ||||
|             # JPEG is better here because the screenshots can be very very large | ||||
|  | ||||
|             # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded | ||||
|             # which will significantly increase the IO size between the server and client, it's recommended to use the lowest | ||||
|             # acceptable screenshot quality here | ||||
|             try: | ||||
|                 page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}) | ||||
|                 self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=92) | ||||
|                 # Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this. | ||||
|                 page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1) | ||||
|                 # The actual screenshot | ||||
|                 self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72))) | ||||
|             except Exception as e: | ||||
|                 context.close() | ||||
|                 browser.close() | ||||
| @@ -428,6 +453,12 @@ class base_html_webdriver(Fetcher): | ||||
|  | ||||
|         self.driver.set_window_size(1280, 1024) | ||||
|         self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         if self.webdriver_js_execute_code is not None: | ||||
|             self.driver.execute_script(self.webdriver_js_execute_code) | ||||
|             # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|             self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         self.screenshot = self.driver.get_screenshot_as_png() | ||||
|  | ||||
|         # @todo - how to check this? is it possible? | ||||
|   | ||||
| @@ -1,4 +1,5 @@ | ||||
| import hashlib | ||||
| import logging | ||||
| import os | ||||
| import re | ||||
| import time | ||||
| @@ -10,6 +11,7 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning) | ||||
|  | ||||
|  | ||||
| # Some common stuff here that can be moved to a base class | ||||
| # (set_proxy_from_list) | ||||
| class perform_site_check(): | ||||
|  | ||||
|     def __init__(self, *args, datastore, **kwargs): | ||||
| @@ -44,6 +46,20 @@ class perform_site_check(): | ||||
|  | ||||
|         return proxy_args | ||||
|  | ||||
|     # Doesn't look like python supports forward slash auto enclosure in re.findall | ||||
|     # So convert it to inline flag "foobar(?i)" type configuration | ||||
|     def forward_slash_enclosed_regex_to_options(self, regex): | ||||
|         res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE) | ||||
|  | ||||
|         if res: | ||||
|             regex = res.group(1) | ||||
|             regex += '(?{})'.format(res.group(2)) | ||||
|         else: | ||||
|             regex += '(?{})'.format('i') | ||||
|  | ||||
|         return regex | ||||
|  | ||||
|  | ||||
|     def run(self, uuid): | ||||
|         timestamp = int(time.time())  # used for storage etc too | ||||
|  | ||||
| @@ -105,6 +121,9 @@ class perform_site_check(): | ||||
|         elif system_webdriver_delay is not None: | ||||
|             fetcher.render_extract_delay = system_webdriver_delay | ||||
|  | ||||
|         if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip(): | ||||
|             fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code'] | ||||
|  | ||||
|         fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter']) | ||||
|         fetcher.quit() | ||||
|  | ||||
| @@ -146,7 +165,9 @@ class perform_site_check(): | ||||
|                 is_html = False | ||||
|  | ||||
|         if is_html or is_source: | ||||
|              | ||||
|             # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text | ||||
|             fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content) | ||||
|             html_content = fetcher.content | ||||
|  | ||||
|             # If not JSON,  and if it's not text/plain.. | ||||
| @@ -204,34 +225,69 @@ class perform_site_check(): | ||||
|         else: | ||||
|             stripped_text_from_html = stripped_text_from_html.encode('utf8') | ||||
|  | ||||
|         # 615 Extract text by regex | ||||
|         extract_text = watch.get('extract_text', []) | ||||
|         if len(extract_text) > 0: | ||||
|             regex_matched_output = [] | ||||
|             for s_re in extract_text: | ||||
|                 # incase they specified something in '/.../x' | ||||
|                 regex = self.forward_slash_enclosed_regex_to_options(s_re) | ||||
|                 result = re.findall(regex.encode('utf-8'), stripped_text_from_html) | ||||
|  | ||||
|                 for l in result: | ||||
|                     if type(l) is tuple: | ||||
|                         #@todo - some formatter option default (between groups) | ||||
|                         regex_matched_output += list(l) + [b'\n'] | ||||
|                     else: | ||||
|                         # @todo - some formatter option default (between each ungrouped result) | ||||
|                         regex_matched_output += [l] + [b'\n'] | ||||
|  | ||||
|             # Now we will only show what the regex matched | ||||
|             stripped_text_from_html = b'' | ||||
|             text_content_before_ignored_filter = b'' | ||||
|             if regex_matched_output: | ||||
|                 # @todo some formatter for presentation? | ||||
|                 stripped_text_from_html = b''.join(regex_matched_output) | ||||
|                 text_content_before_ignored_filter = stripped_text_from_html | ||||
|  | ||||
|  | ||||
|         # Re #133 - if we should strip whitespaces from triggering the change detected comparison | ||||
|         if self.datastore.data['settings']['application'].get('ignore_whitespace', False): | ||||
|             fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest() | ||||
|         else: | ||||
|             fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest() | ||||
|  | ||||
|         # On the first run of a site, watch['previous_md5'] will be None, set it the current one. | ||||
|         if not watch.get('previous_md5'): | ||||
|             watch['previous_md5'] = fetched_md5 | ||||
|             update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|         blocked_by_not_found_trigger_text = False | ||||
|         ############ Blocking rules, after checksum ################# | ||||
|         blocked = False | ||||
|  | ||||
|         if len(watch['trigger_text']): | ||||
|             # Yeah, lets block first until something matches | ||||
|             blocked_by_not_found_trigger_text = True | ||||
|             # Assume blocked | ||||
|             blocked = True | ||||
|             # Filter and trigger works the same, so reuse it | ||||
|             # It should return the line numbers that match | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=watch['trigger_text'], | ||||
|                                                   mode="line numbers") | ||||
|             # If it returned any lines that matched.. | ||||
|             # Unblock if the trigger was found | ||||
|             if result: | ||||
|                 blocked_by_not_found_trigger_text = False | ||||
|                 blocked = False | ||||
|  | ||||
|         if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5: | ||||
|  | ||||
|         if len(watch['text_should_not_be_present']): | ||||
|             # If anything matched, then we should block a change from happening | ||||
|             result = html_tools.strip_ignore_text(content=str(stripped_text_from_html), | ||||
|                                                   wordlist=watch['text_should_not_be_present'], | ||||
|                                                   mode="line numbers") | ||||
|             if result: | ||||
|                 blocked = True | ||||
|  | ||||
|         # The main thing that all this at the moment comes down to :) | ||||
|         if watch['previous_md5'] != fetched_md5: | ||||
|             changed_detected = True | ||||
|             update_obj["previous_md5"] = fetched_md5 | ||||
|             update_obj["last_changed"] = timestamp | ||||
|  | ||||
|         # Looks like something changed, but did it match all the rules? | ||||
|         if blocked: | ||||
|             changed_detected = False | ||||
|  | ||||
|         # Extract title as title | ||||
|         if is_html: | ||||
| @@ -239,4 +295,21 @@ class perform_site_check(): | ||||
|                 if not watch['title'] or not len(watch['title']): | ||||
|                     update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content) | ||||
|  | ||||
|         if changed_detected: | ||||
|             if watch.get('check_unique_lines', False): | ||||
|                 has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines()) | ||||
|                 # One or more lines? unsure? | ||||
|                 if not has_unique_lines: | ||||
|                     logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid)) | ||||
|                     changed_detected = False | ||||
|                 else: | ||||
|                     logging.debug("check_unique_lines: UUID {} had unique content".format(uuid)) | ||||
|  | ||||
|         # Always record the new checksum | ||||
|         update_obj["previous_md5"] = fetched_md5 | ||||
|  | ||||
|         # On the first run of a site, watch['previous_md5'] will be None, set it the current one. | ||||
|         if not watch.get('previous_md5'): | ||||
|             watch['previous_md5'] = fetched_md5 | ||||
|  | ||||
|         return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data | ||||
|   | ||||
| @@ -223,7 +223,7 @@ class validateURL(object): | ||||
|         except validators.ValidationFailure: | ||||
|             message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip())) | ||||
|             raise ValidationError(message) | ||||
|          | ||||
|  | ||||
| class ValidateListRegex(object): | ||||
|     """ | ||||
|     Validates that anything that looks like a regex passes as a regex | ||||
| @@ -308,6 +308,9 @@ class ValidateCSSJSONXPATHInput(object): | ||||
| class quickWatchForm(Form): | ||||
|     url = fields.URLField('URL', validators=[validateURL()]) | ||||
|     tag = StringField('Group tag', [validators.Optional()]) | ||||
|     watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|  | ||||
| # Common to a single watch and the global settings | ||||
| class commonSettingsForm(Form): | ||||
| @@ -330,6 +333,9 @@ class watchForm(commonSettingsForm): | ||||
|     css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='') | ||||
|  | ||||
|     subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)]) | ||||
|  | ||||
|     extract_text = StringListField('Extract text', [ValidateListRegex()]) | ||||
|  | ||||
|     title = StringField('Title', default='') | ||||
|  | ||||
|     ignore_text = StringListField('Ignore text', [ValidateListRegex()]) | ||||
| @@ -337,10 +343,17 @@ class watchForm(commonSettingsForm): | ||||
|     body = TextAreaField('Request body', [validators.Optional()]) | ||||
|     method = SelectField('Request method', choices=valid_method, default=default_method) | ||||
|     ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False) | ||||
|     check_unique_lines = BooleanField('Only trigger when new lines appear', default=False) | ||||
|     trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()]) | ||||
|     text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()]) | ||||
|  | ||||
|     webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()]) | ||||
|  | ||||
|     save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|  | ||||
|     proxy = RadioField('Proxy') | ||||
|     filter_failure_notification_send = BooleanField( | ||||
|         'Send a notification when the filter can no longer be found on the page', default=False) | ||||
|  | ||||
|     def validate(self, **kwargs): | ||||
|         if not super().validate(): | ||||
| @@ -360,7 +373,9 @@ class watchForm(commonSettingsForm): | ||||
| class globalSettingsRequestForm(Form): | ||||
|     time_between_check = FormField(TimeBetweenCheckForm) | ||||
|     proxy = RadioField('Proxy') | ||||
|  | ||||
|     jitter_seconds = IntegerField('Random jitter seconds ± check', | ||||
|                                   render_kw={"style": "width: 5em;"}, | ||||
|                                   validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")]) | ||||
|  | ||||
| # datastore.data['settings']['application'].. | ||||
| class globalSettingsApplicationForm(commonSettingsForm): | ||||
| @@ -377,6 +392,11 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|     api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()]) | ||||
|     password = SaltyPasswordField() | ||||
|  | ||||
|     filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification', | ||||
|                                                                   render_kw={"style": "width: 5em;"}, | ||||
|                                                                   validators=[validators.NumberRange(min=0, | ||||
|                                                                                                      message="Should contain zero or more attempts")]) | ||||
|  | ||||
|  | ||||
| class globalSettingsForm(Form): | ||||
|     # Define these as FormFields/"sub forms", this way it matches the JSON storage | ||||
|   | ||||
| @@ -1,5 +1,4 @@ | ||||
| import json | ||||
| import re | ||||
| from typing import List | ||||
|  | ||||
| from bs4 import BeautifulSoup | ||||
| @@ -8,16 +7,23 @@ import re | ||||
| from inscriptis import get_text | ||||
| from inscriptis.model.config import ParserConfig | ||||
|  | ||||
| class FilterNotFoundInResponse(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
| class JSONNotFound(ValueError): | ||||
|     def __init__(self, msg): | ||||
|         ValueError.__init__(self, msg) | ||||
|  | ||||
|  | ||||
| # Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches | ||||
| def css_filter(css_filter, html_content): | ||||
|     soup = BeautifulSoup(html_content, "html.parser") | ||||
|     html_block = "" | ||||
|     for item in soup.select(css_filter, separator=""): | ||||
|     r = soup.select(css_filter, separator="") | ||||
|     if len(html_content) > 0 and len(r) == 0: | ||||
|         raise FilterNotFoundInResponse(css_filter) | ||||
|     for item in r: | ||||
|         html_block += str(item) | ||||
|  | ||||
|     return html_block + "\n" | ||||
| @@ -42,8 +48,19 @@ def xpath_filter(xpath_filter, html_content): | ||||
|     tree = html.fromstring(bytes(html_content, encoding='utf-8')) | ||||
|     html_block = "" | ||||
|  | ||||
|     for item in tree.xpath(xpath_filter.strip(), namespaces={'re':'http://exslt.org/regular-expressions'}): | ||||
|         html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"<br/>" | ||||
|     r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'}) | ||||
|     if len(html_content) > 0 and len(r) == 0: | ||||
|         raise FilterNotFoundInResponse(xpath_filter) | ||||
|  | ||||
|     #@note: //title/text() wont work where <title>CDATA.. | ||||
|  | ||||
|     for element in r: | ||||
|         if type(element) == etree._ElementStringResult: | ||||
|             html_block += str(element) + "<br/>" | ||||
|         elif type(element) == etree._ElementUnicodeResult: | ||||
|             html_block += str(element) + "<br/>" | ||||
|         else: | ||||
|             html_block += etree.tostring(element, pretty_print=True).decode('utf-8') + "<br/>" | ||||
|  | ||||
|     return html_block | ||||
|  | ||||
| @@ -202,3 +219,17 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str: | ||||
|  | ||||
|     return text_content | ||||
|  | ||||
| def workarounds_for_obfuscations(content): | ||||
|     """ | ||||
|     Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis | ||||
|     This could go into its own Pip package in the future, for faster updates | ||||
|     """ | ||||
|  | ||||
|     # HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span> | ||||
|     # https://github.com/weblyzard/inscriptis/issues/45 | ||||
|     if not content: | ||||
|         return content | ||||
|  | ||||
|     content = re.sub('<!--\s+-->', '', content) | ||||
|  | ||||
|     return content | ||||
|   | ||||
| @@ -1,29 +1,28 @@ | ||||
| import collections | ||||
| import os | ||||
|  | ||||
| import uuid as uuid_builder | ||||
|  | ||||
| from os import getenv | ||||
| from changedetectionio.notification import ( | ||||
|     default_notification_body, | ||||
|     default_notification_format, | ||||
|     default_notification_title, | ||||
| ) | ||||
|  | ||||
| _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6 | ||||
|  | ||||
| class model(dict): | ||||
|     base_config = { | ||||
|             'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!", | ||||
|             'watching': {}, | ||||
|             'settings': { | ||||
|                 'headers': { | ||||
|                     'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36', | ||||
|                     'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'), | ||||
|                     'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9', | ||||
|                     'Accept-Encoding': 'gzip, deflate',  # No support for brolti in python requests yet. | ||||
|                     'Accept-Language': 'en-GB,en-US;q=0.9,en;' | ||||
|                 }, | ||||
|                 'requests': { | ||||
|                     'timeout': 15,  # Default 15 seconds | ||||
|                     'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds | ||||
|                     'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None}, | ||||
|                     'workers': 10,  # Number of threads, lower is better for slow connections | ||||
|                     'jitter_seconds': 0, | ||||
|                     'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")),  # Number of threads, lower is better for slow connections | ||||
|                     'proxy': None # Preferred proxy connection | ||||
|                 }, | ||||
|                 'application': { | ||||
| @@ -32,7 +31,8 @@ class model(dict): | ||||
|                     'base_url' : None, | ||||
|                     'extract_title_as_title': False, | ||||
|                     'empty_pages_are_a_change': False, | ||||
|                     'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|                     'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|                     'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT, | ||||
|                     'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum | ||||
|                     'global_subtractive_selectors': [], | ||||
|                     'ignore_whitespace': True, | ||||
|   | ||||
| @@ -1,7 +1,9 @@ | ||||
| import os | ||||
| import uuid as uuid_builder | ||||
| from distutils.util import strtobool | ||||
|  | ||||
| minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60)) | ||||
| mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} | ||||
|  | ||||
| from changedetectionio.notification import ( | ||||
|     default_notification_body, | ||||
| @@ -13,7 +15,6 @@ from changedetectionio.notification import ( | ||||
| class model(dict): | ||||
|     __newest_history_key = None | ||||
|     __history_n=0 | ||||
|  | ||||
|     __base_config = { | ||||
|             'url': None, | ||||
|             'tag': None, | ||||
| @@ -35,18 +36,26 @@ class model(dict): | ||||
|             'notification_title': default_notification_title, | ||||
|             'notification_body': default_notification_body, | ||||
|             'notification_format': default_notification_format, | ||||
|             'css_filter': "", | ||||
|             'notification_muted': False, | ||||
|             'css_filter': '', | ||||
|             'extract_text': [],  # Extract text by regex after filters | ||||
|             'subtractive_selectors': [], | ||||
|             'trigger_text': [],  # List of text or regex to wait for until a change is detected | ||||
|             'text_should_not_be_present': [], # Text that should not present | ||||
|             'fetch_backend': None, | ||||
|             'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')), | ||||
|             'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine. | ||||
|             'extract_title_as_title': False, | ||||
|             'check_unique_lines': False, # On change-detected, compare against all history if its something new | ||||
|             'proxy': None, # Preferred proxy connection | ||||
|             # Re #110, so then if this is set to None, we know to use the default value instead | ||||
|             # Requires setting to None on submit if it's the same as the default | ||||
|             # Should be all None by default, so we use the system default in this case. | ||||
|             'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None}, | ||||
|             'webdriver_delay': None | ||||
|             'webdriver_delay': None, | ||||
|             'webdriver_js_execute_code': None, # Run before change-detection | ||||
|         } | ||||
|     jitter_seconds = 0 | ||||
|  | ||||
|     def __init__(self, *arg, **kw): | ||||
|         import uuid | ||||
| @@ -84,7 +93,7 @@ class model(dict): | ||||
|         # Read the history file as a dict | ||||
|         fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt") | ||||
|         if os.path.isfile(fname): | ||||
|             logging.debug("Disk IO accessed " + str(time.time())) | ||||
|             logging.debug("Reading history index " + str(time.time())) | ||||
|             with open(fname, "r") as f: | ||||
|                 tmp_history = dict(i.strip().split(',', 2) for i in f.readlines()) | ||||
|  | ||||
| @@ -156,9 +165,22 @@ class model(dict): | ||||
|  | ||||
|     def threshold_seconds(self): | ||||
|         seconds = 0 | ||||
|         mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} | ||||
|         for m, n in mtable.items(): | ||||
|             x = self.get('time_between_check', {}).get(m, None) | ||||
|             if x: | ||||
|                 seconds += x * n | ||||
|         return seconds | ||||
|  | ||||
|     # Iterate over all history texts and see if something new exists | ||||
|     def lines_contain_something_unique_compared_to_history(self, lines=[]): | ||||
|         local_lines = set([l.decode('utf-8').strip().lower() for l in lines]) | ||||
|  | ||||
|         # Compare each lines (set) against each history text file (set) looking for something new.. | ||||
|         existing_history = set({}) | ||||
|         for k, v in self.history.items(): | ||||
|             alist = set([line.decode('utf-8').strip().lower() for line in open(v, 'rb')]) | ||||
|             existing_history = existing_history.union(alist) | ||||
|  | ||||
|         # Check that everything in local_lines(new stuff) already exists in existing_history - it should | ||||
|         # if not, something new happened | ||||
|         return not local_lines.issubset(existing_history) | ||||
|   | ||||
| @@ -34,7 +34,6 @@ def process_notification(n_object, datastore): | ||||
|         valid_notification_formats[default_notification_format], | ||||
|     ) | ||||
|  | ||||
|  | ||||
|     # Insert variables into the notification content | ||||
|     notification_parameters = create_notification_parameters(n_object, datastore) | ||||
|  | ||||
| @@ -48,9 +47,10 @@ def process_notification(n_object, datastore): | ||||
|     # Anything higher than or equal to WARNING (which covers things like Connection errors) | ||||
|     # raise it as an exception | ||||
|     apobjs=[] | ||||
|     sent_objs=[] | ||||
|     from .apprise_asset import asset | ||||
|     for url in n_object['notification_urls']: | ||||
|  | ||||
|         apobj = apprise.Apprise(debug=True) | ||||
|         apobj = apprise.Apprise(debug=True, asset=asset) | ||||
|         url = url.strip() | ||||
|         if len(url): | ||||
|             print(">> Process Notification: AppRise notifying {}".format(url)) | ||||
| @@ -63,23 +63,36 @@ def process_notification(n_object, datastore): | ||||
|  | ||||
|                 # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload | ||||
|                 k = '?' if not '?' in url else '&' | ||||
|                 if not 'avatar_url' in url: | ||||
|                 if not 'avatar_url' in url and not url.startswith('mail'): | ||||
|                     url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png' | ||||
|  | ||||
|                 if url.startswith('tgram://'): | ||||
|                     # Telegram only supports a limit subset of HTML, remove the '<br/>' we place in. | ||||
|                     # re https://github.com/dgtlmoon/changedetection.io/issues/555 | ||||
|                     # @todo re-use an existing library we have already imported to strip all non-allowed tags | ||||
|                     n_body = n_body.replace('<br/>', '\n') | ||||
|                     n_body = n_body.replace('</br>', '\n') | ||||
|                     # real limit is 4096, but minus some for extra metadata | ||||
|                     payload_max_size = 3600 | ||||
|                     body_limit = max(0, payload_max_size - len(n_title)) | ||||
|                     n_title = n_title[0:payload_max_size] | ||||
|                     n_body = n_body[0:body_limit] | ||||
|  | ||||
|                 elif url.startswith('discord://'): | ||||
|                 elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks') or url.startswith('https://discord.com/api'): | ||||
|                     # real limit is 2000, but minus some for extra metadata | ||||
|                     payload_max_size = 1700 | ||||
|                     body_limit = max(0, payload_max_size - len(n_title)) | ||||
|                     n_title = n_title[0:payload_max_size] | ||||
|                     n_body = n_body[0:body_limit] | ||||
|  | ||||
|                 elif url.startswith('mailto'): | ||||
|                     # Apprise will default to HTML, so we need to override it | ||||
|                     # So that whats' generated in n_body is in line with what is going to be sent. | ||||
|                     # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321 | ||||
|                     if not 'format=' in url and (n_format == 'text' or n_format == 'markdown'): | ||||
|                         prefix = '?' if not '?' in url else '&' | ||||
|                         url = "{}{}format={}".format(url, prefix, n_format) | ||||
|  | ||||
|                 apobj.add(url) | ||||
|  | ||||
|                 apobj.notify( | ||||
| @@ -96,6 +109,15 @@ def process_notification(n_object, datastore): | ||||
|                 log_value = logs.getvalue() | ||||
|                 if log_value and 'WARNING' in log_value or 'ERROR' in log_value: | ||||
|                     raise Exception(log_value) | ||||
|                  | ||||
|                 sent_objs.append({'title': n_title, | ||||
|                                   'body': n_body, | ||||
|                                   'url' : url, | ||||
|                                   'body_format': n_format}) | ||||
|  | ||||
|     # Return what was sent for better logging - after the for loop | ||||
|     return sent_objs | ||||
|  | ||||
|  | ||||
| # Notification title + body content parameters get created here. | ||||
| def create_notification_parameters(n_object, datastore): | ||||
|   | ||||
							
								
								
									
										42
									
								
								changedetectionio/static/images/bell-off.svg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										42
									
								
								changedetectionio/static/images/bell-off.svg
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,42 @@ | ||||
| <?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||||
| <svg | ||||
|    width="15" | ||||
|    height="16.363636" | ||||
|    viewBox="0 0 15 16.363636" | ||||
|    version="1.1" | ||||
|    id="svg4" | ||||
|    sodipodi:docname="bell-off.svg" | ||||
|    inkscape:version="1.1.1 (1:1.1+202109281949+c3084ef5ed)" | ||||
|    xmlns:inkscape="http://www.inkscape.org/namespaces/inkscape" | ||||
|    xmlns:sodipodi="http://sodipodi.sourceforge.net/DTD/sodipodi-0.dtd" | ||||
|    xmlns="http://www.w3.org/2000/svg" | ||||
|    xmlns:svg="http://www.w3.org/2000/svg"> | ||||
|   <sodipodi:namedview | ||||
|      id="namedview5" | ||||
|      pagecolor="#ffffff" | ||||
|      bordercolor="#666666" | ||||
|      borderopacity="1.0" | ||||
|      inkscape:pageshadow="2" | ||||
|      inkscape:pageopacity="0.0" | ||||
|      inkscape:pagecheckerboard="0" | ||||
|      showgrid="false" | ||||
|      fit-margin-top="0" | ||||
|      fit-margin-left="0" | ||||
|      fit-margin-right="0" | ||||
|      fit-margin-bottom="0" | ||||
|      inkscape:zoom="28.416667" | ||||
|      inkscape:cx="-0.59824046" | ||||
|      inkscape:cy="12" | ||||
|      inkscape:window-width="1554" | ||||
|      inkscape:window-height="896" | ||||
|      inkscape:window-x="2095" | ||||
|      inkscape:window-y="107" | ||||
|      inkscape:window-maximized="0" | ||||
|      inkscape:current-layer="svg4" /> | ||||
|   <defs | ||||
|      id="defs8" /> | ||||
|   <path | ||||
|      d="m 14.318182,11.762045 v 1.1925 H 5.4102273 L 11.849318,7.1140909 C 12.234545,9.1561364 12.54,11.181818 14.318182,11.762045 Z m -6.7984093,4.601591 c 1.0759091,0 2.0256823,-0.955909 2.0256823,-2.045454 H 5.4545455 c 0,1.089545 0.9879545,2.045454 2.0652272,2.045454 z M 15,2.8622727 0.9177273,15.636136 0,14.627045 l 1.8443182,-1.6725 h -1.1625 v -1.1925 C 4.0070455,10.677273 2.1784091,4.5388636 5.3611364,2.6897727 5.8009091,2.4347727 6.0709091,1.9609091 6.0702273,1.4488636 v -0.00205 C 6.0702273,0.64772727 6.7104545,0 7.5,0 8.2895455,0 8.9297727,0.64772727 8.9297727,1.4468182 v 0.00205 C 8.9290909,1.9602319 9.199773,2.4354591 9.638864,2.6897773 10.364318,3.111141 10.827273,3.7568228 11.1525,4.5129591 L 14.085682,1.8531818 Z M 6.8181818,1.3636364 C 6.8181818,1.74 7.1236364,2.0454545 7.5,2.0454545 7.8763636,2.0454545 8.1818182,1.74 8.1818182,1.3636364 8.1818182,0.98795455 7.8763636,0.68181818 7.5,0.68181818 c -0.3763636,0 -0.6818182,0.30613637 -0.6818182,0.68181822 z" | ||||
|      id="path2" | ||||
|      style="fill:#f8321b;stroke-width:0.681818;fill-opacity:1" /> | ||||
| </svg> | ||||
| After Width: | Height: | Size: 2.1 KiB | 
							
								
								
									
										20
									
								
								changedetectionio/static/images/spread-white.svg
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										20
									
								
								changedetectionio/static/images/spread-white.svg
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,20 @@ | ||||
| <?xml version="1.0" encoding="UTF-8" standalone="no"?> | ||||
| <svg | ||||
|    width="18" | ||||
|    height="19.92" | ||||
|    viewBox="0 0 18 19.92" | ||||
|    version="1.1" | ||||
|    id="svg6" | ||||
|    xmlns="http://www.w3.org/2000/svg" | ||||
|    xmlns:svg="http://www.w3.org/2000/svg"> | ||||
|   <defs | ||||
|      id="defs10" /> | ||||
|   <path | ||||
|      d="M -3,-2 H 21 V 22 H -3 Z" | ||||
|      fill="none" | ||||
|      id="path2" /> | ||||
|   <path | ||||
|      d="m 15,14.08 c -0.76,0 -1.44,0.3 -1.96,0.77 L 5.91,10.7 C 5.96,10.47 6,10.24 6,10 6,9.76 5.96,9.53 5.91,9.3 L 12.96,5.19 C 13.5,5.69 14.21,6 15,6 16.66,6 18,4.66 18,3 18,1.34 16.66,0 15,0 c -1.66,0 -3,1.34 -3,3 0,0.24 0.04,0.47 0.09,0.7 L 5.04,7.81 C 4.5,7.31 3.79,7 3,7 1.34,7 0,8.34 0,10 c 0,1.66 1.34,3 3,3 0.79,0 1.5,-0.31 2.04,-0.81 l 7.12,4.16 c -0.05,0.21 -0.08,0.43 -0.08,0.65 0,1.61 1.31,2.92 2.92,2.92 1.61,0 2.92,-1.31 2.92,-2.92 0,-1.61 -1.31,-2.92 -2.92,-2.92 z" | ||||
|      id="path4" | ||||
|      style="fill:#ffffff;fill-opacity:1" /> | ||||
| </svg> | ||||
| After Width: | Height: | Size: 892 B | 
							
								
								
									
										17
									
								
								changedetectionio/static/js/diff-overview.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										17
									
								
								changedetectionio/static/js/diff-overview.js
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,17 @@ | ||||
| $(document).ready(function () { | ||||
|     // Load it when the #screenshot tab is in use, so we dont give a slow experience when waiting for the text diff to load | ||||
|     window.addEventListener('hashchange', function (e) { | ||||
|         toggle(location.hash); | ||||
|     }, false); | ||||
|  | ||||
|     toggle(location.hash); | ||||
|  | ||||
|     function toggle(hash_name) { | ||||
|         if (hash_name === '#screenshot') { | ||||
|             $("img#screenshot-img").attr('src', screenshot_url); | ||||
|             $("#settings").hide(); | ||||
|         } else { | ||||
|             $("#settings").show(); | ||||
|         } | ||||
|     } | ||||
| }); | ||||
| @@ -40,13 +40,19 @@ $(document).ready(function() { | ||||
|     $.ajax({ | ||||
|       type: "POST", | ||||
|       url: notification_base_url, | ||||
|       data : data | ||||
|       data : data, | ||||
|         statusCode: { | ||||
|         400: function() { | ||||
|             // More than likely the CSRF token was lost when the server restarted | ||||
|           alert("There was a problem processing the request, please reload the page."); | ||||
|         } | ||||
|       } | ||||
|     }).done(function(data){ | ||||
|       console.log(data); | ||||
|       alert('Sent'); | ||||
|     }).fail(function(data){ | ||||
|       console.log(data); | ||||
|       alert('Error: '+data.responseJSON.error); | ||||
|       alert('There was an error communicating with the server.'); | ||||
|     }) | ||||
|   }); | ||||
| }); | ||||
|   | ||||
| @@ -49,6 +49,8 @@ $(document).ready(function() { | ||||
|         } | ||||
|         state_clicked=false; | ||||
|         ctx.clearRect(0, 0, c.width, c.height); | ||||
|         xctx.clearRect(0, 0, c.width, c.height); | ||||
|         $("#css_filter").val(''); | ||||
|     }); | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -1,13 +1,30 @@ | ||||
| $(document).ready(function() { | ||||
|     function toggle() { | ||||
|         if ($('input[name="fetch_backend"]:checked').val() != 'html_requests') { | ||||
|             $('#requests-override-options').hide(); | ||||
|         if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') { | ||||
|             if(playwright_enabled) { | ||||
|                 // playwright supports headers, so hide everything else | ||||
|                 // See #664 | ||||
|                 $('#requests-override-options #request-method').hide(); | ||||
|                 $('#requests-override-options #request-body').hide(); | ||||
|  | ||||
|                 // @todo connect this one up | ||||
|                 $('#ignore-status-codes-option').hide(); | ||||
|             } else { | ||||
|                 // selenium/webdriver doesnt support anything afaik, hide it all | ||||
|                 $('#requests-override-options').hide(); | ||||
|             } | ||||
|  | ||||
|  | ||||
|             $('#webdriver-override-options').show(); | ||||
|  | ||||
|         } else { | ||||
|  | ||||
|             $('#requests-override-options').show(); | ||||
|             $('#requests-override-options *:hidden').show(); | ||||
|             $('#webdriver-override-options').hide(); | ||||
|         } | ||||
|     } | ||||
|  | ||||
|     $('input[name="fetch_backend"]').click(function (e) { | ||||
|         toggle(); | ||||
|     }); | ||||
|   | ||||
							
								
								
									
										2
									
								
								changedetectionio/static/styles/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								changedetectionio/static/styles/.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -1 +1,3 @@ | ||||
| node_modules | ||||
| package-lock.json | ||||
|  | ||||
|   | ||||
							
								
								
									
										3719
									
								
								changedetectionio/static/styles/package-lock.json
									
									
									
										generated
									
									
									
								
							
							
						
						
									
										3719
									
								
								changedetectionio/static/styles/package-lock.json
									
									
									
										generated
									
									
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							| @@ -1,9 +1,7 @@ | ||||
| /* | ||||
|  * -- BASE STYLES -- | ||||
|  * Most of these are inherited from Base, but I want to change a few. | ||||
|  * nvm use v14.18.1 | ||||
|  * npm install | ||||
|  * npm run build | ||||
|  * nvm use v14.18.1 && npm install && npm run build | ||||
|  * or npm run watch | ||||
|  */ | ||||
| body { | ||||
| @@ -203,13 +201,18 @@ body:after, body:before { | ||||
|   border-radius: 10px; | ||||
|   margin-bottom: 1em; } | ||||
|   #new-watch-form input { | ||||
|     width: auto !important; | ||||
|     display: inline-block; } | ||||
|     display: inline-block; | ||||
|     margin-bottom: 5px; } | ||||
|   #new-watch-form .label { | ||||
|     display: none; } | ||||
|   #new-watch-form legend { | ||||
|     color: #fff; | ||||
|     font-weight: bold; } | ||||
|   #new-watch-form #watch-add-wrapper-zone > div { | ||||
|     display: inline-block; } | ||||
|   @media only screen and (max-width: 760px) { | ||||
|     #new-watch-form #watch-add-wrapper-zone #url { | ||||
|       width: 100%; } } | ||||
|  | ||||
| #diff-col { | ||||
|   padding-left: 40px; } | ||||
| @@ -268,11 +271,15 @@ footer { | ||||
| #new-version-text a { | ||||
|   color: #e07171; } | ||||
|  | ||||
| .paused-state.state-False img { | ||||
|   opacity: 0.2; } | ||||
|  | ||||
| .paused-state.state-False:hover img { | ||||
|   opacity: 0.8; } | ||||
| .watch-controls { | ||||
|   /* default */ } | ||||
|   .watch-controls .state-on img { | ||||
|     opacity: 0.8; } | ||||
|   .watch-controls img { | ||||
|     opacity: 0.2; } | ||||
|   .watch-controls img:hover { | ||||
|     transition: opacity 0.3s; | ||||
|     opacity: 0.8; } | ||||
|  | ||||
| .monospaced-textarea textarea { | ||||
|   width: 100%; | ||||
| @@ -353,6 +360,8 @@ and also iPads specifically. | ||||
|     /* Hide table headers (but not display: none;, for accessibility) */ } | ||||
|     .watch-table thead, .watch-table tbody, .watch-table th, .watch-table td, .watch-table tr { | ||||
|       display: block; } | ||||
|     .watch-table .last-checked > span { | ||||
|       vertical-align: middle; } | ||||
|     .watch-table .last-checked::before { | ||||
|       color: #555; | ||||
|       content: "Last Checked "; } | ||||
| @@ -370,7 +379,8 @@ and also iPads specifically. | ||||
|     .watch-table td { | ||||
|       /* Behave  like a "row" */ | ||||
|       border: none; | ||||
|       border-bottom: 1px solid #eee; } | ||||
|       border-bottom: 1px solid #eee; | ||||
|       vertical-align: middle; } | ||||
|       .watch-table td:before { | ||||
|         /* Top/left values mimic padding */ | ||||
|         top: 6px; | ||||
| @@ -490,3 +500,42 @@ ul { | ||||
|  | ||||
| #api-key-copy { | ||||
|   color: #0078e7; } | ||||
|  | ||||
| /* spinner */ | ||||
| .loader, | ||||
| .loader:after { | ||||
|   border-radius: 50%; | ||||
|   width: 10px; | ||||
|   height: 10px; } | ||||
|  | ||||
| .loader { | ||||
|   margin: 0px auto; | ||||
|   font-size: 3px; | ||||
|   vertical-align: middle; | ||||
|   display: inline-block; | ||||
|   text-indent: -9999em; | ||||
|   border-top: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-right: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-bottom: 1.1em solid rgba(38, 104, 237, 0.2); | ||||
|   border-left: 1.1em solid #2668ed; | ||||
|   -webkit-transform: translateZ(0); | ||||
|   -ms-transform: translateZ(0); | ||||
|   transform: translateZ(0); | ||||
|   -webkit-animation: load8 1.1s infinite linear; | ||||
|   animation: load8 1.1s infinite linear; } | ||||
|  | ||||
| @-webkit-keyframes load8 { | ||||
|   0% { | ||||
|     -webkit-transform: rotate(0deg); | ||||
|     transform: rotate(0deg); } | ||||
|   100% { | ||||
|     -webkit-transform: rotate(360deg); | ||||
|     transform: rotate(360deg); } } | ||||
|  | ||||
| @keyframes load8 { | ||||
|   0% { | ||||
|     -webkit-transform: rotate(0deg); | ||||
|     transform: rotate(0deg); } | ||||
|   100% { | ||||
|     -webkit-transform: rotate(360deg); | ||||
|     transform: rotate(360deg); } } | ||||
|   | ||||
| @@ -1,9 +1,7 @@ | ||||
| /* | ||||
|  * -- BASE STYLES -- | ||||
|  * Most of these are inherited from Base, but I want to change a few. | ||||
|  * nvm use v14.18.1 | ||||
|  * npm install | ||||
|  * npm run build | ||||
|  * nvm use v14.18.1 && npm install && npm run build | ||||
|  * or npm run watch | ||||
|  */ | ||||
| body { | ||||
| @@ -269,8 +267,8 @@ body:after, body:before { | ||||
|   border-radius: 10px; | ||||
|   margin-bottom: 1em; | ||||
|   input { | ||||
|     width: auto !important; | ||||
|     display: inline-block; | ||||
|     margin-bottom: 5px; | ||||
|   } | ||||
|   .label { | ||||
|     display: none; | ||||
| @@ -279,6 +277,17 @@ body:after, body:before { | ||||
|     color: #fff; | ||||
|     font-weight: bold; | ||||
|   } | ||||
|  | ||||
|   #watch-add-wrapper-zone { | ||||
|     > div { | ||||
|       display: inline-block; | ||||
|     } | ||||
|     @media only screen and (max-width: 760px) { | ||||
|       #url { | ||||
|         width: 100%; | ||||
|       } | ||||
|     } | ||||
|   } | ||||
| } | ||||
|  | ||||
|  | ||||
| @@ -353,14 +362,25 @@ footer { | ||||
|   color: #e07171; | ||||
| } | ||||
|  | ||||
| .paused-state { | ||||
|   &.state-False img { | ||||
| .watch-controls { | ||||
|   .state-on { | ||||
|     img { | ||||
|       opacity: 0.8; | ||||
|     } | ||||
|   } | ||||
|  | ||||
|   /* default */ | ||||
|   img { | ||||
|     opacity: 0.2; | ||||
|   } | ||||
|  | ||||
|   &.state-False:hover img { | ||||
|     opacity: 0.8; | ||||
|   img { | ||||
|     &:hover { | ||||
|       transition: opacity 0.3s; | ||||
|       opacity: 0.8; | ||||
|     } | ||||
|   } | ||||
|  | ||||
| } | ||||
|  | ||||
| .monospaced-textarea { | ||||
| @@ -487,6 +507,11 @@ and also iPads specifically. | ||||
|       display: block; | ||||
|     } | ||||
|  | ||||
|     .last-checked { | ||||
|       > span { | ||||
|         vertical-align: middle; | ||||
|       } | ||||
|     } | ||||
|     .last-checked::before { | ||||
|       color: #555; | ||||
|       content: "Last Checked "; | ||||
| @@ -517,7 +542,7 @@ and also iPads specifically. | ||||
|       /* Behave  like a "row" */ | ||||
|       border: none; | ||||
|       border-bottom: 1px solid #eee; | ||||
|  | ||||
|       vertical-align: middle; | ||||
|       &:before { | ||||
|         /* Top/left values mimic padding */ | ||||
|         top: 6px; | ||||
| @@ -701,3 +726,48 @@ ul { | ||||
| #api-key-copy { | ||||
|   color: #0078e7; | ||||
| } | ||||
|  | ||||
| /* spinner */ | ||||
| .loader, | ||||
| .loader:after { | ||||
|   border-radius: 50%; | ||||
|   width: 10px; | ||||
|   height: 10px; | ||||
| } | ||||
| .loader { | ||||
|   margin: 0px auto; | ||||
|   font-size: 3px; | ||||
|   vertical-align: middle; | ||||
|   display: inline-block; | ||||
|   text-indent: -9999em; | ||||
|   border-top: 1.1em solid rgba(38,104,237, 0.2); | ||||
|   border-right: 1.1em solid rgba(38,104,237, 0.2); | ||||
|   border-bottom: 1.1em solid rgba(38,104,237, 0.2); | ||||
|   border-left: 1.1em solid #2668ed; | ||||
|   -webkit-transform: translateZ(0); | ||||
|   -ms-transform: translateZ(0); | ||||
|   transform: translateZ(0); | ||||
|   -webkit-animation: load8 1.1s infinite linear; | ||||
|   animation: load8 1.1s infinite linear; | ||||
| } | ||||
| @-webkit-keyframes load8 { | ||||
|   0% { | ||||
|     -webkit-transform: rotate(0deg); | ||||
|     transform: rotate(0deg); | ||||
|   } | ||||
|   100% { | ||||
|     -webkit-transform: rotate(360deg); | ||||
|     transform: rotate(360deg); | ||||
|   } | ||||
| } | ||||
| @keyframes load8 { | ||||
|   0% { | ||||
|     -webkit-transform: rotate(0deg); | ||||
|     transform: rotate(0deg); | ||||
|   } | ||||
|   100% { | ||||
|     -webkit-transform: rotate(360deg); | ||||
|     transform: rotate(360deg); | ||||
|   } | ||||
| } | ||||
|  | ||||
|   | ||||
| @@ -158,13 +158,11 @@ class ChangeDetectionStore: | ||||
|     @property | ||||
|     def threshold_seconds(self): | ||||
|         seconds = 0 | ||||
|         mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7} | ||||
|         minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60)) | ||||
|         for m, n in mtable.items(): | ||||
|         for m, n in Watch.mtable.items(): | ||||
|             x = self.__data['settings']['requests']['time_between_check'].get(m) | ||||
|             if x: | ||||
|                 seconds += x * n | ||||
|         return max(seconds, minimum_seconds_recheck_time) | ||||
|         return seconds | ||||
|  | ||||
|     @property | ||||
|     def has_unviewed(self): | ||||
| @@ -251,15 +249,26 @@ class ChangeDetectionStore: | ||||
|         return self.data['watching'][uuid].get(val) | ||||
|  | ||||
|     # Remove a watchs data but keep the entry (URL etc) | ||||
|     def scrub_watch(self, uuid): | ||||
|     def clear_watch_history(self, uuid): | ||||
|         import pathlib | ||||
|  | ||||
|         self.__data['watching'][uuid].update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': False}) | ||||
|         self.needs_write_urgent = True | ||||
|         self.__data['watching'][uuid].update( | ||||
|             {'last_checked': 0, | ||||
|              'last_changed': 0, | ||||
|              'last_viewed': 0, | ||||
|              'previous_md5': False, | ||||
|              'last_notification_error': False, | ||||
|              'last_error': False}) | ||||
|  | ||||
|         for item in pathlib.Path(self.datastore_path).rglob(uuid+"/*.txt"): | ||||
|         # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc | ||||
|         for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"): | ||||
|             unlink(item) | ||||
|  | ||||
|         # Force the attr to recalculate | ||||
|         bump = self.__data['watching'][uuid].history | ||||
|  | ||||
|         self.needs_write_urgent = True | ||||
|  | ||||
|     def add_watch(self, url, tag="", extras=None, write_to_disk_now=True): | ||||
|  | ||||
|         if extras is None: | ||||
| @@ -280,14 +289,16 @@ class ChangeDetectionStore: | ||||
|                                      headers={'App-Guid': self.__data['app_guid']}) | ||||
|                 res = r.json() | ||||
|  | ||||
|                 # List of permisable stuff we accept from the wild internet | ||||
|                 # List of permissible attributes we accept from the wild internet | ||||
|                 for k in ['url', 'tag', | ||||
|                                    'paused', 'title', | ||||
|                                    'previous_md5', 'headers', | ||||
|                                    'body', 'method', | ||||
|                                    'ignore_text', 'css_filter', | ||||
|                                    'subtractive_selectors', 'trigger_text', | ||||
|                                    'extract_title_as_title']: | ||||
|                           'paused', 'title', | ||||
|                           'previous_md5', 'headers', | ||||
|                           'body', 'method', | ||||
|                           'ignore_text', 'css_filter', | ||||
|                           'subtractive_selectors', 'trigger_text', | ||||
|                           'extract_title_as_title', 'extract_text', | ||||
|                           'text_should_not_be_present', | ||||
|                           'webdriver_js_execute_code']: | ||||
|                     if res.get(k): | ||||
|                         apply_extras[k] = res[k] | ||||
|  | ||||
| @@ -507,3 +518,11 @@ class ChangeDetectionStore: | ||||
|                 # But we should set it back to a empty dict so we don't break if this schema runs on an earlier version. | ||||
|                 # In the distant future we can remove this entirely | ||||
|                 self.data['watching'][uuid]['history'] = {} | ||||
|  | ||||
|     # We incorrectly stored last_changed when there was not a change, and then confused the output list table | ||||
|     def update_3(self): | ||||
|         for uuid, watch in self.data['watching'].items(): | ||||
|             # Be sure it's recalculated | ||||
|             p = watch.history | ||||
|             if watch.history_n < 2: | ||||
|                 watch['last_changed'] = 0 | ||||
|   | ||||
| @@ -14,7 +14,7 @@ | ||||
|                                 <li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li> | ||||
|                                 <li><code>discord://</code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li> | ||||
|                                 <li><code>tgram://</code> bots cant send messages to other bots, so you should specify chat ID of non-bot user.</li> | ||||
|                                 <li>Go here for <a href="{{url_for('notification_logs')}}">notification debug logs</a></li> | ||||
|                                 <li><code>tgram://</code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li> | ||||
|                               </ul> | ||||
|                             </div> | ||||
|                             <br/> | ||||
| @@ -22,6 +22,7 @@ | ||||
| {% if emailprefix %} | ||||
|                             <a id="add-email-helper" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Add email</a> | ||||
| {% endif %} | ||||
|                             <a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Notification debug logs</a> | ||||
|                         </div> | ||||
|                         <div id="notification-customisation" class="pure-control-group"> | ||||
|                             <div class="pure-control-group"> | ||||
|   | ||||
| @@ -3,22 +3,22 @@ | ||||
| {% block content %} | ||||
| <div class="edit-form"> | ||||
|     <div class="box-wrap inner"> | ||||
|     <form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST"> | ||||
|     <form class="pure-form pure-form-stacked" action="{{url_for('clear_all_history')}}" method="POST"> | ||||
|         <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/> | ||||
|         <fieldset> | ||||
|             <div class="pure-control-group"> | ||||
|                 This will remove ALL version snapshots/data, but keep your list of URLs. <br/> | ||||
|                 This will remove version history (snapshots) for ALL watches, but keep your list of URLs! <br/> | ||||
|                 You may like to use the <strong>BACKUP</strong> link first.<br/> | ||||
|             </div> | ||||
|             <br/> | ||||
|             <div class="pure-control-group"> | ||||
|                 <label for="confirmtext">Confirmation text</label> | ||||
|                 <input type="text" id="confirmtext" required="" name="confirmtext" value="" size="10"/> | ||||
|                 <span class="pure-form-message-inline">Type in the word <strong>scrub</strong> to confirm that you understand!</span> | ||||
|                 <span class="pure-form-message-inline">Type in the word <strong>clear</strong> to confirm that you understand.</span> | ||||
|             </div> | ||||
|             <br/> | ||||
|             <div class="pure-control-group"> | ||||
|                 <button type="submit" class="pure-button pure-button-primary">Scrub!</button> | ||||
|                 <button type="submit" class="pure-button pure-button-primary">Clear History!</button> | ||||
|             </div> | ||||
|             <br/> | ||||
|             <div class="pure-control-group"> | ||||
| @@ -1,6 +1,11 @@ | ||||
| {% extends 'base.html' %} | ||||
|  | ||||
| {% block content %} | ||||
| <script> | ||||
|     const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; | ||||
| </script> | ||||
| <script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script> | ||||
|  | ||||
| <div id="settings"> | ||||
|     <h1>Differences</h1> | ||||
|     <form class="pure-form " action="" method="GET"> | ||||
| @@ -17,7 +22,7 @@ | ||||
|             {% if versions|length >= 1 %} | ||||
|             <label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label> | ||||
|             <select id="diff-version" name="previous_version"> | ||||
|                 {% for version in versions %} | ||||
|                 {% for version in versions|reverse %} | ||||
|                 <option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}> | ||||
|                     {{version}} | ||||
|                 </option> | ||||
| @@ -39,6 +44,7 @@ | ||||
| <div class="tabs"> | ||||
|     <ul> | ||||
|         <li class="tab" id="default-tab"><a href="#text">Text</a></li> | ||||
|         <li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li> | ||||
|     </ul> | ||||
| </div> | ||||
|  | ||||
| @@ -60,6 +66,21 @@ | ||||
|          </table> | ||||
|          Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff">github.com/kpdecker/jsdiff</a> | ||||
|      </div> | ||||
|      <div class="tab-pane-inner" id="screenshot"> | ||||
|          <div class="tip"> | ||||
|              For now, Differences are performed on text, not graphically, only the latest screenshot is available. | ||||
|          </div> | ||||
|          </br> | ||||
|          {% if is_html_webdriver %} | ||||
|            {% if screenshot %} | ||||
|              <img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/> | ||||
|            {% else %} | ||||
|               No screenshot available just yet! Try rechecking the page. | ||||
|            {% endif %} | ||||
|          {% else %} | ||||
|            <strong>Screenshot requires Playwright/WebDriver enabled</strong> | ||||
|          {% endif %} | ||||
|      </div> | ||||
| </div> | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -7,6 +7,7 @@ | ||||
|     const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}"; | ||||
|     const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}"; | ||||
|     const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; | ||||
|     const playwright_enabled={% if playwright_enabled %} true {% else %} false {% endif %}; | ||||
|  | ||||
| {% if emailprefix %} | ||||
|     const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}'); | ||||
| @@ -24,7 +25,7 @@ | ||||
|         <ul> | ||||
|             <li class="tab" id="default-tab"><a href="#general">General</a></li> | ||||
|             <li class="tab"><a href="#request">Request</a></li> | ||||
|             <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Selector</a></li> | ||||
|             <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li> | ||||
|             <li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li> | ||||
|             <li class="tab"><a href="#notifications">Notifications</a></li> | ||||
|         </ul> | ||||
| @@ -32,7 +33,7 @@ | ||||
|  | ||||
|     <div class="box-wrap inner"> | ||||
|         <form class="pure-form pure-form-stacked" | ||||
|               action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next') ) }}" method="POST"> | ||||
|               action="{{ url_for('edit_page', uuid=uuid, next = request.args.get('next'), unpause_on_save = request.args.get('unpause_on_save')) }}" method="POST"> | ||||
|              <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="general"> | ||||
| @@ -61,6 +62,12 @@ | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.extract_title_as_title) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.filter_failure_notification_send) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                          Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore. | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
|  | ||||
| @@ -81,33 +88,39 @@ | ||||
|                     </div> | ||||
|                 {% endif %} | ||||
|                 <fieldset id="webdriver-override-options"> | ||||
|                     <div class="pure-form-message-inline"> | ||||
|                         <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong> | ||||
|                         <br/> | ||||
|                         This will wait <i>n</i> seconds before extracting the text. | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.webdriver_delay) }} | ||||
|                         <div class="pure-form-message-inline"> | ||||
|                             <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong> | ||||
|                             <br/> | ||||
|                             This will wait <i>n</i> seconds before extracting the text. | ||||
|                             {% if using_global_webdriver_wait %} | ||||
|                             <br/><strong>Using the current global default settings</strong> | ||||
|                             {% endif %} | ||||
|                         </div> | ||||
|                     </div> | ||||
|                     {% if using_global_webdriver_wait %} | ||||
|                     <div class="pure-form-message-inline"> | ||||
|                         <strong>Using the current global default settings</strong> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.webdriver_js_execute_code) }} | ||||
|                         <div class="pure-form-message-inline"> | ||||
|                             Run this code before performing change detection, handy for filling in fields and other actions <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Run-JavaScript-before-change-detection">More help and examples here</a> | ||||
|                         </div> | ||||
|                     </div> | ||||
|                     {% endif %} | ||||
|                 </fieldset> | ||||
|                 <fieldset class="pure-group" id="requests-override-options"> | ||||
|                     <div class="pure-form-message-inline"> | ||||
|                         <strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                     {% if not playwright_enabled %} | ||||
|                         <div class="pure-form-message-inline"> | ||||
|                             <strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong> | ||||
|                         </div> | ||||
|                     {% endif %} | ||||
|                     <div class="pure-control-group" id="request-method"> | ||||
|                         {{ render_field(form.method) }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                     <div class="pure-control-group" id="request-headers"> | ||||
| {{ render_field(form.headers, rows=5, placeholder="Example | ||||
| Cookie: foobar | ||||
| User-Agent: wonderbra 1.0") }} | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                     <div class="pure-control-group" id="request-body"> | ||||
|                                         {{ render_field(form.body, rows=5, placeholder="Example | ||||
| { | ||||
|    \"name\":\"John\", | ||||
| @@ -115,7 +128,7 @@ User-Agent: wonderbra 1.0") }} | ||||
|    \"car\":null | ||||
| }") }} | ||||
|                     </div> | ||||
|                     <div> | ||||
|                     <div id="ignore-status-codes-option"> | ||||
|                         {{ render_checkbox_field(form.ignore_status_codes) }} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
| @@ -143,16 +156,33 @@ User-Agent: wonderbra 1.0") }} | ||||
|                                 </li> | ||||
|                             </ul> | ||||
|                     </div> | ||||
|                     <fieldset> | ||||
|                         <div class="pure-control-group"> | ||||
|                             {{ render_checkbox_field(form.check_unique_lines) }} | ||||
|                             <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span> | ||||
|                         </div> | ||||
|                     </fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.", | ||||
|                         class="m-d") }} | ||||
|                         {% set field = render_field(form.css_filter, | ||||
|                             placeholder=".class-name or #some-id, or other CSS selector rule.", | ||||
|                             class="m-d") | ||||
|                         %} | ||||
|                         {{ field }} | ||||
|                         {% if '/text()' in  field %} | ||||
|                           <span class="pure-form-message-inline"><strong>Note!: //text() function does not work where the <element> contains <![CDATA[]]></strong></span><br/> | ||||
|                         {% endif %} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                     <ul> | ||||
|                         <li>CSS - Limit text to this CSS rule, only text matching this CSS rule is included.</li> | ||||
|                         <li>JSON - Limit text to this JSON rule, using <a href="https://pypi.org/project/jsonpath-ng/">JSONPath</a>, prefix with <code>"json:"</code>, use <code>json:$</code> to force re-formatting if required,  <a | ||||
|                                 href="https://jsonpath.com/" target="new">test your JSONPath here</a></li> | ||||
|                         <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, example  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a | ||||
|                         <li>XPath - Limit text to this XPath rule, simply start with a forward-slash, | ||||
|                             <ul> | ||||
|                                 <li>Example:  <code>//*[contains(@class, 'sametext')]</code> or <code>xpath://*[contains(@class, 'sametext')]</code>, <a | ||||
|                                 href="http://xpather.com/" target="new">test your XPath here</a></li> | ||||
|                                 <li>Example: Get all titles from an RSS feed <code>//title/text()</code></li> | ||||
|                             </ul> | ||||
|                             </li> | ||||
|                     </ul> | ||||
|                     Please be sure that you thoroughly understand how to write CSS or JSONPath, XPath selector rules before filing an issue on GitHub! <a | ||||
|                                 href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br/> | ||||
| @@ -177,7 +207,7 @@ nav | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         <ul> | ||||
|                             <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> | ||||
|                             <li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li> | ||||
|                             <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> | ||||
|                             <li>Changing this will affect the comparison checksum which may trigger an alert</li> | ||||
|                             <li>Use the preview/show current tab to see ignores</li> | ||||
|                         </ul> | ||||
| @@ -199,11 +229,45 @@ nav | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock | ||||
| Sold out | ||||
| Not in stock | ||||
| Unavailable") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                             <ul> | ||||
|                                 <li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li> | ||||
|                                 <li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li> | ||||
|                                 <li>All lines here must not exist (think of each line as "OR")</li> | ||||
|                                 <li>Note: Wrap in forward slash / to use regex  example: <code>/foo\d/</code></li> | ||||
|                             </ul> | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|                     <ul> | ||||
|                         <li>Extracts text in the final output (line by line) after other filters using regular expressions; | ||||
|                             <ul> | ||||
|                                 <li>Regular expression ‐ example <code>/reports.+?2022/i</code></li> | ||||
|                                 <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li> | ||||
|                                 <li>Keyword example ‐ example <code>Out of stock</code></li> | ||||
|                                 <li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li> | ||||
|                             </ul> | ||||
|                         </li> | ||||
|                         <li>One line per regular-expression/ string match</li> | ||||
|                     </ul> | ||||
|                         </span> | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
|  | ||||
|             <div class="tab-pane-inner visual-selector-ui" id="visualselector"> | ||||
|                 <img id="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}"> | ||||
|  | ||||
|                 <strong>Pro-tip:</strong> This tool is only for limiting which elements will be included on a change-detection, not for interacting with browser directly. | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% if visualselector_enabled %} | ||||
| @@ -243,11 +307,11 @@ nav | ||||
|  | ||||
|             <div id="actions"> | ||||
|                 <div class="pure-control-group"> | ||||
|  | ||||
|                       {{ render_button(form.save_button) }} {{ render_button(form.save_and_preview_button) }} | ||||
|  | ||||
|                     {{ render_button(form.save_button) }} | ||||
|                     <a href="{{url_for('form_delete', uuid=uuid)}}" | ||||
|                        class="pure-button button-small button-error ">Delete</a> | ||||
|                     <a href="{{url_for('clear_watch_history', uuid=uuid)}}" | ||||
|                        class="pure-button button-small button-error ">Clear History</a> | ||||
|                     <a href="{{url_for('form_clone', uuid=uuid)}}" | ||||
|                        class="pure-button button-small ">Create Copy</a> | ||||
|                 </div> | ||||
|   | ||||
| @@ -4,7 +4,7 @@ | ||||
| <div class="edit-form"> | ||||
|      <div class="inner"> | ||||
|  | ||||
|          <h4 style="margin-top: 0px;">The following issues were detected when sending notifications</h4> | ||||
|          <h4 style="margin-top: 0px;">Notification debug log</h4> | ||||
|                 <div id="notification-error-log"> | ||||
|                 <ul style="font-size: 80%; margin:0px; padding: 0 0 0 7px"> | ||||
|                 {% for log in logs|reverse %} | ||||
|   | ||||
| @@ -1,6 +1,10 @@ | ||||
| {% extends 'base.html' %} | ||||
|  | ||||
| {% block content %} | ||||
| <script> | ||||
|     const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}"; | ||||
| </script> | ||||
| <script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script> | ||||
|  | ||||
| <div id="settings"> | ||||
|     <h1>Current - {{watch.last_checked|format_timestamp_timeago}}</h1> | ||||
| @@ -10,6 +14,7 @@ | ||||
| <div class="tabs"> | ||||
|     <ul> | ||||
|         <li class="tab" id="default-tab"><a href="#text">Text</a></li> | ||||
|         <li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li> | ||||
|     </ul> | ||||
| </div> | ||||
|  | ||||
| @@ -28,5 +33,20 @@ | ||||
|             </tbody> | ||||
|         </table> | ||||
|     </div> | ||||
|      <div class="tab-pane-inner" id="screenshot"> | ||||
|          <div class="tip"> | ||||
|              For now, Differences are performed on text, not graphically, only the latest screenshot is available. | ||||
|          </div> | ||||
|          </br> | ||||
|          {% if is_html_webdriver %} | ||||
|            {% if screenshot %} | ||||
|              <img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/> | ||||
|            {% else %} | ||||
|               No screenshot available just yet! Try rechecking the page. | ||||
|            {% endif %} | ||||
|          {% else %} | ||||
|            <strong>Screenshot requires Playwright/WebDriver enabled</strong> | ||||
|          {% endif %} | ||||
|      </div> | ||||
| </div> | ||||
| {% endblock %} | ||||
| @@ -32,6 +32,17 @@ | ||||
|                         {{ render_field(form.requests.form.time_between_check, class="time-check-widget") }} | ||||
|                         <span class="pure-form-message-inline">Default time for all watches, when the watch does not have a specific time setting.</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }} | ||||
|                         <span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }} | ||||
|                         <span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification | ||||
|                             <br/> | ||||
|                         Set to <strong>0</strong> to disable | ||||
|                         </span> | ||||
|                     </div> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% if not hide_remove_pass %} | ||||
|                             {% if current_user.is_authenticated %} | ||||
| @@ -143,7 +154,7 @@ nav | ||||
|                         <ul> | ||||
|                             <li>Note: This is applied globally in addition to the per-watch rules.</li> | ||||
|                             <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li> | ||||
|                             <li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li> | ||||
|                             <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li> | ||||
|                             <li>Changing this will affect the comparison checksum which may trigger an alert</li> | ||||
|                             <li>Use the preview/show current tab to see ignores</li> | ||||
|                         </ul> | ||||
| @@ -168,7 +179,7 @@ nav | ||||
|                 <div class="pure-control-group"> | ||||
|                     {{ render_button(form.save_button) }} | ||||
|                     <a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a> | ||||
|                     <a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete History Snapshot Data</a> | ||||
|                     <a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a> | ||||
|                 </div> | ||||
|  | ||||
|             </div> | ||||
|   | ||||
| @@ -1,20 +1,27 @@ | ||||
| {% extends 'base.html' %} | ||||
| {% block content %} | ||||
| {% from '_helpers.jinja' import render_simple_field %} | ||||
| {% from '_helpers.jinja' import render_simple_field, render_field %} | ||||
| <script type="text/javascript" src="{{url_for('static_content', group='js', filename='jquery-3.6.0.min.js')}}"></script> | ||||
| <script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-overview.js')}}" defer></script> | ||||
|  | ||||
| <div class="box"> | ||||
|  | ||||
|     <form class="pure-form" action="{{ url_for('form_watch_add') }}" method="POST" id="new-watch-form"> | ||||
|     <form class="pure-form" action="{{ url_for('form_quick_watch_add') }}" method="POST" id="new-watch-form"> | ||||
|         <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/> | ||||
|         <fieldset> | ||||
|             <legend>Add a new change detection watch</legend> | ||||
|                 {{ render_simple_field(form.url, placeholder="https://...", required=true) }} | ||||
|                 {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }} | ||||
|             <button type="submit" class="pure-button pure-button-primary">Watch</button> | ||||
|             <div id="watch-add-wrapper-zone"> | ||||
|                 <div> | ||||
|                     {{ render_simple_field(form.url, placeholder="https://...", required=true) }} | ||||
|                     {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }} | ||||
|                 </div> | ||||
|                 <div> | ||||
|                     {{ render_simple_field(form.watch_submit_button, title="Watch this URL!" ) }} | ||||
|                     {{ render_simple_field(form.edit_and_watch_submit_button, title="Edit first then Watch") }} | ||||
|                 </div> | ||||
|             </div> | ||||
|         </fieldset> | ||||
|         <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span> | ||||
|         <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span> | ||||
|     </form> | ||||
|     <div> | ||||
|         <a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a> | ||||
| @@ -40,7 +47,7 @@ | ||||
|             <tbody> | ||||
|  | ||||
|  | ||||
|             {% for watch in watches %} | ||||
|             {% for watch in watches|sort(attribute='last_changed', reverse=True) %} | ||||
|             <tr id="{{ watch.uuid }}" | ||||
|                 class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }} | ||||
|                 {% if watch.last_error is defined and watch.last_error != False %}error{% endif %} | ||||
| @@ -49,8 +56,10 @@ | ||||
|                 {% if watch.newest_history_key| int > watch.last_viewed and watch.history_n>=2 %}unviewed{% endif %} | ||||
|                 {% if watch.uuid in queued_uuids %}queued{% endif %}"> | ||||
|                 <td class="inline">{{ loop.index }}</td> | ||||
|                 <td class="inline paused-state state-{{watch.paused}}"><a href="{{url_for('index', pause=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause" title="Pause"/></a></td> | ||||
|  | ||||
|                 <td class="inline watch-controls"> | ||||
|                     <a class="state-{{'on' if watch.paused }}" href="{{url_for('index', op='pause', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='pause.svg')}}" alt="Pause checks" title="Pause checks"/></a> | ||||
|                     <a class="state-{{'on' if watch.notification_muted}}" href="{{url_for('index', op='mute', uuid=watch.uuid, tag=active_tag)}}"><img src="{{url_for('static_content', group='images', filename='bell-off.svg')}}" alt="Mute notifications" title="Mute notifications"/></a> | ||||
|                 </td> | ||||
|                 <td class="title-col inline">{{watch.title if watch.title is not none and watch.title|length > 0 else watch.url}} | ||||
|                     <a class="external" target="_blank" rel="noopener" href="{{ watch.url.replace('source:','') }}"></a> | ||||
|                     <a href="{{url_for('form_share_put_watch', uuid=watch.uuid)}}"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /></a> | ||||
| @@ -67,8 +76,8 @@ | ||||
|                     <span class="watch-tag-list">{{ watch.tag}}</span> | ||||
|                     {% endif %} | ||||
|                 </td> | ||||
|                 <td class="last-checked">{{watch|format_last_checked_time}}</td> | ||||
|                 <td class="last-changed">{% if watch.history_n >=2 and watch.last_changed %} | ||||
|                 <td class="last-checked">{{watch|format_last_checked_time|safe}}</td> | ||||
|                 <td class="last-changed">{% if watch.history_n >=2 and watch.last_changed >0 %} | ||||
|                     {{watch.last_changed|format_timestamp_timeago}} | ||||
|                     {% else %} | ||||
|                     Not yet | ||||
|   | ||||
| @@ -32,6 +32,8 @@ def app(request): | ||||
|     """Create application for the tests.""" | ||||
|     datastore_path = "./test-datastore" | ||||
|  | ||||
|     # So they don't delay in fetching | ||||
|     os.environ["MINIMUM_SECONDS_RECHECK_TIME"] = "0" | ||||
|     try: | ||||
|         os.mkdir(datastore_path) | ||||
|     except FileExistsError: | ||||
|   | ||||
| @@ -95,6 +95,8 @@ def test_api_simple(client, live_server): | ||||
|     assert watch_uuid in json.loads(res.data).keys() | ||||
|     before_recheck_info = json.loads(res.data)[watch_uuid] | ||||
|     assert before_recheck_info['last_checked'] != 0 | ||||
|     #705 `last_changed` should be zero on the first check | ||||
|     assert before_recheck_info['last_changed'] == 0 | ||||
|     assert before_recheck_info['title'] == 'My test URL' | ||||
|  | ||||
|     set_modified_response() | ||||
|   | ||||
| @@ -90,6 +90,14 @@ def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     res = client.get(url_for("diff_history_page", uuid="first")) | ||||
|     assert b'Compare newest' in res.data | ||||
|  | ||||
|     # Check the [preview] pulls the right one | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b'which has this one new line' in res.data | ||||
|     assert b'Which is across multiple lines' not in res.data | ||||
|  | ||||
|     time.sleep(2) | ||||
|  | ||||
|     # Do this a few times.. ensures we dont accidently set the status | ||||
|   | ||||
							
								
								
									
										137
									
								
								changedetectionio/tests/test_block_while_text_present.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										137
									
								
								changedetectionio/tests/test_block_while_text_present.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,137 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from . util import live_server_setup | ||||
| from changedetectionio import html_tools | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      </body> | ||||
|      </html> | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def set_modified_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some NEW nice initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <p>new ignore stuff</p> | ||||
|      <p>out of stock</p> | ||||
|      <p>blah</p> | ||||
|      </body> | ||||
|      </html> | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| # Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text | ||||
| def set_modified_response_minus_block_text(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some NEW nice initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <p>now on sale $2/p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <p>new ignore stuff</p> | ||||
|      <p>blah</p> | ||||
|      </body> | ||||
|      </html> | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def test_check_block_changedetection_text_NOT_present(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|     live_server_setup(live_server) | ||||
|     # Use a mix of case in ZzZ to prove it works case-insensitive. | ||||
|     ignore_text = "out of stoCk\r\nfoobar" | ||||
|  | ||||
|     set_original_ignore_response() | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"text_should_not_be_present": ignore_text, "url": test_url, 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     # Check it saved | ||||
|     res = client.get( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|     ) | ||||
|     assert bytes(ignore_text.encode('utf-8')) in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|     # The page changed, BUT the text is still there, just the rest of it changes, we should not see a change | ||||
|     set_modified_original_ignore_response() | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'/test-endpoint' in res.data | ||||
|  | ||||
|  | ||||
|     # Now we set a change where the text is gone, it should now trigger | ||||
|     set_modified_response_minus_block_text() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
| @@ -28,13 +28,9 @@ def test_error_handler(client, live_server): | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|  | ||||
|  | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|     assert b'Status Code 403' in res.data | ||||
| @@ -53,9 +49,6 @@ def test_error_text_handler(client, live_server): | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|  | ||||
|   | ||||
							
								
								
									
										198
									
								
								changedetectionio/tests/test_extract_regex.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										198
									
								
								changedetectionio/tests/test_extract_regex.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,198 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup | ||||
|  | ||||
| from ..html_tools import * | ||||
|  | ||||
|  | ||||
| def set_original_response(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <div id="sametext">Some text thats the same</div> | ||||
|      <div class="changetext">Some text that will change</div>      | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def set_modified_response(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>which has this one new line</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <div id="sametext">Some text thats the same</div> | ||||
|      <div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/>  2000 online )</div> | ||||
|      <div class="changetext">SomeCase insensitive 3456</div> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def set_multiline_response(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|       | ||||
|      <p>Something <br/> | ||||
|         across 6 billion multiple<br/> | ||||
|         lines | ||||
|      </p> | ||||
|       | ||||
|      <div>aaand something lines</div> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def test_setup(client, live_server): | ||||
|  | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_check_filter_multiline(client, live_server): | ||||
|  | ||||
|     set_multiline_response() | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": '', | ||||
|               'extract_text': '/something.+?6 billion.+?lines/si', | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_requests" | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Updated watch." in res.data | ||||
|     time.sleep(3) | ||||
|  | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|  | ||||
|     assert b'<div class="">Something' in res.data | ||||
|     assert b'<div class="">across 6 billion multiple' in res.data | ||||
|     assert b'<div class="">lines' in res.data | ||||
|  | ||||
|     # but the last one, which also says 'lines' shouldnt be here (non-greedy match checking) | ||||
|     assert b'aaand something lines' not in res.data | ||||
|  | ||||
| def test_check_filter_and_regex_extract(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|     css_filter = ".changetext" | ||||
|  | ||||
|     set_original_response() | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     time.sleep(1) | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": css_filter, | ||||
|               'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i', | ||||
|               "url": test_url, | ||||
|               "tag": "", | ||||
|               "headers": "", | ||||
|               'fetch_backend': "html_requests" | ||||
|               }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     #  Make a change | ||||
|     set_modified_response() | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # It should have 'unviewed' still | ||||
|     # Because it should be looking at only that 'sametext' id | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|     # Check HTML conversion detected and workd | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # Class will be blank for now because the frontend didnt apply the diff | ||||
|     assert b'<div class="">1000 online' in res.data | ||||
|  | ||||
|     # All regex matching should be here | ||||
|     assert b'<div class="">2000 online' in res.data | ||||
|  | ||||
|     # Both regexs should be here | ||||
|     assert b'<div class="">80 guests' in res.data | ||||
|  | ||||
|     # Regex with flag handling should be here | ||||
|     assert b'<div class="">SomeCase insensitive 3456' in res.data | ||||
|  | ||||
|     # Singular group from /somecase insensitive (345\d)/i | ||||
|     assert b'<div class="">3456' in res.data | ||||
|  | ||||
|     # Regex with multiline flag handling should be here | ||||
|  | ||||
|     # Should not be here | ||||
|     assert b'Some text that did change' not in res.data | ||||
							
								
								
									
										134
									
								
								changedetectionio/tests/test_filter_exist_changes.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										134
									
								
								changedetectionio/tests/test_filter_exist_changes.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,134 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| # https://www.reddit.com/r/selfhosted/comments/wa89kp/comment/ii3a4g7/?context=3 | ||||
| import os | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import set_original_response, live_server_setup | ||||
| from changedetectionio.model import App | ||||
|  | ||||
|  | ||||
| def set_response_without_filter(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <div id="nope-doesnt-exist">Some text thats the same</div>      | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
|  | ||||
| def set_response_with_filter(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <div class="ticket-available">Ticket now on sale!</div>      | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_server): | ||||
| #  Filter knowingly doesn't exist, like someone setting up a known filter to see if some cinema tickets are on sale again | ||||
| #  And the page has that filter available | ||||
| #  Then I should get a notification | ||||
|  | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|     set_response_without_filter() | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tag": 'cinema'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     # Give the thread time to pick up the first version | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     url = url_for('test_notification_endpoint', _external=True) | ||||
|     notification_url = url.replace('http', 'json') | ||||
|  | ||||
|     print(">>>> Notification URL: " + notification_url) | ||||
|  | ||||
|     # Just a regular notification setting, this will be used by the special 'filter not found' notification | ||||
|     notification_form_data = {"notification_urls": notification_url, | ||||
|                               "notification_title": "New ChangeDetection.io Notification - {watch_url}", | ||||
|                               "notification_body": "BASE URL: {base_url}\n" | ||||
|                                                    "Watch URL: {watch_url}\n" | ||||
|                                                    "Watch UUID: {watch_uuid}\n" | ||||
|                                                    "Watch title: {watch_title}\n" | ||||
|                                                    "Watch tag: {watch_tag}\n" | ||||
|                                                    "Preview: {preview_url}\n" | ||||
|                                                    "Diff URL: {diff_url}\n" | ||||
|                                                    "Snapshot: {current_snapshot}\n" | ||||
|                                                    "Diff: {diff}\n" | ||||
|                                                    "Diff Full: {diff_full}\n" | ||||
|                                                    ":-)", | ||||
|                               "notification_format": "Text"} | ||||
|  | ||||
|     notification_form_data.update({ | ||||
|         "url": test_url, | ||||
|         "tag": "my tag", | ||||
|         "title": "my title", | ||||
|         "headers": "", | ||||
|         "css_filter": '.ticket-available', | ||||
|         "fetch_backend": "html_requests"}) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data=notification_form_data, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Shouldn't exist, shouldn't have fired | ||||
|     assert not os.path.isfile("test-datastore/notification.txt") | ||||
|     # Now the filter should exist | ||||
|     set_response_with_filter() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(3) | ||||
|  | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|  | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         notification = f.read() | ||||
|  | ||||
|     assert 'Ticket now on sale' in notification | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
|  | ||||
|     # Test that if it gets removed, then re-added, we get a notification | ||||
|     # Remove the target and re-add it, we should get a new notification | ||||
|     set_response_without_filter() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(3) | ||||
|     assert not os.path.isfile("test-datastore/notification.txt") | ||||
|  | ||||
|     set_response_with_filter() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(3) | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|  | ||||
| # Also test that the filter was updated after the first one was requested | ||||
							
								
								
									
										144
									
								
								changedetectionio/tests/test_filter_failure_notification.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										144
									
								
								changedetectionio/tests/test_filter_failure_notification.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,144 @@ | ||||
| import os | ||||
| import time | ||||
| import re | ||||
| from flask import url_for | ||||
| from .util import set_original_response, live_server_setup | ||||
| from changedetectionio.model import App | ||||
|  | ||||
|  | ||||
| def set_response_with_filter(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text</br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      </br> | ||||
|      So let's see what happens.  </br> | ||||
|      <div id="nope-doesnt-exist">Some text thats the same</div>      | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     return None | ||||
|  | ||||
| def run_filter_test(client, content_filter): | ||||
|  | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     if os.path.isfile("test-datastore/notification.txt"): | ||||
|         os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tag": ''}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Watch added" in res.data | ||||
|  | ||||
|     # Give the thread time to pick up the first version | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Goto the edit page, add our ignore text | ||||
|     # Add our URL to the import page | ||||
|     url = url_for('test_notification_endpoint', _external=True) | ||||
|     notification_url = url.replace('http', 'json') | ||||
|  | ||||
|     print(">>>> Notification URL: " + notification_url) | ||||
|  | ||||
|     # Just a regular notification setting, this will be used by the special 'filter not found' notification | ||||
|     notification_form_data = {"notification_urls": notification_url, | ||||
|                               "notification_title": "New ChangeDetection.io Notification - {watch_url}", | ||||
|                               "notification_body": "BASE URL: {base_url}\n" | ||||
|                                                    "Watch URL: {watch_url}\n" | ||||
|                                                    "Watch UUID: {watch_uuid}\n" | ||||
|                                                    "Watch title: {watch_title}\n" | ||||
|                                                    "Watch tag: {watch_tag}\n" | ||||
|                                                    "Preview: {preview_url}\n" | ||||
|                                                    "Diff URL: {diff_url}\n" | ||||
|                                                    "Snapshot: {current_snapshot}\n" | ||||
|                                                    "Diff: {diff}\n" | ||||
|                                                    "Diff Full: {diff_full}\n" | ||||
|                                                    ":-)", | ||||
|                               "notification_format": "Text"} | ||||
|  | ||||
|     notification_form_data.update({ | ||||
|         "url": test_url, | ||||
|         "tag": "my tag", | ||||
|         "title": "my title", | ||||
|         "headers": "", | ||||
|         "filter_failure_notification_send": 'y', | ||||
|         "css_filter": content_filter, | ||||
|         "fetch_backend": "html_requests"}) | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data=notification_form_data, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Now the notification should not exist, because we didnt reach the threshold | ||||
|     assert not os.path.isfile("test-datastore/notification.txt") | ||||
|  | ||||
|     for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT): | ||||
|         res = client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|         time.sleep(3) | ||||
|  | ||||
|     # We should see something in the frontend | ||||
|     assert b'Warning, filter' in res.data | ||||
|  | ||||
|     # Now it should exist and contain our "filter not found" alert | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|     notification = False | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         notification = f.read() | ||||
|     assert 'CSS/xPath filter was not present in the page' in notification | ||||
|     assert content_filter.replace('"', '\\"') in notification | ||||
|  | ||||
|     # Remove it and prove that it doesnt trigger when not expected | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|     set_response_with_filter() | ||||
|  | ||||
|     for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT): | ||||
|         client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|         time.sleep(3) | ||||
|  | ||||
|     # It should have sent a notification, but.. | ||||
|     assert os.path.isfile("test-datastore/notification.txt") | ||||
|     # but it should not contain the info about the failed filter | ||||
|     with open("test-datastore/notification.txt", 'r') as f: | ||||
|         notification = f.read() | ||||
|     assert not 'CSS/xPath filter was not present in the page' in notification | ||||
|  | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     os.unlink("test-datastore/notification.txt") | ||||
|  | ||||
|  | ||||
| def test_setup(live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_check_css_filter_failure_notification(client, live_server): | ||||
|     set_original_response() | ||||
|     time.sleep(1) | ||||
|     run_filter_test(client, '#nope-doesnt-exist') | ||||
|  | ||||
| def test_check_xpath_filter_failure_notification(client, live_server): | ||||
|     set_original_response() | ||||
|     time.sleep(1) | ||||
|     run_filter_test(client, '//*[@id="nope-doesnt-exist"]') | ||||
|  | ||||
| # Test that notification is never sent | ||||
| @@ -36,7 +36,7 @@ def test_check_notification(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("form_watch_add"), | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tag": ''}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -154,6 +154,10 @@ def test_check_notification(client, live_server): | ||||
|     time.sleep(1) | ||||
|     assert os.path.exists("test-datastore/notification.txt") == False | ||||
|  | ||||
|     res = client.get(url_for("notification_logs")) | ||||
|     # be sure we see it in the output log | ||||
|     assert b'New ChangeDetection.io Notification - ' + test_url.encode('utf-8') in res.data | ||||
|  | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("form_delete", uuid="all"), | ||||
| @@ -168,7 +172,7 @@ def test_notification_validation(client, live_server): | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("form_watch_add"), | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": test_url, "tag": 'nice one'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
| @@ -16,7 +16,7 @@ def test_check_notification_error_handling(client, live_server): | ||||
|     # use a different URL so that it doesnt interfere with the actual check until we are ready | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("form_watch_add"), | ||||
|         url_for("form_quick_watch_add"), | ||||
|         data={"url": "https://changedetection.io/CHANGELOG.txt", "tag": ''}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|   | ||||
							
								
								
									
										43
									
								
								changedetectionio/tests/test_obfuscations.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										43
									
								
								changedetectionio/tests/test_obfuscations.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,43 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup | ||||
|  | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      <span>The price is</span><span>$<!-- -->90<!-- -->.<!-- -->74</span> | ||||
|      </body> | ||||
|      </html> | ||||
|  | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def test_obfuscations(client, live_server): | ||||
|     set_original_ignore_response() | ||||
|     live_server_setup(live_server) | ||||
|     time.sleep(1) | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(3) | ||||
|  | ||||
|     # Check HTML conversion detected and workd | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'$90.74' in res.data | ||||
							
								
								
									
										104
									
								
								changedetectionio/tests/test_unique_lines.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										104
									
								
								changedetectionio/tests/test_unique_lines.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,104 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from .util import live_server_setup | ||||
|  | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
|      <body> | ||||
|      <p>Some initial text</p> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <p>So let's see what happens.</p> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| # The same but just re-ordered the text | ||||
| def set_modified_swapped_lines(): | ||||
|     # Re-ordered and with some whitespacing, should get stripped() too. | ||||
|     test_return_data = """<html> | ||||
|      <body> | ||||
|      <p>Some initial text</p> | ||||
|      <p>   So let's see what happens.</p> | ||||
|      <p> Which is across multiple lines</p>      | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def set_modified_with_trigger_text_response(): | ||||
|     test_return_data = """<html> | ||||
|      <body> | ||||
|      <p>Some initial text</p> | ||||
|      <p>So let's see what happens.</p> | ||||
|      <p>and a new line!</p> | ||||
|      <p>Which is across multiple lines</p>      | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def test_unique_lines_functionality(client, live_server): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
|     set_original_ignore_response() | ||||
|     # Give the endpoint time to spin up | ||||
|     time.sleep(1) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"check_unique_lines": "y", | ||||
|               "url": test_url, | ||||
|               "fetch_backend": "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|     #  Make a change | ||||
|     set_modified_swapped_lines() | ||||
|  | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     # Trigger a check | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|  | ||||
|     # Now set the content which contains the new text and re-ordered existing text | ||||
|     set_modified_with_trigger_text_response() | ||||
|     client.get(url_for("form_watch_checknow"), follow_redirects=True) | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
| @@ -86,6 +86,7 @@ def test_check_xpath_filter_utf8(client, live_server): | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(1) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
| @@ -99,6 +100,68 @@ def test_check_xpath_filter_utf8(client, live_server): | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
|  | ||||
| # Handle utf-8 charset replies https://github.com/dgtlmoon/changedetection.io/pull/613 | ||||
| def test_check_xpath_text_function_utf8(client, live_server): | ||||
|     filter='//item/title/text()' | ||||
|  | ||||
|     d='''<?xml version="1.0" encoding="UTF-8"?> | ||||
| <rss xmlns:taxo="http://purl.org/rss/1.0/modules/taxonomy/" xmlns:rdf="http://www.w3.org/1999/02/22-rdf-syntax-ns#" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd" xmlns:dc="http://purl.org/dc/elements/1.1/" version="2.0"> | ||||
| 	<channel> | ||||
| 		<title>rpilocator.com</title> | ||||
| 		<link>https://rpilocator.com</link> | ||||
| 		<description>Find Raspberry Pi Computers in Stock</description> | ||||
| 		<lastBuildDate>Thu, 19 May 2022 23:27:30 GMT</lastBuildDate> | ||||
| 		<image> | ||||
| 			<url>https://rpilocator.com/favicon.png</url> | ||||
| 			<title>rpilocator.com</title> | ||||
| 			<link>https://rpilocator.com/</link> | ||||
| 			<width>32</width> | ||||
| 			<height>32</height> | ||||
| 		</image> | ||||
| 		<item> | ||||
| 			<title>Stock Alert (UK): RPi CM4</title> | ||||
| 			<foo>something else unrelated</foo> | ||||
| 		</item> | ||||
| 		<item> | ||||
| 			<title>Stock Alert (UK): Big monitor</title> | ||||
| 			<foo>something else unrelated</foo> | ||||
| 		</item>		 | ||||
| 	</channel> | ||||
| </rss>''' | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(d) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True, content_type="application/rss+xml;charset=UTF-8") | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|     time.sleep(1) | ||||
|     res = client.post( | ||||
|         url_for("edit_page", uuid="first"), | ||||
|         data={"css_filter": filter, "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|     time.sleep(3) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'Unicode strings with encoding declaration are not supported.' not in res.data | ||||
|  | ||||
|     # The service should echo back the request headers | ||||
|     res = client.get( | ||||
|         url_for("preview_page", uuid="first"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b'<div class="">Stock Alert (UK): RPi CM4' in res.data | ||||
|     assert b'<div class="">Stock Alert (UK): Big monitor' in res.data | ||||
|  | ||||
|     res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| def test_check_markup_xpath_filter_restriction(client, live_server): | ||||
|     sleep_time_for_fetch_thread = 3 | ||||
|   | ||||
| @@ -3,6 +3,8 @@ import queue | ||||
| import time | ||||
|  | ||||
| from changedetectionio import content_fetcher | ||||
| from changedetectionio.html_tools import FilterNotFoundInResponse | ||||
|  | ||||
| # A single update worker | ||||
| # | ||||
| # Requests for checking on a single site(watch) from a queue of watches | ||||
| @@ -19,6 +21,92 @@ class update_worker(threading.Thread): | ||||
|         self.datastore = datastore | ||||
|         super().__init__(*args, **kwargs) | ||||
|  | ||||
|     def send_content_changed_notification(self, t, watch_uuid): | ||||
|  | ||||
|         from changedetectionio import diff | ||||
|  | ||||
|         n_object = {} | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid, False) | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         watch_history = watch.history | ||||
|         dates = list(watch_history.keys()) | ||||
|         # Theoretically it's possible that this could be just 1 long, | ||||
|         # - In the case that the timestamp key was not unique | ||||
|         if len(dates) == 1: | ||||
|             raise ValueError( | ||||
|                 "History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?" | ||||
|             ) | ||||
|  | ||||
|         # Did it have any notification alerts to hit? | ||||
|         if len(watch['notification_urls']): | ||||
|             print(">>> Notifications queued for UUID from watch {}".format(watch_uuid)) | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|             n_object['notification_title'] = watch['notification_title'] | ||||
|             n_object['notification_body'] = watch['notification_body'] | ||||
|             n_object['notification_format'] = watch['notification_format'] | ||||
|  | ||||
|         # No? maybe theres a global setting, queue them all | ||||
|         elif len(self.datastore.data['settings']['application']['notification_urls']): | ||||
|             print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(watch_uuid)) | ||||
|             n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] | ||||
|             n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title'] | ||||
|             n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body'] | ||||
|             n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format'] | ||||
|         else: | ||||
|             print(">>> NO notifications queued, watch and global notification URLs were empty.") | ||||
|  | ||||
|         # Only prepare to notify if the rules above matched | ||||
|         if 'notification_urls' in n_object: | ||||
|             # HTML needs linebreak, but MarkDown and Text can use a linefeed | ||||
|             if n_object['notification_format'] == 'HTML': | ||||
|                 line_feed_sep = "</br>" | ||||
|             else: | ||||
|                 line_feed_sep = "\n" | ||||
|  | ||||
|             snapshot_contents = '' | ||||
|             with open(watch_history[dates[-1]], 'rb') as f: | ||||
|                 snapshot_contents = f.read() | ||||
|  | ||||
|             n_object.update({ | ||||
|                 'watch_url': watch['url'], | ||||
|                 'uuid': watch_uuid, | ||||
|                 'current_snapshot': snapshot_contents.decode('utf-8'), | ||||
|                 'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep), | ||||
|                 'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep) | ||||
|             }) | ||||
|  | ||||
|             self.notification_q.put(n_object) | ||||
|  | ||||
|     def send_filter_failure_notification(self, watch_uuid): | ||||
|  | ||||
|         threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts') | ||||
|         watch = self.datastore.data['watching'].get(watch_uuid, False) | ||||
|         if not watch: | ||||
|             return | ||||
|  | ||||
|         n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page', | ||||
|                     'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format( | ||||
|                         watch['css_filter'], | ||||
|                         threshold), | ||||
|                     'notification_format': 'text'} | ||||
|  | ||||
|         if len(watch['notification_urls']): | ||||
|             n_object['notification_urls'] = watch['notification_urls'] | ||||
|  | ||||
|         elif len(self.datastore.data['settings']['application']['notification_urls']): | ||||
|             n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] | ||||
|  | ||||
|         # Only prepare to notify if the rules above matched | ||||
|         if 'notification_urls' in n_object: | ||||
|             n_object.update({ | ||||
|                 'watch_url': watch['url'], | ||||
|                 'uuid': watch_uuid | ||||
|             }) | ||||
|             self.notification_q.put(n_object) | ||||
|             print("Sent filter not found notification for {}".format(watch_uuid)) | ||||
|  | ||||
|     def run(self): | ||||
|         from changedetectionio import fetch_site_status | ||||
|  | ||||
| @@ -27,7 +115,7 @@ class update_worker(threading.Thread): | ||||
|         while not self.app.config.exit.is_set(): | ||||
|  | ||||
|             try: | ||||
|                 uuid = self.q.get(block=False) | ||||
|                 priority, uuid = self.q.get(block=False) | ||||
|             except queue.Empty: | ||||
|                 pass | ||||
|  | ||||
| @@ -35,17 +123,17 @@ class update_worker(threading.Thread): | ||||
|                 self.current_uuid = uuid | ||||
|  | ||||
|                 if uuid in list(self.datastore.data['watching'].keys()): | ||||
|  | ||||
|                     changed_detected = False | ||||
|                     contents = "" | ||||
|                     contents = b'' | ||||
|                     screenshot = False | ||||
|                     update_obj= {} | ||||
|                     xpath_data = False | ||||
|                     process_changedetection_results = True | ||||
|                     print("> Processing UUID {} Priority {} URL {}".format(uuid, priority, self.datastore.data['watching'][uuid]['url'])) | ||||
|                     now = time.time() | ||||
|  | ||||
|                     try: | ||||
|                         changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid) | ||||
|  | ||||
|                         # Re #342 | ||||
|                         # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes. | ||||
|                         # We then convert/.decode('utf-8') for the notification etc | ||||
| @@ -53,30 +141,62 @@ class update_worker(threading.Thread): | ||||
|                             raise Exception("Error - returned data from the fetch handler SHOULD be bytes") | ||||
|                     except PermissionError as e: | ||||
|                         self.app.logger.error("File permission error updating", uuid, str(e)) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetcher.ReplyWithContentButNoText as e: | ||||
|                         # Totally fine, it's by choice - just continue on, nothing more to care about | ||||
|                         # Page had elements/content but no renderable text | ||||
|                         # Backend (not filters) gave zero output | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."}) | ||||
|                         pass | ||||
|                         process_changedetection_results = False | ||||
|  | ||||
|                     except FilterNotFoundInResponse as e: | ||||
|                         err_text = "Warning, filter '{}' not found".format(str(e)) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            # So that we get a trigger when the content is added again | ||||
|                                                                            'previous_md5': ''}) | ||||
|  | ||||
|                         # Only when enabled, send the notification | ||||
|                         if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False): | ||||
|                             c = self.datastore.data['watching'][uuid].get('consecutive_filter_failures', 5) | ||||
|                             c += 1 | ||||
|                             # Send notification if we reached the threshold? | ||||
|                             threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', | ||||
|                                                                                            0) | ||||
|                             print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c)) | ||||
|                             if threshold > 0 and c >= threshold: | ||||
|                                 if not self.datastore.data['watching'][uuid].get('notification_muted'): | ||||
|                                     self.send_filter_failure_notification(uuid) | ||||
|                                 c = 0 | ||||
|  | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj={'consecutive_filter_failures': c}) | ||||
|  | ||||
|                         process_changedetection_results = True | ||||
|  | ||||
|                     except content_fetcher.EmptyReply as e: | ||||
|                         # Some kind of custom to-str handler in the exception handler that does this? | ||||
|                         err_text = "EmptyReply: Status Code {}".format(e.status_code) | ||||
|                         err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|                     except content_fetcher.ScreenshotUnavailable as e: | ||||
|                         err_text = "Screenshot unavailable, page did not render fully in the expected time" | ||||
|                         err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'" | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|                         process_changedetection_results = False | ||||
|                     except content_fetcher.PageUnloadable as e: | ||||
|                         err_text = "Page request from server didnt respond correctly" | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, | ||||
|                                                                            'last_check_status': e.status_code}) | ||||
|  | ||||
|                     except Exception as e: | ||||
|                         self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) | ||||
|  | ||||
|                         # Other serious error | ||||
|                         process_changedetection_results = False | ||||
|                     else: | ||||
|                         # Mark that we never had any failures | ||||
|                         update_obj['consecutive_filter_failures'] = 0 | ||||
|  | ||||
|                     # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc | ||||
|                     if process_changedetection_results: | ||||
|                         try: | ||||
|                             watch = self.datastore.data['watching'][uuid] | ||||
|                             fname = "" # Saved history text filename | ||||
| @@ -84,59 +204,21 @@ class update_worker(threading.Thread): | ||||
|                             # For the FIRST time we check a site, or a change detected, save the snapshot. | ||||
|                             if changed_detected or not watch['last_checked']: | ||||
|                                 # A change was detected | ||||
|                                 fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time()))) | ||||
|                                 watch.save_history_text(contents=contents, timestamp=str(round(time.time()))) | ||||
|  | ||||
|                             # Generally update anything interesting returned | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj=update_obj) | ||||
|  | ||||
|                             # A change was detected | ||||
|                             if changed_detected: | ||||
|                                 n_object = {} | ||||
|                                 print (">> Change detected in UUID {} - {}".format(uuid, watch['url'])) | ||||
|  | ||||
|                                 # Notifications should only trigger on the second time (first time, we gather the initial snapshot) | ||||
|                                 if watch.history_n >= 2: | ||||
|                                     # Atleast 2, means there really was a change | ||||
|                                     self.datastore.update_watch(uuid=uuid, update_obj={'last_changed': round(now)}) | ||||
|                                     if not self.datastore.data['watching'][uuid].get('notification_muted'): | ||||
|                                         self.send_content_changed_notification(self, watch_uuid=uuid) | ||||
|  | ||||
|                                     dates = list(watch.history.keys()) | ||||
|                                     prev_fname = watch.history[dates[-2]] | ||||
|  | ||||
|  | ||||
|                                     # Did it have any notification alerts to hit? | ||||
|                                     if len(watch['notification_urls']): | ||||
|                                         print(">>> Notifications queued for UUID from watch {}".format(uuid)) | ||||
|                                         n_object['notification_urls'] = watch['notification_urls'] | ||||
|                                         n_object['notification_title'] = watch['notification_title'] | ||||
|                                         n_object['notification_body'] = watch['notification_body'] | ||||
|                                         n_object['notification_format'] = watch['notification_format'] | ||||
|  | ||||
|                                     # No? maybe theres a global setting, queue them all | ||||
|                                     elif len(self.datastore.data['settings']['application']['notification_urls']): | ||||
|                                         print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(uuid)) | ||||
|                                         n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls'] | ||||
|                                         n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title'] | ||||
|                                         n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body'] | ||||
|                                         n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format'] | ||||
|                                     else: | ||||
|                                         print(">>> NO notifications queued, watch and global notification URLs were empty.") | ||||
|  | ||||
|                                     # Only prepare to notify if the rules above matched | ||||
|                                     if 'notification_urls' in n_object: | ||||
|                                         # HTML needs linebreak, but MarkDown and Text can use a linefeed | ||||
|                                         if n_object['notification_format'] == 'HTML': | ||||
|                                             line_feed_sep = "</br>" | ||||
|                                         else: | ||||
|                                             line_feed_sep = "\n" | ||||
|  | ||||
|                                         from changedetectionio import diff | ||||
|                                         n_object.update({ | ||||
|                                             'watch_url': watch['url'], | ||||
|                                             'uuid': uuid, | ||||
|                                             'current_snapshot': contents.decode('utf-8'), | ||||
|                                             'diff': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep), | ||||
|                                             'diff_full': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep) | ||||
|                                         }) | ||||
|  | ||||
|                                         self.notification_q.put(n_object) | ||||
|  | ||||
|                         except Exception as e: | ||||
|                             # Catch everything possible here, so that if a worker crashes, we don't lose it until restart! | ||||
| @@ -144,10 +226,11 @@ class update_worker(threading.Thread): | ||||
|                             self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e)) | ||||
|                             self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)}) | ||||
|  | ||||
|                     finally: | ||||
|  | ||||
|                         # Always record that we atleast tried | ||||
|                         self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3), | ||||
|                                                                            'last_checked': round(time.time())}) | ||||
|  | ||||
|                         # Always save the screenshot if it's available | ||||
|                         if screenshot: | ||||
|                             self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot) | ||||
|   | ||||
| @@ -24,7 +24,7 @@ services: | ||||
|   #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy | ||||
|   # | ||||
|   #       Alternative Playwright URL, do not use "'s or 's! | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/ | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true | ||||
|   # | ||||
|   #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password | ||||
|   # | ||||
| @@ -73,6 +73,17 @@ services: | ||||
| #        hostname: playwright-chrome | ||||
| #        image: browserless/chrome | ||||
| #        restart: unless-stopped | ||||
| #        environment: | ||||
| #            - SCREEN_WIDTH=1920 | ||||
| #            - SCREEN_HEIGHT=1024 | ||||
| #            - SCREEN_DEPTH=16 | ||||
| #            - ENABLE_DEBUGGER=false | ||||
| #            - PREBOOT_CHROME=true | ||||
| #            - CONNECTION_TIMEOUT=300000 | ||||
| #            - MAX_CONCURRENT_SESSIONS=10 | ||||
| #            - CHROME_REFRESH_TIME=600000 | ||||
| #            - DEFAULT_BLOCK_ADS=true | ||||
| #            - DEFAULT_STEALTH=true | ||||
|  | ||||
| volumes: | ||||
|   changedetection-data: | ||||
|   | ||||
		Reference in New Issue
	
	Block a user