mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 00:27:48 +00:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			threading-
			...
			bug/RSS-fe
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					114344f950 | 
							
								
								
									
										2
									
								
								.github/ISSUE_TEMPLATE/bug_report.md
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								.github/ISSUE_TEMPLATE/bug_report.md
									
									
									
									
										vendored
									
									
								
							@@ -21,7 +21,7 @@ Steps to reproduce the behavior:
 | 
			
		||||
3. Scroll down to '....'
 | 
			
		||||
4. See error
 | 
			
		||||
 | 
			
		||||
! ALWAYS INCLUDE AN EXAMPLE URL WHERE IT IS POSSIBLE TO RE-CREATE THE ISSUE - USE THE 'SHARE WATCH' FEATURE AND PASTE IN THE SHARE-LINK!
 | 
			
		||||
! ALWAYS INCLUDE AN EXAMPLE URL WHERE IT IS POSSIBLE TO RE-CREATE THE ISSUE !
 | 
			
		||||
 | 
			
		||||
**Expected behavior**
 | 
			
		||||
A clear and concise description of what you expected to happen.
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										15
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/workflows/containers.yml
									
									
									
									
										vendored
									
									
								
							@@ -85,8 +85,8 @@ jobs:
 | 
			
		||||
          version: latest
 | 
			
		||||
          driver-opts: image=moby/buildkit:master
 | 
			
		||||
 | 
			
		||||
      # master branch -> :dev container tag
 | 
			
		||||
      - name: Build and push :dev
 | 
			
		||||
      # master always builds :latest
 | 
			
		||||
      - name: Build and push :latest
 | 
			
		||||
        id: docker_build
 | 
			
		||||
        if: ${{ github.ref }} == "refs/heads/master"
 | 
			
		||||
        uses: docker/build-push-action@v2
 | 
			
		||||
@@ -95,12 +95,12 @@ jobs:
 | 
			
		||||
          file: ./Dockerfile
 | 
			
		||||
          push: true
 | 
			
		||||
          tags: |
 | 
			
		||||
            ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
 | 
			
		||||
            ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest,ghcr.io/${{ github.repository }}:latest
 | 
			
		||||
          platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
 | 
			
		||||
          cache-from: type=local,src=/tmp/.buildx-cache
 | 
			
		||||
          cache-to: type=local,dest=/tmp/.buildx-cache
 | 
			
		||||
 | 
			
		||||
      # A new tagged release is required, which builds :tag and :latest
 | 
			
		||||
      # A new tagged release is required, which builds :tag
 | 
			
		||||
      - name: Build and push :tag
 | 
			
		||||
        id: docker_build_tag_release
 | 
			
		||||
        if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
 | 
			
		||||
@@ -110,10 +110,7 @@ jobs:
 | 
			
		||||
          file: ./Dockerfile
 | 
			
		||||
          push: true
 | 
			
		||||
          tags: |
 | 
			
		||||
            ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }}
 | 
			
		||||
            ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
 | 
			
		||||
            ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
 | 
			
		||||
            ghcr.io/dgtlmoon/changedetection.io:latest
 | 
			
		||||
            ${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }},ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
 | 
			
		||||
          platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
 | 
			
		||||
          cache-from: type=local,src=/tmp/.buildx-cache
 | 
			
		||||
          cache-to: type=local,dest=/tmp/.buildx-cache
 | 
			
		||||
@@ -128,3 +125,5 @@ jobs:
 | 
			
		||||
          key: ${{ runner.os }}-buildx-${{ github.sha }}
 | 
			
		||||
          restore-keys: |
 | 
			
		||||
            ${{ runner.os }}-buildx-
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -8,6 +8,5 @@ __pycache__
 | 
			
		||||
build
 | 
			
		||||
dist
 | 
			
		||||
venv
 | 
			
		||||
test-datastore
 | 
			
		||||
*.egg-info*
 | 
			
		||||
.vscode/settings.json
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										17
									
								
								README.md
									
									
									
									
									
								
							
							
						
						
									
										17
									
								
								README.md
									
									
									
									
									
								
							@@ -3,16 +3,14 @@
 | 
			
		||||
 | 
			
		||||

 | 
			
		||||
 | 
			
		||||
## Web Site Change Detection, Monitoring and Notification - Self-Hosted or SaaS.
 | 
			
		||||
## Self-Hosted, Open Source, Change Monitoring of Web Pages
 | 
			
		||||
 | 
			
		||||
_Know when web pages change! Stay ontop of new information! get notifications when important website content changes_ 
 | 
			
		||||
_Know when web pages change! Stay ontop of new information!_ 
 | 
			
		||||
 | 
			
		||||
Live your data-life *pro-actively* instead of *re-actively*.
 | 
			
		||||
 | 
			
		||||
Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)
 | 
			
		||||
 | 
			
		||||
[[ Discord ]](https://discord.com/channels/1000806276256780309/1000806276873334816) [[ YouTube ]](https://www.youtube.com/channel/UCbS09q1TRf0o4N2t-WA3emQ) [[ LinkedIn ]](https://www.linkedin.com/company/changedetection-io/)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring"  title="Self-hosted web page change monitoring"  />](https://lemonade.changedetection.io/start)
 | 
			
		||||
 | 
			
		||||
@@ -35,7 +33,6 @@ Free, Open-source web page monitoring, notification and change detection. Don't
 | 
			
		||||
- New software releases, security advisories when you're not on their mailing list.
 | 
			
		||||
- Festivals with changes
 | 
			
		||||
- Realestate listing changes
 | 
			
		||||
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
 | 
			
		||||
- COVID related news from government websites
 | 
			
		||||
- University/organisation news from their website
 | 
			
		||||
- Detect and monitor changes in JSON API responses 
 | 
			
		||||
@@ -59,9 +56,9 @@ Easily see what changed, examine by word, line, or individual character.
 | 
			
		||||
 | 
			
		||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
 | 
			
		||||
 | 
			
		||||
### Filter by elements using the Visual Selector tool.
 | 
			
		||||
### Target elements with the Visual Selector tool.
 | 
			
		||||
 | 
			
		||||
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
 | 
			
		||||
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (available also as part of our subscription service)
 | 
			
		||||
 | 
			
		||||
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference "  title="Self-hosted web page change monitoring context difference " />
 | 
			
		||||
 | 
			
		||||
@@ -70,18 +67,14 @@ Available when connected to a <a href="https://github.com/dgtlmoon/changedetecti
 | 
			
		||||
### Docker
 | 
			
		||||
 | 
			
		||||
With Docker composer, just clone this repository and..
 | 
			
		||||
 | 
			
		||||
```bash
 | 
			
		||||
$ docker-compose up -d
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
Docker standalone
 | 
			
		||||
```bash
 | 
			
		||||
$ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
 | 
			
		||||
```
 | 
			
		||||
 | 
			
		||||
`:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch.
 | 
			
		||||
 | 
			
		||||
### Windows
 | 
			
		||||
 | 
			
		||||
See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
 | 
			
		||||
@@ -121,7 +114,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
 | 
			
		||||
## Filters
 | 
			
		||||
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
 | 
			
		||||
 | 
			
		||||
(We support LXML `re:test`, `re:math` and `re:replace`.)
 | 
			
		||||
(We support LXML re:test, re:math and re:replace.)
 | 
			
		||||
 | 
			
		||||
## Notifications
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -20,7 +20,6 @@ from copy import deepcopy
 | 
			
		||||
from threading import Event
 | 
			
		||||
 | 
			
		||||
import flask_login
 | 
			
		||||
import logging
 | 
			
		||||
import pytz
 | 
			
		||||
import timeago
 | 
			
		||||
from feedgen.feed import FeedGenerator
 | 
			
		||||
@@ -44,7 +43,7 @@ from flask_wtf import CSRFProtect
 | 
			
		||||
from changedetectionio import html_tools
 | 
			
		||||
from changedetectionio.api import api_v1
 | 
			
		||||
 | 
			
		||||
__version__ = '0.39.16'
 | 
			
		||||
__version__ = '0.39.14'
 | 
			
		||||
 | 
			
		||||
datastore = None
 | 
			
		||||
 | 
			
		||||
@@ -105,11 +104,10 @@ def init_app_secret(datastore_path):
 | 
			
		||||
# running or something similar.
 | 
			
		||||
@app.template_filter('format_last_checked_time')
 | 
			
		||||
def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
 | 
			
		||||
 | 
			
		||||
    # Worker thread tells us which UUID it is currently processing.
 | 
			
		||||
    for t in threading.enumerate():
 | 
			
		||||
        if t.name == 'update_worker' and t.current_uuid == watch_obj['uuid']:
 | 
			
		||||
            return '<span class="loader"></span><span> Checking now</span>'
 | 
			
		||||
    for t in running_update_threads:
 | 
			
		||||
        if t.current_uuid == watch_obj['uuid']:
 | 
			
		||||
            return "Checking now.."
 | 
			
		||||
 | 
			
		||||
    if watch_obj['last_checked'] == 0:
 | 
			
		||||
        return 'Not yet'
 | 
			
		||||
@@ -299,7 +297,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
        # Sort by last_changed and add the uuid which is usually the key..
 | 
			
		||||
        sorted_watches = []
 | 
			
		||||
 | 
			
		||||
        # @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
 | 
			
		||||
        # @todo needs a .itemsWithTag() or something
 | 
			
		||||
        for uuid, watch in datastore.data['watching'].items():
 | 
			
		||||
 | 
			
		||||
            if limit_tag != None:
 | 
			
		||||
@@ -353,8 +351,9 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
                latest_fname = watch.history[dates[-1]]
 | 
			
		||||
 | 
			
		||||
                html_diff = diff.render_diff(prev_fname, latest_fname, include_equal=False, line_feed_sep="</br>")
 | 
			
		||||
                fe.content(content="<html><body><h4>{}</h4>{}</body></html>".format(watch_title, html_diff),
 | 
			
		||||
                           type='CDATA')
 | 
			
		||||
                fe.description(description="<![CDATA["
 | 
			
		||||
                                           "<html><body><h4>{}</h4>{}</body></html>"
 | 
			
		||||
                                           "]]>".format(watch_title, html_diff))
 | 
			
		||||
 | 
			
		||||
                fe.guid(guid, permalink=False)
 | 
			
		||||
                dt = datetime.datetime.fromtimestamp(int(watch.newest_history_key))
 | 
			
		||||
@@ -362,7 +361,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
                fe.pubDate(dt)
 | 
			
		||||
 | 
			
		||||
        response = make_response(fg.rss_str())
 | 
			
		||||
        response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
 | 
			
		||||
        response.headers.set('Content-Type', 'application/rss+xml')
 | 
			
		||||
        return response
 | 
			
		||||
 | 
			
		||||
    @app.route("/", methods=['GET'])
 | 
			
		||||
@@ -404,6 +403,8 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
                watch['uuid'] = uuid
 | 
			
		||||
                sorted_watches.append(watch)
 | 
			
		||||
 | 
			
		||||
        sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)
 | 
			
		||||
 | 
			
		||||
        existing_tags = datastore.get_all_tags()
 | 
			
		||||
 | 
			
		||||
        form = forms.quickWatchForm(request.form)
 | 
			
		||||
@@ -432,9 +433,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    def ajax_callback_send_notification_test():
 | 
			
		||||
 | 
			
		||||
        import apprise
 | 
			
		||||
        from .apprise_asset import asset
 | 
			
		||||
        apobj = apprise.Apprise(asset=asset)
 | 
			
		||||
 | 
			
		||||
        apobj = apprise.Apprise()
 | 
			
		||||
 | 
			
		||||
        # validate URLS
 | 
			
		||||
        if not len(request.form['notification_urls'].strip()):
 | 
			
		||||
@@ -459,39 +458,25 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
        return 'OK'
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    @app.route("/clear_history/<string:uuid>", methods=['GET'])
 | 
			
		||||
    @app.route("/scrub", methods=['GET', 'POST'])
 | 
			
		||||
    @login_required
 | 
			
		||||
    def clear_watch_history(uuid):
 | 
			
		||||
        try:
 | 
			
		||||
            datastore.clear_watch_history(uuid)
 | 
			
		||||
        except KeyError:
 | 
			
		||||
            flash('Watch not found', 'error')
 | 
			
		||||
        else:
 | 
			
		||||
            flash("Cleared snapshot history for watch {}".format(uuid))
 | 
			
		||||
 | 
			
		||||
        return redirect(url_for('index'))
 | 
			
		||||
 | 
			
		||||
    @app.route("/clear_history", methods=['GET', 'POST'])
 | 
			
		||||
    @login_required
 | 
			
		||||
    def clear_all_history():
 | 
			
		||||
    def scrub_page():
 | 
			
		||||
 | 
			
		||||
        if request.method == 'POST':
 | 
			
		||||
            confirmtext = request.form.get('confirmtext')
 | 
			
		||||
 | 
			
		||||
            if confirmtext == 'clear':
 | 
			
		||||
            if confirmtext == 'scrub':
 | 
			
		||||
                changes_removed = 0
 | 
			
		||||
                for uuid in datastore.data['watching'].keys():
 | 
			
		||||
                    datastore.clear_watch_history(uuid)
 | 
			
		||||
                    #TODO: KeyError not checked, as it is above
 | 
			
		||||
                    datastore.scrub_watch(uuid)
 | 
			
		||||
 | 
			
		||||
                flash("Cleared snapshot history for all watches")
 | 
			
		||||
                flash("Cleared all snapshot history")
 | 
			
		||||
            else:
 | 
			
		||||
                flash('Incorrect confirmation text.', 'error')
 | 
			
		||||
 | 
			
		||||
            return redirect(url_for('index'))
 | 
			
		||||
 | 
			
		||||
        output = render_template("clear_all_history.html")
 | 
			
		||||
        output = render_template("scrub.html")
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
@@ -658,8 +643,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
                                     current_base_url=datastore.data['settings']['application']['base_url'],
 | 
			
		||||
                                     emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
 | 
			
		||||
                                     visualselector_data_is_ready=visualselector_data_is_ready,
 | 
			
		||||
                                     visualselector_enabled=visualselector_enabled,
 | 
			
		||||
                                     playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
 | 
			
		||||
                                     visualselector_enabled=visualselector_enabled
 | 
			
		||||
                                     )
 | 
			
		||||
 | 
			
		||||
        return output
 | 
			
		||||
@@ -825,25 +809,18 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
        screenshot_url = datastore.get_screenshot(uuid)
 | 
			
		||||
 | 
			
		||||
        system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
 | 
			
		||||
 | 
			
		||||
        is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
 | 
			
		||||
                    watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
 | 
			
		||||
 | 
			
		||||
        output = render_template("diff.html",
 | 
			
		||||
                                 watch_a=watch,
 | 
			
		||||
        output = render_template("diff.html", watch_a=watch,
 | 
			
		||||
                                 newest=newest_version_file_contents,
 | 
			
		||||
                                 previous=previous_version_file_contents,
 | 
			
		||||
                                 extra_stylesheets=extra_stylesheets,
 | 
			
		||||
                                 versions=dates[:-1], # All except current/last
 | 
			
		||||
                                 versions=dates[1:],
 | 
			
		||||
                                 uuid=uuid,
 | 
			
		||||
                                 newest_version_timestamp=dates[-1],
 | 
			
		||||
                                 current_previous_version=str(previous_version),
 | 
			
		||||
                                 current_diff_url=watch['url'],
 | 
			
		||||
                                 extra_title=" - Diff - {}".format(watch['title'] if watch['title'] else watch['url']),
 | 
			
		||||
                                 left_sticky=True,
 | 
			
		||||
                                 screenshot=screenshot_url,
 | 
			
		||||
                                 is_html_webdriver=is_html_webdriver)
 | 
			
		||||
                                 screenshot=screenshot_url)
 | 
			
		||||
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
@@ -858,12 +835,6 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
        if uuid == 'first':
 | 
			
		||||
            uuid = list(datastore.data['watching'].keys()).pop()
 | 
			
		||||
 | 
			
		||||
        # Normally you would never reach this, because the 'preview' button is not available when there's no history
 | 
			
		||||
        # However they may try to clear snapshots and reload the page
 | 
			
		||||
        if datastore.data['watching'][uuid].history_n == 0:
 | 
			
		||||
            flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
 | 
			
		||||
            return redirect(url_for('index'))
 | 
			
		||||
 | 
			
		||||
        extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
@@ -910,11 +881,6 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
            content.append({'line': "No history found", 'classes': ''})
 | 
			
		||||
 | 
			
		||||
        screenshot_url = datastore.get_screenshot(uuid)
 | 
			
		||||
        system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver'
 | 
			
		||||
 | 
			
		||||
        is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
 | 
			
		||||
                watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
 | 
			
		||||
 | 
			
		||||
        output = render_template("preview.html",
 | 
			
		||||
                                 content=content,
 | 
			
		||||
                                 extra_stylesheets=extra_stylesheets,
 | 
			
		||||
@@ -923,9 +889,8 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
                                 current_diff_url=watch['url'],
 | 
			
		||||
                                 screenshot=screenshot_url,
 | 
			
		||||
                                 watch=watch,
 | 
			
		||||
                                 uuid=uuid,
 | 
			
		||||
                                 is_html_webdriver=is_html_webdriver)
 | 
			
		||||
 | 
			
		||||
                                 uuid=uuid)
 | 
			
		||||
        
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
    @app.route("/settings/notification-logs", methods=['GET'])
 | 
			
		||||
@@ -933,7 +898,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    def notification_logs():
 | 
			
		||||
        global notification_debug_log
 | 
			
		||||
        output = render_template("notification-log.html",
 | 
			
		||||
                                 logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
 | 
			
		||||
                                 logs=notification_debug_log if len(notification_debug_log) else ["No errors or warnings detected"])
 | 
			
		||||
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
@@ -1204,8 +1169,7 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            logging.error("Error sharing -{}".format(str(e)))
 | 
			
		||||
            flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error')
 | 
			
		||||
            flash("Could not share, something went wrong while communicating with the share server.", 'error')
 | 
			
		||||
 | 
			
		||||
        # https://changedetection.io/share/VrMv05wpXyQa
 | 
			
		||||
        # in the browser - should give you a nice info page - wtf
 | 
			
		||||
@@ -1214,7 +1178,6 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
 | 
			
		||||
    # @todo handle ctrl break
 | 
			
		||||
    ticker_thread = threading.Thread(target=ticker_thread_check_time_launch_checks).start()
 | 
			
		||||
    threading.Thread(target=ticker_thread_job_queue_processor).start()
 | 
			
		||||
 | 
			
		||||
    threading.Thread(target=notification_runner).start()
 | 
			
		||||
 | 
			
		||||
@@ -1254,9 +1217,6 @@ def check_for_new_version():
 | 
			
		||||
 | 
			
		||||
def notification_runner():
 | 
			
		||||
    global notification_debug_log
 | 
			
		||||
    from datetime import datetime
 | 
			
		||||
    import json
 | 
			
		||||
 | 
			
		||||
    while not app.config.exit.is_set():
 | 
			
		||||
        try:
 | 
			
		||||
            # At the moment only one thread runs (single runner)
 | 
			
		||||
@@ -1265,17 +1225,13 @@ def notification_runner():
 | 
			
		||||
            time.sleep(1)
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
 | 
			
		||||
            now = datetime.now()
 | 
			
		||||
            sent_obj = None
 | 
			
		||||
 | 
			
		||||
            # Process notifications
 | 
			
		||||
            try:
 | 
			
		||||
                from changedetectionio import notification
 | 
			
		||||
 | 
			
		||||
                sent_obj = notification.process_notification(n_object, datastore)
 | 
			
		||||
                notification.process_notification(n_object, datastore)
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                logging.error("Watch URL: {}  Error {}".format(n_object['watch_url'], str(e)))
 | 
			
		||||
                print("Watch URL: {}  Error {}".format(n_object['watch_url'], str(e)))
 | 
			
		||||
 | 
			
		||||
                # UUID wont be present when we submit a 'test' from the global settings
 | 
			
		||||
                if 'uuid' in n_object:
 | 
			
		||||
@@ -1285,68 +1241,26 @@ def notification_runner():
 | 
			
		||||
                log_lines = str(e).splitlines()
 | 
			
		||||
                notification_debug_log += log_lines
 | 
			
		||||
 | 
			
		||||
            # Process notifications
 | 
			
		||||
            notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
 | 
			
		||||
            # Trim the log length
 | 
			
		||||
            notification_debug_log = notification_debug_log[-100:]
 | 
			
		||||
 | 
			
		||||
# Check the queue, when a job exists, start a fresh thread of update_worker
 | 
			
		||||
def ticker_thread_job_queue_processor():
 | 
			
		||||
 | 
			
		||||
    from changedetectionio import update_worker
 | 
			
		||||
    n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
 | 
			
		||||
 | 
			
		||||
    while not app.config.exit.is_set():
 | 
			
		||||
        time.sleep(0.3)
 | 
			
		||||
 | 
			
		||||
        # Check that some threads are free
 | 
			
		||||
        running = 0
 | 
			
		||||
        for t in threading.enumerate():
 | 
			
		||||
            if t.name == 'update_worker':
 | 
			
		||||
                running += 1
 | 
			
		||||
 | 
			
		||||
        if running >= n_workers:
 | 
			
		||||
            continue
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            uuid = update_q.get(block=False)
 | 
			
		||||
        except queue.Empty:
 | 
			
		||||
            # Go back to waiting for exit and/or another entry from the queue
 | 
			
		||||
            continue
 | 
			
		||||
        print ("Starting a thread fetch")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        try:
 | 
			
		||||
            # Launch the update_worker thread that will handle picking items off a queue and sending them off
 | 
			
		||||
            # in the event that playwright or others have a memory leak, this should clean it up better than gc.collect()
 | 
			
		||||
            # (By letting it exit entirely)
 | 
			
		||||
            update_worker.update_worker(update_q, notification_q, app, datastore, uuid).start()
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            print ("Error launching update_worker for UUID {}.".format(uuid))
 | 
			
		||||
            print (str(e))
 | 
			
		||||
 | 
			
		||||
        print ("Running now {}", running)
 | 
			
		||||
                # Trim the log length
 | 
			
		||||
                notification_debug_log = notification_debug_log[-100:]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Thread runner to check every minute, look for new watches to feed into the Queue.
 | 
			
		||||
def ticker_thread_check_time_launch_checks():
 | 
			
		||||
    import random
 | 
			
		||||
    from changedetectionio import update_worker
 | 
			
		||||
    import logging
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
 | 
			
		||||
    print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
 | 
			
		||||
 | 
			
		||||
    # Can go in its own function
 | 
			
		||||
 | 
			
		||||
    # Always maintain the minimum number of threads, each thread will terminate when it has processed exactly 1 queued watch
 | 
			
		||||
    # This is to be totally sure that they don't leak memory
 | 
			
		||||
    # Spin up Workers that do the fetching
 | 
			
		||||
    # Can be overriden by ENV or use the default settings
 | 
			
		||||
 | 
			
		||||
    n_workers = int(os.getenv("FETCH_WORKERS", datastore.data['settings']['requests']['workers']))
 | 
			
		||||
    for _ in range(n_workers):
 | 
			
		||||
        new_worker = update_worker.update_worker(update_q, notification_q, app, datastore)
 | 
			
		||||
        running_update_threads.append(new_worker)
 | 
			
		||||
        new_worker.start()
 | 
			
		||||
 | 
			
		||||
    while not app.config.exit.is_set():
 | 
			
		||||
 | 
			
		||||
        # Update our list of watches by UUID that are currently fetching data, used in the UI
 | 
			
		||||
        # Get a list of watches by UUID that are currently fetching data
 | 
			
		||||
        running_uuids = []
 | 
			
		||||
        for t in running_update_threads:
 | 
			
		||||
            if t.current_uuid:
 | 
			
		||||
@@ -1367,12 +1281,14 @@ def ticker_thread_check_time_launch_checks():
 | 
			
		||||
        while update_q.qsize() >= 2000:
 | 
			
		||||
            time.sleep(1)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        recheck_time_system_seconds = int(datastore.threshold_seconds)
 | 
			
		||||
 | 
			
		||||
        # Check for watches outside of the time threshold to put in the thread queue.
 | 
			
		||||
        now = time.time()
 | 
			
		||||
 | 
			
		||||
        recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
 | 
			
		||||
        recheck_time_system_seconds = datastore.threshold_seconds
 | 
			
		||||
 | 
			
		||||
        for uuid in watch_uuid_list:
 | 
			
		||||
            now = time.time()
 | 
			
		||||
 | 
			
		||||
            watch = datastore.data['watching'].get(uuid)
 | 
			
		||||
            if not watch:
 | 
			
		||||
                logging.error("Watch: {} no longer present.".format(uuid))
 | 
			
		||||
@@ -1383,33 +1299,20 @@ def ticker_thread_check_time_launch_checks():
 | 
			
		||||
                continue
 | 
			
		||||
 | 
			
		||||
            # If they supplied an individual entry minutes to threshold.
 | 
			
		||||
 | 
			
		||||
            threshold = now
 | 
			
		||||
            watch_threshold_seconds = watch.threshold_seconds()
 | 
			
		||||
            threshold = watch_threshold_seconds if watch_threshold_seconds > 0 else recheck_time_system_seconds
 | 
			
		||||
            if watch_threshold_seconds:
 | 
			
		||||
                threshold -= watch_threshold_seconds
 | 
			
		||||
            else:
 | 
			
		||||
                threshold -= recheck_time_system_seconds
 | 
			
		||||
 | 
			
		||||
            # #580 - Jitter plus/minus amount of time to make the check seem more random to the server
 | 
			
		||||
            jitter = datastore.data['settings']['requests'].get('jitter_seconds', 0)
 | 
			
		||||
            if jitter > 0:
 | 
			
		||||
                if watch.jitter_seconds == 0:
 | 
			
		||||
                    watch.jitter_seconds = random.uniform(-abs(jitter), jitter)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            seconds_since_last_recheck = now - watch['last_checked']
 | 
			
		||||
            if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
 | 
			
		||||
            # Yeah, put it in the queue, it's more than time
 | 
			
		||||
            if watch['last_checked'] <= max(threshold, recheck_time_minimum_seconds):
 | 
			
		||||
                if not uuid in running_uuids and uuid not in update_q.queue:
 | 
			
		||||
                    print("Queued watch UUID {} last checked at {} queued at {:0.2f} jitter {:0.2f}s, {:0.2f}s since last checked".format(uuid,
 | 
			
		||||
                                                                                                         watch['last_checked'],
 | 
			
		||||
                                                                                                         now,
 | 
			
		||||
                                                                                                         watch.jitter_seconds,
 | 
			
		||||
                                                                                                         now - watch['last_checked']))
 | 
			
		||||
                    # Into the queue with you
 | 
			
		||||
                    update_q.put(uuid)
 | 
			
		||||
 | 
			
		||||
                    # Reset for next time
 | 
			
		||||
                    watch.jitter_seconds = 0
 | 
			
		||||
 | 
			
		||||
        # Wait before checking the list again - saves CPU
 | 
			
		||||
        time.sleep(1)
 | 
			
		||||
        # Wait a few seconds before checking the list again
 | 
			
		||||
        time.sleep(3)
 | 
			
		||||
 | 
			
		||||
        # Should be low so we can break this out in testing
 | 
			
		||||
        app.config.exit.wait(1)
 | 
			
		||||
        app.config.exit.wait(1)
 | 
			
		||||
@@ -1,11 +0,0 @@
 | 
			
		||||
import apprise
 | 
			
		||||
 | 
			
		||||
# Create our AppriseAsset and populate it with some of our new values:
 | 
			
		||||
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
 | 
			
		||||
asset = apprise.AppriseAsset(
 | 
			
		||||
   image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
asset.app_id = "changedetection.io"
 | 
			
		||||
asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection"
 | 
			
		||||
asset.app_url = "https://changedetection.io"
 | 
			
		||||
@@ -35,7 +35,7 @@ def main():
 | 
			
		||||
    create_datastore_dir = False
 | 
			
		||||
 | 
			
		||||
    for opt, arg in opts:
 | 
			
		||||
        #        if opt == '--clear-all-history':
 | 
			
		||||
        #        if opt == '--purge':
 | 
			
		||||
        # Remove history, the actual files you need to delete manually.
 | 
			
		||||
        #            for uuid, watch in datastore.data['watching'].items():
 | 
			
		||||
        #                watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})
 | 
			
		||||
 
 | 
			
		||||
@@ -46,7 +46,6 @@ class Fetcher():
 | 
			
		||||
    headers = None
 | 
			
		||||
 | 
			
		||||
    fetcher_description = "No description"
 | 
			
		||||
    webdriver_js_execute_code = None
 | 
			
		||||
    xpath_element_js = """               
 | 
			
		||||
                // Include the getXpath script directly, easier than fetching
 | 
			
		||||
                !function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
 | 
			
		||||
@@ -176,6 +175,7 @@ class Fetcher():
 | 
			
		||||
 | 
			
		||||
    # Will be needed in the future by the VisualSelector, always get this where possible.
 | 
			
		||||
    screenshot = False
 | 
			
		||||
    fetcher_description = "No description"
 | 
			
		||||
    system_http_proxy = os.getenv('HTTP_PROXY')
 | 
			
		||||
    system_https_proxy = os.getenv('HTTPS_PROXY')
 | 
			
		||||
 | 
			
		||||
@@ -281,14 +281,13 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
        from playwright.sync_api import sync_playwright
 | 
			
		||||
        import playwright._impl._api_types
 | 
			
		||||
        from playwright._impl._api_types import Error, TimeoutError
 | 
			
		||||
        response = None
 | 
			
		||||
 | 
			
		||||
        with sync_playwright() as p:
 | 
			
		||||
            browser_type = getattr(p, self.browser_type)
 | 
			
		||||
 | 
			
		||||
            # Seemed to cause a connection Exception even tho I can see it connect
 | 
			
		||||
            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
 | 
			
		||||
            # 60,000 connection timeout only
 | 
			
		||||
            browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
 | 
			
		||||
            browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000)
 | 
			
		||||
 | 
			
		||||
            # Set user agent to prevent Cloudflare from blocking the browser
 | 
			
		||||
            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
 | 
			
		||||
@@ -301,35 +300,21 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
                accept_downloads=False
 | 
			
		||||
            )
 | 
			
		||||
 | 
			
		||||
            if len(request_headers):
 | 
			
		||||
                context.set_extra_http_headers(request_headers)
 | 
			
		||||
 | 
			
		||||
            page = context.new_page()
 | 
			
		||||
            try:
 | 
			
		||||
                page.set_default_navigation_timeout(90000)
 | 
			
		||||
                page.set_default_timeout(90000)
 | 
			
		||||
 | 
			
		||||
                # Listen for all console events and handle errors
 | 
			
		||||
                page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
 | 
			
		||||
 | 
			
		||||
                # Bug - never set viewport size BEFORE page.goto
 | 
			
		||||
 | 
			
		||||
                # Waits for the next navigation. Using Python context manager
 | 
			
		||||
                # prevents a race condition between clicking and waiting for a navigation.
 | 
			
		||||
                with page.expect_navigation():
 | 
			
		||||
                    response = page.goto(url, wait_until='load')
 | 
			
		||||
 | 
			
		||||
                if self.webdriver_js_execute_code is not None:
 | 
			
		||||
                    page.evaluate(self.webdriver_js_execute_code)
 | 
			
		||||
 | 
			
		||||
               # Bug - never set viewport size BEFORE page.goto
 | 
			
		||||
                response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
 | 
			
		||||
                # Wait_until = commit
 | 
			
		||||
                # - `'commit'` - consider operation to be finished when network response is received and the document started loading.
 | 
			
		||||
                # Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
 | 
			
		||||
                # This seemed to solve nearly all 'TimeoutErrors'
 | 
			
		||||
                extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
                page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
            except playwright._impl._api_types.TimeoutError as e:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                # This can be ok, we will try to grab what we could retrieve
 | 
			
		||||
                pass
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print ("other exception when page.goto")
 | 
			
		||||
                print (str(e))
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise PageUnloadable(url=url, status_code=None)
 | 
			
		||||
@@ -337,22 +322,18 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
            if response is None:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                print ("response object was none")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            if len(page.content().strip()) == 0:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            # Bug 2(?) Set the viewport size AFTER loading the page
 | 
			
		||||
            page.set_viewport_size({"width": 1280, "height": 1024})            
 | 
			
		||||
            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
 | 
			
		||||
            time.sleep(extra_wait)
 | 
			
		||||
            self.content = page.content()
 | 
			
		||||
            self.status_code = response.status
 | 
			
		||||
            page.set_viewport_size({"width": 1280, "height": 1024})
 | 
			
		||||
 | 
			
		||||
            if len(self.content.strip()) == 0:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
                print ("Content was empty")
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
            
 | 
			
		||||
            self.status_code = response.status
 | 
			
		||||
            self.content = page.content()
 | 
			
		||||
            self.headers = response.all_headers()
 | 
			
		||||
 | 
			
		||||
            if current_css_filter is not None:
 | 
			
		||||
@@ -365,15 +346,9 @@ class base_html_playwright(Fetcher):
 | 
			
		||||
            # Bug 3 in Playwright screenshot handling
 | 
			
		||||
            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
 | 
			
		||||
            # JPEG is better here because the screenshots can be very very large
 | 
			
		||||
 | 
			
		||||
            # Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
 | 
			
		||||
            # which will significantly increase the IO size between the server and client, it's recommended to use the lowest
 | 
			
		||||
            # acceptable screenshot quality here
 | 
			
		||||
            try:
 | 
			
		||||
                # Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
 | 
			
		||||
                page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
 | 
			
		||||
                # The actual screenshot
 | 
			
		||||
                self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
 | 
			
		||||
                page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
 | 
			
		||||
                self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=92)
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                context.close()
 | 
			
		||||
                browser.close()
 | 
			
		||||
@@ -453,12 +428,6 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
 | 
			
		||||
        self.driver.set_window_size(1280, 1024)
 | 
			
		||||
        self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
        if self.webdriver_js_execute_code is not None:
 | 
			
		||||
            self.driver.execute_script(self.webdriver_js_execute_code)
 | 
			
		||||
            # Selenium doesn't automatically wait for actions as good as Playwright, so wait again
 | 
			
		||||
            self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
 | 
			
		||||
        self.screenshot = self.driver.get_screenshot_as_png()
 | 
			
		||||
 | 
			
		||||
        # @todo - how to check this? is it possible?
 | 
			
		||||
 
 | 
			
		||||
@@ -1,5 +1,4 @@
 | 
			
		||||
import hashlib
 | 
			
		||||
import logging
 | 
			
		||||
import os
 | 
			
		||||
import re
 | 
			
		||||
import time
 | 
			
		||||
@@ -11,7 +10,6 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Some common stuff here that can be moved to a base class
 | 
			
		||||
# (set_proxy_from_list)
 | 
			
		||||
class perform_site_check():
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *args, datastore, **kwargs):
 | 
			
		||||
@@ -46,20 +44,6 @@ class perform_site_check():
 | 
			
		||||
 | 
			
		||||
        return proxy_args
 | 
			
		||||
 | 
			
		||||
    # Doesn't look like python supports forward slash auto enclosure in re.findall
 | 
			
		||||
    # So convert it to inline flag "foobar(?i)" type configuration
 | 
			
		||||
    def forward_slash_enclosed_regex_to_options(self, regex):
 | 
			
		||||
        res = re.search(r'^/(.*?)/(\w+)$', regex, re.IGNORECASE)
 | 
			
		||||
 | 
			
		||||
        if res:
 | 
			
		||||
            regex = res.group(1)
 | 
			
		||||
            regex += '(?{})'.format(res.group(2))
 | 
			
		||||
        else:
 | 
			
		||||
            regex += '(?{})'.format('i')
 | 
			
		||||
 | 
			
		||||
        return regex
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def run(self, uuid):
 | 
			
		||||
        timestamp = int(time.time())  # used for storage etc too
 | 
			
		||||
 | 
			
		||||
@@ -121,9 +105,6 @@ class perform_site_check():
 | 
			
		||||
        elif system_webdriver_delay is not None:
 | 
			
		||||
            fetcher.render_extract_delay = system_webdriver_delay
 | 
			
		||||
 | 
			
		||||
        if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
 | 
			
		||||
            fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
 | 
			
		||||
 | 
			
		||||
        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter'])
 | 
			
		||||
        fetcher.quit()
 | 
			
		||||
 | 
			
		||||
@@ -165,9 +146,7 @@ class perform_site_check():
 | 
			
		||||
                is_html = False
 | 
			
		||||
 | 
			
		||||
        if is_html or is_source:
 | 
			
		||||
            
 | 
			
		||||
            # CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
 | 
			
		||||
            fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content)
 | 
			
		||||
            html_content = fetcher.content
 | 
			
		||||
 | 
			
		||||
            # If not JSON,  and if it's not text/plain..
 | 
			
		||||
@@ -225,69 +204,34 @@ class perform_site_check():
 | 
			
		||||
        else:
 | 
			
		||||
            stripped_text_from_html = stripped_text_from_html.encode('utf8')
 | 
			
		||||
 | 
			
		||||
        # 615 Extract text by regex
 | 
			
		||||
        extract_text = watch.get('extract_text', [])
 | 
			
		||||
        if len(extract_text) > 0:
 | 
			
		||||
            regex_matched_output = []
 | 
			
		||||
            for s_re in extract_text:
 | 
			
		||||
                # incase they specified something in '/.../x'
 | 
			
		||||
                regex = self.forward_slash_enclosed_regex_to_options(s_re)
 | 
			
		||||
                result = re.findall(regex.encode('utf-8'), stripped_text_from_html)
 | 
			
		||||
 | 
			
		||||
                for l in result:
 | 
			
		||||
                    if type(l) is tuple:
 | 
			
		||||
                        #@todo - some formatter option default (between groups)
 | 
			
		||||
                        regex_matched_output += list(l) + [b'\n']
 | 
			
		||||
                    else:
 | 
			
		||||
                        # @todo - some formatter option default (between each ungrouped result)
 | 
			
		||||
                        regex_matched_output += [l] + [b'\n']
 | 
			
		||||
 | 
			
		||||
            # Now we will only show what the regex matched
 | 
			
		||||
            stripped_text_from_html = b''
 | 
			
		||||
            text_content_before_ignored_filter = b''
 | 
			
		||||
            if regex_matched_output:
 | 
			
		||||
                # @todo some formatter for presentation?
 | 
			
		||||
                stripped_text_from_html = b''.join(regex_matched_output)
 | 
			
		||||
                text_content_before_ignored_filter = stripped_text_from_html
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        # Re #133 - if we should strip whitespaces from triggering the change detected comparison
 | 
			
		||||
        if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
 | 
			
		||||
            fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
 | 
			
		||||
        else:
 | 
			
		||||
            fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
 | 
			
		||||
 | 
			
		||||
        ############ Blocking rules, after checksum #################
 | 
			
		||||
        blocked = False
 | 
			
		||||
        # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
 | 
			
		||||
        if not watch.get('previous_md5'):
 | 
			
		||||
            watch['previous_md5'] = fetched_md5
 | 
			
		||||
            update_obj["previous_md5"] = fetched_md5
 | 
			
		||||
 | 
			
		||||
        blocked_by_not_found_trigger_text = False
 | 
			
		||||
 | 
			
		||||
        if len(watch['trigger_text']):
 | 
			
		||||
            # Assume blocked
 | 
			
		||||
            blocked = True
 | 
			
		||||
            # Yeah, lets block first until something matches
 | 
			
		||||
            blocked_by_not_found_trigger_text = True
 | 
			
		||||
            # Filter and trigger works the same, so reuse it
 | 
			
		||||
            # It should return the line numbers that match
 | 
			
		||||
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
 | 
			
		||||
                                                  wordlist=watch['trigger_text'],
 | 
			
		||||
                                                  mode="line numbers")
 | 
			
		||||
            # Unblock if the trigger was found
 | 
			
		||||
            # If it returned any lines that matched..
 | 
			
		||||
            if result:
 | 
			
		||||
                blocked = False
 | 
			
		||||
                blocked_by_not_found_trigger_text = False
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        if len(watch['text_should_not_be_present']):
 | 
			
		||||
            # If anything matched, then we should block a change from happening
 | 
			
		||||
            result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
 | 
			
		||||
                                                  wordlist=watch['text_should_not_be_present'],
 | 
			
		||||
                                                  mode="line numbers")
 | 
			
		||||
            if result:
 | 
			
		||||
                blocked = True
 | 
			
		||||
 | 
			
		||||
        # The main thing that all this at the moment comes down to :)
 | 
			
		||||
        if watch['previous_md5'] != fetched_md5:
 | 
			
		||||
        if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
 | 
			
		||||
            changed_detected = True
 | 
			
		||||
 | 
			
		||||
        # Looks like something changed, but did it match all the rules?
 | 
			
		||||
        if blocked:
 | 
			
		||||
            changed_detected = False
 | 
			
		||||
            update_obj["previous_md5"] = fetched_md5
 | 
			
		||||
            update_obj["last_changed"] = timestamp
 | 
			
		||||
 | 
			
		||||
        # Extract title as title
 | 
			
		||||
        if is_html:
 | 
			
		||||
@@ -295,21 +239,4 @@ class perform_site_check():
 | 
			
		||||
                if not watch['title'] or not len(watch['title']):
 | 
			
		||||
                    update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
 | 
			
		||||
 | 
			
		||||
        if changed_detected:
 | 
			
		||||
            if watch.get('check_unique_lines', False):
 | 
			
		||||
                has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
 | 
			
		||||
                # One or more lines? unsure?
 | 
			
		||||
                if not has_unique_lines:
 | 
			
		||||
                    logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
 | 
			
		||||
                    changed_detected = False
 | 
			
		||||
                else:
 | 
			
		||||
                    logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
 | 
			
		||||
 | 
			
		||||
        # Always record the new checksum
 | 
			
		||||
        update_obj["previous_md5"] = fetched_md5
 | 
			
		||||
 | 
			
		||||
        # On the first run of a site, watch['previous_md5'] will be None, set it the current one.
 | 
			
		||||
        if not watch.get('previous_md5'):
 | 
			
		||||
            watch['previous_md5'] = fetched_md5
 | 
			
		||||
 | 
			
		||||
        return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data
 | 
			
		||||
 
 | 
			
		||||
@@ -223,7 +223,7 @@ class validateURL(object):
 | 
			
		||||
        except validators.ValidationFailure:
 | 
			
		||||
            message = field.gettext('\'%s\' is not a valid URL.' % (field.data.strip()))
 | 
			
		||||
            raise ValidationError(message)
 | 
			
		||||
 | 
			
		||||
        
 | 
			
		||||
class ValidateListRegex(object):
 | 
			
		||||
    """
 | 
			
		||||
    Validates that anything that looks like a regex passes as a regex
 | 
			
		||||
@@ -330,9 +330,6 @@ class watchForm(commonSettingsForm):
 | 
			
		||||
    css_filter = StringField('CSS/JSON/XPATH Filter', [ValidateCSSJSONXPATHInput()], default='')
 | 
			
		||||
 | 
			
		||||
    subtractive_selectors = StringListField('Remove elements', [ValidateCSSJSONXPATHInput(allow_xpath=False, allow_json=False)])
 | 
			
		||||
 | 
			
		||||
    extract_text = StringListField('Extract text', [ValidateListRegex()])
 | 
			
		||||
 | 
			
		||||
    title = StringField('Title', default='')
 | 
			
		||||
 | 
			
		||||
    ignore_text = StringListField('Ignore text', [ValidateListRegex()])
 | 
			
		||||
@@ -340,17 +337,10 @@ class watchForm(commonSettingsForm):
 | 
			
		||||
    body = TextAreaField('Request body', [validators.Optional()])
 | 
			
		||||
    method = SelectField('Request method', choices=valid_method, default=default_method)
 | 
			
		||||
    ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
 | 
			
		||||
    check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
 | 
			
		||||
    trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
 | 
			
		||||
    text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
 | 
			
		||||
 | 
			
		||||
    webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
 | 
			
		||||
 | 
			
		||||
    save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
    save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
    proxy = RadioField('Proxy')
 | 
			
		||||
    filter_failure_notification_send = BooleanField(
 | 
			
		||||
        'Send a notification when the filter can no longer be found on the page', default=False)
 | 
			
		||||
 | 
			
		||||
    def validate(self, **kwargs):
 | 
			
		||||
        if not super().validate():
 | 
			
		||||
@@ -370,9 +360,7 @@ class watchForm(commonSettingsForm):
 | 
			
		||||
class globalSettingsRequestForm(Form):
 | 
			
		||||
    time_between_check = FormField(TimeBetweenCheckForm)
 | 
			
		||||
    proxy = RadioField('Proxy')
 | 
			
		||||
    jitter_seconds = IntegerField('Random jitter seconds ± check',
 | 
			
		||||
                                  render_kw={"style": "width: 5em;"},
 | 
			
		||||
                                  validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# datastore.data['settings']['application']..
 | 
			
		||||
class globalSettingsApplicationForm(commonSettingsForm):
 | 
			
		||||
@@ -389,11 +377,6 @@ class globalSettingsApplicationForm(commonSettingsForm):
 | 
			
		||||
    api_access_token_enabled = BooleanField('API access token security check enabled', default=True, validators=[validators.Optional()])
 | 
			
		||||
    password = SaltyPasswordField()
 | 
			
		||||
 | 
			
		||||
    filter_failure_notification_threshold_attempts = IntegerField('Number of times the filter can be missing before sending a notification',
 | 
			
		||||
                                                                  render_kw={"style": "width: 5em;"},
 | 
			
		||||
                                                                  validators=[validators.NumberRange(min=0,
 | 
			
		||||
                                                                                                     message="Should contain zero or more attempts")])
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class globalSettingsForm(Form):
 | 
			
		||||
    # Define these as FormFields/"sub forms", this way it matches the JSON storage
 | 
			
		||||
 
 | 
			
		||||
@@ -1,27 +1,23 @@
 | 
			
		||||
import json
 | 
			
		||||
import re
 | 
			
		||||
from typing import List
 | 
			
		||||
 | 
			
		||||
from bs4 import BeautifulSoup
 | 
			
		||||
from jsonpath_ng.ext import parse
 | 
			
		||||
import re
 | 
			
		||||
from inscriptis import get_text
 | 
			
		||||
from inscriptis.model.config import ParserConfig
 | 
			
		||||
 | 
			
		||||
class FilterNotFoundInResponse(ValueError):
 | 
			
		||||
    def __init__(self, msg):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
 | 
			
		||||
class JSONNotFound(ValueError):
 | 
			
		||||
    def __init__(self, msg):
 | 
			
		||||
        ValueError.__init__(self, msg)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Given a CSS Rule, and a blob of HTML, return the blob of HTML that matches
 | 
			
		||||
def css_filter(css_filter, html_content):
 | 
			
		||||
    soup = BeautifulSoup(html_content, "html.parser")
 | 
			
		||||
    html_block = ""
 | 
			
		||||
    r = soup.select(css_filter, separator="")
 | 
			
		||||
    if len(html_content) > 0 and len(r) == 0:
 | 
			
		||||
        raise FilterNotFoundInResponse(css_filter)
 | 
			
		||||
    for item in r:
 | 
			
		||||
    for item in soup.select(css_filter, separator=""):
 | 
			
		||||
        html_block += str(item)
 | 
			
		||||
 | 
			
		||||
    return html_block + "\n"
 | 
			
		||||
@@ -46,12 +42,8 @@ def xpath_filter(xpath_filter, html_content):
 | 
			
		||||
    tree = html.fromstring(bytes(html_content, encoding='utf-8'))
 | 
			
		||||
    html_block = ""
 | 
			
		||||
 | 
			
		||||
    r = tree.xpath(xpath_filter.strip(), namespaces={'re': 'http://exslt.org/regular-expressions'})
 | 
			
		||||
    if len(html_content) > 0 and len(r) == 0:
 | 
			
		||||
        raise FilterNotFoundInResponse(xpath_filter)
 | 
			
		||||
 | 
			
		||||
    for item in r:
 | 
			
		||||
        html_block += etree.tostring(item, pretty_print=True).decode('utf-8') + "<br/>"
 | 
			
		||||
    for item in tree.xpath(xpath_filter.strip(), namespaces={'re':'http://exslt.org/regular-expressions'}):
 | 
			
		||||
        html_block+= etree.tostring(item, pretty_print=True).decode('utf-8')+"<br/>"
 | 
			
		||||
 | 
			
		||||
    return html_block
 | 
			
		||||
 | 
			
		||||
@@ -181,16 +173,9 @@ def strip_ignore_text(content, wordlist, mode="content"):
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
 | 
			
		||||
    import multiprocessing
 | 
			
		||||
 | 
			
		||||
    from inscriptis.model.config import ParserConfig
 | 
			
		||||
 | 
			
		||||
    """Converts html string to a string with just the text. If ignoring
 | 
			
		||||
    rendering anchor tag content is enable, anchor tag content are also
 | 
			
		||||
    included in the text
 | 
			
		||||
    
 | 
			
		||||
    @NOTE: HORRIBLE LXML INDUCED MEMORY LEAK WORKAROUND HERE 
 | 
			
		||||
           https://www.reddit.com/r/Python/comments/j0gl8t/psa_pythonlxml_memory_leaks_and_a_solution/ 
 | 
			
		||||
 | 
			
		||||
    :param html_content: string with html content
 | 
			
		||||
    :param render_anchor_tag_content: boolean flag indicating whether to extract
 | 
			
		||||
@@ -212,33 +197,8 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
 | 
			
		||||
    else:
 | 
			
		||||
        parser_config = None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def parse_function(html_content, parser_config, results_queue):
 | 
			
		||||
        from inscriptis import get_text
 | 
			
		||||
        # get text and annotations via inscriptis
 | 
			
		||||
        text_content = get_text(html_content, config=parser_config)
 | 
			
		||||
        results_queue.put(text_content)
 | 
			
		||||
 | 
			
		||||
    results_queue = multiprocessing.Queue()
 | 
			
		||||
    parse_process = multiprocessing.Process(target=parse_function, args=(html_content, parser_config, results_queue))
 | 
			
		||||
    parse_process.daemon = True
 | 
			
		||||
    parse_process.start()
 | 
			
		||||
    text_content = results_queue.get()  # blocks until results are available
 | 
			
		||||
    parse_process.terminate()
 | 
			
		||||
    # get text and annotations via inscriptis
 | 
			
		||||
    text_content = get_text(html_content, config=parser_config)
 | 
			
		||||
 | 
			
		||||
    return text_content
 | 
			
		||||
 | 
			
		||||
def workarounds_for_obfuscations(content):
 | 
			
		||||
    """
 | 
			
		||||
    Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
 | 
			
		||||
    This could go into its own Pip package in the future, for faster updates
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    # HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
 | 
			
		||||
    # https://github.com/weblyzard/inscriptis/issues/45
 | 
			
		||||
    if not content:
 | 
			
		||||
        return content
 | 
			
		||||
 | 
			
		||||
    content = re.sub('<!--\s+-->', '', content)
 | 
			
		||||
 | 
			
		||||
    return content
 | 
			
		||||
 
 | 
			
		||||
@@ -1,28 +1,29 @@
 | 
			
		||||
from os import getenv
 | 
			
		||||
import collections
 | 
			
		||||
import os
 | 
			
		||||
 | 
			
		||||
import uuid as uuid_builder
 | 
			
		||||
 | 
			
		||||
from changedetectionio.notification import (
 | 
			
		||||
    default_notification_body,
 | 
			
		||||
    default_notification_format,
 | 
			
		||||
    default_notification_title,
 | 
			
		||||
)
 | 
			
		||||
 | 
			
		||||
_FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT = 6
 | 
			
		||||
 | 
			
		||||
class model(dict):
 | 
			
		||||
    base_config = {
 | 
			
		||||
            'note': "Hello! If you change this file manually, please be sure to restart your changedetection.io instance!",
 | 
			
		||||
            'watching': {},
 | 
			
		||||
            'settings': {
 | 
			
		||||
                'headers': {
 | 
			
		||||
                    'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
 | 
			
		||||
                    'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
 | 
			
		||||
                    'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
 | 
			
		||||
                    'Accept-Encoding': 'gzip, deflate',  # No support for brolti in python requests yet.
 | 
			
		||||
                    'Accept-Language': 'en-GB,en-US;q=0.9,en;'
 | 
			
		||||
                },
 | 
			
		||||
                'requests': {
 | 
			
		||||
                    'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")),  # Default 45 seconds
 | 
			
		||||
                    'timeout': 15,  # Default 15 seconds
 | 
			
		||||
                    'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
 | 
			
		||||
                    'jitter_seconds': 0,
 | 
			
		||||
                    'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")),  # Number of threads, lower is better for slow connections
 | 
			
		||||
                    'workers': 10,  # Number of threads, lower is better for slow connections
 | 
			
		||||
                    'proxy': None # Preferred proxy connection
 | 
			
		||||
                },
 | 
			
		||||
                'application': {
 | 
			
		||||
@@ -31,8 +32,7 @@ class model(dict):
 | 
			
		||||
                    'base_url' : None,
 | 
			
		||||
                    'extract_title_as_title': False,
 | 
			
		||||
                    'empty_pages_are_a_change': False,
 | 
			
		||||
                    'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
 | 
			
		||||
                    'filter_failure_notification_threshold_attempts': _FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT,
 | 
			
		||||
                    'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
 | 
			
		||||
                    'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
 | 
			
		||||
                    'global_subtractive_selectors': [],
 | 
			
		||||
                    'ignore_whitespace': True,
 | 
			
		||||
 
 | 
			
		||||
@@ -1,9 +1,7 @@
 | 
			
		||||
import os
 | 
			
		||||
import uuid as uuid_builder
 | 
			
		||||
from distutils.util import strtobool
 | 
			
		||||
 | 
			
		||||
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
 | 
			
		||||
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
 | 
			
		||||
 | 
			
		||||
from changedetectionio.notification import (
 | 
			
		||||
    default_notification_body,
 | 
			
		||||
@@ -15,6 +13,7 @@ from changedetectionio.notification import (
 | 
			
		||||
class model(dict):
 | 
			
		||||
    __newest_history_key = None
 | 
			
		||||
    __history_n=0
 | 
			
		||||
 | 
			
		||||
    __base_config = {
 | 
			
		||||
            'url': None,
 | 
			
		||||
            'tag': None,
 | 
			
		||||
@@ -36,25 +35,18 @@ class model(dict):
 | 
			
		||||
            'notification_title': default_notification_title,
 | 
			
		||||
            'notification_body': default_notification_body,
 | 
			
		||||
            'notification_format': default_notification_format,
 | 
			
		||||
            'css_filter': '',
 | 
			
		||||
            'extract_text': [],  # Extract text by regex after filters
 | 
			
		||||
            'css_filter': "",
 | 
			
		||||
            'subtractive_selectors': [],
 | 
			
		||||
            'trigger_text': [],  # List of text or regex to wait for until a change is detected
 | 
			
		||||
            'text_should_not_be_present': [], # Text that should not present
 | 
			
		||||
            'fetch_backend': None,
 | 
			
		||||
            'filter_failure_notification_send': strtobool(os.getenv('FILTER_FAILURE_NOTIFICATION_SEND_DEFAULT', 'True')),
 | 
			
		||||
            'consecutive_filter_failures': 0, # Every time the CSS/xPath filter cannot be located, reset when all is fine.
 | 
			
		||||
            'extract_title_as_title': False,
 | 
			
		||||
            'check_unique_lines': False, # On change-detected, compare against all history if its something new
 | 
			
		||||
            'proxy': None, # Preferred proxy connection
 | 
			
		||||
            # Re #110, so then if this is set to None, we know to use the default value instead
 | 
			
		||||
            # Requires setting to None on submit if it's the same as the default
 | 
			
		||||
            # Should be all None by default, so we use the system default in this case.
 | 
			
		||||
            'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
 | 
			
		||||
            'webdriver_delay': None,
 | 
			
		||||
            'webdriver_js_execute_code': None, # Run before change-detection
 | 
			
		||||
            'webdriver_delay': None
 | 
			
		||||
        }
 | 
			
		||||
    jitter_seconds = 0
 | 
			
		||||
 | 
			
		||||
    def __init__(self, *arg, **kw):
 | 
			
		||||
        import uuid
 | 
			
		||||
@@ -92,7 +84,7 @@ class model(dict):
 | 
			
		||||
        # Read the history file as a dict
 | 
			
		||||
        fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
 | 
			
		||||
        if os.path.isfile(fname):
 | 
			
		||||
            logging.debug("Reading history index " + str(time.time()))
 | 
			
		||||
            logging.debug("Disk IO accessed " + str(time.time()))
 | 
			
		||||
            with open(fname, "r") as f:
 | 
			
		||||
                tmp_history = dict(i.strip().split(',', 2) for i in f.readlines())
 | 
			
		||||
 | 
			
		||||
@@ -164,21 +156,9 @@ class model(dict):
 | 
			
		||||
 | 
			
		||||
    def threshold_seconds(self):
 | 
			
		||||
        seconds = 0
 | 
			
		||||
        mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
 | 
			
		||||
        for m, n in mtable.items():
 | 
			
		||||
            x = self.get('time_between_check', {}).get(m, None)
 | 
			
		||||
            if x:
 | 
			
		||||
                seconds += x * n
 | 
			
		||||
        return seconds
 | 
			
		||||
 | 
			
		||||
    # Iterate over all history texts and see if something new exists
 | 
			
		||||
    def lines_contain_something_unique_compared_to_history(self, lines=[]):
 | 
			
		||||
        local_lines = [l.decode('utf-8').strip().lower() for l in lines]
 | 
			
		||||
 | 
			
		||||
        # Compare each lines (set) against each history text file (set) looking for something new..
 | 
			
		||||
        for k, v in self.history.items():
 | 
			
		||||
            alist = [line.decode('utf-8').strip().lower() for line in open(v, 'rb')]
 | 
			
		||||
            res = set(alist) != set(local_lines)
 | 
			
		||||
            if res:
 | 
			
		||||
                return True
 | 
			
		||||
 | 
			
		||||
        return False
 | 
			
		||||
 
 | 
			
		||||
@@ -34,6 +34,7 @@ def process_notification(n_object, datastore):
 | 
			
		||||
        valid_notification_formats[default_notification_format],
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Insert variables into the notification content
 | 
			
		||||
    notification_parameters = create_notification_parameters(n_object, datastore)
 | 
			
		||||
 | 
			
		||||
@@ -47,10 +48,9 @@ def process_notification(n_object, datastore):
 | 
			
		||||
    # Anything higher than or equal to WARNING (which covers things like Connection errors)
 | 
			
		||||
    # raise it as an exception
 | 
			
		||||
    apobjs=[]
 | 
			
		||||
    sent_objs=[]
 | 
			
		||||
    from .apprise_asset import asset
 | 
			
		||||
    for url in n_object['notification_urls']:
 | 
			
		||||
        apobj = apprise.Apprise(debug=True, asset=asset)
 | 
			
		||||
 | 
			
		||||
        apobj = apprise.Apprise(debug=True)
 | 
			
		||||
        url = url.strip()
 | 
			
		||||
        if len(url):
 | 
			
		||||
            print(">> Process Notification: AppRise notifying {}".format(url))
 | 
			
		||||
@@ -63,36 +63,23 @@ def process_notification(n_object, datastore):
 | 
			
		||||
 | 
			
		||||
                # So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
 | 
			
		||||
                k = '?' if not '?' in url else '&'
 | 
			
		||||
                if not 'avatar_url' in url and not url.startswith('mail'):
 | 
			
		||||
                if not 'avatar_url' in url:
 | 
			
		||||
                    url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
 | 
			
		||||
 | 
			
		||||
                if url.startswith('tgram://'):
 | 
			
		||||
                    # Telegram only supports a limit subset of HTML, remove the '<br/>' we place in.
 | 
			
		||||
                    # re https://github.com/dgtlmoon/changedetection.io/issues/555
 | 
			
		||||
                    # @todo re-use an existing library we have already imported to strip all non-allowed tags
 | 
			
		||||
                    n_body = n_body.replace('<br/>', '\n')
 | 
			
		||||
                    n_body = n_body.replace('</br>', '\n')
 | 
			
		||||
                    # real limit is 4096, but minus some for extra metadata
 | 
			
		||||
                    payload_max_size = 3600
 | 
			
		||||
                    body_limit = max(0, payload_max_size - len(n_title))
 | 
			
		||||
                    n_title = n_title[0:payload_max_size]
 | 
			
		||||
                    n_body = n_body[0:body_limit]
 | 
			
		||||
 | 
			
		||||
                elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks'):
 | 
			
		||||
                elif url.startswith('discord://'):
 | 
			
		||||
                    # real limit is 2000, but minus some for extra metadata
 | 
			
		||||
                    payload_max_size = 1700
 | 
			
		||||
                    body_limit = max(0, payload_max_size - len(n_title))
 | 
			
		||||
                    n_title = n_title[0:payload_max_size]
 | 
			
		||||
                    n_body = n_body[0:body_limit]
 | 
			
		||||
 | 
			
		||||
                elif url.startswith('mailto'):
 | 
			
		||||
                    # Apprise will default to HTML, so we need to override it
 | 
			
		||||
                    # So that whats' generated in n_body is in line with what is going to be sent.
 | 
			
		||||
                    # https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
 | 
			
		||||
                    if not 'format=' in url and (n_format == 'text' or n_format == 'markdown'):
 | 
			
		||||
                        prefix = '?' if not '?' in url else '&'
 | 
			
		||||
                        url = "{}{}format={}".format(url, prefix, n_format)
 | 
			
		||||
 | 
			
		||||
                apobj.add(url)
 | 
			
		||||
 | 
			
		||||
                apobj.notify(
 | 
			
		||||
@@ -109,15 +96,6 @@ def process_notification(n_object, datastore):
 | 
			
		||||
                log_value = logs.getvalue()
 | 
			
		||||
                if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
 | 
			
		||||
                    raise Exception(log_value)
 | 
			
		||||
                
 | 
			
		||||
                sent_objs.append({'title': n_title,
 | 
			
		||||
                                  'body': n_body,
 | 
			
		||||
                                  'url' : url,
 | 
			
		||||
                                  'body_format': n_format})
 | 
			
		||||
 | 
			
		||||
    # Return what was sent for better logging - after the for loop
 | 
			
		||||
    return sent_objs
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Notification title + body content parameters get created here.
 | 
			
		||||
def create_notification_parameters(n_object, datastore):
 | 
			
		||||
 
 | 
			
		||||
@@ -1,20 +0,0 @@
 | 
			
		||||
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
 | 
			
		||||
<svg
 | 
			
		||||
   width="18"
 | 
			
		||||
   height="19.92"
 | 
			
		||||
   viewBox="0 0 18 19.92"
 | 
			
		||||
   version="1.1"
 | 
			
		||||
   id="svg6"
 | 
			
		||||
   xmlns="http://www.w3.org/2000/svg"
 | 
			
		||||
   xmlns:svg="http://www.w3.org/2000/svg">
 | 
			
		||||
  <defs
 | 
			
		||||
     id="defs10" />
 | 
			
		||||
  <path
 | 
			
		||||
     d="M -3,-2 H 21 V 22 H -3 Z"
 | 
			
		||||
     fill="none"
 | 
			
		||||
     id="path2" />
 | 
			
		||||
  <path
 | 
			
		||||
     d="m 15,14.08 c -0.76,0 -1.44,0.3 -1.96,0.77 L 5.91,10.7 C 5.96,10.47 6,10.24 6,10 6,9.76 5.96,9.53 5.91,9.3 L 12.96,5.19 C 13.5,5.69 14.21,6 15,6 16.66,6 18,4.66 18,3 18,1.34 16.66,0 15,0 c -1.66,0 -3,1.34 -3,3 0,0.24 0.04,0.47 0.09,0.7 L 5.04,7.81 C 4.5,7.31 3.79,7 3,7 1.34,7 0,8.34 0,10 c 0,1.66 1.34,3 3,3 0.79,0 1.5,-0.31 2.04,-0.81 l 7.12,4.16 c -0.05,0.21 -0.08,0.43 -0.08,0.65 0,1.61 1.31,2.92 2.92,2.92 1.61,0 2.92,-1.31 2.92,-2.92 0,-1.61 -1.31,-2.92 -2.92,-2.92 z"
 | 
			
		||||
     id="path4"
 | 
			
		||||
     style="fill:#ffffff;fill-opacity:1" />
 | 
			
		||||
</svg>
 | 
			
		||||
| 
		 Before Width: | Height: | Size: 892 B  | 
@@ -1,17 +0,0 @@
 | 
			
		||||
$(document).ready(function () {
 | 
			
		||||
    // Load it when the #screenshot tab is in use, so we dont give a slow experience when waiting for the text diff to load
 | 
			
		||||
    window.addEventListener('hashchange', function (e) {
 | 
			
		||||
        toggle(location.hash);
 | 
			
		||||
    }, false);
 | 
			
		||||
 | 
			
		||||
    toggle(location.hash);
 | 
			
		||||
 | 
			
		||||
    function toggle(hash_name) {
 | 
			
		||||
        if (hash_name === '#screenshot') {
 | 
			
		||||
            $("img#screenshot-img").attr('src', screenshot_url);
 | 
			
		||||
            $("#settings").hide();
 | 
			
		||||
        } else {
 | 
			
		||||
            $("#settings").show();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
});
 | 
			
		||||
@@ -40,19 +40,13 @@ $(document).ready(function() {
 | 
			
		||||
    $.ajax({
 | 
			
		||||
      type: "POST",
 | 
			
		||||
      url: notification_base_url,
 | 
			
		||||
      data : data,
 | 
			
		||||
        statusCode: {
 | 
			
		||||
        400: function() {
 | 
			
		||||
            // More than likely the CSRF token was lost when the server restarted
 | 
			
		||||
          alert("There was a problem processing the request, please reload the page.");
 | 
			
		||||
        }
 | 
			
		||||
      }
 | 
			
		||||
      data : data
 | 
			
		||||
    }).done(function(data){
 | 
			
		||||
      console.log(data);
 | 
			
		||||
      alert('Sent');
 | 
			
		||||
    }).fail(function(data){
 | 
			
		||||
      console.log(data);
 | 
			
		||||
      alert('There was an error communicating with the server.');
 | 
			
		||||
      alert('Error: '+data.responseJSON.error);
 | 
			
		||||
    })
 | 
			
		||||
  });
 | 
			
		||||
});
 | 
			
		||||
 
 | 
			
		||||
@@ -49,8 +49,6 @@ $(document).ready(function() {
 | 
			
		||||
        }
 | 
			
		||||
        state_clicked=false;
 | 
			
		||||
        ctx.clearRect(0, 0, c.width, c.height);
 | 
			
		||||
        xctx.clearRect(0, 0, c.width, c.height);
 | 
			
		||||
        $("#css_filter").val('');
 | 
			
		||||
    });
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -1,30 +1,13 @@
 | 
			
		||||
$(document).ready(function() {
 | 
			
		||||
    function toggle() {
 | 
			
		||||
        if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') {
 | 
			
		||||
            if(playwright_enabled) {
 | 
			
		||||
                // playwright supports headers, so hide everything else
 | 
			
		||||
                // See #664
 | 
			
		||||
                $('#requests-override-options #request-method').hide();
 | 
			
		||||
                $('#requests-override-options #request-body').hide();
 | 
			
		||||
 | 
			
		||||
                // @todo connect this one up
 | 
			
		||||
                $('#ignore-status-codes-option').hide();
 | 
			
		||||
            } else {
 | 
			
		||||
                // selenium/webdriver doesnt support anything afaik, hide it all
 | 
			
		||||
                $('#requests-override-options').hide();
 | 
			
		||||
            }
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        if ($('input[name="fetch_backend"]:checked').val() != 'html_requests') {
 | 
			
		||||
            $('#requests-override-options').hide();
 | 
			
		||||
            $('#webdriver-override-options').show();
 | 
			
		||||
 | 
			
		||||
        } else {
 | 
			
		||||
 | 
			
		||||
            $('#requests-override-options').show();
 | 
			
		||||
            $('#requests-override-options *:hidden').show();
 | 
			
		||||
            $('#webdriver-override-options').hide();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    $('input[name="fetch_backend"]').click(function (e) {
 | 
			
		||||
        toggle();
 | 
			
		||||
    });
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										2
									
								
								changedetectionio/static/styles/.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										2
									
								
								changedetectionio/static/styles/.gitignore
									
									
									
									
										vendored
									
									
								
							@@ -1,3 +1 @@
 | 
			
		||||
node_modules
 | 
			
		||||
package-lock.json
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										3719
									
								
								changedetectionio/static/styles/package-lock.json
									
									
									
										generated
									
									
									
										Normal file
									
								
							
							
						
						
									
										3719
									
								
								changedetectionio/static/styles/package-lock.json
									
									
									
										generated
									
									
									
										Normal file
									
								
							
										
											
												File diff suppressed because it is too large
												Load Diff
											
										
									
								
							@@ -353,8 +353,6 @@ and also iPads specifically.
 | 
			
		||||
    /* Hide table headers (but not display: none;, for accessibility) */ }
 | 
			
		||||
    .watch-table thead, .watch-table tbody, .watch-table th, .watch-table td, .watch-table tr {
 | 
			
		||||
      display: block; }
 | 
			
		||||
    .watch-table .last-checked > span {
 | 
			
		||||
      vertical-align: middle; }
 | 
			
		||||
    .watch-table .last-checked::before {
 | 
			
		||||
      color: #555;
 | 
			
		||||
      content: "Last Checked "; }
 | 
			
		||||
@@ -372,8 +370,7 @@ and also iPads specifically.
 | 
			
		||||
    .watch-table td {
 | 
			
		||||
      /* Behave  like a "row" */
 | 
			
		||||
      border: none;
 | 
			
		||||
      border-bottom: 1px solid #eee;
 | 
			
		||||
      vertical-align: middle; }
 | 
			
		||||
      border-bottom: 1px solid #eee; }
 | 
			
		||||
      .watch-table td:before {
 | 
			
		||||
        /* Top/left values mimic padding */
 | 
			
		||||
        top: 6px;
 | 
			
		||||
@@ -493,42 +490,3 @@ ul {
 | 
			
		||||
 | 
			
		||||
#api-key-copy {
 | 
			
		||||
  color: #0078e7; }
 | 
			
		||||
 | 
			
		||||
/* spinner */
 | 
			
		||||
.loader,
 | 
			
		||||
.loader:after {
 | 
			
		||||
  border-radius: 50%;
 | 
			
		||||
  width: 10px;
 | 
			
		||||
  height: 10px; }
 | 
			
		||||
 | 
			
		||||
.loader {
 | 
			
		||||
  margin: 0px auto;
 | 
			
		||||
  font-size: 3px;
 | 
			
		||||
  vertical-align: middle;
 | 
			
		||||
  display: inline-block;
 | 
			
		||||
  text-indent: -9999em;
 | 
			
		||||
  border-top: 1.1em solid rgba(38, 104, 237, 0.2);
 | 
			
		||||
  border-right: 1.1em solid rgba(38, 104, 237, 0.2);
 | 
			
		||||
  border-bottom: 1.1em solid rgba(38, 104, 237, 0.2);
 | 
			
		||||
  border-left: 1.1em solid #2668ed;
 | 
			
		||||
  -webkit-transform: translateZ(0);
 | 
			
		||||
  -ms-transform: translateZ(0);
 | 
			
		||||
  transform: translateZ(0);
 | 
			
		||||
  -webkit-animation: load8 1.1s infinite linear;
 | 
			
		||||
  animation: load8 1.1s infinite linear; }
 | 
			
		||||
 | 
			
		||||
@-webkit-keyframes load8 {
 | 
			
		||||
  0% {
 | 
			
		||||
    -webkit-transform: rotate(0deg);
 | 
			
		||||
    transform: rotate(0deg); }
 | 
			
		||||
  100% {
 | 
			
		||||
    -webkit-transform: rotate(360deg);
 | 
			
		||||
    transform: rotate(360deg); } }
 | 
			
		||||
 | 
			
		||||
@keyframes load8 {
 | 
			
		||||
  0% {
 | 
			
		||||
    -webkit-transform: rotate(0deg);
 | 
			
		||||
    transform: rotate(0deg); }
 | 
			
		||||
  100% {
 | 
			
		||||
    -webkit-transform: rotate(360deg);
 | 
			
		||||
    transform: rotate(360deg); } }
 | 
			
		||||
 
 | 
			
		||||
@@ -487,11 +487,6 @@ and also iPads specifically.
 | 
			
		||||
      display: block;
 | 
			
		||||
    }
 | 
			
		||||
 | 
			
		||||
    .last-checked {
 | 
			
		||||
      > span {
 | 
			
		||||
        vertical-align: middle;
 | 
			
		||||
      }
 | 
			
		||||
    }
 | 
			
		||||
    .last-checked::before {
 | 
			
		||||
      color: #555;
 | 
			
		||||
      content: "Last Checked ";
 | 
			
		||||
@@ -522,7 +517,7 @@ and also iPads specifically.
 | 
			
		||||
      /* Behave  like a "row" */
 | 
			
		||||
      border: none;
 | 
			
		||||
      border-bottom: 1px solid #eee;
 | 
			
		||||
      vertical-align: middle;
 | 
			
		||||
 | 
			
		||||
      &:before {
 | 
			
		||||
        /* Top/left values mimic padding */
 | 
			
		||||
        top: 6px;
 | 
			
		||||
@@ -706,48 +701,3 @@ ul {
 | 
			
		||||
#api-key-copy {
 | 
			
		||||
  color: #0078e7;
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
/* spinner */
 | 
			
		||||
.loader,
 | 
			
		||||
.loader:after {
 | 
			
		||||
  border-radius: 50%;
 | 
			
		||||
  width: 10px;
 | 
			
		||||
  height: 10px;
 | 
			
		||||
}
 | 
			
		||||
.loader {
 | 
			
		||||
  margin: 0px auto;
 | 
			
		||||
  font-size: 3px;
 | 
			
		||||
  vertical-align: middle;
 | 
			
		||||
  display: inline-block;
 | 
			
		||||
  text-indent: -9999em;
 | 
			
		||||
  border-top: 1.1em solid rgba(38,104,237, 0.2);
 | 
			
		||||
  border-right: 1.1em solid rgba(38,104,237, 0.2);
 | 
			
		||||
  border-bottom: 1.1em solid rgba(38,104,237, 0.2);
 | 
			
		||||
  border-left: 1.1em solid #2668ed;
 | 
			
		||||
  -webkit-transform: translateZ(0);
 | 
			
		||||
  -ms-transform: translateZ(0);
 | 
			
		||||
  transform: translateZ(0);
 | 
			
		||||
  -webkit-animation: load8 1.1s infinite linear;
 | 
			
		||||
  animation: load8 1.1s infinite linear;
 | 
			
		||||
}
 | 
			
		||||
@-webkit-keyframes load8 {
 | 
			
		||||
  0% {
 | 
			
		||||
    -webkit-transform: rotate(0deg);
 | 
			
		||||
    transform: rotate(0deg);
 | 
			
		||||
  }
 | 
			
		||||
  100% {
 | 
			
		||||
    -webkit-transform: rotate(360deg);
 | 
			
		||||
    transform: rotate(360deg);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
@keyframes load8 {
 | 
			
		||||
  0% {
 | 
			
		||||
    -webkit-transform: rotate(0deg);
 | 
			
		||||
    transform: rotate(0deg);
 | 
			
		||||
  }
 | 
			
		||||
  100% {
 | 
			
		||||
    -webkit-transform: rotate(360deg);
 | 
			
		||||
    transform: rotate(360deg);
 | 
			
		||||
  }
 | 
			
		||||
}
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -158,11 +158,13 @@ class ChangeDetectionStore:
 | 
			
		||||
    @property
 | 
			
		||||
    def threshold_seconds(self):
 | 
			
		||||
        seconds = 0
 | 
			
		||||
        for m, n in Watch.mtable.items():
 | 
			
		||||
        mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
 | 
			
		||||
        minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
 | 
			
		||||
        for m, n in mtable.items():
 | 
			
		||||
            x = self.__data['settings']['requests']['time_between_check'].get(m)
 | 
			
		||||
            if x:
 | 
			
		||||
                seconds += x * n
 | 
			
		||||
        return seconds
 | 
			
		||||
        return max(seconds, minimum_seconds_recheck_time)
 | 
			
		||||
 | 
			
		||||
    @property
 | 
			
		||||
    def has_unviewed(self):
 | 
			
		||||
@@ -249,26 +251,15 @@ class ChangeDetectionStore:
 | 
			
		||||
        return self.data['watching'][uuid].get(val)
 | 
			
		||||
 | 
			
		||||
    # Remove a watchs data but keep the entry (URL etc)
 | 
			
		||||
    def clear_watch_history(self, uuid):
 | 
			
		||||
    def scrub_watch(self, uuid):
 | 
			
		||||
        import pathlib
 | 
			
		||||
 | 
			
		||||
        self.__data['watching'][uuid].update(
 | 
			
		||||
            {'last_checked': 0,
 | 
			
		||||
             'last_changed': 0,
 | 
			
		||||
             'last_viewed': 0,
 | 
			
		||||
             'previous_md5': False,
 | 
			
		||||
             'last_notification_error': False,
 | 
			
		||||
             'last_error': False})
 | 
			
		||||
 | 
			
		||||
        # JSON Data, Screenshots, Textfiles (history index and snapshots), HTML in the future etc
 | 
			
		||||
        for item in pathlib.Path(os.path.join(self.datastore_path, uuid)).rglob("*.*"):
 | 
			
		||||
            unlink(item)
 | 
			
		||||
 | 
			
		||||
        # Force the attr to recalculate
 | 
			
		||||
        bump = self.__data['watching'][uuid].history
 | 
			
		||||
 | 
			
		||||
        self.__data['watching'][uuid].update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': False})
 | 
			
		||||
        self.needs_write_urgent = True
 | 
			
		||||
 | 
			
		||||
        for item in pathlib.Path(self.datastore_path).rglob(uuid+"/*.txt"):
 | 
			
		||||
            unlink(item)
 | 
			
		||||
 | 
			
		||||
    def add_watch(self, url, tag="", extras=None, write_to_disk_now=True):
 | 
			
		||||
 | 
			
		||||
        if extras is None:
 | 
			
		||||
@@ -289,16 +280,14 @@ class ChangeDetectionStore:
 | 
			
		||||
                                     headers={'App-Guid': self.__data['app_guid']})
 | 
			
		||||
                res = r.json()
 | 
			
		||||
 | 
			
		||||
                # List of permissible attributes we accept from the wild internet
 | 
			
		||||
                # List of permisable stuff we accept from the wild internet
 | 
			
		||||
                for k in ['url', 'tag',
 | 
			
		||||
                          'paused', 'title',
 | 
			
		||||
                          'previous_md5', 'headers',
 | 
			
		||||
                          'body', 'method',
 | 
			
		||||
                          'ignore_text', 'css_filter',
 | 
			
		||||
                          'subtractive_selectors', 'trigger_text',
 | 
			
		||||
                          'extract_title_as_title', 'extract_text',
 | 
			
		||||
                          'text_should_not_be_present',
 | 
			
		||||
                          'webdriver_js_execute_code']:
 | 
			
		||||
                                   'paused', 'title',
 | 
			
		||||
                                   'previous_md5', 'headers',
 | 
			
		||||
                                   'body', 'method',
 | 
			
		||||
                                   'ignore_text', 'css_filter',
 | 
			
		||||
                                   'subtractive_selectors', 'trigger_text',
 | 
			
		||||
                                   'extract_title_as_title']:
 | 
			
		||||
                    if res.get(k):
 | 
			
		||||
                        apply_extras[k] = res[k]
 | 
			
		||||
 | 
			
		||||
@@ -518,11 +507,3 @@ class ChangeDetectionStore:
 | 
			
		||||
                # But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
 | 
			
		||||
                # In the distant future we can remove this entirely
 | 
			
		||||
                self.data['watching'][uuid]['history'] = {}
 | 
			
		||||
 | 
			
		||||
    # We incorrectly stored last_changed when there was not a change, and then confused the output list table
 | 
			
		||||
    def update_3(self):
 | 
			
		||||
        for uuid, watch in self.data['watching'].items():
 | 
			
		||||
            # Be sure it's recalculated
 | 
			
		||||
            p = watch.history
 | 
			
		||||
            if watch.history_n < 2:
 | 
			
		||||
                watch['last_changed'] = 0
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@
 | 
			
		||||
                                <li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
 | 
			
		||||
                                <li><code>discord://</code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
 | 
			
		||||
                                <li><code>tgram://</code> bots cant send messages to other bots, so you should specify chat ID of non-bot user.</li>
 | 
			
		||||
                                <li><code>tgram://</code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
 | 
			
		||||
                                <li>Go here for <a href="{{url_for('notification_logs')}}">notification debug logs</a></li>
 | 
			
		||||
                              </ul>
 | 
			
		||||
                            </div>
 | 
			
		||||
                            <br/>
 | 
			
		||||
@@ -22,7 +22,6 @@
 | 
			
		||||
{% if emailprefix %}
 | 
			
		||||
                            <a id="add-email-helper" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Add email</a>
 | 
			
		||||
{% endif %}
 | 
			
		||||
                            <a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Notification debug logs</a>
 | 
			
		||||
                        </div>
 | 
			
		||||
                        <div id="notification-customisation" class="pure-control-group">
 | 
			
		||||
                            <div class="pure-control-group">
 | 
			
		||||
 
 | 
			
		||||
@@ -1,11 +1,6 @@
 | 
			
		||||
{% extends 'base.html' %}
 | 
			
		||||
 | 
			
		||||
{% block content %}
 | 
			
		||||
<script>
 | 
			
		||||
    const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
 | 
			
		||||
</script>
 | 
			
		||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
 | 
			
		||||
 | 
			
		||||
<div id="settings">
 | 
			
		||||
    <h1>Differences</h1>
 | 
			
		||||
    <form class="pure-form " action="" method="GET">
 | 
			
		||||
@@ -22,7 +17,7 @@
 | 
			
		||||
            {% if versions|length >= 1 %}
 | 
			
		||||
            <label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
 | 
			
		||||
            <select id="diff-version" name="previous_version">
 | 
			
		||||
                {% for version in versions|reverse %}
 | 
			
		||||
                {% for version in versions %}
 | 
			
		||||
                <option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
 | 
			
		||||
                    {{version}}
 | 
			
		||||
                </option>
 | 
			
		||||
@@ -44,7 +39,6 @@
 | 
			
		||||
<div class="tabs">
 | 
			
		||||
    <ul>
 | 
			
		||||
        <li class="tab" id="default-tab"><a href="#text">Text</a></li>
 | 
			
		||||
        <li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
 | 
			
		||||
    </ul>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
@@ -66,21 +60,6 @@
 | 
			
		||||
         </table>
 | 
			
		||||
         Diff algorithm from the amazing <a href="https://github.com/kpdecker/jsdiff">github.com/kpdecker/jsdiff</a>
 | 
			
		||||
     </div>
 | 
			
		||||
     <div class="tab-pane-inner" id="screenshot">
 | 
			
		||||
         <div class="tip">
 | 
			
		||||
             For now, Differences are performed on text, not graphically, only the latest screenshot is available.
 | 
			
		||||
         </div>
 | 
			
		||||
         </br>
 | 
			
		||||
         {% if is_html_webdriver %}
 | 
			
		||||
           {% if screenshot %}
 | 
			
		||||
             <img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/>
 | 
			
		||||
           {% else %}
 | 
			
		||||
              No screenshot available just yet! Try rechecking the page.
 | 
			
		||||
           {% endif %}
 | 
			
		||||
         {% else %}
 | 
			
		||||
           <strong>Screenshot requires Playwright/WebDriver enabled</strong>
 | 
			
		||||
         {% endif %}
 | 
			
		||||
     </div>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,6 @@
 | 
			
		||||
    const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
 | 
			
		||||
    const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
 | 
			
		||||
    const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
 | 
			
		||||
    const playwright_enabled={% if playwright_enabled %} true {% else %} false {% endif %};
 | 
			
		||||
 | 
			
		||||
{% if emailprefix %}
 | 
			
		||||
    const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
 | 
			
		||||
@@ -25,7 +24,7 @@
 | 
			
		||||
        <ul>
 | 
			
		||||
            <li class="tab" id="default-tab"><a href="#general">General</a></li>
 | 
			
		||||
            <li class="tab"><a href="#request">Request</a></li>
 | 
			
		||||
            <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
 | 
			
		||||
            <li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Selector</a></li>
 | 
			
		||||
            <li class="tab"><a href="#filters-and-triggers">Filters & Triggers</a></li>
 | 
			
		||||
            <li class="tab"><a href="#notifications">Notifications</a></li>
 | 
			
		||||
        </ul>
 | 
			
		||||
@@ -62,12 +61,6 @@
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.extract_title_as_title) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.filter_failure_notification_send) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">
 | 
			
		||||
                         Sends a notification when the filter can no longer be seen on the page, good for knowing when the page changed and your filter will not work anymore.
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
@@ -88,39 +81,33 @@
 | 
			
		||||
                    </div>
 | 
			
		||||
                {% endif %}
 | 
			
		||||
                <fieldset id="webdriver-override-options">
 | 
			
		||||
                    <div class="pure-form-message-inline">
 | 
			
		||||
                        <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
 | 
			
		||||
                        <br/>
 | 
			
		||||
                        This will wait <i>n</i> seconds before extracting the text.
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.webdriver_delay) }}
 | 
			
		||||
                        <div class="pure-form-message-inline">
 | 
			
		||||
                            <strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
 | 
			
		||||
                            <br/>
 | 
			
		||||
                            This will wait <i>n</i> seconds before extracting the text.
 | 
			
		||||
                            {% if using_global_webdriver_wait %}
 | 
			
		||||
                            <br/><strong>Using the current global default settings</strong>
 | 
			
		||||
                            {% endif %}
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.webdriver_js_execute_code) }}
 | 
			
		||||
                        <div class="pure-form-message-inline">
 | 
			
		||||
                            Run this code before performing change detection, handy for filling in fields and other actions <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Run-JavaScript-before-change-detection">More help and examples here</a>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    {% if using_global_webdriver_wait %}
 | 
			
		||||
                    <div class="pure-form-message-inline">
 | 
			
		||||
                        <strong>Using the current global default settings</strong>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                </fieldset>
 | 
			
		||||
                <fieldset class="pure-group" id="requests-override-options">
 | 
			
		||||
                    {% if not playwright_enabled %}
 | 
			
		||||
                        <div class="pure-form-message-inline">
 | 
			
		||||
                            <strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                    <div class="pure-control-group" id="request-method">
 | 
			
		||||
                    <div class="pure-form-message-inline">
 | 
			
		||||
                        <strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.method) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group" id="request-headers">
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
{{ render_field(form.headers, rows=5, placeholder="Example
 | 
			
		||||
Cookie: foobar
 | 
			
		||||
User-Agent: wonderbra 1.0") }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group" id="request-body">
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                                        {{ render_field(form.body, rows=5, placeholder="Example
 | 
			
		||||
{
 | 
			
		||||
   \"name\":\"John\",
 | 
			
		||||
@@ -128,7 +115,7 @@ User-Agent: wonderbra 1.0") }}
 | 
			
		||||
   \"car\":null
 | 
			
		||||
}") }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div id="ignore-status-codes-option">
 | 
			
		||||
                    <div>
 | 
			
		||||
                        {{ render_checkbox_field(form.ignore_status_codes) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
@@ -156,12 +143,6 @@ User-Agent: wonderbra 1.0") }}
 | 
			
		||||
                                </li>
 | 
			
		||||
                            </ul>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <fieldset>
 | 
			
		||||
                        <div class="pure-control-group">
 | 
			
		||||
                            {{ render_checkbox_field(form.check_unique_lines) }}
 | 
			
		||||
                            <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
 | 
			
		||||
                        </div>
 | 
			
		||||
                    </fieldset>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
 | 
			
		||||
                        class="m-d") }}
 | 
			
		||||
@@ -196,7 +177,7 @@ nav
 | 
			
		||||
                    <span class="pure-form-message-inline">
 | 
			
		||||
                        <ul>
 | 
			
		||||
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
 | 
			
		||||
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
 | 
			
		||||
                            <li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
 | 
			
		||||
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
 | 
			
		||||
                            <li>Use the preview/show current tab to see ignores</li>
 | 
			
		||||
                        </ul>
 | 
			
		||||
@@ -218,45 +199,11 @@ nav
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
                <fieldset>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
 | 
			
		||||
Sold out
 | 
			
		||||
Not in stock
 | 
			
		||||
Unavailable") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">
 | 
			
		||||
                            <ul>
 | 
			
		||||
                                <li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li>
 | 
			
		||||
                                <li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
 | 
			
		||||
                                <li>All lines here must not exist (think of each line as "OR")</li>
 | 
			
		||||
                                <li>Note: Wrap in forward slash / to use regex  example: <code>/foo\d/</code></li>
 | 
			
		||||
                            </ul>
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
                <fieldset>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">
 | 
			
		||||
                    <ul>
 | 
			
		||||
                        <li>Extracts text in the final output (line by line) after other filters using regular expressions;
 | 
			
		||||
                            <ul>
 | 
			
		||||
                                <li>Regular expression ‐ example <code>/reports.+?2022/i</code></li>
 | 
			
		||||
                                <li>Use <code>//(?aiLmsux))</code> type flags (more <a href="https://docs.python.org/3/library/re.html#index-15">information here</a>)<br/></li>
 | 
			
		||||
                                <li>Keyword example ‐ example <code>Out of stock</code></li>
 | 
			
		||||
                                <li>Use groups to extract just that text ‐ example <code>/reports.+?(\d+)/i</code> returns a list of years only</li>
 | 
			
		||||
                            </ul>
 | 
			
		||||
                        </li>
 | 
			
		||||
                        <li>One line per regular-expression/ string match</li>
 | 
			
		||||
                    </ul>
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
            </div>
 | 
			
		||||
 | 
			
		||||
            <div class="tab-pane-inner visual-selector-ui" id="visualselector">
 | 
			
		||||
                <img id="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
 | 
			
		||||
                <strong>Pro-tip:</strong> This tool is only for limiting which elements will be included on a change-detection, not for interacting with browser directly.
 | 
			
		||||
 | 
			
		||||
                <fieldset>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {% if visualselector_enabled %}
 | 
			
		||||
@@ -301,8 +248,6 @@ Unavailable") }}
 | 
			
		||||
 | 
			
		||||
                    <a href="{{url_for('form_delete', uuid=uuid)}}"
 | 
			
		||||
                       class="pure-button button-small button-error ">Delete</a>
 | 
			
		||||
                    <a href="{{url_for('clear_watch_history', uuid=uuid)}}"
 | 
			
		||||
                       class="pure-button button-small button-error ">Clear History</a>
 | 
			
		||||
                    <a href="{{url_for('form_clone', uuid=uuid)}}"
 | 
			
		||||
                       class="pure-button button-small ">Create Copy</a>
 | 
			
		||||
                </div>
 | 
			
		||||
 
 | 
			
		||||
@@ -4,7 +4,7 @@
 | 
			
		||||
<div class="edit-form">
 | 
			
		||||
     <div class="inner">
 | 
			
		||||
 | 
			
		||||
         <h4 style="margin-top: 0px;">Notification debug log</h4>
 | 
			
		||||
         <h4 style="margin-top: 0px;">The following issues were detected when sending notifications</h4>
 | 
			
		||||
                <div id="notification-error-log">
 | 
			
		||||
                <ul style="font-size: 80%; margin:0px; padding: 0 0 0 7px">
 | 
			
		||||
                {% for log in logs|reverse %}
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,6 @@
 | 
			
		||||
{% extends 'base.html' %}
 | 
			
		||||
 | 
			
		||||
{% block content %}
 | 
			
		||||
<script>
 | 
			
		||||
    const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
 | 
			
		||||
</script>
 | 
			
		||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='diff-overview.js')}}" defer></script>
 | 
			
		||||
 | 
			
		||||
<div id="settings">
 | 
			
		||||
    <h1>Current - {{watch.last_checked|format_timestamp_timeago}}</h1>
 | 
			
		||||
@@ -14,7 +10,6 @@
 | 
			
		||||
<div class="tabs">
 | 
			
		||||
    <ul>
 | 
			
		||||
        <li class="tab" id="default-tab"><a href="#text">Text</a></li>
 | 
			
		||||
        <li class="tab" id="screenshot-tab"><a href="#screenshot">Screenshot</a></li>
 | 
			
		||||
    </ul>
 | 
			
		||||
</div>
 | 
			
		||||
 | 
			
		||||
@@ -33,20 +28,5 @@
 | 
			
		||||
            </tbody>
 | 
			
		||||
        </table>
 | 
			
		||||
    </div>
 | 
			
		||||
     <div class="tab-pane-inner" id="screenshot">
 | 
			
		||||
         <div class="tip">
 | 
			
		||||
             For now, Differences are performed on text, not graphically, only the latest screenshot is available.
 | 
			
		||||
         </div>
 | 
			
		||||
         </br>
 | 
			
		||||
         {% if is_html_webdriver %}
 | 
			
		||||
           {% if screenshot %}
 | 
			
		||||
             <img style="max-width: 80%" id="screenshot-img" alt="Current screenshot from most recent request"/>
 | 
			
		||||
           {% else %}
 | 
			
		||||
              No screenshot available just yet! Try rechecking the page.
 | 
			
		||||
           {% endif %}
 | 
			
		||||
         {% else %}
 | 
			
		||||
           <strong>Screenshot requires Playwright/WebDriver enabled</strong>
 | 
			
		||||
         {% endif %}
 | 
			
		||||
     </div>
 | 
			
		||||
</div>
 | 
			
		||||
{% endblock %}
 | 
			
		||||
@@ -3,22 +3,22 @@
 | 
			
		||||
{% block content %}
 | 
			
		||||
<div class="edit-form">
 | 
			
		||||
    <div class="box-wrap inner">
 | 
			
		||||
    <form class="pure-form pure-form-stacked" action="{{url_for('clear_all_history')}}" method="POST">
 | 
			
		||||
    <form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST">
 | 
			
		||||
        <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
 | 
			
		||||
        <fieldset>
 | 
			
		||||
            <div class="pure-control-group">
 | 
			
		||||
                This will remove version history (snapshots) for ALL watches, but keep your list of URLs! <br/>
 | 
			
		||||
                This will remove ALL version snapshots/data, but keep your list of URLs. <br/>
 | 
			
		||||
                You may like to use the <strong>BACKUP</strong> link first.<br/>
 | 
			
		||||
            </div>
 | 
			
		||||
            <br/>
 | 
			
		||||
            <div class="pure-control-group">
 | 
			
		||||
                <label for="confirmtext">Confirmation text</label>
 | 
			
		||||
                <input type="text" id="confirmtext" required="" name="confirmtext" value="" size="10"/>
 | 
			
		||||
                <span class="pure-form-message-inline">Type in the word <strong>clear</strong> to confirm that you understand.</span>
 | 
			
		||||
                <span class="pure-form-message-inline">Type in the word <strong>scrub</strong> to confirm that you understand!</span>
 | 
			
		||||
            </div>
 | 
			
		||||
            <br/>
 | 
			
		||||
            <div class="pure-control-group">
 | 
			
		||||
                <button type="submit" class="pure-button pure-button-primary">Clear History!</button>
 | 
			
		||||
                <button type="submit" class="pure-button pure-button-primary">Scrub!</button>
 | 
			
		||||
            </div>
 | 
			
		||||
            <br/>
 | 
			
		||||
            <div class="pure-control-group">
 | 
			
		||||
@@ -32,17 +32,6 @@
 | 
			
		||||
                        {{ render_field(form.requests.form.time_between_check, class="time-check-widget") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Default time for all watches, when the watch does not have a specific time setting.</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.application.form.filter_failure_notification_threshold_attempts, class="filter_failure_notification_threshold_attempts") }}
 | 
			
		||||
                        <span class="pure-form-message-inline">After this many consecutive times that the CSS/xPath filter is missing, send a notification
 | 
			
		||||
                            <br/>
 | 
			
		||||
                        Set to <strong>0</strong> to disable
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {% if not hide_remove_pass %}
 | 
			
		||||
                            {% if current_user.is_authenticated %}
 | 
			
		||||
@@ -154,7 +143,7 @@ nav
 | 
			
		||||
                        <ul>
 | 
			
		||||
                            <li>Note: This is applied globally in addition to the per-watch rules.</li>
 | 
			
		||||
                            <li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
 | 
			
		||||
                            <li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
 | 
			
		||||
                            <li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
 | 
			
		||||
                            <li>Changing this will affect the comparison checksum which may trigger an alert</li>
 | 
			
		||||
                            <li>Use the preview/show current tab to see ignores</li>
 | 
			
		||||
                        </ul>
 | 
			
		||||
@@ -179,7 +168,7 @@ nav
 | 
			
		||||
                <div class="pure-control-group">
 | 
			
		||||
                    {{ render_button(form.save_button) }}
 | 
			
		||||
                    <a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
 | 
			
		||||
                    <a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a>
 | 
			
		||||
                    <a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete History Snapshot Data</a>
 | 
			
		||||
                </div>
 | 
			
		||||
 | 
			
		||||
            </div>
 | 
			
		||||
 
 | 
			
		||||
@@ -14,7 +14,7 @@
 | 
			
		||||
                {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
 | 
			
		||||
            <button type="submit" class="pure-button pure-button-primary">Watch</button>
 | 
			
		||||
        </fieldset>
 | 
			
		||||
        <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
 | 
			
		||||
        <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
 | 
			
		||||
    </form>
 | 
			
		||||
    <div>
 | 
			
		||||
        <a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
 | 
			
		||||
@@ -40,7 +40,7 @@
 | 
			
		||||
            <tbody>
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
            {% for watch in watches|sort(attribute='last_changed', reverse=True) %}
 | 
			
		||||
            {% for watch in watches %}
 | 
			
		||||
            <tr id="{{ watch.uuid }}"
 | 
			
		||||
                class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
 | 
			
		||||
                {% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
 | 
			
		||||
@@ -67,8 +67,8 @@
 | 
			
		||||
                    <span class="watch-tag-list">{{ watch.tag}}</span>
 | 
			
		||||
                    {% endif %}
 | 
			
		||||
                </td>
 | 
			
		||||
                <td class="last-checked">{{watch|format_last_checked_time|safe}}</td>
 | 
			
		||||
                <td class="last-changed">{% if watch.history_n >=2 and watch.last_changed >0 %}
 | 
			
		||||
                <td class="last-checked">{{watch|format_last_checked_time}}</td>
 | 
			
		||||
                <td class="last-changed">{% if watch.history_n >=2 and watch.last_changed %}
 | 
			
		||||
                    {{watch.last_changed|format_timestamp_timeago}}
 | 
			
		||||
                    {% else %}
 | 
			
		||||
                    Not yet
 | 
			
		||||
 
 | 
			
		||||
@@ -32,8 +32,6 @@ def app(request):
 | 
			
		||||
    """Create application for the tests."""
 | 
			
		||||
    datastore_path = "./test-datastore"
 | 
			
		||||
 | 
			
		||||
    # So they don't delay in fetching
 | 
			
		||||
    os.environ["MINIMUM_SECONDS_RECHECK_TIME"] = "0"
 | 
			
		||||
    try:
 | 
			
		||||
        os.mkdir(datastore_path)
 | 
			
		||||
    except FileExistsError:
 | 
			
		||||
 
 | 
			
		||||
@@ -95,8 +95,6 @@ def test_api_simple(client, live_server):
 | 
			
		||||
    assert watch_uuid in json.loads(res.data).keys()
 | 
			
		||||
    before_recheck_info = json.loads(res.data)[watch_uuid]
 | 
			
		||||
    assert before_recheck_info['last_checked'] != 0
 | 
			
		||||
    #705 `last_changed` should be zero on the first check
 | 
			
		||||
    assert before_recheck_info['last_changed'] == 0
 | 
			
		||||
    assert before_recheck_info['title'] == 'My test URL'
 | 
			
		||||
 | 
			
		||||
    set_modified_response()
 | 
			
		||||
 
 | 
			
		||||
@@ -1,137 +0,0 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from . util import live_server_setup
 | 
			
		||||
from changedetectionio import html_tools
 | 
			
		||||
 | 
			
		||||
def set_original_ignore_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     Some initial text</br>
 | 
			
		||||
     <p>Which is across multiple lines</p>
 | 
			
		||||
     </br>
 | 
			
		||||
     So let's see what happens.  </br>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_modified_original_ignore_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     Some NEW nice initial text</br>
 | 
			
		||||
     <p>Which is across multiple lines</p>
 | 
			
		||||
     </br>
 | 
			
		||||
     So let's see what happens.  </br>
 | 
			
		||||
     <p>new ignore stuff</p>
 | 
			
		||||
     <p>out of stock</p>
 | 
			
		||||
     <p>blah</p>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text
 | 
			
		||||
def set_modified_response_minus_block_text():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     Some NEW nice initial text</br>
 | 
			
		||||
     <p>Which is across multiple lines</p>
 | 
			
		||||
     <p>now on sale $2/p>
 | 
			
		||||
     </br>
 | 
			
		||||
     So let's see what happens.  </br>
 | 
			
		||||
     <p>new ignore stuff</p>
 | 
			
		||||
     <p>blah</p>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_check_block_changedetection_text_NOT_present(client, live_server):
 | 
			
		||||
    sleep_time_for_fetch_thread = 3
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
    # Use a mix of case in ZzZ to prove it works case-insensitive.
 | 
			
		||||
    ignore_text = "out of stoCk\r\nfoobar"
 | 
			
		||||
 | 
			
		||||
    set_original_ignore_response()
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": test_url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # Goto the edit page, add our ignore text
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"text_should_not_be_present": ignore_text, "url": test_url, 'fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    # Check it saved
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
    )
 | 
			
		||||
    assert bytes(ignore_text.encode('utf-8')) in res.data
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' not in res.data
 | 
			
		||||
    assert b'/test-endpoint' in res.data
 | 
			
		||||
 | 
			
		||||
    # The page changed, BUT the text is still there, just the rest of it changes, we should not see a change
 | 
			
		||||
    set_modified_original_ignore_response()
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' not in res.data
 | 
			
		||||
    assert b'/test-endpoint' in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Now we set a change where the text is gone, it should now trigger
 | 
			
		||||
    set_modified_response_minus_block_text()
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
@@ -1,198 +0,0 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import live_server_setup
 | 
			
		||||
 | 
			
		||||
from ..html_tools import *
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_original_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     Some initial text</br>
 | 
			
		||||
     <p>Which is across multiple lines</p>
 | 
			
		||||
     </br>
 | 
			
		||||
     So let's see what happens.  </br>
 | 
			
		||||
     <div id="sametext">Some text thats the same</div>
 | 
			
		||||
     <div class="changetext">Some text that will change</div>     
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_modified_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     Some initial text</br>
 | 
			
		||||
     <p>which has this one new line</p>
 | 
			
		||||
     </br>
 | 
			
		||||
     So let's see what happens.  </br>
 | 
			
		||||
     <div id="sametext">Some text thats the same</div>
 | 
			
		||||
     <div class="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/>  2000 online )</div>
 | 
			
		||||
     <div class="changetext">SomeCase insensitive 3456</div>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_multiline_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     
 | 
			
		||||
     <p>Something <br/>
 | 
			
		||||
        across 6 billion multiple<br/>
 | 
			
		||||
        lines
 | 
			
		||||
     </p>
 | 
			
		||||
     
 | 
			
		||||
     <div>aaand something lines</div>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_setup(client, live_server):
 | 
			
		||||
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
def test_check_filter_multiline(client, live_server):
 | 
			
		||||
 | 
			
		||||
    set_multiline_response()
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": test_url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # Goto the edit page, add our ignore text
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": '',
 | 
			
		||||
              'extract_text': '/something.+?6 billion.+?lines/si',
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
              "headers": "",
 | 
			
		||||
              'fetch_backend': "html_requests"
 | 
			
		||||
              },
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("preview_page", uuid="first"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    assert b'<div class="">Something' in res.data
 | 
			
		||||
    assert b'<div class="">across 6 billion multiple' in res.data
 | 
			
		||||
    assert b'<div class="">lines' in res.data
 | 
			
		||||
 | 
			
		||||
    # but the last one, which also says 'lines' shouldnt be here (non-greedy match checking)
 | 
			
		||||
    assert b'aaand something lines' not in res.data
 | 
			
		||||
 | 
			
		||||
def test_check_filter_and_regex_extract(client, live_server):
 | 
			
		||||
    sleep_time_for_fetch_thread = 3
 | 
			
		||||
    css_filter = ".changetext"
 | 
			
		||||
 | 
			
		||||
    set_original_response()
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": test_url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # Goto the edit page, add our ignore text
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"css_filter": css_filter,
 | 
			
		||||
              'extract_text': '\d+ online\r\n\d+ guests\r\n/somecase insensitive \d+/i\r\n/somecase insensitive (345\d)/i',
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "tag": "",
 | 
			
		||||
              "headers": "",
 | 
			
		||||
              'fetch_backend': "html_requests"
 | 
			
		||||
              },
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    #  Make a change
 | 
			
		||||
    set_modified_response()
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # It should have 'unviewed' still
 | 
			
		||||
    # Because it should be looking at only that 'sametext' id
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
 | 
			
		||||
    # Check HTML conversion detected and workd
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("preview_page", uuid="first"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # Class will be blank for now because the frontend didnt apply the diff
 | 
			
		||||
    assert b'<div class="">1000 online' in res.data
 | 
			
		||||
 | 
			
		||||
    # All regex matching should be here
 | 
			
		||||
    assert b'<div class="">2000 online' in res.data
 | 
			
		||||
 | 
			
		||||
    # Both regexs should be here
 | 
			
		||||
    assert b'<div class="">80 guests' in res.data
 | 
			
		||||
 | 
			
		||||
    # Regex with flag handling should be here
 | 
			
		||||
    assert b'<div class="">SomeCase insensitive 3456' in res.data
 | 
			
		||||
 | 
			
		||||
    # Singular group from /somecase insensitive (345\d)/i
 | 
			
		||||
    assert b'<div class="">3456' in res.data
 | 
			
		||||
 | 
			
		||||
    # Regex with multiline flag handling should be here
 | 
			
		||||
 | 
			
		||||
    # Should not be here
 | 
			
		||||
    assert b'Some text that did change' not in res.data
 | 
			
		||||
@@ -1,134 +0,0 @@
 | 
			
		||||
import os
 | 
			
		||||
import time
 | 
			
		||||
import re
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import set_original_response, live_server_setup
 | 
			
		||||
from changedetectionio.model import App
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_response_with_filter():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     Some initial text</br>
 | 
			
		||||
     <p>Which is across multiple lines</p>
 | 
			
		||||
     </br>
 | 
			
		||||
     So let's see what happens.  </br>
 | 
			
		||||
     <div id="nope-doesnt-exist">Some text thats the same</div>     
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
def run_filter_test(client, content_filter):
 | 
			
		||||
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("form_watch_add"),
 | 
			
		||||
        data={"url": test_url, "tag": ''},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Watch added" in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick up the first version
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # Goto the edit page, add our ignore text
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    url = url_for('test_notification_endpoint', _external=True)
 | 
			
		||||
    notification_url = url.replace('http', 'json')
 | 
			
		||||
 | 
			
		||||
    print(">>>> Notification URL: " + notification_url)
 | 
			
		||||
 | 
			
		||||
    # Just a regular notification setting, this will be used by the special 'filter not found' notification
 | 
			
		||||
    notification_form_data = {"notification_urls": notification_url,
 | 
			
		||||
                              "notification_title": "New ChangeDetection.io Notification - {watch_url}",
 | 
			
		||||
                              "notification_body": "BASE URL: {base_url}\n"
 | 
			
		||||
                                                   "Watch URL: {watch_url}\n"
 | 
			
		||||
                                                   "Watch UUID: {watch_uuid}\n"
 | 
			
		||||
                                                   "Watch title: {watch_title}\n"
 | 
			
		||||
                                                   "Watch tag: {watch_tag}\n"
 | 
			
		||||
                                                   "Preview: {preview_url}\n"
 | 
			
		||||
                                                   "Diff URL: {diff_url}\n"
 | 
			
		||||
                                                   "Snapshot: {current_snapshot}\n"
 | 
			
		||||
                                                   "Diff: {diff}\n"
 | 
			
		||||
                                                   "Diff Full: {diff_full}\n"
 | 
			
		||||
                                                   ":-)",
 | 
			
		||||
                              "notification_format": "Text"}
 | 
			
		||||
 | 
			
		||||
    notification_form_data.update({
 | 
			
		||||
        "url": test_url,
 | 
			
		||||
        "tag": "my tag",
 | 
			
		||||
        "title": "my title",
 | 
			
		||||
        "headers": "",
 | 
			
		||||
        "css_filter": content_filter,
 | 
			
		||||
        "fetch_backend": "html_requests"})
 | 
			
		||||
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data=notification_form_data,
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # Now the notification should not exist, because we didnt reach the threshold
 | 
			
		||||
    assert not os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
 | 
			
		||||
    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
 | 
			
		||||
        res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
        time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # We should see something in the frontend
 | 
			
		||||
    assert b'Did the page change its layout' in res.data
 | 
			
		||||
 | 
			
		||||
    # Now it should exist and contain our "filter not found" alert
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
    notification = False
 | 
			
		||||
    with open("test-datastore/notification.txt", 'r') as f:
 | 
			
		||||
        notification = f.read()
 | 
			
		||||
    assert 'CSS/xPath filter was not present in the page' in notification
 | 
			
		||||
    assert content_filter.replace('"', '\\"') in notification
 | 
			
		||||
 | 
			
		||||
    # Remove it and prove that it doesnt trigger when not expected
 | 
			
		||||
    os.unlink("test-datastore/notification.txt")
 | 
			
		||||
    set_response_with_filter()
 | 
			
		||||
 | 
			
		||||
    for i in range(0, App._FILTER_FAILURE_THRESHOLD_ATTEMPTS_DEFAULT):
 | 
			
		||||
        client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
        time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # It should have sent a notification, but..
 | 
			
		||||
    assert os.path.isfile("test-datastore/notification.txt")
 | 
			
		||||
    # but it should not contain the info about the failed filter
 | 
			
		||||
    with open("test-datastore/notification.txt", 'r') as f:
 | 
			
		||||
        notification = f.read()
 | 
			
		||||
    assert not 'CSS/xPath filter was not present in the page' in notification
 | 
			
		||||
 | 
			
		||||
    # cleanup for the next
 | 
			
		||||
    client.get(
 | 
			
		||||
        url_for("form_delete", uuid="all"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    os.unlink("test-datastore/notification.txt")
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_setup(live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
def test_check_css_filter_failure_notification(client, live_server):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    run_filter_test(client, '#nope-doesnt-exist')
 | 
			
		||||
 | 
			
		||||
def test_check_xpath_filter_failure_notification(client, live_server):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    run_filter_test(client, '//*[@id="nope-doesnt-exist"]')
 | 
			
		||||
 | 
			
		||||
@@ -154,10 +154,6 @@ def test_check_notification(client, live_server):
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    assert os.path.exists("test-datastore/notification.txt") == False
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("notification_logs"))
 | 
			
		||||
    # be sure we see it in the output log
 | 
			
		||||
    assert b'New ChangeDetection.io Notification - ' + test_url.encode('utf-8') in res.data
 | 
			
		||||
 | 
			
		||||
    # cleanup for the next
 | 
			
		||||
    client.get(
 | 
			
		||||
        url_for("form_delete", uuid="all"),
 | 
			
		||||
 
 | 
			
		||||
@@ -1,43 +0,0 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import live_server_setup
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_original_ignore_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
       <body>
 | 
			
		||||
     <span>The price is</span><span>$<!-- -->90<!-- -->.<!-- -->74</span>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_obfuscations(client, live_server):
 | 
			
		||||
    set_original_ignore_response()
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": test_url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(3)
 | 
			
		||||
 | 
			
		||||
    # Check HTML conversion detected and workd
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("preview_page", uuid="first"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert b'$90.74' in res.data
 | 
			
		||||
@@ -1,104 +0,0 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import live_server_setup
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_original_ignore_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
     <body>
 | 
			
		||||
     <p>Some initial text</p>
 | 
			
		||||
     <p>Which is across multiple lines</p>
 | 
			
		||||
     <p>So let's see what happens.</p>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# The same but just re-ordered the text
 | 
			
		||||
def set_modified_swapped_lines():
 | 
			
		||||
    # Re-ordered and with some whitespacing, should get stripped() too.
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
     <body>
 | 
			
		||||
     <p>Some initial text</p>
 | 
			
		||||
     <p>   So let's see what happens.</p>
 | 
			
		||||
     <p> Which is across multiple lines</p>     
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_modified_with_trigger_text_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
     <body>
 | 
			
		||||
     <p>Some initial text</p>
 | 
			
		||||
     <p>So let's see what happens.</p>
 | 
			
		||||
     <p>and a new line!</p>
 | 
			
		||||
     <p>Which is across multiple lines</p>     
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_unique_lines_functionality(client, live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    sleep_time_for_fetch_thread = 3
 | 
			
		||||
 | 
			
		||||
    set_original_ignore_response()
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": test_url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"check_unique_lines": "y",
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "fetch_backend": "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
    assert b'unviewed' not in res.data
 | 
			
		||||
 | 
			
		||||
    #  Make a change
 | 
			
		||||
    set_modified_swapped_lines()
 | 
			
		||||
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' not in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Now set the content which contains the new text and re-ordered existing text
 | 
			
		||||
    set_modified_with_trigger_text_response()
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
 | 
			
		||||
@@ -3,215 +3,162 @@ import queue
 | 
			
		||||
import time
 | 
			
		||||
 | 
			
		||||
from changedetectionio import content_fetcher
 | 
			
		||||
from changedetectionio.html_tools import FilterNotFoundInResponse
 | 
			
		||||
 | 
			
		||||
# A single update worker
 | 
			
		||||
#
 | 
			
		||||
#
 | 
			
		||||
# Requests for checking on a single site(watch) from a queue of watches
 | 
			
		||||
# (another process inserts watches into the queue that are time-ready for checking)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class update_worker(threading.Thread):
 | 
			
		||||
    current_uuid = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self, q, notification_q, app, datastore, uuid, *args, **kwargs):
 | 
			
		||||
    def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
 | 
			
		||||
        self.q = q
 | 
			
		||||
 | 
			
		||||
        self.app = app
 | 
			
		||||
        self.notification_q = notification_q
 | 
			
		||||
        self.datastore = datastore
 | 
			
		||||
        self.current_uuid = uuid
 | 
			
		||||
        super().__init__(*args, **kwargs)
 | 
			
		||||
        self.name = "update_worker"
 | 
			
		||||
 | 
			
		||||
    def send_filter_failure_notification(self, uuid):
 | 
			
		||||
 | 
			
		||||
        threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts')
 | 
			
		||||
        watch = self.datastore.data['watching'].get(uuid, False)
 | 
			
		||||
 | 
			
		||||
        n_object = {'notification_title': 'Changedetection.io - Alert - CSS/xPath filter was not present in the page',
 | 
			
		||||
                    'notification_body': "Your configured CSS/xPath filter of '{}' for {{watch_url}} did not appear on the page after {} attempts, did the page change layout?\n\nLink: {{base_url}}/edit/{{watch_uuid}}\n\nThanks - Your omniscient changedetection.io installation :)\n".format(
 | 
			
		||||
                        watch['css_filter'],
 | 
			
		||||
                        threshold),
 | 
			
		||||
                    'notification_format': 'text'}
 | 
			
		||||
 | 
			
		||||
        if len(watch['notification_urls']):
 | 
			
		||||
            n_object['notification_urls'] = watch['notification_urls']
 | 
			
		||||
 | 
			
		||||
        elif len(self.datastore.data['settings']['application']['notification_urls']):
 | 
			
		||||
            n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
 | 
			
		||||
 | 
			
		||||
        # Only prepare to notify if the rules above matched
 | 
			
		||||
        if 'notification_urls' in n_object:
 | 
			
		||||
            n_object.update({
 | 
			
		||||
                'watch_url': watch['url'],
 | 
			
		||||
                'uuid': uuid
 | 
			
		||||
            })
 | 
			
		||||
            self.notification_q.put(n_object)
 | 
			
		||||
            print("Sent filter not found notification for {}".format(uuid))
 | 
			
		||||
 | 
			
		||||
    # Pick one job off the list, process it threaded, exist
 | 
			
		||||
    def run(self):
 | 
			
		||||
        # Go talk to the website
 | 
			
		||||
        self.perform_site_update()
 | 
			
		||||
 | 
			
		||||
        self.current_uuid = None  # Done
 | 
			
		||||
        self.q.task_done()
 | 
			
		||||
 | 
			
		||||
        # Let the thread die after processing 1
 | 
			
		||||
        # We will launch nice juicy fresh threads every time to prevent memory leaks in complex runner code (playwright etc)
 | 
			
		||||
        print ("EXITING THREAD!")
 | 
			
		||||
        self.app.config.exit.wait(1)
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    def perform_site_update(self):
 | 
			
		||||
 | 
			
		||||
        from changedetectionio import fetch_site_status
 | 
			
		||||
 | 
			
		||||
        if not self.current_uuid in list(self.datastore.data['watching'].keys()):
 | 
			
		||||
            return
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        changed_detected = False
 | 
			
		||||
        contents = ""
 | 
			
		||||
        screenshot = False
 | 
			
		||||
        update_obj= {}
 | 
			
		||||
        xpath_data = False
 | 
			
		||||
        now = time.time()
 | 
			
		||||
 | 
			
		||||
        update_handler = fetch_site_status.perform_site_check(datastore=self.datastore)
 | 
			
		||||
        try:
 | 
			
		||||
            changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(self.current_uuid)
 | 
			
		||||
            # Re #342
 | 
			
		||||
            # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
 | 
			
		||||
            # We then convert/.decode('utf-8') for the notification etc
 | 
			
		||||
            if not isinstance(contents, (bytes, bytearray)):
 | 
			
		||||
                raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
 | 
			
		||||
        except PermissionError as e:
 | 
			
		||||
            self.app.logger.error("File permission error updating", self.current_uuid, str(e))
 | 
			
		||||
        except content_fetcher.ReplyWithContentButNoText as e:
 | 
			
		||||
            # Totally fine, it's by choice - just continue on, nothing more to care about
 | 
			
		||||
            # Page had elements/content but no renderable text
 | 
			
		||||
            self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': "Got HTML content but no text found."})
 | 
			
		||||
        except FilterNotFoundInResponse as e:
 | 
			
		||||
            err_text = "Filter '{}' not found - Did the page change its layout?".format(str(e))
 | 
			
		||||
            c = 0
 | 
			
		||||
            if self.datastore.data['watching'].get(self.current_uuid, False):
 | 
			
		||||
                c = self.datastore.data['watching'][self.current_uuid].get('consecutive_filter_failures', 5)
 | 
			
		||||
            c += 1
 | 
			
		||||
 | 
			
		||||
            # Send notification if we reached the threshold?
 | 
			
		||||
            threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts', 0)
 | 
			
		||||
            print("Filter for {} not found, consecutive_filter_failures: {}".format(self.current_uuid, c))
 | 
			
		||||
            if threshold >0 and c >= threshold:
 | 
			
		||||
                self.send_filter_failure_notification(self.current_uuid)
 | 
			
		||||
                c = 0
 | 
			
		||||
        while not self.app.config.exit.is_set():
 | 
			
		||||
 | 
			
		||||
            self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                               'consecutive_filter_failures': c})
 | 
			
		||||
        except content_fetcher.EmptyReply as e:
 | 
			
		||||
            # Some kind of custom to-str handler in the exception handler that does this?
 | 
			
		||||
            err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
 | 
			
		||||
            self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                               'last_check_status': e.status_code})
 | 
			
		||||
        except content_fetcher.ScreenshotUnavailable as e:
 | 
			
		||||
            err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
 | 
			
		||||
            self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                               'last_check_status': e.status_code})
 | 
			
		||||
        except content_fetcher.PageUnloadable as e:
 | 
			
		||||
            err_text = "Page request from server didnt respond correctly"
 | 
			
		||||
            self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                               'last_check_status': e.status_code})
 | 
			
		||||
        except Exception as e:
 | 
			
		||||
            self.app.logger.error("Exception reached processing watch UUID: %s - %s", self.current_uuid, str(e))
 | 
			
		||||
            self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': str(e)})
 | 
			
		||||
 | 
			
		||||
        else:
 | 
			
		||||
            try:
 | 
			
		||||
                watch = self.datastore.data['watching'][self.current_uuid]
 | 
			
		||||
                fname = "" # Saved history text filename
 | 
			
		||||
                uuid = self.q.get(block=False)
 | 
			
		||||
            except queue.Empty:
 | 
			
		||||
                pass
 | 
			
		||||
 | 
			
		||||
                # For the FIRST time we check a site, or a change detected, save the snapshot.
 | 
			
		||||
                if changed_detected or not watch['last_checked']:
 | 
			
		||||
                    # A change was detected
 | 
			
		||||
                    fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
 | 
			
		||||
            else:
 | 
			
		||||
                self.current_uuid = uuid
 | 
			
		||||
 | 
			
		||||
                # Generally update anything interesting returned
 | 
			
		||||
                update_obj['consecutive_filter_failures'] = 0
 | 
			
		||||
                self.datastore.update_watch(uuid=self.current_uuid, update_obj=update_obj)
 | 
			
		||||
                if uuid in list(self.datastore.data['watching'].keys()):
 | 
			
		||||
 | 
			
		||||
                # A change was detected
 | 
			
		||||
                if changed_detected:
 | 
			
		||||
                    n_object = {}
 | 
			
		||||
                    print (">> Change detected in UUID {} - {}".format(self.current_uuid, watch['url']))
 | 
			
		||||
                    changed_detected = False
 | 
			
		||||
                    contents = ""
 | 
			
		||||
                    screenshot = False
 | 
			
		||||
                    update_obj= {}
 | 
			
		||||
                    xpath_data = False
 | 
			
		||||
                    now = time.time()
 | 
			
		||||
 | 
			
		||||
                    # Notifications should only trigger on the second time (first time, we gather the initial snapshot)
 | 
			
		||||
                    if watch.history_n >= 2:
 | 
			
		||||
                        # Atleast 2, means there really was a change
 | 
			
		||||
                        self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_changed': round(now)})
 | 
			
		||||
                    try:
 | 
			
		||||
                        changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
 | 
			
		||||
 | 
			
		||||
                        watch_history = watch.history
 | 
			
		||||
                        dates = list(watch_history.keys())
 | 
			
		||||
                        # Theoretically it's possible that this could be just 1 long,
 | 
			
		||||
                        # - In the case that the timestamp key was not unique
 | 
			
		||||
                        if len(dates) == 1:
 | 
			
		||||
                            raise ValueError(
 | 
			
		||||
                                "History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?"
 | 
			
		||||
                            )
 | 
			
		||||
                        prev_fname = watch_history[dates[-2]]
 | 
			
		||||
                        # Re #342
 | 
			
		||||
                        # In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
 | 
			
		||||
                        # We then convert/.decode('utf-8') for the notification etc
 | 
			
		||||
                        if not isinstance(contents, (bytes, bytearray)):
 | 
			
		||||
                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
 | 
			
		||||
                    except PermissionError as e:
 | 
			
		||||
                        self.app.logger.error("File permission error updating", uuid, str(e))
 | 
			
		||||
                    except content_fetcher.ReplyWithContentButNoText as e:
 | 
			
		||||
                        # Totally fine, it's by choice - just continue on, nothing more to care about
 | 
			
		||||
                        # Page had elements/content but no renderable text
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
 | 
			
		||||
                        pass
 | 
			
		||||
                    except content_fetcher.EmptyReply as e:
 | 
			
		||||
                        # Some kind of custom to-str handler in the exception handler that does this?
 | 
			
		||||
                        err_text = "EmptyReply: Status Code {}".format(e.status_code)
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
                    except content_fetcher.ScreenshotUnavailable as e:
 | 
			
		||||
                        err_text = "Screenshot unavailable, page did not render fully in the expected time"
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
                    except content_fetcher.PageUnloadable as e:
 | 
			
		||||
                        err_text = "Page request from server didnt respond correctly"
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
 | 
			
		||||
                                                                           'last_check_status': e.status_code})
 | 
			
		||||
 | 
			
		||||
                        # Did it have any notification alerts to hit?
 | 
			
		||||
                        if len(watch['notification_urls']):
 | 
			
		||||
                            print(">>> Notifications queued for UUID from watch {}".format(self.current_uuid))
 | 
			
		||||
                            n_object['notification_urls'] = watch['notification_urls']
 | 
			
		||||
                            n_object['notification_title'] = watch['notification_title']
 | 
			
		||||
                            n_object['notification_body'] = watch['notification_body']
 | 
			
		||||
                            n_object['notification_format'] = watch['notification_format']
 | 
			
		||||
                    except Exception as e:
 | 
			
		||||
                        self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
 | 
			
		||||
 | 
			
		||||
                        # No? maybe theres a global setting, queue them all
 | 
			
		||||
                        elif len(self.datastore.data['settings']['application']['notification_urls']):
 | 
			
		||||
                            print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(self.current_uuid))
 | 
			
		||||
                            n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
 | 
			
		||||
                            n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title']
 | 
			
		||||
                            n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body']
 | 
			
		||||
                            n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format']
 | 
			
		||||
                        else:
 | 
			
		||||
                            print(">>> NO notifications queued, watch and global notification URLs were empty.")
 | 
			
		||||
                    else:
 | 
			
		||||
                        try:
 | 
			
		||||
                            watch = self.datastore.data['watching'][uuid]
 | 
			
		||||
                            fname = "" # Saved history text filename
 | 
			
		||||
 | 
			
		||||
                        # Only prepare to notify if the rules above matched
 | 
			
		||||
                        if 'notification_urls' in n_object:
 | 
			
		||||
                            # HTML needs linebreak, but MarkDown and Text can use a linefeed
 | 
			
		||||
                            if n_object['notification_format'] == 'HTML':
 | 
			
		||||
                                line_feed_sep = "</br>"
 | 
			
		||||
                            else:
 | 
			
		||||
                                line_feed_sep = "\n"
 | 
			
		||||
                            # For the FIRST time we check a site, or a change detected, save the snapshot.
 | 
			
		||||
                            if changed_detected or not watch['last_checked']:
 | 
			
		||||
                                # A change was detected
 | 
			
		||||
                                fname = watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
 | 
			
		||||
 | 
			
		||||
                            from changedetectionio import diff
 | 
			
		||||
                            n_object.update({
 | 
			
		||||
                                'watch_url': watch['url'],
 | 
			
		||||
                                'uuid': self.current_uuid,
 | 
			
		||||
                                'current_snapshot': contents.decode('utf-8'),
 | 
			
		||||
                                'diff': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
 | 
			
		||||
                                'diff_full': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
 | 
			
		||||
                            })
 | 
			
		||||
                            # Generally update anything interesting returned
 | 
			
		||||
                            self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
 | 
			
		||||
 | 
			
		||||
                            self.notification_q.put(n_object)
 | 
			
		||||
                            # A change was detected
 | 
			
		||||
                            if changed_detected:
 | 
			
		||||
                                n_object = {}
 | 
			
		||||
                                print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
 | 
			
		||||
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                # Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
 | 
			
		||||
                print("!!!! Exception in update_worker !!!\n", e)
 | 
			
		||||
                self.app.logger.error("Exception reached processing watch UUID: %s - %s", self.current_uuid, str(e))
 | 
			
		||||
                self.datastore.update_watch(uuid=self.current_uuid, update_obj={'last_error': str(e)})
 | 
			
		||||
                                # Notifications should only trigger on the second time (first time, we gather the initial snapshot)
 | 
			
		||||
                                if watch.history_n >= 2:
 | 
			
		||||
 | 
			
		||||
        finally:
 | 
			
		||||
            # Always record that we atleast tried
 | 
			
		||||
            self.datastore.update_watch(uuid=self.current_uuid, update_obj={'fetch_time': round(time.time() - now, 3),
 | 
			
		||||
                                                               'last_checked': round(time.time())})
 | 
			
		||||
 | 
			
		||||
            # Always save the screenshot if it's available
 | 
			
		||||
            if screenshot:
 | 
			
		||||
                self.datastore.save_screenshot(watch_uuid=self.current_uuid, screenshot=screenshot)
 | 
			
		||||
            if xpath_data:
 | 
			
		||||
                self.datastore.save_xpath_data(watch_uuid=self.current_uuid, data=xpath_data)
 | 
			
		||||
                                    dates = list(watch.history.keys())
 | 
			
		||||
                                    prev_fname = watch.history[dates[-2]]
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                                    # Did it have any notification alerts to hit?
 | 
			
		||||
                                    if len(watch['notification_urls']):
 | 
			
		||||
                                        print(">>> Notifications queued for UUID from watch {}".format(uuid))
 | 
			
		||||
                                        n_object['notification_urls'] = watch['notification_urls']
 | 
			
		||||
                                        n_object['notification_title'] = watch['notification_title']
 | 
			
		||||
                                        n_object['notification_body'] = watch['notification_body']
 | 
			
		||||
                                        n_object['notification_format'] = watch['notification_format']
 | 
			
		||||
 | 
			
		||||
                                    # No? maybe theres a global setting, queue them all
 | 
			
		||||
                                    elif len(self.datastore.data['settings']['application']['notification_urls']):
 | 
			
		||||
                                        print(">>> Watch notification URLs were empty, using GLOBAL notifications for UUID: {}".format(uuid))
 | 
			
		||||
                                        n_object['notification_urls'] = self.datastore.data['settings']['application']['notification_urls']
 | 
			
		||||
                                        n_object['notification_title'] = self.datastore.data['settings']['application']['notification_title']
 | 
			
		||||
                                        n_object['notification_body'] = self.datastore.data['settings']['application']['notification_body']
 | 
			
		||||
                                        n_object['notification_format'] = self.datastore.data['settings']['application']['notification_format']
 | 
			
		||||
                                    else:
 | 
			
		||||
                                        print(">>> NO notifications queued, watch and global notification URLs were empty.")
 | 
			
		||||
 | 
			
		||||
                                    # Only prepare to notify if the rules above matched
 | 
			
		||||
                                    if 'notification_urls' in n_object:
 | 
			
		||||
                                        # HTML needs linebreak, but MarkDown and Text can use a linefeed
 | 
			
		||||
                                        if n_object['notification_format'] == 'HTML':
 | 
			
		||||
                                            line_feed_sep = "</br>"
 | 
			
		||||
                                        else:
 | 
			
		||||
                                            line_feed_sep = "\n"
 | 
			
		||||
 | 
			
		||||
                                        from changedetectionio import diff
 | 
			
		||||
                                        n_object.update({
 | 
			
		||||
                                            'watch_url': watch['url'],
 | 
			
		||||
                                            'uuid': uuid,
 | 
			
		||||
                                            'current_snapshot': contents.decode('utf-8'),
 | 
			
		||||
                                            'diff': diff.render_diff(prev_fname, fname, line_feed_sep=line_feed_sep),
 | 
			
		||||
                                            'diff_full': diff.render_diff(prev_fname, fname, True, line_feed_sep=line_feed_sep)
 | 
			
		||||
                                        })
 | 
			
		||||
 | 
			
		||||
                                        self.notification_q.put(n_object)
 | 
			
		||||
 | 
			
		||||
                        except Exception as e:
 | 
			
		||||
                            # Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
 | 
			
		||||
                            print("!!!! Exception in update_worker !!!\n", e)
 | 
			
		||||
                            self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
 | 
			
		||||
                            self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
 | 
			
		||||
 | 
			
		||||
                    finally:
 | 
			
		||||
                        # Always record that we atleast tried
 | 
			
		||||
                        self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
 | 
			
		||||
                                                                           'last_checked': round(time.time())})
 | 
			
		||||
                        # Always save the screenshot if it's available
 | 
			
		||||
                        if screenshot:
 | 
			
		||||
                            self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
 | 
			
		||||
                        if xpath_data:
 | 
			
		||||
                            self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
                self.current_uuid = None  # Done
 | 
			
		||||
                self.q.task_done()
 | 
			
		||||
 | 
			
		||||
                # Give the CPU time to interrupt
 | 
			
		||||
                time.sleep(0.1)
 | 
			
		||||
 | 
			
		||||
            self.app.config.exit.wait(1)
 | 
			
		||||
 
 | 
			
		||||
@@ -24,7 +24,7 @@ services:
 | 
			
		||||
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
 | 
			
		||||
  #
 | 
			
		||||
  #       Alternative Playwright URL, do not use "'s or 's!
 | 
			
		||||
  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true
 | 
			
		||||
  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/
 | 
			
		||||
  #
 | 
			
		||||
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
 | 
			
		||||
  #
 | 
			
		||||
@@ -73,17 +73,6 @@ services:
 | 
			
		||||
#        hostname: playwright-chrome
 | 
			
		||||
#        image: browserless/chrome
 | 
			
		||||
#        restart: unless-stopped
 | 
			
		||||
#        environment:
 | 
			
		||||
#            - SCREEN_WIDTH=1920
 | 
			
		||||
#            - SCREEN_HEIGHT=1024
 | 
			
		||||
#            - SCREEN_DEPTH=16
 | 
			
		||||
#            - ENABLE_DEBUGGER=false
 | 
			
		||||
#            - PREBOOT_CHROME=true
 | 
			
		||||
#            - CONNECTION_TIMEOUT=300000
 | 
			
		||||
#            - MAX_CONCURRENT_SESSIONS=10
 | 
			
		||||
#            - CHROME_REFRESH_TIME=600000
 | 
			
		||||
#            - DEFAULT_BLOCK_ADS=true
 | 
			
		||||
#            - DEFAULT_STEALTH=true
 | 
			
		||||
 | 
			
		||||
volumes:
 | 
			
		||||
  changedetection-data:
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user