mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 08:34:57 +00:00 
			
		
		
		
	Compare commits
	
		
			1 Commits
		
	
	
		
			proxy-impr
			...
			bugfix-del
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					c97dd3ae0b | 
@@ -20,11 +20,6 @@ COPY requirements.txt /requirements.txt
 | 
			
		||||
 | 
			
		||||
RUN pip install --target=/dependencies -r /requirements.txt
 | 
			
		||||
 | 
			
		||||
# Playwright is an alternative to Selenium
 | 
			
		||||
# Excluded this package from requirements.txt to prevent arm/v6 and arm/v7 builds from failing
 | 
			
		||||
RUN pip install --target=/dependencies playwright~=1.20 \
 | 
			
		||||
    || echo "WARN: Failed to install Playwright. The application can still run, but the Playwright option will be disabled."
 | 
			
		||||
 | 
			
		||||
# Final image stage
 | 
			
		||||
FROM python:3.8-slim
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -434,21 +434,48 @@ def changedetection_app(config=None, datastore_o=None):
 | 
			
		||||
    @login_required
 | 
			
		||||
    def scrub_page():
 | 
			
		||||
 | 
			
		||||
        import re
 | 
			
		||||
 | 
			
		||||
        if request.method == 'POST':
 | 
			
		||||
            confirmtext = request.form.get('confirmtext')
 | 
			
		||||
            limit_date = request.form.get('limit_date')
 | 
			
		||||
            limit_timestamp = 0
 | 
			
		||||
 | 
			
		||||
            # Re #149 - allow empty/0 timestamp limit
 | 
			
		||||
            if len(limit_date):
 | 
			
		||||
                try:
 | 
			
		||||
                    limit_date = limit_date.replace('T', ' ')
 | 
			
		||||
                    # I noticed chrome will show '/' but actually submit '-'
 | 
			
		||||
                    limit_date = limit_date.replace('-', '/')
 | 
			
		||||
                    # In the case that :ss seconds are supplied
 | 
			
		||||
                    limit_date = re.sub(r'(\d\d:\d\d)(:\d\d)', '\\1', limit_date)
 | 
			
		||||
 | 
			
		||||
                    str_to_dt = datetime.datetime.strptime(limit_date, '%Y/%m/%d %H:%M')
 | 
			
		||||
                    limit_timestamp = int(str_to_dt.timestamp())
 | 
			
		||||
 | 
			
		||||
                    if limit_timestamp > time.time():
 | 
			
		||||
                        flash("Timestamp is in the future, cannot continue.", 'error')
 | 
			
		||||
                        return redirect(url_for('scrub_page'))
 | 
			
		||||
 | 
			
		||||
                except ValueError:
 | 
			
		||||
                    flash('Incorrect date format, cannot continue.', 'error')
 | 
			
		||||
                    return redirect(url_for('scrub_page'))
 | 
			
		||||
 | 
			
		||||
            if confirmtext == 'scrub':
 | 
			
		||||
                changes_removed = 0
 | 
			
		||||
                for uuid in datastore.data['watching'].keys():
 | 
			
		||||
                    datastore.scrub_watch(uuid)
 | 
			
		||||
                for uuid, watch in datastore.data['watching'].items():
 | 
			
		||||
                    if limit_timestamp:
 | 
			
		||||
                        changes_removed += datastore.scrub_watch(uuid, limit_timestamp=limit_timestamp)
 | 
			
		||||
                    else:
 | 
			
		||||
                        changes_removed += datastore.scrub_watch(uuid)
 | 
			
		||||
 | 
			
		||||
                flash("Cleared all snapshot history")
 | 
			
		||||
                flash("Cleared snapshot history ({} snapshots removed)".format(changes_removed))
 | 
			
		||||
            else:
 | 
			
		||||
                flash('Incorrect confirmation text.', 'error')
 | 
			
		||||
 | 
			
		||||
            return redirect(url_for('index'))
 | 
			
		||||
 | 
			
		||||
        output = render_template("scrub.html")
 | 
			
		||||
        output =  render_template("scrub.html")
 | 
			
		||||
        return output
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -8,7 +8,7 @@ import sys
 | 
			
		||||
 | 
			
		||||
import eventlet
 | 
			
		||||
import eventlet.wsgi
 | 
			
		||||
from . import store, changedetection_app, content_fetcher
 | 
			
		||||
from . import store, changedetection_app
 | 
			
		||||
from . import __version__
 | 
			
		||||
 | 
			
		||||
def main():
 | 
			
		||||
 
 | 
			
		||||
@@ -1,10 +1,13 @@
 | 
			
		||||
from abc import ABC, abstractmethod
 | 
			
		||||
import chardet
 | 
			
		||||
import os
 | 
			
		||||
from selenium import webdriver
 | 
			
		||||
from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
			
		||||
from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
 | 
			
		||||
from selenium.common.exceptions import WebDriverException
 | 
			
		||||
import requests
 | 
			
		||||
import time
 | 
			
		||||
import urllib3.exceptions
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EmptyReply(Exception):
 | 
			
		||||
@@ -16,17 +19,13 @@ class EmptyReply(Exception):
 | 
			
		||||
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class Fetcher():
 | 
			
		||||
    error = None
 | 
			
		||||
    status_code = None
 | 
			
		||||
    content = None
 | 
			
		||||
    headers = None
 | 
			
		||||
    # Will be needed in the future by the VisualSelector, always get this where possible.
 | 
			
		||||
    screenshot = False
 | 
			
		||||
    fetcher_description = "No description"
 | 
			
		||||
    system_http_proxy = os.getenv('HTTP_PROXY')
 | 
			
		||||
    system_https_proxy = os.getenv('HTTPS_PROXY')
 | 
			
		||||
 | 
			
		||||
    fetcher_description ="No description"
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_error(self):
 | 
			
		||||
@@ -47,6 +46,10 @@ class Fetcher():
 | 
			
		||||
    def quit(self):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def screenshot(self):
 | 
			
		||||
        return
 | 
			
		||||
 | 
			
		||||
    @abstractmethod
 | 
			
		||||
    def get_last_status_code(self):
 | 
			
		||||
        return self.status_code
 | 
			
		||||
@@ -56,105 +59,29 @@ class Fetcher():
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
        return True
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
#   Maybe for the future, each fetcher provides its own diff output, could be used for text, image
 | 
			
		||||
#   the current one would return javascript output (as we use JS to generate the diff)
 | 
			
		||||
#
 | 
			
		||||
#   Returns tuple(mime_type, stream)
 | 
			
		||||
#    @abstractmethod
 | 
			
		||||
#    def return_diff(self, stream_a, stream_b):
 | 
			
		||||
#        return
 | 
			
		||||
 | 
			
		||||
def available_fetchers():
 | 
			
		||||
    # See the if statement at the bottom of this file for how we switch between playwright and webdriver
 | 
			
		||||
    import inspect
 | 
			
		||||
    p = []
 | 
			
		||||
    for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
 | 
			
		||||
        if inspect.isclass(obj):
 | 
			
		||||
            # @todo html_ is maybe better as fetcher_ or something
 | 
			
		||||
            # In this case, make sure to edit the default one in store.py and fetch_site_status.py
 | 
			
		||||
            if name.startswith('html_'):
 | 
			
		||||
                t = tuple([name, obj.fetcher_description])
 | 
			
		||||
                p.append(t)
 | 
			
		||||
        import inspect
 | 
			
		||||
        from changedetectionio import content_fetcher
 | 
			
		||||
        p=[]
 | 
			
		||||
        for name, obj in inspect.getmembers(content_fetcher):
 | 
			
		||||
            if inspect.isclass(obj):
 | 
			
		||||
                # @todo html_ is maybe better as fetcher_ or something
 | 
			
		||||
                # In this case, make sure to edit the default one in store.py and fetch_site_status.py
 | 
			
		||||
                if "html_" in name:
 | 
			
		||||
                    t=tuple([name,obj.fetcher_description])
 | 
			
		||||
                    p.append(t)
 | 
			
		||||
 | 
			
		||||
    return p
 | 
			
		||||
        return p
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class base_html_playwright(Fetcher):
 | 
			
		||||
    fetcher_description = "Playwright {}/Javascript".format(
 | 
			
		||||
        os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').capitalize()
 | 
			
		||||
    )
 | 
			
		||||
    if os.getenv("PLAYWRIGHT_DRIVER_URL"):
 | 
			
		||||
        fetcher_description += " via '{}'".format(os.getenv("PLAYWRIGHT_DRIVER_URL"))
 | 
			
		||||
 | 
			
		||||
    browser_type = ''
 | 
			
		||||
    command_executor = ''
 | 
			
		||||
 | 
			
		||||
    # Configs for Proxy setup
 | 
			
		||||
    # In the ENV vars, is prefixed with "playwright_proxy_", so it is for example "playwright_proxy_server"
 | 
			
		||||
    playwright_proxy_settings_mappings = ['bypass', 'server', 'username', 'password']
 | 
			
		||||
 | 
			
		||||
    proxy = None
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
 | 
			
		||||
        self.browser_type = os.getenv("PLAYWRIGHT_BROWSER_TYPE", 'chromium').strip('"')
 | 
			
		||||
        self.command_executor = os.getenv(
 | 
			
		||||
            "PLAYWRIGHT_DRIVER_URL",
 | 
			
		||||
            'ws://playwright-chrome:3000/playwright'
 | 
			
		||||
        ).strip('"')
 | 
			
		||||
 | 
			
		||||
        # If any proxy settings are enabled, then we should setup the proxy object
 | 
			
		||||
        proxy_args = {}
 | 
			
		||||
        for k in self.playwright_proxy_settings_mappings:
 | 
			
		||||
            v = os.getenv('playwright_proxy_' + k, False)
 | 
			
		||||
            if v:
 | 
			
		||||
                proxy_args[k] = v.strip('"')
 | 
			
		||||
 | 
			
		||||
        if proxy_args:
 | 
			
		||||
            self.proxy = proxy_args
 | 
			
		||||
 | 
			
		||||
    def run(self,
 | 
			
		||||
            url,
 | 
			
		||||
            timeout,
 | 
			
		||||
            request_headers,
 | 
			
		||||
            request_body,
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False):
 | 
			
		||||
 | 
			
		||||
        from playwright.sync_api import sync_playwright
 | 
			
		||||
 | 
			
		||||
        with sync_playwright() as p:
 | 
			
		||||
            browser_type = getattr(p, self.browser_type)
 | 
			
		||||
 | 
			
		||||
            # Seemed to cause a connection Exception even tho I can see it connect
 | 
			
		||||
            # self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
 | 
			
		||||
            browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000)
 | 
			
		||||
 | 
			
		||||
            # Set user agent to prevent Cloudflare from blocking the browser
 | 
			
		||||
            # Use the default one configured in the App.py model that's passed from fetch_site_status.py
 | 
			
		||||
            context = browser.new_context(
 | 
			
		||||
                user_agent=request_headers['User-Agent'] if request_headers.get('User-Agent') else 'Mozilla/5.0',
 | 
			
		||||
                proxy=self.proxy
 | 
			
		||||
            )
 | 
			
		||||
            page = context.new_page()
 | 
			
		||||
            page.set_viewport_size({"width": 1280, "height": 1024})
 | 
			
		||||
            response = page.goto(url, timeout=timeout * 1000)
 | 
			
		||||
 | 
			
		||||
            extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))
 | 
			
		||||
            page.wait_for_timeout(extra_wait * 1000)
 | 
			
		||||
 | 
			
		||||
            if response is None:
 | 
			
		||||
                raise EmptyReply(url=url, status_code=None)
 | 
			
		||||
 | 
			
		||||
            self.status_code = response.status
 | 
			
		||||
            self.content = page.content()
 | 
			
		||||
            self.headers = response.all_headers()
 | 
			
		||||
 | 
			
		||||
            # Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
 | 
			
		||||
            # JPEG is better here because the screenshots can be very very large
 | 
			
		||||
            page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
 | 
			
		||||
            self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=90)
 | 
			
		||||
            context.close()
 | 
			
		||||
            browser.close()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class base_html_webdriver(Fetcher):
 | 
			
		||||
class html_webdriver(Fetcher):
 | 
			
		||||
    if os.getenv("WEBDRIVER_URL"):
 | 
			
		||||
        fetcher_description = "WebDriver Chrome/Javascript via '{}'".format(os.getenv("WEBDRIVER_URL"))
 | 
			
		||||
    else:
 | 
			
		||||
@@ -167,11 +94,12 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
    selenium_proxy_settings_mappings = ['proxyType', 'ftpProxy', 'httpProxy', 'noProxy',
 | 
			
		||||
                                        'proxyAutoconfigUrl', 'sslProxy', 'autodetect',
 | 
			
		||||
                                        'socksProxy', 'socksVersion', 'socksUsername', 'socksPassword']
 | 
			
		||||
    proxy = None
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    proxy=None
 | 
			
		||||
 | 
			
		||||
    def __init__(self):
 | 
			
		||||
        from selenium.webdriver.common.proxy import Proxy as SeleniumProxy
 | 
			
		||||
 | 
			
		||||
        # .strip('"') is going to save someone a lot of time when they accidently wrap the env value
 | 
			
		||||
        self.command_executor = os.getenv("WEBDRIVER_URL", 'http://browser-chrome:4444/wd/hub').strip('"')
 | 
			
		||||
 | 
			
		||||
@@ -182,12 +110,6 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
            if v:
 | 
			
		||||
                proxy_args[k] = v.strip('"')
 | 
			
		||||
 | 
			
		||||
        # Map back standard HTTP_ and HTTPS_PROXY to webDriver httpProxy/sslProxy
 | 
			
		||||
        if not proxy_args.get('webdriver_httpProxy') and self.system_http_proxy:
 | 
			
		||||
            proxy_args['httpProxy'] = self.system_http_proxy
 | 
			
		||||
        if not proxy_args.get('webdriver_sslProxy') and self.system_https_proxy:
 | 
			
		||||
            proxy_args['httpsProxy'] = self.system_https_proxy
 | 
			
		||||
 | 
			
		||||
        if proxy_args:
 | 
			
		||||
            self.proxy = SeleniumProxy(raw=proxy_args)
 | 
			
		||||
 | 
			
		||||
@@ -199,9 +121,6 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False):
 | 
			
		||||
 | 
			
		||||
        from selenium import webdriver
 | 
			
		||||
        from selenium.webdriver.common.desired_capabilities import DesiredCapabilities
 | 
			
		||||
        from selenium.common.exceptions import WebDriverException
 | 
			
		||||
        # request_body, request_method unused for now, until some magic in the future happens.
 | 
			
		||||
 | 
			
		||||
        # check env for WEBDRIVER_URL
 | 
			
		||||
@@ -226,8 +145,9 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
        time.sleep(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
 | 
			
		||||
        self.content = self.driver.page_source
 | 
			
		||||
        self.headers = {}
 | 
			
		||||
        self.screenshot = self.driver.get_screenshot_as_png()
 | 
			
		||||
        self.quit()
 | 
			
		||||
 | 
			
		||||
    def screenshot(self):
 | 
			
		||||
        return self.driver.get_screenshot_as_png()
 | 
			
		||||
 | 
			
		||||
    # Does the connection to the webdriver work? run a test connection.
 | 
			
		||||
    def is_ready(self):
 | 
			
		||||
@@ -250,7 +170,6 @@ class base_html_webdriver(Fetcher):
 | 
			
		||||
            except Exception as e:
 | 
			
		||||
                print("Exception in chrome shutdown/quit" + str(e))
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# "html_requests" is listed as the default fetcher in store.py!
 | 
			
		||||
class html_requests(Fetcher):
 | 
			
		||||
    fetcher_description = "Basic fast Plaintext/HTTP Client"
 | 
			
		||||
@@ -263,20 +182,12 @@ class html_requests(Fetcher):
 | 
			
		||||
            request_method,
 | 
			
		||||
            ignore_status_codes=False):
 | 
			
		||||
 | 
			
		||||
        # Map back standard HTTP_ and HTTPS_PROXY to requests http/https proxy
 | 
			
		||||
        proxies={}
 | 
			
		||||
        if self.system_http_proxy:
 | 
			
		||||
            proxies['http'] = self.system_http_proxy
 | 
			
		||||
        if self.system_https_proxy:
 | 
			
		||||
            proxies['https'] = self.system_https_proxy
 | 
			
		||||
 | 
			
		||||
        r = requests.request(method=request_method,
 | 
			
		||||
                             data=request_body,
 | 
			
		||||
                             url=url,
 | 
			
		||||
                             headers=request_headers,
 | 
			
		||||
                             timeout=timeout,
 | 
			
		||||
                             proxies=proxies,
 | 
			
		||||
                             verify=False)
 | 
			
		||||
                         data=request_body,
 | 
			
		||||
                         url=url,
 | 
			
		||||
                         headers=request_headers,
 | 
			
		||||
                         timeout=timeout,
 | 
			
		||||
                         verify=False)
 | 
			
		||||
 | 
			
		||||
        # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks.
 | 
			
		||||
        # For example - some sites don't tell us it's utf-8, but return utf-8 content
 | 
			
		||||
@@ -296,11 +207,3 @@ class html_requests(Fetcher):
 | 
			
		||||
        self.content = r.text
 | 
			
		||||
        self.headers = r.headers
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
 | 
			
		||||
# rather than site-specific.
 | 
			
		||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
 | 
			
		||||
if use_playwright_as_chrome_fetcher:
 | 
			
		||||
    html_webdriver = base_html_playwright
 | 
			
		||||
else:
 | 
			
		||||
    html_webdriver = base_html_webdriver
 | 
			
		||||
 
 | 
			
		||||
@@ -68,7 +68,6 @@ class perform_site_check():
 | 
			
		||||
 | 
			
		||||
        fetcher = klass()
 | 
			
		||||
        fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code)
 | 
			
		||||
 | 
			
		||||
        # Fetching complete, now filters
 | 
			
		||||
        # @todo move to class / maybe inside of fetcher abstract base?
 | 
			
		||||
 | 
			
		||||
@@ -193,4 +192,9 @@ class perform_site_check():
 | 
			
		||||
                if not watch['title'] or not len(watch['title']):
 | 
			
		||||
                    update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
 | 
			
		||||
 | 
			
		||||
        return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot
 | 
			
		||||
        if self.datastore.data['settings']['application'].get('real_browser_save_screenshot', True):
 | 
			
		||||
            screenshot = fetcher.screenshot()
 | 
			
		||||
 | 
			
		||||
        fetcher.quit()
 | 
			
		||||
 | 
			
		||||
        return changed_detected, update_obj, text_content_before_ignored_filter, screenshot
 | 
			
		||||
							
								
								
									
										13
									
								
								changedetectionio/static/js/settings.js
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										13
									
								
								changedetectionio/static/js/settings.js
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,13 @@
 | 
			
		||||
window.addEventListener("load", (event) => {
 | 
			
		||||
  // just an example for now
 | 
			
		||||
  function toggleVisible(elem) {
 | 
			
		||||
    // theres better ways todo this
 | 
			
		||||
    var x = document.getElementById(elem);
 | 
			
		||||
    if (x.style.display === "block") {
 | 
			
		||||
      x.style.display = "none";
 | 
			
		||||
    } else {
 | 
			
		||||
      x.style.display = "block";
 | 
			
		||||
    }
 | 
			
		||||
  }
 | 
			
		||||
});
 | 
			
		||||
 | 
			
		||||
@@ -1,14 +0,0 @@
 | 
			
		||||
$(document).ready(function() {
 | 
			
		||||
    function toggle() {
 | 
			
		||||
        if ($('input[name="fetch_backend"]:checked').val() != 'html_requests') {
 | 
			
		||||
            $('#requests-override-options').hide();
 | 
			
		||||
        } else {
 | 
			
		||||
            $('#requests-override-options').show();
 | 
			
		||||
        }
 | 
			
		||||
    }
 | 
			
		||||
    $('input[name="fetch_backend"]').click(function (e) {
 | 
			
		||||
        toggle();
 | 
			
		||||
    });
 | 
			
		||||
    toggle();
 | 
			
		||||
 | 
			
		||||
});
 | 
			
		||||
@@ -260,14 +260,46 @@ class ChangeDetectionStore:
 | 
			
		||||
        return self.data['watching'][uuid].get(val)
 | 
			
		||||
 | 
			
		||||
    # Remove a watchs data but keep the entry (URL etc)
 | 
			
		||||
    def scrub_watch(self, uuid):
 | 
			
		||||
        import pathlib
 | 
			
		||||
    def scrub_watch(self, uuid, limit_timestamp = False):
 | 
			
		||||
 | 
			
		||||
        self.__data['watching'][uuid].update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'newest_history_key': 0, 'previous_md5': False})
 | 
			
		||||
        self.needs_write_urgent = True
 | 
			
		||||
        import hashlib
 | 
			
		||||
        del_timestamps = []
 | 
			
		||||
 | 
			
		||||
        for item in pathlib.Path(self.datastore_path).rglob(uuid+"/*.txt"):
 | 
			
		||||
            unlink(item)
 | 
			
		||||
        changes_removed = 0
 | 
			
		||||
 | 
			
		||||
        for timestamp, path in self.data['watching'][uuid]['history'].items():
 | 
			
		||||
            if not limit_timestamp or (limit_timestamp is not False and int(timestamp) > limit_timestamp):
 | 
			
		||||
                self.unlink_history_file(path)
 | 
			
		||||
                del_timestamps.append(timestamp)
 | 
			
		||||
                changes_removed += 1
 | 
			
		||||
 | 
			
		||||
        if not limit_timestamp:
 | 
			
		||||
            self.data['watching'][uuid]['last_checked'] = 0
 | 
			
		||||
            self.data['watching'][uuid]['last_changed'] = 0
 | 
			
		||||
            self.data['watching'][uuid]['previous_md5'] = ""
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
        for timestamp in del_timestamps:
 | 
			
		||||
            del self.data['watching'][uuid]['history'][str(timestamp)]
 | 
			
		||||
 | 
			
		||||
            # If there was a limitstamp, we need to reset some meta data about the entry
 | 
			
		||||
            # This has to happen after we remove the others from the list
 | 
			
		||||
            if limit_timestamp:
 | 
			
		||||
                newest_key = self.get_newest_history_key(uuid)
 | 
			
		||||
                if newest_key:
 | 
			
		||||
                    self.data['watching'][uuid]['last_checked'] = int(newest_key)
 | 
			
		||||
                    # @todo should be the original value if it was less than newest key
 | 
			
		||||
                    self.data['watching'][uuid]['last_changed'] = int(newest_key)
 | 
			
		||||
                    try:
 | 
			
		||||
                        with open(self.data['watching'][uuid]['history'][str(newest_key)], "rb") as fp:
 | 
			
		||||
                            content = fp.read()
 | 
			
		||||
                        self.data['watching'][uuid]['previous_md5'] = hashlib.md5(content).hexdigest()
 | 
			
		||||
                    except (FileNotFoundError, IOError):
 | 
			
		||||
                        self.data['watching'][uuid]['previous_md5'] = ""
 | 
			
		||||
                        pass
 | 
			
		||||
 | 
			
		||||
        self.needs_write = True
 | 
			
		||||
        return changes_removed
 | 
			
		||||
 | 
			
		||||
    def add_watch(self, url, tag="", extras=None, write_to_disk_now=True):
 | 
			
		||||
        if extras is None:
 | 
			
		||||
@@ -421,11 +453,10 @@ class ChangeDetectionStore:
 | 
			
		||||
        import pathlib
 | 
			
		||||
 | 
			
		||||
        # Only in the sub-directories
 | 
			
		||||
        for uuid in self.data['watching']:
 | 
			
		||||
            for item in pathlib.Path(self.datastore_path).rglob(uuid+"/*.txt"):
 | 
			
		||||
                if not str(item) in index:
 | 
			
		||||
                    print ("Removing",item)
 | 
			
		||||
                    unlink(item)
 | 
			
		||||
        for item in pathlib.Path(self.datastore_path).rglob("*/*txt"):
 | 
			
		||||
            if not str(item) in index:
 | 
			
		||||
                print ("Removing",item)
 | 
			
		||||
                unlink(item)
 | 
			
		||||
 | 
			
		||||
    # Run all updates
 | 
			
		||||
    # IMPORTANT - Each update could be run even when they have a new install and the schema is correct
 | 
			
		||||
 
 | 
			
		||||
@@ -9,7 +9,6 @@
 | 
			
		||||
    const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
 | 
			
		||||
{% endif %}
 | 
			
		||||
</script>
 | 
			
		||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='watch-settings.js')}}" defer></script>
 | 
			
		||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
 | 
			
		||||
 | 
			
		||||
<div class="edit-form monospaced-textarea">
 | 
			
		||||
@@ -66,10 +65,12 @@
 | 
			
		||||
                        </span>
 | 
			
		||||
                    </div>
 | 
			
		||||
 | 
			
		||||
                <fieldset class="pure-group" id="requests-override-options">
 | 
			
		||||
                    <div class="pure-form-message-inline">
 | 
			
		||||
                <hr/>
 | 
			
		||||
                <fieldset class="pure-group">
 | 
			
		||||
 | 
			
		||||
                    <span class="pure-form-message-inline">
 | 
			
		||||
                        <strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
 | 
			
		||||
                    </div>
 | 
			
		||||
                    </span>
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_field(form.method) }}
 | 
			
		||||
                    </div>
 | 
			
		||||
 
 | 
			
		||||
@@ -7,7 +7,7 @@
 | 
			
		||||
        <input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
 | 
			
		||||
        <fieldset>
 | 
			
		||||
            <div class="pure-control-group">
 | 
			
		||||
                This will remove ALL version snapshots/data, but keep your list of URLs. <br/>
 | 
			
		||||
                This will remove all version snapshots/data, but keep your list of URLs. <br/>
 | 
			
		||||
                You may like to use the <strong>BACKUP</strong> link first.<br/>
 | 
			
		||||
            </div>
 | 
			
		||||
            <br/>
 | 
			
		||||
@@ -17,6 +17,12 @@
 | 
			
		||||
                <span class="pure-form-message-inline">Type in the word <strong>scrub</strong> to confirm that you understand!</span>
 | 
			
		||||
            </div>
 | 
			
		||||
            <br/>
 | 
			
		||||
            <div class="pure-control-group">
 | 
			
		||||
                <label for="confirmtext">Optional: Limit deletion of snapshots to snapshots <i>newer</i> than date/time</label>
 | 
			
		||||
                <input type="datetime-local" id="limit_date" name="limit_date"  />
 | 
			
		||||
                <span class="pure-form-message-inline">dd/mm/yyyy hh:mm (24 hour format)</span>
 | 
			
		||||
            </div>
 | 
			
		||||
            <br/>
 | 
			
		||||
            <div class="pure-control-group">
 | 
			
		||||
                <button type="submit" class="pure-button pure-button-primary">Scrub!</button>
 | 
			
		||||
            </div>
 | 
			
		||||
 
 | 
			
		||||
@@ -9,6 +9,7 @@
 | 
			
		||||
    const email_notification_prefix=JSON.parse('{{emailprefix|tojson}}');
 | 
			
		||||
{% endif %}
 | 
			
		||||
</script>
 | 
			
		||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='settings.js')}}" defer></script>
 | 
			
		||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script>
 | 
			
		||||
<script type="text/javascript" src="{{url_for('static_content', group='js', filename='notifications.js')}}" defer></script>
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -17,19 +17,12 @@ services:
 | 
			
		||||
  #       Alternative WebDriver/selenium URL, do not use "'s or 's!
 | 
			
		||||
  #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub
 | 
			
		||||
  #
 | 
			
		||||
  #       WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy,
 | 
			
		||||
  #                                webdriver_proxyAutoconfigUrl, webdriver_autodetect,
 | 
			
		||||
  #       WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_httpProxy, webdriver_noProxy,
 | 
			
		||||
  #                                webdriver_proxyAutoconfigUrl, webdriver_sslProxy, webdriver_autodetect,
 | 
			
		||||
  #                                webdriver_socksProxy, webdriver_socksUsername, webdriver_socksVersion, webdriver_socksPassword
 | 
			
		||||
  #
 | 
			
		||||
  #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
 | 
			
		||||
  #
 | 
			
		||||
  #       Alternative Playwright URL, do not use "'s or 's!
 | 
			
		||||
  #      - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/playwright
 | 
			
		||||
  #
 | 
			
		||||
  #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
 | 
			
		||||
  #
 | 
			
		||||
  #             https://playwright.dev/python/docs/api/class-browsertype#browser-type-launch-option-proxy
 | 
			
		||||
  #
 | 
			
		||||
  #        Plain requsts - proxy support example.
 | 
			
		||||
  #      - HTTP_PROXY=socks5h://10.10.1.10:1080
 | 
			
		||||
  #      - HTTPS_PROXY=socks5h://10.10.1.10:1080
 | 
			
		||||
@@ -65,13 +58,6 @@ services:
 | 
			
		||||
#            # Workaround to avoid the browser crashing inside a docker container
 | 
			
		||||
#            # See https://github.com/SeleniumHQ/docker-selenium#quick-start
 | 
			
		||||
#            - /dev/shm:/dev/shm
 | 
			
		||||
#        restart: unless-stopped
 | 
			
		||||
 | 
			
		||||
     # Used for fetching pages via Playwright+Chrome where you need Javascript support.
 | 
			
		||||
 | 
			
		||||
#    playwright-chrome:
 | 
			
		||||
#        hostname: playwright-chrome
 | 
			
		||||
#        image: browserless/chrome
 | 
			
		||||
#        restart: unless-stopped
 | 
			
		||||
 | 
			
		||||
volumes:
 | 
			
		||||
 
 | 
			
		||||
@@ -40,4 +40,3 @@ selenium ~= 4.1.0
 | 
			
		||||
# need to revisit flask login versions
 | 
			
		||||
werkzeug ~= 2.0.0
 | 
			
		||||
 | 
			
		||||
# playwright is installed at Dockerfile build time because it's not available on all platforms
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user