Compare commits

..

2 Commits

Author SHA1 Message Date
dgtlmoon
df2c58d1ff Not needed 2022-06-11 22:55:00 +02:00
dgtlmoon
d29e0eea47 Also log normal notification activity, make logs easier to find 2022-06-11 22:52:52 +02:00
35 changed files with 3886 additions and 816 deletions

View File

@@ -85,8 +85,8 @@ jobs:
version: latest
driver-opts: image=moby/buildkit:master
# master branch -> :dev container tag
- name: Build and push :dev
# master always builds :latest
- name: Build and push :latest
id: docker_build
if: ${{ github.ref }} == "refs/heads/master"
uses: docker/build-push-action@v2
@@ -95,12 +95,12 @@ jobs:
file: ./Dockerfile
push: true
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest,ghcr.io/${{ github.repository }}:latest
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
# A new tagged release is required, which builds :tag and :latest
# A new tagged release is required, which builds :tag
- name: Build and push :tag
id: docker_build_tag_release
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
@@ -110,10 +110,7 @@ jobs:
file: ./Dockerfile
push: true
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }}
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
ghcr.io/dgtlmoon/changedetection.io:latest
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }},ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
@@ -128,3 +125,5 @@ jobs:
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-

View File

@@ -3,9 +3,9 @@
![changedetection.io](https://github.com/dgtlmoon/changedetection.io/actions/workflows/test-only.yml/badge.svg?branch=master)
## Web Site Change Detection, Monitoring and Notification - Self-Hosted or SaaS.
## Self-Hosted, Open Source, Change Monitoring of Web Pages
_Know when web pages change! Stay ontop of new information! get notifications when important website content changes_
_Know when web pages change! Stay ontop of new information!_
Live your data-life *pro-actively* instead of *re-actively*.
@@ -33,7 +33,6 @@ Free, Open-source web page monitoring, notification and change detection. Don't
- New software releases, security advisories when you're not on their mailing list.
- Festivals with changes
- Realestate listing changes
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
- COVID related news from government websites
- University/organisation news from their website
- Detect and monitor changes in JSON API responses
@@ -57,9 +56,9 @@ Easily see what changed, examine by word, line, or individual character.
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
### Filter by elements using the Visual Selector tool.
### Target elements with the Visual Selector tool.
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (available also as part of our subscription service)
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />
@@ -68,18 +67,14 @@ Available when connected to a <a href="https://github.com/dgtlmoon/changedetecti
### Docker
With Docker composer, just clone this repository and..
```bash
$ docker-compose up -d
```
Docker standalone
```bash
$ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
```
`:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch.
### Windows
See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
@@ -119,7 +114,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
## Filters
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
(We support LXML `re:test`, `re:math` and `re:replace`.)
(We support LXML re:test, re:math and re:replace.)
## Notifications

View File

@@ -44,7 +44,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.39.16'
__version__ = '0.39.14'
datastore = None
@@ -108,7 +108,7 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
# Worker thread tells us which UUID it is currently processing.
for t in running_update_threads:
if t.current_uuid == watch_obj['uuid']:
return '<span class="loader"></span><span> Checking now</span>'
return "Checking now.."
if watch_obj['last_checked'] == 0:
return 'Not yet'
@@ -298,7 +298,7 @@ def changedetection_app(config=None, datastore_o=None):
# Sort by last_changed and add the uuid which is usually the key..
sorted_watches = []
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
# @todo needs a .itemsWithTag() or something
for uuid, watch in datastore.data['watching'].items():
if limit_tag != None:
@@ -361,7 +361,7 @@ def changedetection_app(config=None, datastore_o=None):
fe.pubDate(dt)
response = make_response(fg.rss_str())
response.headers.set('Content-Type', 'application/rss+xml;charset=utf-8')
response.headers.set('Content-Type', 'application/rss+xml')
return response
@app.route("/", methods=['GET'])
@@ -403,6 +403,8 @@ def changedetection_app(config=None, datastore_o=None):
watch['uuid'] = uuid
sorted_watches.append(watch)
sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)
existing_tags = datastore.get_all_tags()
form = forms.quickWatchForm(request.form)
@@ -431,9 +433,7 @@ def changedetection_app(config=None, datastore_o=None):
def ajax_callback_send_notification_test():
import apprise
from .apprise_asset import asset
apobj = apprise.Apprise(asset=asset)
apobj = apprise.Apprise()
# validate URLS
if not len(request.form['notification_urls'].strip()):
@@ -459,38 +459,37 @@ def changedetection_app(config=None, datastore_o=None):
return 'OK'
@app.route("/clear_history/<string:uuid>", methods=['GET'])
@app.route("/scrub/<string:uuid>", methods=['GET'])
@login_required
def clear_watch_history(uuid):
def scrub_watch(uuid):
try:
datastore.clear_watch_history(uuid)
datastore.scrub_watch(uuid)
except KeyError:
flash('Watch not found', 'error')
else:
flash("Cleared snapshot history for watch {}".format(uuid))
flash("Scrubbed watch {}".format(uuid))
return redirect(url_for('index'))
@app.route("/clear_history", methods=['GET', 'POST'])
@app.route("/scrub", methods=['GET', 'POST'])
@login_required
def clear_all_history():
def scrub_page():
if request.method == 'POST':
confirmtext = request.form.get('confirmtext')
if confirmtext == 'clear':
if confirmtext == 'scrub':
changes_removed = 0
for uuid in datastore.data['watching'].keys():
datastore.clear_watch_history(uuid)
#TODO: KeyError not checked, as it is above
datastore.scrub_watch(uuid)
flash("Cleared snapshot history for all watches")
flash("Cleared all snapshot history")
else:
flash('Incorrect confirmation text.', 'error')
return redirect(url_for('index'))
output = render_template("clear_all_history.html")
output = render_template("scrub.html")
return output
@@ -657,8 +656,7 @@ def changedetection_app(config=None, datastore_o=None):
current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
visualselector_data_is_ready=visualselector_data_is_ready,
visualselector_enabled=visualselector_enabled,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
visualselector_enabled=visualselector_enabled
)
return output
@@ -834,7 +832,7 @@ def changedetection_app(config=None, datastore_o=None):
newest=newest_version_file_contents,
previous=previous_version_file_contents,
extra_stylesheets=extra_stylesheets,
versions=dates[:-1], # All except current/last
versions=dates[1:],
uuid=uuid,
newest_version_timestamp=dates[-1],
current_previous_version=str(previous_version),
@@ -857,12 +855,6 @@ def changedetection_app(config=None, datastore_o=None):
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
# Normally you would never reach this, because the 'preview' button is not available when there's no history
# However they may try to clear snapshots and reload the page
if datastore.data['watching'][uuid].history_n == 0:
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
return redirect(url_for('index'))
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
try:
@@ -1265,12 +1257,11 @@ def notification_runner():
else:
now = datetime.now()
sent_obj = None
try:
from changedetectionio import notification
sent_obj = notification.process_notification(n_object, datastore)
notification.process_notification(n_object, datastore)
except Exception as e:
logging.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
@@ -1284,17 +1275,14 @@ def notification_runner():
notification_debug_log += log_lines
# Process notifications
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
notification_debug_log+= ["{} - SENDING {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(n_object))]
# Trim the log length
notification_debug_log = notification_debug_log[-100:]
# Thread runner to check every minute, look for new watches to feed into the Queue.
def ticker_thread_check_time_launch_checks():
import random
from changedetectionio import update_worker
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
import logging
# Spin up Workers that do the fetching
# Can be overriden by ENV or use the default settings
@@ -1327,12 +1315,14 @@ def ticker_thread_check_time_launch_checks():
while update_q.qsize() >= 2000:
time.sleep(1)
recheck_time_system_seconds = int(datastore.threshold_seconds)
# Check for watches outside of the time threshold to put in the thread queue.
now = time.time()
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
recheck_time_system_seconds = datastore.threshold_seconds
for uuid in watch_uuid_list:
now = time.time()
watch = datastore.data['watching'].get(uuid)
if not watch:
logging.error("Watch: {} no longer present.".format(uuid))
@@ -1343,33 +1333,20 @@ def ticker_thread_check_time_launch_checks():
continue
# If they supplied an individual entry minutes to threshold.
threshold = now
watch_threshold_seconds = watch.threshold_seconds()
threshold = watch_threshold_seconds if watch_threshold_seconds > 0 else recheck_time_system_seconds
if watch_threshold_seconds:
threshold -= watch_threshold_seconds
else:
threshold -= recheck_time_system_seconds
# #580 - Jitter plus/minus amount of time to make the check seem more random to the server
jitter = datastore.data['settings']['requests'].get('jitter_seconds', 0)
if jitter > 0:
if watch.jitter_seconds == 0:
watch.jitter_seconds = random.uniform(-abs(jitter), jitter)
seconds_since_last_recheck = now - watch['last_checked']
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
# Yeah, put it in the queue, it's more than time
if watch['last_checked'] <= max(threshold, recheck_time_minimum_seconds):
if not uuid in running_uuids and uuid not in update_q.queue:
print("Queued watch UUID {} last checked at {} queued at {:0.2f} jitter {:0.2f}s, {:0.2f}s since last checked".format(uuid,
watch['last_checked'],
now,
watch.jitter_seconds,
now - watch['last_checked']))
# Into the queue with you
update_q.put(uuid)
# Reset for next time
watch.jitter_seconds = 0
# Wait before checking the list again - saves CPU
time.sleep(1)
# Wait a few seconds before checking the list again
time.sleep(3)
# Should be low so we can break this out in testing
app.config.exit.wait(1)
app.config.exit.wait(1)

View File

@@ -1,11 +0,0 @@
import apprise
# Create our AppriseAsset and populate it with some of our new values:
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
asset = apprise.AppriseAsset(
image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
)
asset.app_id = "changedetection.io"
asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection"
asset.app_url = "https://changedetection.io"

View File

@@ -35,7 +35,7 @@ def main():
create_datastore_dir = False
for opt, arg in opts:
# if opt == '--clear-all-history':
# if opt == '--purge':
# Remove history, the actual files you need to delete manually.
# for uuid, watch in datastore.data['watching'].items():
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})

View File

@@ -46,7 +46,6 @@ class Fetcher():
headers = None
fetcher_description = "No description"
webdriver_js_execute_code = None
xpath_element_js = """
// Include the getXpath script directly, easier than fetching
!function(e,n){"object"==typeof exports&&"undefined"!=typeof module?module.exports=n():"function"==typeof define&&define.amd?define(n):(e=e||self).getXPath=n()}(this,function(){return function(e){var n=e;if(n&&n.id)return'//*[@id="'+n.id+'"]';for(var o=[];n&&Node.ELEMENT_NODE===n.nodeType;){for(var i=0,r=!1,d=n.previousSibling;d;)d.nodeType!==Node.DOCUMENT_TYPE_NODE&&d.nodeName===n.nodeName&&i++,d=d.previousSibling;for(d=n.nextSibling;d;){if(d.nodeName===n.nodeName){r=!0;break}d=d.nextSibling}o.push((n.prefix?n.prefix+":":"")+n.localName+(i||r?"["+(i+1)+"]":"")),n=n.parentNode}return o.length?"/"+o.reverse().join("/"):""}});
@@ -176,6 +175,7 @@ class Fetcher():
# Will be needed in the future by the VisualSelector, always get this where possible.
screenshot = False
fetcher_description = "No description"
system_http_proxy = os.getenv('HTTP_PROXY')
system_https_proxy = os.getenv('HTTPS_PROXY')
@@ -281,14 +281,13 @@ class base_html_playwright(Fetcher):
from playwright.sync_api import sync_playwright
import playwright._impl._api_types
from playwright._impl._api_types import Error, TimeoutError
response = None
with sync_playwright() as p:
browser_type = getattr(p, self.browser_type)
# Seemed to cause a connection Exception even tho I can see it connect
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
# 60,000 connection timeout only
browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000)
# Set user agent to prevent Cloudflare from blocking the browser
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
@@ -301,35 +300,21 @@ class base_html_playwright(Fetcher):
accept_downloads=False
)
if len(request_headers):
context.set_extra_http_headers(request_headers)
page = context.new_page()
try:
page.set_default_navigation_timeout(90000)
page.set_default_timeout(90000)
# Listen for all console events and handle errors
page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
# Bug - never set viewport size BEFORE page.goto
# Waits for the next navigation. Using Python context manager
# prevents a race condition between clicking and waiting for a navigation.
with page.expect_navigation():
response = page.goto(url, wait_until='load')
if self.webdriver_js_execute_code is not None:
page.evaluate(self.webdriver_js_execute_code)
# Bug - never set viewport size BEFORE page.goto
response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
# Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors'
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
page.wait_for_timeout(extra_wait * 1000)
except playwright._impl._api_types.TimeoutError as e:
context.close()
browser.close()
# This can be ok, we will try to grab what we could retrieve
pass
raise EmptyReply(url=url, status_code=None)
except Exception as e:
print ("other exception when page.goto")
print (str(e))
context.close()
browser.close()
raise PageUnloadable(url=url, status_code=None)
@@ -337,22 +322,18 @@ class base_html_playwright(Fetcher):
if response is None:
context.close()
browser.close()
print ("response object was none")
raise EmptyReply(url=url, status_code=None)
if len(page.content().strip()) == 0:
context.close()
browser.close()
raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
page.set_viewport_size({"width": 1280, "height": 1024})
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
time.sleep(extra_wait)
self.content = page.content()
self.status_code = response.status
page.set_viewport_size({"width": 1280, "height": 1024})
if len(self.content.strip()) == 0:
context.close()
browser.close()
print ("Content was empty")
raise EmptyReply(url=url, status_code=None)
self.status_code = response.status
self.content = page.content()
self.headers = response.all_headers()
if current_css_filter is not None:
@@ -365,15 +346,9 @@ class base_html_playwright(Fetcher):
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
# JPEG is better here because the screenshots can be very very large
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
try:
# Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
# The actual screenshot
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=92)
except Exception as e:
context.close()
browser.close()
@@ -453,12 +428,6 @@ class base_html_webdriver(Fetcher):
self.driver.set_window_size(1280, 1024)
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
if self.webdriver_js_execute_code is not None:
self.driver.execute_script(self.webdriver_js_execute_code)
# Selenium doesn't automatically wait for actions as good as Playwright, so wait again
self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)))
self.screenshot = self.driver.get_screenshot_as_png()
# @todo - how to check this? is it possible?

View File

@@ -1,5 +1,4 @@
import hashlib
import logging
import os
import re
import time
@@ -106,9 +105,6 @@ class perform_site_check():
elif system_webdriver_delay is not None:
fetcher.render_extract_delay = system_webdriver_delay
if watch['webdriver_js_execute_code'] is not None and watch['webdriver_js_execute_code'].strip():
fetcher.webdriver_js_execute_code = watch['webdriver_js_execute_code']
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_code, watch['css_filter'])
fetcher.quit()
@@ -150,9 +146,7 @@ class perform_site_check():
is_html = False
if is_html or is_source:
# CSS Filter, extract the HTML that matches and feed that into the existing inscriptis::get_text
fetcher.content = html_tools.workarounds_for_obfuscations(fetcher.content)
html_content = fetcher.content
# If not JSON, and if it's not text/plain..
@@ -218,7 +212,7 @@ class perform_site_check():
result = re.findall(s_re.encode('utf8'), stripped_text_from_html,
flags=re.MULTILINE | re.DOTALL | re.LOCALE)
if result:
regex_matched_output = regex_matched_output + result
regex_matched_output.append(result[0])
if regex_matched_output:
stripped_text_from_html = b'\n'.join(regex_matched_output)
@@ -230,37 +224,29 @@ class perform_site_check():
else:
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
############ Blocking rules, after checksum #################
blocked = False
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
update_obj["previous_md5"] = fetched_md5
blocked_by_not_found_trigger_text = False
if len(watch['trigger_text']):
# Assume blocked
blocked = True
# Yeah, lets block first until something matches
blocked_by_not_found_trigger_text = True
# Filter and trigger works the same, so reuse it
# It should return the line numbers that match
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=watch['trigger_text'],
mode="line numbers")
# Unblock if the trigger was found
# If it returned any lines that matched..
if result:
blocked = False
blocked_by_not_found_trigger_text = False
if len(watch['text_should_not_be_present']):
# If anything matched, then we should block a change from happening
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=watch['text_should_not_be_present'],
mode="line numbers")
if result:
blocked = True
# The main thing that all this at the moment comes down to :)
if watch['previous_md5'] != fetched_md5:
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
changed_detected = True
# Looks like something changed, but did it match all the rules?
if blocked:
changed_detected = False
update_obj["previous_md5"] = fetched_md5
update_obj["last_changed"] = timestamp
# Extract title as title
if is_html:
@@ -268,21 +254,4 @@ class perform_site_check():
if not watch['title'] or not len(watch['title']):
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
if changed_detected:
if watch.get('check_unique_lines', False):
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
# One or more lines? unsure?
if not has_unique_lines:
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
changed_detected = False
else:
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data

View File

@@ -340,12 +340,7 @@ class watchForm(commonSettingsForm):
body = TextAreaField('Request body', [validators.Optional()])
method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
webdriver_js_execute_code = TextAreaField('Execute JavaScript before change detection', render_kw={"rows": "5"}, validators=[validators.Optional()])
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
proxy = RadioField('Proxy')
@@ -368,9 +363,7 @@ class watchForm(commonSettingsForm):
class globalSettingsRequestForm(Form):
time_between_check = FormField(TimeBetweenCheckForm)
proxy = RadioField('Proxy')
jitter_seconds = IntegerField('Random jitter seconds ± check',
render_kw={"style": "width: 5em;"},
validators=[validators.NumberRange(min=0, message="Should contain zero or more seconds")])
# datastore.data['settings']['application']..
class globalSettingsApplicationForm(commonSettingsForm):

View File

@@ -202,17 +202,3 @@ def html_to_text(html_content: str, render_anchor_tag_content=False) -> str:
return text_content
def workarounds_for_obfuscations(content):
"""
Some sites are using sneaky tactics to make prices and other information un-renderable by Inscriptis
This could go into its own Pip package in the future, for faster updates
"""
# HomeDepot.com style <span>$<!-- -->90<!-- -->.<!-- -->74</span>
# https://github.com/weblyzard/inscriptis/issues/45
if not content:
return content
content = re.sub('<!--\s+-->', '', content)
return content

View File

@@ -1,4 +1,8 @@
from os import getenv
import collections
import os
import uuid as uuid_builder
from changedetectionio.notification import (
default_notification_body,
default_notification_format,
@@ -11,16 +15,15 @@ class model(dict):
'watching': {},
'settings': {
'headers': {
'User-Agent': getenv("DEFAULT_SETTINGS_HEADERS_USERAGENT", 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36'),
'User-Agent': 'Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.66 Safari/537.36',
'Accept': 'text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.9',
'Accept-Encoding': 'gzip, deflate', # No support for brolti in python requests yet.
'Accept-Language': 'en-GB,en-US;q=0.9,en;'
},
'requests': {
'timeout': int(getenv("DEFAULT_SETTINGS_REQUESTS_TIMEOUT", "45")), # Default 45 seconds
'timeout': 15, # Default 15 seconds
'time_between_check': {'weeks': None, 'days': None, 'hours': 3, 'minutes': None, 'seconds': None},
'jitter_seconds': 0,
'workers': int(getenv("DEFAULT_SETTINGS_REQUESTS_WORKERS", "10")), # Number of threads, lower is better for slow connections
'workers': 10, # Number of threads, lower is better for slow connections
'proxy': None # Preferred proxy connection
},
'application': {
@@ -29,7 +32,7 @@ class model(dict):
'base_url' : None,
'extract_title_as_title': False,
'empty_pages_are_a_change': False,
'fetch_backend': getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
'global_subtractive_selectors': [],
'ignore_whitespace': True,

View File

@@ -2,7 +2,6 @@ import os
import uuid as uuid_builder
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
from changedetectionio.notification import (
default_notification_body,
@@ -14,6 +13,7 @@ from changedetectionio.notification import (
class model(dict):
__newest_history_key = None
__history_n=0
__base_config = {
'url': None,
'tag': None,
@@ -39,19 +39,15 @@ class model(dict):
'extract_text': [], # Extract text by regex after filters
'subtractive_selectors': [],
'trigger_text': [], # List of text or regex to wait for until a change is detected
'text_should_not_be_present': [], # Text that should not present
'fetch_backend': None,
'extract_title_as_title': False,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'proxy': None, # Preferred proxy connection
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
# Should be all None by default, so we use the system default in this case.
'time_between_check': {'weeks': None, 'days': None, 'hours': None, 'minutes': None, 'seconds': None},
'webdriver_delay': None,
'webdriver_js_execute_code': None, # Run before change-detection
'webdriver_delay': None
}
jitter_seconds = 0
def __init__(self, *arg, **kw):
import uuid
@@ -89,7 +85,7 @@ class model(dict):
# Read the history file as a dict
fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
if os.path.isfile(fname):
logging.debug("Reading history index " + str(time.time()))
logging.debug("Disk IO accessed " + str(time.time()))
with open(fname, "r") as f:
tmp_history = dict(i.strip().split(',', 2) for i in f.readlines())
@@ -161,21 +157,9 @@ class model(dict):
def threshold_seconds(self):
seconds = 0
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
for m, n in mtable.items():
x = self.get('time_between_check', {}).get(m, None)
if x:
seconds += x * n
return seconds
# Iterate over all history texts and see if something new exists
def lines_contain_something_unique_compared_to_history(self, lines=[]):
local_lines = [l.decode('utf-8').strip().lower() for l in lines]
# Compare each lines (set) against each history text file (set) looking for something new..
for k, v in self.history.items():
alist = [line.decode('utf-8').strip().lower() for line in open(v, 'rb')]
res = set(alist) != set(local_lines)
if res:
return True
return False

View File

@@ -48,10 +48,9 @@ def process_notification(n_object, datastore):
# Anything higher than or equal to WARNING (which covers things like Connection errors)
# raise it as an exception
apobjs=[]
sent_objs=[]
from .apprise_asset import asset
for url in n_object['notification_urls']:
apobj = apprise.Apprise(debug=True, asset=asset)
apobj = apprise.Apprise(debug=True)
url = url.strip()
if len(url):
print(">> Process Notification: AppRise notifying {}".format(url))
@@ -64,35 +63,23 @@ def process_notification(n_object, datastore):
# So if no avatar_url is specified, add one so it can be correctly calculated into the total payload
k = '?' if not '?' in url else '&'
if not 'avatar_url' in url and not url.startswith('mail'):
if not 'avatar_url' in url:
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
if url.startswith('tgram://'):
# Telegram only supports a limit subset of HTML, remove the '<br/>' we place in.
# re https://github.com/dgtlmoon/changedetection.io/issues/555
# @todo re-use an existing library we have already imported to strip all non-allowed tags
n_body = n_body.replace('<br/>', '\n')
n_body = n_body.replace('</br>', '\n')
# real limit is 4096, but minus some for extra metadata
payload_max_size = 3600
body_limit = max(0, payload_max_size - len(n_title))
n_title = n_title[0:payload_max_size]
n_body = n_body[0:body_limit]
elif url.startswith('discord://') or url.startswith('https://discordapp.com/api/webhooks'):
elif url.startswith('discord://'):
# real limit is 2000, but minus some for extra metadata
payload_max_size = 1700
body_limit = max(0, payload_max_size - len(n_title))
n_title = n_title[0:payload_max_size]
n_body = n_body[0:body_limit]
elif url.startswith('mailto'):
# Apprise will default to HTML, so we need to override it
# So that whats' generated in n_body is in line with what is going to be sent.
# https://github.com/caronc/apprise/issues/633#issuecomment-1191449321
if not 'format=' in url and (n_format == 'text' or n_format == 'markdown'):
url = "{}?format={}".format(url, n_format)
apobj.add(url)
apobj.notify(
@@ -109,14 +96,7 @@ def process_notification(n_object, datastore):
log_value = logs.getvalue()
if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
raise Exception(log_value)
sent_objs.append({'title': n_title,
'body': n_body,
'url' : url,
'body_format': n_format})
# Return what was sent for better logging - after the for loop
return sent_objs
# Notification title + body content parameters get created here.

View File

@@ -1,20 +0,0 @@
<?xml version="1.0" encoding="UTF-8" standalone="no"?>
<svg
width="18"
height="19.92"
viewBox="0 0 18 19.92"
version="1.1"
id="svg6"
xmlns="http://www.w3.org/2000/svg"
xmlns:svg="http://www.w3.org/2000/svg">
<defs
id="defs10" />
<path
d="M -3,-2 H 21 V 22 H -3 Z"
fill="none"
id="path2" />
<path
d="m 15,14.08 c -0.76,0 -1.44,0.3 -1.96,0.77 L 5.91,10.7 C 5.96,10.47 6,10.24 6,10 6,9.76 5.96,9.53 5.91,9.3 L 12.96,5.19 C 13.5,5.69 14.21,6 15,6 16.66,6 18,4.66 18,3 18,1.34 16.66,0 15,0 c -1.66,0 -3,1.34 -3,3 0,0.24 0.04,0.47 0.09,0.7 L 5.04,7.81 C 4.5,7.31 3.79,7 3,7 1.34,7 0,8.34 0,10 c 0,1.66 1.34,3 3,3 0.79,0 1.5,-0.31 2.04,-0.81 l 7.12,4.16 c -0.05,0.21 -0.08,0.43 -0.08,0.65 0,1.61 1.31,2.92 2.92,2.92 1.61,0 2.92,-1.31 2.92,-2.92 0,-1.61 -1.31,-2.92 -2.92,-2.92 z"
id="path4"
style="fill:#ffffff;fill-opacity:1" />
</svg>

Before

Width:  |  Height:  |  Size: 892 B

View File

@@ -40,19 +40,13 @@ $(document).ready(function() {
$.ajax({
type: "POST",
url: notification_base_url,
data : data,
statusCode: {
400: function() {
// More than likely the CSRF token was lost when the server restarted
alert("There was a problem processing the request, please reload the page.");
}
}
data : data
}).done(function(data){
console.log(data);
alert('Sent');
}).fail(function(data){
console.log(data);
alert('There was an error communicating with the server.');
alert('Error: '+data.responseJSON.error);
})
});
});

View File

@@ -1,30 +1,13 @@
$(document).ready(function() {
function toggle() {
if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') {
if(playwright_enabled) {
// playwright supports headers, so hide everything else
// See #664
$('#requests-override-options #request-method').hide();
$('#requests-override-options #request-body').hide();
// @todo connect this one up
$('#ignore-status-codes-option').hide();
} else {
// selenium/webdriver doesnt support anything afaik, hide it all
$('#requests-override-options').hide();
}
if ($('input[name="fetch_backend"]:checked').val() != 'html_requests') {
$('#requests-override-options').hide();
$('#webdriver-override-options').show();
} else {
$('#requests-override-options').show();
$('#requests-override-options *:hidden').show();
$('#webdriver-override-options').hide();
}
}
$('input[name="fetch_backend"]').click(function (e) {
toggle();
});

View File

@@ -1,3 +1 @@
node_modules
package-lock.json

File diff suppressed because it is too large Load Diff

View File

@@ -353,8 +353,6 @@ and also iPads specifically.
/* Hide table headers (but not display: none;, for accessibility) */ }
.watch-table thead, .watch-table tbody, .watch-table th, .watch-table td, .watch-table tr {
display: block; }
.watch-table .last-checked > span {
vertical-align: middle; }
.watch-table .last-checked::before {
color: #555;
content: "Last Checked "; }
@@ -372,8 +370,7 @@ and also iPads specifically.
.watch-table td {
/* Behave like a "row" */
border: none;
border-bottom: 1px solid #eee;
vertical-align: middle; }
border-bottom: 1px solid #eee; }
.watch-table td:before {
/* Top/left values mimic padding */
top: 6px;
@@ -493,42 +490,3 @@ ul {
#api-key-copy {
color: #0078e7; }
/* spinner */
.loader,
.loader:after {
border-radius: 50%;
width: 10px;
height: 10px; }
.loader {
margin: 0px auto;
font-size: 3px;
vertical-align: middle;
display: inline-block;
text-indent: -9999em;
border-top: 1.1em solid rgba(38, 104, 237, 0.2);
border-right: 1.1em solid rgba(38, 104, 237, 0.2);
border-bottom: 1.1em solid rgba(38, 104, 237, 0.2);
border-left: 1.1em solid #2668ed;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear; }
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }

View File

@@ -487,11 +487,6 @@ and also iPads specifically.
display: block;
}
.last-checked {
> span {
vertical-align: middle;
}
}
.last-checked::before {
color: #555;
content: "Last Checked ";
@@ -522,7 +517,7 @@ and also iPads specifically.
/* Behave like a "row" */
border: none;
border-bottom: 1px solid #eee;
vertical-align: middle;
&:before {
/* Top/left values mimic padding */
top: 6px;
@@ -706,48 +701,3 @@ ul {
#api-key-copy {
color: #0078e7;
}
/* spinner */
.loader,
.loader:after {
border-radius: 50%;
width: 10px;
height: 10px;
}
.loader {
margin: 0px auto;
font-size: 3px;
vertical-align: middle;
display: inline-block;
text-indent: -9999em;
border-top: 1.1em solid rgba(38,104,237, 0.2);
border-right: 1.1em solid rgba(38,104,237, 0.2);
border-bottom: 1.1em solid rgba(38,104,237, 0.2);
border-left: 1.1em solid #2668ed;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear;
}
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}

View File

@@ -158,11 +158,13 @@ class ChangeDetectionStore:
@property
def threshold_seconds(self):
seconds = 0
for m, n in Watch.mtable.items():
mtable = {'seconds': 1, 'minutes': 60, 'hours': 3600, 'days': 86400, 'weeks': 86400 * 7}
minimum_seconds_recheck_time = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 60))
for m, n in mtable.items():
x = self.__data['settings']['requests']['time_between_check'].get(m)
if x:
seconds += x * n
return seconds
return max(seconds, minimum_seconds_recheck_time)
@property
def has_unviewed(self):
@@ -249,7 +251,7 @@ class ChangeDetectionStore:
return self.data['watching'][uuid].get(val)
# Remove a watchs data but keep the entry (URL etc)
def clear_watch_history(self, uuid):
def scrub_watch(self, uuid):
import pathlib
self.__data['watching'][uuid].update(
@@ -289,16 +291,14 @@ class ChangeDetectionStore:
headers={'App-Guid': self.__data['app_guid']})
res = r.json()
# List of permissible attributes we accept from the wild internet
# List of permisable stuff we accept from the wild internet
for k in ['url', 'tag',
'paused', 'title',
'previous_md5', 'headers',
'body', 'method',
'ignore_text', 'css_filter',
'subtractive_selectors', 'trigger_text',
'extract_title_as_title', 'extract_text',
'text_should_not_be_present',
'webdriver_js_execute_code']:
'paused', 'title',
'previous_md5', 'headers',
'body', 'method',
'ignore_text', 'css_filter',
'subtractive_selectors', 'trigger_text',
'extract_title_as_title', 'extract_text']:
if res.get(k):
apply_extras[k] = res[k]
@@ -518,11 +518,3 @@ class ChangeDetectionStore:
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
# In the distant future we can remove this entirely
self.data['watching'][uuid]['history'] = {}
# We incorrectly stored last_changed when there was not a change, and then confused the output list table
def update_3(self):
for uuid, watch in self.data['watching'].items():
# Be sure it's recalculated
p = watch.history
if watch.history_n < 2:
watch['last_changed'] = 0

View File

@@ -14,7 +14,7 @@
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
<li><code>discord://</code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code>tgram://</code> bots cant send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li><code>tgram://</code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
</ul>
</div>
<br/>

View File

@@ -22,7 +22,7 @@
{% if versions|length >= 1 %}
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
<select id="diff-version" name="previous_version">
{% for version in versions|reverse %}
{% for version in versions %}
<option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
{{version}}
</option>

View File

@@ -7,7 +7,6 @@
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
const playwright_enabled={% if playwright_enabled %} true {% else %} false {% endif %};
{% if emailprefix %}
const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
@@ -25,7 +24,7 @@
<ul>
<li class="tab" id="default-tab"><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li>
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Selector</a></li>
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
<li class="tab"><a href="#notifications">Notifications</a></li>
</ul>
@@ -82,39 +81,33 @@
</div>
{% endif %}
<fieldset id="webdriver-override-options">
<div class="pure-form-message-inline">
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
<br/>
This will wait <i>n</i> seconds before extracting the text.
</div>
<div class="pure-control-group">
{{ render_field(form.webdriver_delay) }}
<div class="pure-form-message-inline">
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
<br/>
This will wait <i>n</i> seconds before extracting the text.
{% if using_global_webdriver_wait %}
<br/><strong>Using the current global default settings</strong>
{% endif %}
</div>
</div>
<div class="pure-control-group">
{{ render_field(form.webdriver_js_execute_code) }}
<div class="pure-form-message-inline">
Run this code before performing change detection, handy for filling in fields and other actions <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Run-JavaScript-before-change-detection">More help and examples here</a>
</div>
{% if using_global_webdriver_wait %}
<div class="pure-form-message-inline">
<strong>Using the current global default settings</strong>
</div>
{% endif %}
</fieldset>
<fieldset class="pure-group" id="requests-override-options">
{% if not playwright_enabled %}
<div class="pure-form-message-inline">
<strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
</div>
{% endif %}
<div class="pure-control-group" id="request-method">
<div class="pure-form-message-inline">
<strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
</div>
<div class="pure-control-group">
{{ render_field(form.method) }}
</div>
<div class="pure-control-group" id="request-headers">
<div class="pure-control-group">
{{ render_field(form.headers, rows=5, placeholder="Example
Cookie: foobar
User-Agent: wonderbra 1.0") }}
</div>
<div class="pure-control-group" id="request-body">
<div class="pure-control-group">
{{ render_field(form.body, rows=5, placeholder="Example
{
\"name\":\"John\",
@@ -122,7 +115,7 @@ User-Agent: wonderbra 1.0") }}
\"car\":null
}") }}
</div>
<div id="ignore-status-codes-option">
<div>
{{ render_checkbox_field(form.ignore_status_codes) }}
</div>
</fieldset>
@@ -150,12 +143,6 @@ User-Agent: wonderbra 1.0") }}
</li>
</ul>
</div>
<fieldset>
<div class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
</div>
</fieldset>
<div class="pure-control-group">
{{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
class="m-d") }}
@@ -190,7 +177,7 @@ nav
<span class="pure-form-message-inline">
<ul>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
<li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
<li>Use the preview/show current tab to see ignores</li>
</ul>
@@ -212,22 +199,6 @@ nav
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
Sold out
Not in stock
Unavailable") }}
<span class="pure-form-message-inline">
<ul>
<li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li>
<li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>All lines here must not exist (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
@@ -243,7 +214,7 @@ Unavailable") }}
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
<img id="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}">
<strong>Pro-tip:</strong> This tool is only for limiting which elements will be included on a change-detection, not for interacting with browser directly.
<fieldset>
<div class="pure-control-group">
{% if visualselector_enabled %}
@@ -288,8 +259,8 @@ Unavailable") }}
<a href="{{url_for('form_delete', uuid=uuid)}}"
class="pure-button button-small button-error ">Delete</a>
<a href="{{url_for('clear_watch_history', uuid=uuid)}}"
class="pure-button button-small button-error ">Clear History</a>
<a href="{{url_for('scrub_watch', uuid=uuid)}}"
class="pure-button button-small button-error ">Scrub</a>
<a href="{{url_for('form_clone', uuid=uuid)}}"
class="pure-button button-small ">Create Copy</a>
</div>

View File

@@ -3,22 +3,22 @@
{% block content %}
<div class="edit-form">
<div class="box-wrap inner">
<form class="pure-form pure-form-stacked" action="{{url_for('clear_all_history')}}" method="POST">
<form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset>
<div class="pure-control-group">
This will remove version history (snapshots) for ALL watches, but keep your list of URLs! <br/>
This will remove ALL version snapshots/data, but keep your list of URLs. <br/>
You may like to use the <strong>BACKUP</strong> link first.<br/>
</div>
<br/>
<div class="pure-control-group">
<label for="confirmtext">Confirmation text</label>
<input type="text" id="confirmtext" required="" name="confirmtext" value="" size="10"/>
<span class="pure-form-message-inline">Type in the word <strong>clear</strong> to confirm that you understand.</span>
<span class="pure-form-message-inline">Type in the word <strong>scrub</strong> to confirm that you understand!</span>
</div>
<br/>
<div class="pure-control-group">
<button type="submit" class="pure-button pure-button-primary">Clear History!</button>
<button type="submit" class="pure-button pure-button-primary">Scrub!</button>
</div>
<br/>
<div class="pure-control-group">

View File

@@ -32,11 +32,6 @@
{{ render_field(form.requests.form.time_between_check, class="time-check-widget") }}
<span class="pure-form-message-inline">Default time for all watches, when the watch does not have a specific time setting.</span>
</div>
<div class="pure-control-group">
{{ render_field(form.requests.form.jitter_seconds, class="jitter_seconds") }}
<span class="pure-form-message-inline">Example - 3 seconds random jitter could trigger up to 3 seconds earlier or up to 3 seconds later</span>
</div>
<div class="pure-control-group">
{% if not hide_remove_pass %}
{% if current_user.is_authenticated %}
@@ -148,7 +143,7 @@ nav
<ul>
<li>Note: This is applied globally in addition to the per-watch rules.</li>
<li>Each line processed separately, any line matching will be ignored (removed before creating the checksum)</li>
<li>Regular Expression support, wrap the entire line in forward slash <code>/regex/</code></li>
<li>Regular Expression support, wrap the line in forward slash <code>/regex/</code></li>
<li>Changing this will affect the comparison checksum which may trigger an alert</li>
<li>Use the preview/show current tab to see ignores</li>
</ul>
@@ -173,7 +168,7 @@ nav
<div class="pure-control-group">
{{ render_button(form.save_button) }}
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
<a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a>
<a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete History Snapshot Data</a>
</div>
</div>

View File

@@ -14,7 +14,7 @@
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
<button type="submit" class="pure-button pure-button-primary">Watch</button>
</fieldset>
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread-white.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
</form>
<div>
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
@@ -40,7 +40,7 @@
<tbody>
{% for watch in watches|sort(attribute='last_changed', reverse=True) %}
{% for watch in watches %}
<tr id="{{ watch.uuid }}"
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
@@ -67,8 +67,8 @@
<span class="watch-tag-list">{{ watch.tag}}</span>
{% endif %}
</td>
<td class="last-checked">{{watch|format_last_checked_time|safe}}</td>
<td class="last-changed">{% if watch.history_n >=2 and watch.last_changed >0 %}
<td class="last-checked">{{watch|format_last_checked_time}}</td>
<td class="last-changed">{% if watch.history_n >=2 and watch.last_changed %}
{{watch.last_changed|format_timestamp_timeago}}
{% else %}
Not yet

View File

@@ -32,8 +32,6 @@ def app(request):
"""Create application for the tests."""
datastore_path = "./test-datastore"
# So they don't delay in fetching
os.environ["MINIMUM_SECONDS_RECHECK_TIME"] = "0"
try:
os.mkdir(datastore_path)
except FileExistsError:

View File

@@ -95,8 +95,6 @@ def test_api_simple(client, live_server):
assert watch_uuid in json.loads(res.data).keys()
before_recheck_info = json.loads(res.data)[watch_uuid]
assert before_recheck_info['last_checked'] != 0
#705 `last_changed` should be zero on the first check
assert before_recheck_info['last_changed'] == 0
assert before_recheck_info['title'] == 'My test URL'
set_modified_response()

View File

@@ -1,137 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
from changedetectionio import html_tools
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_modified_original_ignore_response():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<p>new ignore stuff</p>
<p>out of stock</p>
<p>blah</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
# Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text
def set_modified_response_minus_block_text():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
<p>Which is across multiple lines</p>
<p>now on sale $2/p>
</br>
So let's see what happens. </br>
<p>new ignore stuff</p>
<p>blah</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_check_block_changedetection_text_NOT_present(client, live_server):
sleep_time_for_fetch_thread = 3
live_server_setup(live_server)
# Use a mix of case in ZzZ to prove it works case-insensitive.
ignore_text = "out of stoCk\r\nfoobar"
set_original_ignore_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"text_should_not_be_present": ignore_text, "url": test_url, 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Check it saved
res = client.get(
url_for("edit_page", uuid="first"),
)
assert bytes(ignore_text.encode('utf-8')) in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
# The page changed, BUT the text is still there, just the rest of it changes, we should not see a change
set_modified_original_ignore_response()
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
# Now we set a change where the text is gone, it should now trigger
set_modified_response_minus_block_text()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -33,7 +33,7 @@ def set_modified_response():
</br>
So let's see what happens. </br>
<div id="sametext">Some text thats the same</div>
<div id="changetext">Some text that did change ( 1000 online <br/> 80 guests<br/> 2000 online )</div>
<div id="changetext">Some text that did change ( 1000 online <br/> 80 guests)</div>
</body>
</html>
"""
@@ -119,12 +119,9 @@ def test_check_filter_and_regex_extract(client, live_server):
# Class will be blank for now because the frontend didnt apply the diff
assert b'<div class="">1000 online' in res.data
# All regex matching should be here
assert b'<div class="">2000 online' in res.data
# Both regexs should be here
assert b'<div class="">80 guests' in res.data
# Should not be here
assert b'Some text that did change' not in res.data
assert b'Some text that did change' not in res.data

View File

@@ -154,10 +154,6 @@ def test_check_notification(client, live_server):
time.sleep(1)
assert os.path.exists("test-datastore/notification.txt") == False
res = client.get(url_for("notification_logs"))
# be sure we see it in the output log
assert b'New ChangeDetection.io Notification - ' + test_url.encode('utf-8') in res.data
# cleanup for the next
client.get(
url_for("form_delete", uuid="all"),

View File

@@ -1,43 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import live_server_setup
def set_original_ignore_response():
test_return_data = """<html>
<body>
<span>The price is</span><span>$<!-- -->90<!-- -->.<!-- -->74</span>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_obfuscations(client, live_server):
set_original_ignore_response()
live_server_setup(live_server)
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
time.sleep(3)
# Check HTML conversion detected and workd
res = client.get(
url_for("preview_page", uuid="first"),
follow_redirects=True
)
assert b'$90.74' in res.data

View File

@@ -1,104 +0,0 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import live_server_setup
def set_original_ignore_response():
test_return_data = """<html>
<body>
<p>Some initial text</p>
<p>Which is across multiple lines</p>
<p>So let's see what happens.</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
# The same but just re-ordered the text
def set_modified_swapped_lines():
# Re-ordered and with some whitespacing, should get stripped() too.
test_return_data = """<html>
<body>
<p>Some initial text</p>
<p> So let's see what happens.</p>
<p>&nbsp;Which is across multiple lines</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_modified_with_trigger_text_response():
test_return_data = """<html>
<body>
<p>Some initial text</p>
<p>So let's see what happens.</p>
<p>and a new line!</p>
<p>Which is across multiple lines</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_unique_lines_functionality(client, live_server):
live_server_setup(live_server)
sleep_time_for_fetch_thread = 3
set_original_ignore_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"check_unique_lines": "y",
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
assert b'unviewed' not in res.data
# Make a change
set_modified_swapped_lines()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Now set the content which contains the new text and re-ordered existing text
set_modified_with_trigger_text_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index"))
assert b'unviewed' in res.data

View File

@@ -45,6 +45,7 @@ class update_worker(threading.Thread):
try:
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc
@@ -55,18 +56,18 @@ class update_worker(threading.Thread):
except content_fetcher.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text
if self.datastore.data['watching'].get(uuid, False) and self.datastore.data['watching'][uuid].get('css_filter'):
if self.datastore.data['watching'][uuid].get('css_filter'):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (CSS / xPath Filter not found in page?)"})
else:
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
pass
except content_fetcher.EmptyReply as e:
# Some kind of custom to-str handler in the exception handler that does this?
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
err_text = "EmptyReply: Status Code {}".format(e.status_code)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
except content_fetcher.ScreenshotUnavailable as e:
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
err_text = "Screenshot unavailable, page did not render fully in the expected time"
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
except content_fetcher.PageUnloadable as e:
@@ -98,18 +99,10 @@ class update_worker(threading.Thread):
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2:
# Atleast 2, means there really was a change
self.datastore.update_watch(uuid=uuid, update_obj={'last_changed': round(now)})
watch_history = watch.history
dates = list(watch_history.keys())
# Theoretically it's possible that this could be just 1 long,
# - In the case that the timestamp key was not unique
if len(dates) == 1:
raise ValueError(
"History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?"
)
prev_fname = watch_history[dates[-2]]
dates = list(watch.history.keys())
prev_fname = watch.history[dates[-2]]
# Did it have any notification alerts to hit?
if len(watch['notification_urls']):
@@ -158,7 +151,6 @@ class update_worker(threading.Thread):
# Always record that we atleast tried
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time())})
# Always save the screenshot if it's available
if screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)

View File

@@ -24,7 +24,7 @@ services:
# https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy
#
# Alternative Playwright URL, do not use "'s or 's!
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/?stealth=1&--disable-web-security=true
# - PLAYWRIGHT_DRIVER_URL=ws://playwright-chrome:3000/
#
# Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password
#
@@ -73,17 +73,6 @@ services:
# hostname: playwright-chrome
# image: browserless/chrome
# restart: unless-stopped
# environment:
# - SCREEN_WIDTH=1920
# - SCREEN_HEIGHT=1024
# - SCREEN_DEPTH=16
# - ENABLE_DEBUGGER=false
# - PREBOOT_CHROME=true
# - CONNECTION_TIMEOUT=300000
# - MAX_CONCURRENT_SESSIONS=10
# - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true
volumes:
changedetection-data: