Compare commits

..

28 Commits

Author SHA1 Message Date
dgtlmoon
a57e5bba6e Re #731 - use our own apprise asset format 2022-06-30 22:55:43 +02:00
dgtlmoon
34b8784f50 Update README.md 2022-06-30 16:16:58 +02:00
dgtlmoon
2b054ced8c [new filter] Filter option - Trigger only when NEW content (lines) are detected ( compared to earlier text snapshots ) (#685) 2022-06-28 18:34:32 +02:00
dgtlmoon
6553980cd5 Playwright - Use HTTP Request Headers override (Cookie, etc) 2022-06-25 23:42:48 +02:00
jtagcat
7c12c47204 lang: prefer 'clear (snap) history' to 'scrub' (#721) 2022-06-25 13:43:57 +02:00
dgtlmoon
dbd9b470d7 Minor diff page improvements - list should be sorted 'newest first' and no need to include the current version to compare against (#716) 2022-06-23 10:16:05 +02:00
dgtlmoon
83555a9991 bug fix: last_changed was being set on the first fetch, should only be set on the change after the first fetch #705 2022-06-23 09:41:55 +02:00
dgtlmoon
5bfdb28bd2 Update README.md 2022-06-16 11:02:22 +02:00
dgtlmoon
31a6a6717b Improve docker-compose.yml browserless docker container example, add env var for STEALTH and BLOCK_ADS (#701) 2022-06-15 23:50:48 +02:00
dgtlmoon
7da32f9ac3 New filter - Block change-detection if text matches - for example, block change-detection while the text "out of stock" is on the page, know when the text is no longer on the page (#698) 2022-06-15 22:59:37 +02:00
dgtlmoon
bb732d3d2e Docker containers - :latest is now stable release, :dev is now master/nightly 2022-06-15 22:59:21 +02:00
dgtlmoon
485e55f9ed Merge branch 'master' of github.com:dgtlmoon/changedetection.io 2022-06-15 19:12:34 +02:00
dgtlmoon
601a20ea49 Trigger filters improvement- it's possible some changes weren't getting detected because the previous checksum only recorded when an event occurred (#697) 2022-06-15 19:11:20 +02:00
dgtlmoon
76996b9eb8 Some changes werent getting triggered because the previous checksum only recorded when an event occured 2022-06-15 17:18:46 +02:00
dgtlmoon
fba2b1a39d Notifications regression bug in 0.39.15 - only sent the first notification URL 2022-06-15 17:05:03 +02:00
dgtlmoon
4a91505af5 Playwright screenshots - no need for high-res "bug workaround" screenshot, use lower quality/faster configurable image quality env var 2022-06-15 10:52:24 +02:00
dgtlmoon
4841c79b4c Adding extra check when updating DB on ReplyWithContentButNoText 2022-06-14 19:54:35 +02:00
dgtlmoon
2ba00d2e1d Notifications log - log what was sent after applying all cleanups 2022-06-14 17:01:13 +02:00
dgtlmoon
19c96f4bdd Re #555 - tgram:// notifications - strip added HTML tag which is not supported by Telegram 2022-06-14 12:00:21 +02:00
dgtlmoon
82b900fbf4 Give more helpful error message when a page doesnt load 2022-06-14 08:16:22 +02:00
dgtlmoon
358a365303 Tweaks to playwright fetch code - better timeout handling 2022-06-13 23:39:43 +02:00
dgtlmoon
a07ca4b136 Re #580 - New functionality - Random "jitter" delay to requests (#681) 2022-06-13 12:41:53 +02:00
dgtlmoon
ba8cf2c8cf 0.39.15 2022-06-12 14:05:34 +02:00
dgtlmoon
3106b6688e Watch overview list - adding spinner to make it easier to see whats currently being 'Checked' 2022-06-12 12:52:17 +02:00
dgtlmoon
2c83845dac Preview section - add helpful check 2022-06-12 11:10:06 +02:00
dgtlmoon
111266d6fa Send test notification - improved handling of errors 2022-06-12 10:47:00 +02:00
dgtlmoon
ead610151f Notification log - also log normal requests and make the log easier to find 2022-06-11 23:07:09 +02:00
dgtlmoon
7e1e763989 Update bug_report.md 2022-06-11 00:43:28 +02:00
31 changed files with 652 additions and 121 deletions

View File

@@ -21,7 +21,7 @@ Steps to reproduce the behavior:
3. Scroll down to '....'
4. See error
! ALWAYS INCLUDE AN EXAMPLE URL WHERE IT IS POSSIBLE TO RE-CREATE THE ISSUE !
! ALWAYS INCLUDE AN EXAMPLE URL WHERE IT IS POSSIBLE TO RE-CREATE THE ISSUE - USE THE 'SHARE WATCH' FEATURE AND PASTE IN THE SHARE-LINK!
**Expected behavior**
A clear and concise description of what you expected to happen.

View File

@@ -85,8 +85,8 @@ jobs:
version: latest
driver-opts: image=moby/buildkit:master
# master always builds :latest
- name: Build and push :latest
# master branch -> :dev container tag
- name: Build and push :dev
id: docker_build
if: ${{ github.ref }} == "refs/heads/master"
uses: docker/build-push-action@v2
@@ -95,12 +95,12 @@ jobs:
file: ./Dockerfile
push: true
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest,ghcr.io/${{ github.repository }}:latest
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:dev,ghcr.io/${{ github.repository }}:dev
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
# A new tagged release is required, which builds :tag
# A new tagged release is required, which builds :tag and :latest
- name: Build and push :tag
id: docker_build_tag_release
if: github.event_name == 'release' && startsWith(github.event.release.tag_name, '0.')
@@ -110,7 +110,10 @@ jobs:
file: ./Dockerfile
push: true
tags: |
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }},ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:${{ github.event.release.tag_name }}
ghcr.io/dgtlmoon/changedetection.io:${{ github.event.release.tag_name }}
${{ secrets.DOCKER_HUB_USERNAME }}/changedetection.io:latest
ghcr.io/dgtlmoon/changedetection.io:latest
platforms: linux/amd64,linux/arm64,linux/arm/v6,linux/arm/v7
cache-from: type=local,src=/tmp/.buildx-cache
cache-to: type=local,dest=/tmp/.buildx-cache
@@ -125,5 +128,3 @@ jobs:
key: ${{ runner.os }}-buildx-${{ github.sha }}
restore-keys: |
${{ runner.os }}-buildx-

View File

@@ -33,6 +33,7 @@ Free, Open-source web page monitoring, notification and change detection. Don't
- New software releases, security advisories when you're not on their mailing list.
- Festivals with changes
- Realestate listing changes
- Know when your favourite whiskey is on sale, or other special deals are announced before anyone else
- COVID related news from government websites
- University/organisation news from their website
- Detect and monitor changes in JSON API responses
@@ -56,9 +57,9 @@ Easily see what changed, examine by word, line, or individual character.
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
### Target elements with the Visual Selector tool.
### Filter by elements using the Visual Selector tool.
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (available also as part of our subscription service)
Available when connected to a <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Playwright-content-fetcher">playwright content fetcher</a> (included as part of our subscription service)
<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/visualselector-anim.gif" style="max-width:100%;" alt="Self-hosted web page change monitoring context difference " title="Self-hosted web page change monitoring context difference " />
@@ -67,14 +68,18 @@ Available when connected to a <a href="https://github.com/dgtlmoon/changedetecti
### Docker
With Docker composer, just clone this repository and..
```bash
$ docker-compose up -d
```
Docker standalone
```bash
$ docker run -d --restart always -p "127.0.0.1:5000:5000" -v datastore-volume:/datastore --name changedetection.io dgtlmoon/changedetection.io
```
`:latest` tag is our latest stable release, `:dev` tag is our bleeding edge `master` branch.
### Windows
See the install instructions at the wiki https://github.com/dgtlmoon/changedetection.io/wiki/Microsoft-Windows
@@ -114,7 +119,7 @@ See the wiki for more information https://github.com/dgtlmoon/changedetection.io
## Filters
XPath, JSONPath and CSS support comes baked in! You can be as specific as you need, use XPath exported from various XPath element query creation tools.
(We support LXML re:test, re:math and re:replace.)
(We support LXML `re:test`, `re:math` and `re:replace`.)
## Notifications

View File

@@ -44,7 +44,7 @@ from flask_wtf import CSRFProtect
from changedetectionio import html_tools
from changedetectionio.api import api_v1
__version__ = '0.39.14'
__version__ = '0.39.15'
datastore = None
@@ -108,7 +108,7 @@ def _jinja2_filter_datetime(watch_obj, format="%Y-%m-%d %H:%M:%S"):
# Worker thread tells us which UUID it is currently processing.
for t in running_update_threads:
if t.current_uuid == watch_obj['uuid']:
return "Checking now.."
return '<span class="loader"></span><span> Checking now</span>'
if watch_obj['last_checked'] == 0:
return 'Not yet'
@@ -298,7 +298,7 @@ def changedetection_app(config=None, datastore_o=None):
# Sort by last_changed and add the uuid which is usually the key..
sorted_watches = []
# @todo needs a .itemsWithTag() or something
# @todo needs a .itemsWithTag() or something - then we can use that in Jinaj2 and throw this away
for uuid, watch in datastore.data['watching'].items():
if limit_tag != None:
@@ -403,8 +403,6 @@ def changedetection_app(config=None, datastore_o=None):
watch['uuid'] = uuid
sorted_watches.append(watch)
sorted_watches.sort(key=lambda x: x['last_changed'], reverse=True)
existing_tags = datastore.get_all_tags()
form = forms.quickWatchForm(request.form)
@@ -433,7 +431,9 @@ def changedetection_app(config=None, datastore_o=None):
def ajax_callback_send_notification_test():
import apprise
apobj = apprise.Apprise()
from .apprise_asset import asset
apobj = apprise.Apprise(asset=asset)
# validate URLS
if not len(request.form['notification_urls'].strip()):
@@ -459,37 +459,38 @@ def changedetection_app(config=None, datastore_o=None):
return 'OK'
@app.route("/scrub/<string:uuid>", methods=['GET'])
@app.route("/clear_history/<string:uuid>", methods=['GET'])
@login_required
def scrub_watch(uuid):
def clear_watch_history(uuid):
try:
datastore.scrub_watch(uuid)
datastore.clear_watch_history(uuid)
except KeyError:
flash('Watch not found', 'error')
else:
flash("Scrubbed watch {}".format(uuid))
flash("Cleared snapshot history for watch {}".format(uuid))
return redirect(url_for('index'))
@app.route("/scrub", methods=['GET', 'POST'])
@app.route("/clear_history", methods=['GET', 'POST'])
@login_required
def scrub_page():
def clear_all_history():
if request.method == 'POST':
confirmtext = request.form.get('confirmtext')
if confirmtext == 'scrub':
if confirmtext == 'clear':
changes_removed = 0
for uuid in datastore.data['watching'].keys():
datastore.scrub_watch(uuid)
datastore.clear_watch_history(uuid)
#TODO: KeyError not checked, as it is above
flash("Cleared all snapshot history")
flash("Cleared snapshot history for all watches")
else:
flash('Incorrect confirmation text.', 'error')
return redirect(url_for('index'))
output = render_template("scrub.html")
output = render_template("clear_all_history.html")
return output
@@ -656,7 +657,8 @@ def changedetection_app(config=None, datastore_o=None):
current_base_url=datastore.data['settings']['application']['base_url'],
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
visualselector_data_is_ready=visualselector_data_is_ready,
visualselector_enabled=visualselector_enabled
visualselector_enabled=visualselector_enabled,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False)
)
return output
@@ -832,7 +834,7 @@ def changedetection_app(config=None, datastore_o=None):
newest=newest_version_file_contents,
previous=previous_version_file_contents,
extra_stylesheets=extra_stylesheets,
versions=dates[1:],
versions=dates[:-1], # All except current/last
uuid=uuid,
newest_version_timestamp=dates[-1],
current_previous_version=str(previous_version),
@@ -855,6 +857,12 @@ def changedetection_app(config=None, datastore_o=None):
if uuid == 'first':
uuid = list(datastore.data['watching'].keys()).pop()
# Normally you would never reach this, because the 'preview' button is not available when there's no history
# However they may try to clear snapshots and reload the page
if datastore.data['watching'][uuid].history_n == 0:
flash("Preview unavailable - No fetch/check completed or triggers not reached", "error")
return redirect(url_for('index'))
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
try:
@@ -924,7 +932,7 @@ def changedetection_app(config=None, datastore_o=None):
def notification_logs():
global notification_debug_log
output = render_template("notification-log.html",
logs=notification_debug_log if len(notification_debug_log) else ["No errors or warnings detected"])
logs=notification_debug_log if len(notification_debug_log) else ["Notification logs are empty - no notifications sent yet."])
return output
@@ -1244,6 +1252,9 @@ def check_for_new_version():
def notification_runner():
global notification_debug_log
from datetime import datetime
import json
while not app.config.exit.is_set():
try:
# At the moment only one thread runs (single runner)
@@ -1252,10 +1263,14 @@ def notification_runner():
time.sleep(1)
else:
# Process notifications
now = datetime.now()
sent_obj = None
try:
from changedetectionio import notification
notification.process_notification(n_object, datastore)
sent_obj = notification.process_notification(n_object, datastore)
except Exception as e:
logging.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
@@ -1268,9 +1283,10 @@ def notification_runner():
log_lines = str(e).splitlines()
notification_debug_log += log_lines
# Trim the log length
notification_debug_log = notification_debug_log[-100:]
# Process notifications
notification_debug_log+= ["{} - SENDING - {}".format(now.strftime("%Y/%m/%d %H:%M:%S,000"), json.dumps(sent_obj))]
# Trim the log length
notification_debug_log = notification_debug_log[-100:]
# Thread runner to check every minute, look for new watches to feed into the Queue.
def ticker_thread_check_time_launch_checks():
@@ -1356,4 +1372,4 @@ def ticker_thread_check_time_launch_checks():
time.sleep(1)
# Should be low so we can break this out in testing
app.config.exit.wait(1)
app.config.exit.wait(1)

View File

@@ -0,0 +1,11 @@
import apprise
# Create our AppriseAsset and populate it with some of our new values:
# https://github.com/caronc/apprise/wiki/Development_API#the-apprise-asset-object
asset = apprise.AppriseAsset(
image_url_logo='https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
)
asset.app_id = "changedetection.io"
asset.app_desc = "ChangeDetection.io best and simplest website monitoring and change detection"
asset.app_url = "https://changedetection.io"

View File

@@ -35,7 +35,7 @@ def main():
create_datastore_dir = False
for opt, arg in opts:
# if opt == '--purge':
# if opt == '--clear-all-history':
# Remove history, the actual files you need to delete manually.
# for uuid, watch in datastore.data['watching'].items():
# watch.update({'history': {}, 'last_checked': 0, 'last_changed': 0, 'previous_md5': None})

View File

@@ -281,13 +281,14 @@ class base_html_playwright(Fetcher):
from playwright.sync_api import sync_playwright
import playwright._impl._api_types
from playwright._impl._api_types import Error, TimeoutError
response = None
with sync_playwright() as p:
browser_type = getattr(p, self.browser_type)
# Seemed to cause a connection Exception even tho I can see it connect
# self.browser = browser_type.connect(self.command_executor, timeout=timeout*1000)
browser = browser_type.connect_over_cdp(self.command_executor, timeout=timeout * 1000)
# 60,000 connection timeout only
browser = browser_type.connect_over_cdp(self.command_executor, timeout=60000)
# Set user agent to prevent Cloudflare from blocking the browser
# Use the default one configured in the App.py model that's passed from fetch_site_status.py
@@ -300,21 +301,29 @@ class base_html_playwright(Fetcher):
accept_downloads=False
)
if len(request_headers):
context.set_extra_http_headers(request_headers)
page = context.new_page()
try:
page.set_default_navigation_timeout(90000)
page.set_default_timeout(90000)
# Bug - never set viewport size BEFORE page.goto
response = page.goto(url, timeout=timeout * 1000, wait_until='commit')
# Wait_until = commit
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors'
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
page.wait_for_timeout(extra_wait * 1000)
# Waits for the next navigation. Using Python context manager
# prevents a race condition between clicking and waiting for a navigation.
with page.expect_navigation():
response = page.goto(url, wait_until='load')
except playwright._impl._api_types.TimeoutError as e:
context.close()
browser.close()
raise EmptyReply(url=url, status_code=None)
# This can be ok, we will try to grab what we could retrieve
pass
except Exception as e:
print ("other exception when page.goto")
print (str(e))
context.close()
browser.close()
raise PageUnloadable(url=url, status_code=None)
@@ -322,18 +331,22 @@ class base_html_playwright(Fetcher):
if response is None:
context.close()
browser.close()
raise EmptyReply(url=url, status_code=None)
if len(page.content().strip()) == 0:
context.close()
browser.close()
print ("response object was none")
raise EmptyReply(url=url, status_code=None)
# Bug 2(?) Set the viewport size AFTER loading the page
page.set_viewport_size({"width": 1280, "height": 1024})
self.status_code = response.status
page.set_viewport_size({"width": 1280, "height": 1024})
extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay
time.sleep(extra_wait)
self.content = page.content()
self.status_code = response.status
if len(self.content.strip()) == 0:
context.close()
browser.close()
print ("Content was empty")
raise EmptyReply(url=url, status_code=None)
self.headers = response.all_headers()
if current_css_filter is not None:
@@ -346,9 +359,15 @@ class base_html_playwright(Fetcher):
# Bug 3 in Playwright screenshot handling
# Some bug where it gives the wrong screenshot size, but making a request with the clip set first seems to solve it
# JPEG is better here because the screenshots can be very very large
# Screenshots also travel via the ws:// (websocket) meaning that the binary data is base64 encoded
# which will significantly increase the IO size between the server and client, it's recommended to use the lowest
# acceptable screenshot quality here
try:
page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024})
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=92)
# Quality set to 1 because it's not used, just used as a work-around for a bug, no need to change this.
page.screenshot(type='jpeg', clip={'x': 1.0, 'y': 1.0, 'width': 1280, 'height': 1024}, quality=1)
# The actual screenshot
self.screenshot = page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("PLAYWRIGHT_SCREENSHOT_QUALITY", 72)))
except Exception as e:
context.close()
browser.close()

View File

@@ -1,4 +1,5 @@
import hashlib
import logging
import os
import re
import time
@@ -224,29 +225,37 @@ class perform_site_check():
else:
fetched_md5 = hashlib.md5(stripped_text_from_html).hexdigest()
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
update_obj["previous_md5"] = fetched_md5
blocked_by_not_found_trigger_text = False
############ Blocking rules, after checksum #################
blocked = False
if len(watch['trigger_text']):
# Yeah, lets block first until something matches
blocked_by_not_found_trigger_text = True
# Assume blocked
blocked = True
# Filter and trigger works the same, so reuse it
# It should return the line numbers that match
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=watch['trigger_text'],
mode="line numbers")
# If it returned any lines that matched..
# Unblock if the trigger was found
if result:
blocked_by_not_found_trigger_text = False
blocked = False
if not blocked_by_not_found_trigger_text and watch['previous_md5'] != fetched_md5:
if len(watch['text_should_not_be_present']):
# If anything matched, then we should block a change from happening
result = html_tools.strip_ignore_text(content=str(stripped_text_from_html),
wordlist=watch['text_should_not_be_present'],
mode="line numbers")
if result:
blocked = True
# The main thing that all this at the moment comes down to :)
if watch['previous_md5'] != fetched_md5:
changed_detected = True
update_obj["previous_md5"] = fetched_md5
update_obj["last_changed"] = timestamp
# Looks like something changed, but did it match all the rules?
if blocked:
changed_detected = False
# Extract title as title
if is_html:
@@ -254,4 +263,21 @@ class perform_site_check():
if not watch['title'] or not len(watch['title']):
update_obj['title'] = html_tools.extract_element(find='title', html_content=fetcher.content)
if changed_detected:
if watch.get('check_unique_lines', False):
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
# One or more lines? unsure?
if not has_unique_lines:
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
changed_detected = False
else:
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
# On the first run of a site, watch['previous_md5'] will be None, set it the current one.
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data

View File

@@ -340,7 +340,10 @@ class watchForm(commonSettingsForm):
body = TextAreaField('Request body', [validators.Optional()])
method = SelectField('Request method', choices=valid_method, default=default_method)
ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
check_unique_lines = BooleanField('Only trigger when new lines appear', default=False)
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
text_should_not_be_present = StringListField('Block change-detection if text matches', [validators.Optional(), ValidateListRegex()])
save_button = SubmitField('Save', render_kw={"class": "pure-button pure-button-primary"})
save_and_preview_button = SubmitField('Save & Preview', render_kw={"class": "pure-button pure-button-primary"})
proxy = RadioField('Proxy')

View File

@@ -38,8 +38,10 @@ class model(dict):
'extract_text': [], # Extract text by regex after filters
'subtractive_selectors': [],
'trigger_text': [], # List of text or regex to wait for until a change is detected
'text_should_not_be_present': [], # Text that should not present
'fetch_backend': None,
'extract_title_as_title': False,
'check_unique_lines': False, # On change-detected, compare against all history if its something new
'proxy': None, # Preferred proxy connection
# Re #110, so then if this is set to None, we know to use the default value instead
# Requires setting to None on submit if it's the same as the default
@@ -85,7 +87,7 @@ class model(dict):
# Read the history file as a dict
fname = os.path.join(self.__datastore_path, self.get('uuid'), "history.txt")
if os.path.isfile(fname):
logging.debug("Disk IO accessed " + str(time.time()))
logging.debug("Reading history index " + str(time.time()))
with open(fname, "r") as f:
tmp_history = dict(i.strip().split(',', 2) for i in f.readlines())
@@ -162,3 +164,16 @@ class model(dict):
if x:
seconds += x * n
return seconds
# Iterate over all history texts and see if something new exists
def lines_contain_something_unique_compared_to_history(self, lines=[]):
local_lines = [l.decode('utf-8').strip().lower() for l in lines]
# Compare each lines (set) against each history text file (set) looking for something new..
for k, v in self.history.items():
alist = [line.decode('utf-8').strip().lower() for line in open(v, 'rb')]
res = set(alist) != set(local_lines)
if res:
return True
return False

View File

@@ -48,9 +48,10 @@ def process_notification(n_object, datastore):
# Anything higher than or equal to WARNING (which covers things like Connection errors)
# raise it as an exception
apobjs=[]
sent_objs=[]
from .apprise_asset import asset
for url in n_object['notification_urls']:
apobj = apprise.Apprise(debug=True)
apobj = apprise.Apprise(debug=True, asset=asset)
url = url.strip()
if len(url):
print(">> Process Notification: AppRise notifying {}".format(url))
@@ -67,6 +68,11 @@ def process_notification(n_object, datastore):
url += k + 'avatar_url=https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/changedetectionio/static/images/avatar-256x256.png'
if url.startswith('tgram://'):
# Telegram only supports a limit subset of HTML, remove the '<br/>' we place in.
# re https://github.com/dgtlmoon/changedetection.io/issues/555
# @todo re-use an existing library we have already imported to strip all non-allowed tags
n_body = n_body.replace('<br/>', '\n')
n_body = n_body.replace('</br>', '\n')
# real limit is 4096, but minus some for extra metadata
payload_max_size = 3600
body_limit = max(0, payload_max_size - len(n_title))
@@ -96,6 +102,15 @@ def process_notification(n_object, datastore):
log_value = logs.getvalue()
if log_value and 'WARNING' in log_value or 'ERROR' in log_value:
raise Exception(log_value)
sent_objs.append({'title': n_title,
'body': n_body,
'url' : url,
'body_format': n_format})
# Return what was sent for better logging - after the for loop
return sent_objs
# Notification title + body content parameters get created here.
def create_notification_parameters(n_object, datastore):

View File

@@ -9,8 +9,6 @@
# exit when any command fails
set -e
export MINIMUM_SECONDS_RECHECK_TIME=0
find tests/test_*py -type f|while read test_name
do
echo "TEST RUNNING $test_name"

View File

@@ -40,13 +40,19 @@ $(document).ready(function() {
$.ajax({
type: "POST",
url: notification_base_url,
data : data
data : data,
statusCode: {
400: function() {
// More than likely the CSRF token was lost when the server restarted
alert("There was a problem processing the request, please reload the page.");
}
}
}).done(function(data){
console.log(data);
alert('Sent');
}).fail(function(data){
console.log(data);
alert('Error: '+data.responseJSON.error);
alert('There was an error communicating with the server.');
})
});
});

View File

@@ -1,13 +1,30 @@
$(document).ready(function() {
function toggle() {
if ($('input[name="fetch_backend"]:checked').val() != 'html_requests') {
$('#requests-override-options').hide();
if ($('input[name="fetch_backend"]:checked').val() == 'html_webdriver') {
if(playwright_enabled) {
// playwright supports headers, so hide everything else
// See #664
$('#requests-override-options #request-method').hide();
$('#requests-override-options #request-body').hide();
// @todo connect this one up
$('#ignore-status-codes-option').hide();
} else {
// selenium/webdriver doesnt support anything afaik, hide it all
$('#requests-override-options').hide();
}
$('#webdriver-override-options').show();
} else {
$('#requests-override-options').show();
$('#requests-override-options *:hidden').show();
$('#webdriver-override-options').hide();
}
}
$('input[name="fetch_backend"]').click(function (e) {
toggle();
});

View File

@@ -353,6 +353,8 @@ and also iPads specifically.
/* Hide table headers (but not display: none;, for accessibility) */ }
.watch-table thead, .watch-table tbody, .watch-table th, .watch-table td, .watch-table tr {
display: block; }
.watch-table .last-checked > span {
vertical-align: middle; }
.watch-table .last-checked::before {
color: #555;
content: "Last Checked "; }
@@ -370,7 +372,8 @@ and also iPads specifically.
.watch-table td {
/* Behave like a "row" */
border: none;
border-bottom: 1px solid #eee; }
border-bottom: 1px solid #eee;
vertical-align: middle; }
.watch-table td:before {
/* Top/left values mimic padding */
top: 6px;
@@ -490,3 +493,42 @@ ul {
#api-key-copy {
color: #0078e7; }
/* spinner */
.loader,
.loader:after {
border-radius: 50%;
width: 10px;
height: 10px; }
.loader {
margin: 0px auto;
font-size: 3px;
vertical-align: middle;
display: inline-block;
text-indent: -9999em;
border-top: 1.1em solid rgba(38, 104, 237, 0.2);
border-right: 1.1em solid rgba(38, 104, 237, 0.2);
border-bottom: 1.1em solid rgba(38, 104, 237, 0.2);
border-left: 1.1em solid #2668ed;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear; }
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg); }
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg); } }

View File

@@ -487,6 +487,11 @@ and also iPads specifically.
display: block;
}
.last-checked {
> span {
vertical-align: middle;
}
}
.last-checked::before {
color: #555;
content: "Last Checked ";
@@ -517,7 +522,7 @@ and also iPads specifically.
/* Behave like a "row" */
border: none;
border-bottom: 1px solid #eee;
vertical-align: middle;
&:before {
/* Top/left values mimic padding */
top: 6px;
@@ -701,3 +706,48 @@ ul {
#api-key-copy {
color: #0078e7;
}
/* spinner */
.loader,
.loader:after {
border-radius: 50%;
width: 10px;
height: 10px;
}
.loader {
margin: 0px auto;
font-size: 3px;
vertical-align: middle;
display: inline-block;
text-indent: -9999em;
border-top: 1.1em solid rgba(38,104,237, 0.2);
border-right: 1.1em solid rgba(38,104,237, 0.2);
border-bottom: 1.1em solid rgba(38,104,237, 0.2);
border-left: 1.1em solid #2668ed;
-webkit-transform: translateZ(0);
-ms-transform: translateZ(0);
transform: translateZ(0);
-webkit-animation: load8 1.1s infinite linear;
animation: load8 1.1s infinite linear;
}
@-webkit-keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}
@keyframes load8 {
0% {
-webkit-transform: rotate(0deg);
transform: rotate(0deg);
}
100% {
-webkit-transform: rotate(360deg);
transform: rotate(360deg);
}
}

View File

@@ -250,7 +250,7 @@ class ChangeDetectionStore:
return self.data['watching'][uuid].get(val)
# Remove a watchs data but keep the entry (URL etc)
def scrub_watch(self, uuid):
def clear_watch_history(self, uuid):
import pathlib
self.__data['watching'][uuid].update(
@@ -290,14 +290,15 @@ class ChangeDetectionStore:
headers={'App-Guid': self.__data['app_guid']})
res = r.json()
# List of permisable stuff we accept from the wild internet
# List of permissible attributes we accept from the wild internet
for k in ['url', 'tag',
'paused', 'title',
'previous_md5', 'headers',
'body', 'method',
'ignore_text', 'css_filter',
'subtractive_selectors', 'trigger_text',
'extract_title_as_title', 'extract_text']:
'paused', 'title',
'previous_md5', 'headers',
'body', 'method',
'ignore_text', 'css_filter',
'subtractive_selectors', 'trigger_text',
'extract_title_as_title', 'extract_text',
'text_should_not_be_present']:
if res.get(k):
apply_extras[k] = res[k]
@@ -517,3 +518,11 @@ class ChangeDetectionStore:
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
# In the distant future we can remove this entirely
self.data['watching'][uuid]['history'] = {}
# We incorrectly stored last_changed when there was not a change, and then confused the output list table
def update_3(self):
for uuid, watch in self.data['watching'].items():
# Be sure it's recalculated
p = watch.history
if watch.history_n < 2:
watch['last_changed'] = 0

View File

@@ -14,7 +14,7 @@
<li>Use <a target=_new href="https://github.com/caronc/apprise">AppRise URLs</a> for notification to just about any service! <i><a target=_new href="https://github.com/dgtlmoon/changedetection.io/wiki/Notification-configuration-notes">Please read the notification services wiki here for important configuration notes</a></i>.</li>
<li><code>discord://</code> only supports a maximum <strong>2,000 characters</strong> of notification text, including the title.</li>
<li><code>tgram://</code> bots cant send messages to other bots, so you should specify chat ID of non-bot user.</li>
<li>Go here for <a href="{{url_for('notification_logs')}}">notification debug logs</a></li>
<li><code>tgram://</code> only supports very limited HTML and can fail when extra tags are sent, <a href="https://core.telegram.org/bots/api#html-style">read more here</a> (or use plaintext/markdown format)</li>
</ul>
</div>
<br/>
@@ -22,6 +22,7 @@
{% if emailprefix %}
<a id="add-email-helper" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Add email</a>
{% endif %}
<a href="{{url_for('notification_logs')}}" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Notification debug logs</a>
</div>
<div id="notification-customisation" class="pure-control-group">
<div class="pure-control-group">

View File

@@ -3,22 +3,22 @@
{% block content %}
<div class="edit-form">
<div class="box-wrap inner">
<form class="pure-form pure-form-stacked" action="{{url_for('scrub_page')}}" method="POST">
<form class="pure-form pure-form-stacked" action="{{url_for('clear_all_history')}}" method="POST">
<input type="hidden" name="csrf_token" value="{{ csrf_token() }}"/>
<fieldset>
<div class="pure-control-group">
This will remove ALL version snapshots/data, but keep your list of URLs. <br/>
This will remove version history (snapshots) for ALL watches, but keep your list of URLs! <br/>
You may like to use the <strong>BACKUP</strong> link first.<br/>
</div>
<br/>
<div class="pure-control-group">
<label for="confirmtext">Confirmation text</label>
<input type="text" id="confirmtext" required="" name="confirmtext" value="" size="10"/>
<span class="pure-form-message-inline">Type in the word <strong>scrub</strong> to confirm that you understand!</span>
<span class="pure-form-message-inline">Type in the word <strong>clear</strong> to confirm that you understand.</span>
</div>
<br/>
<div class="pure-control-group">
<button type="submit" class="pure-button pure-button-primary">Scrub!</button>
<button type="submit" class="pure-button pure-button-primary">Clear History!</button>
</div>
<br/>
<div class="pure-control-group">

View File

@@ -22,7 +22,7 @@
{% if versions|length >= 1 %}
<label for="diff-version">Compare newest (<span id="current-v-date"></span>) with</label>
<select id="diff-version" name="previous_version">
{% for version in versions %}
{% for version in versions|reverse %}
<option value="{{version}}" {% if version== current_previous_version %} selected="" {% endif %}>
{{version}}
</option>

View File

@@ -7,6 +7,7 @@
const notification_base_url="{{url_for('ajax_callback_send_notification_test')}}";
const watch_visual_selector_data_url="{{url_for('static_content', group='visual_selector_data', filename=uuid)}}";
const screenshot_url="{{url_for('static_content', group='screenshot', filename=uuid)}}";
const playwright_enabled={% if playwright_enabled %} true {% else %} false {% endif %};
{% if emailprefix %}
const email_notification_prefix=JSON.parse('{{ emailprefix|tojson }}');
@@ -81,13 +82,14 @@
</div>
{% endif %}
<fieldset id="webdriver-override-options">
<div class="pure-form-message-inline">
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
<br/>
This will wait <i>n</i> seconds before extracting the text.
</div>
<div class="pure-control-group">
{{ render_field(form.webdriver_delay) }}
<div class="pure-form-message-inline">
<strong>If you're having trouble waiting for the page to be fully rendered (text missing etc), try increasing the 'wait' time here.</strong>
<br/>
This will wait <i>n</i> seconds before extracting the text.
</div>
</div>
{% if using_global_webdriver_wait %}
<div class="pure-form-message-inline">
@@ -96,18 +98,20 @@
{% endif %}
</fieldset>
<fieldset class="pure-group" id="requests-override-options">
<div class="pure-form-message-inline">
<strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
</div>
<div class="pure-control-group">
{% if not playwright_enabled %}
<div class="pure-form-message-inline">
<strong>Request override is currently only used by the <i>Basic fast Plaintext/HTTP Client</i> method.</strong>
</div>
{% endif %}
<div class="pure-control-group" id="request-method">
{{ render_field(form.method) }}
</div>
<div class="pure-control-group">
<div class="pure-control-group" id="request-headers">
{{ render_field(form.headers, rows=5, placeholder="Example
Cookie: foobar
User-Agent: wonderbra 1.0") }}
</div>
<div class="pure-control-group">
<div class="pure-control-group" id="request-body">
{{ render_field(form.body, rows=5, placeholder="Example
{
\"name\":\"John\",
@@ -115,7 +119,7 @@ User-Agent: wonderbra 1.0") }}
\"car\":null
}") }}
</div>
<div>
<div id="ignore-status-codes-option">
{{ render_checkbox_field(form.ignore_status_codes) }}
</div>
</fieldset>
@@ -143,6 +147,12 @@ User-Agent: wonderbra 1.0") }}
</li>
</ul>
</div>
<fieldset>
<div class="pure-control-group">
{{ render_checkbox_field(form.check_unique_lines) }}
<span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
</div>
</fieldset>
<div class="pure-control-group">
{{ render_field(form.css_filter, placeholder=".class-name or #some-id, or other CSS selector rule.",
class="m-d") }}
@@ -199,6 +209,22 @@ nav
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.text_should_not_be_present, rows=5, placeholder="For example: Out of stock
Sold out
Not in stock
Unavailable") }}
<span class="pure-form-message-inline">
<ul>
<li>Block change-detection while this text is on the page, all text and regex are tested <i>case-insensitive</i>, good for waiting for when a product is available again</li>
<li>Block text is processed from the result-text that comes out of any CSS/JSON Filters for this watch</li>
<li>All lines here must not exist (think of each line as "OR")</li>
<li>Note: Wrap in forward slash / to use regex example: <code>/foo\d/</code></li>
</ul>
</span>
</div>
</fieldset>
<fieldset>
<div class="pure-control-group">
{{ render_field(form.extract_text, rows=5, placeholder="\d+ online") }}
@@ -259,8 +285,8 @@ nav
<a href="{{url_for('form_delete', uuid=uuid)}}"
class="pure-button button-small button-error ">Delete</a>
<a href="{{url_for('scrub_watch', uuid=uuid)}}"
class="pure-button button-small button-error ">Scrub</a>
<a href="{{url_for('clear_watch_history', uuid=uuid)}}"
class="pure-button button-small button-error ">Clear History</a>
<a href="{{url_for('form_clone', uuid=uuid)}}"
class="pure-button button-small ">Create Copy</a>
</div>

View File

@@ -4,7 +4,7 @@
<div class="edit-form">
<div class="inner">
<h4 style="margin-top: 0px;">The following issues were detected when sending notifications</h4>
<h4 style="margin-top: 0px;">Notification debug log</h4>
<div id="notification-error-log">
<ul style="font-size: 80%; margin:0px; padding: 0 0 0 7px">
{% for log in logs|reverse %}

View File

@@ -173,7 +173,7 @@ nav
<div class="pure-control-group">
{{ render_button(form.save_button) }}
<a href="{{url_for('index')}}" class="pure-button button-small button-cancel">Back</a>
<a href="{{url_for('scrub_page')}}" class="pure-button button-small button-cancel">Delete History Snapshot Data</a>
<a href="{{url_for('clear_all_history')}}" class="pure-button button-small button-cancel">Clear Snapshot History</a>
</div>
</div>

View File

@@ -40,7 +40,7 @@
<tbody>
{% for watch in watches %}
{% for watch in watches|sort(attribute='last_changed', reverse=True) %}
<tr id="{{ watch.uuid }}"
class="{{ loop.cycle('pure-table-odd', 'pure-table-even') }}
{% if watch.last_error is defined and watch.last_error != False %}error{% endif %}
@@ -67,8 +67,8 @@
<span class="watch-tag-list">{{ watch.tag}}</span>
{% endif %}
</td>
<td class="last-checked">{{watch|format_last_checked_time}}</td>
<td class="last-changed">{% if watch.history_n >=2 and watch.last_changed %}
<td class="last-checked">{{watch|format_last_checked_time|safe}}</td>
<td class="last-changed">{% if watch.history_n >=2 and watch.last_changed >0 %}
{{watch.last_changed|format_timestamp_timeago}}
{% else %}
Not yet

View File

@@ -32,6 +32,8 @@ def app(request):
"""Create application for the tests."""
datastore_path = "./test-datastore"
# So they don't delay in fetching
os.environ["MINIMUM_SECONDS_RECHECK_TIME"] = "0"
try:
os.mkdir(datastore_path)
except FileExistsError:

View File

@@ -95,6 +95,8 @@ def test_api_simple(client, live_server):
assert watch_uuid in json.loads(res.data).keys()
before_recheck_info = json.loads(res.data)[watch_uuid]
assert before_recheck_info['last_checked'] != 0
#705 `last_changed` should be zero on the first check
assert before_recheck_info['last_changed'] == 0
assert before_recheck_info['title'] == 'My test URL'
set_modified_response()

View File

@@ -0,0 +1,137 @@
#!/usr/bin/python3
import time
from flask import url_for
from . util import live_server_setup
from changedetectionio import html_tools
def set_original_ignore_response():
test_return_data = """<html>
<body>
Some initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_modified_original_ignore_response():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
<p>Which is across multiple lines</p>
</br>
So let's see what happens. </br>
<p>new ignore stuff</p>
<p>out of stock</p>
<p>blah</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
# Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text
def set_modified_response_minus_block_text():
test_return_data = """<html>
<body>
Some NEW nice initial text</br>
<p>Which is across multiple lines</p>
<p>now on sale $2/p>
</br>
So let's see what happens. </br>
<p>new ignore stuff</p>
<p>blah</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_check_block_changedetection_text_NOT_present(client, live_server):
sleep_time_for_fetch_thread = 3
live_server_setup(live_server)
# Use a mix of case in ZzZ to prove it works case-insensitive.
ignore_text = "out of stoCk\r\nfoobar"
set_original_ignore_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Goto the edit page, add our ignore text
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"text_should_not_be_present": ignore_text, "url": test_url, 'fetch_backend': "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# Check it saved
res = client.get(
url_for("edit_page", uuid="first"),
)
assert bytes(ignore_text.encode('utf-8')) in res.data
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
# The page changed, BUT the text is still there, just the rest of it changes, we should not see a change
set_modified_original_ignore_response()
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
assert b'/test-endpoint' in res.data
# Now we set a change where the text is gone, it should now trigger
set_modified_response_minus_block_text()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data

View File

@@ -154,6 +154,10 @@ def test_check_notification(client, live_server):
time.sleep(1)
assert os.path.exists("test-datastore/notification.txt") == False
res = client.get(url_for("notification_logs"))
# be sure we see it in the output log
assert b'New ChangeDetection.io Notification - ' + test_url.encode('utf-8') in res.data
# cleanup for the next
client.get(
url_for("form_delete", uuid="all"),

View File

@@ -0,0 +1,104 @@
#!/usr/bin/python3
import time
from flask import url_for
from .util import live_server_setup
def set_original_ignore_response():
test_return_data = """<html>
<body>
<p>Some initial text</p>
<p>Which is across multiple lines</p>
<p>So let's see what happens.</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
# The same but just re-ordered the text
def set_modified_swapped_lines():
# Re-ordered and with some whitespacing, should get stripped() too.
test_return_data = """<html>
<body>
<p>Some initial text</p>
<p> So let's see what happens.</p>
<p>&nbsp;Which is across multiple lines</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def set_modified_with_trigger_text_response():
test_return_data = """<html>
<body>
<p>Some initial text</p>
<p>So let's see what happens.</p>
<p>and a new line!</p>
<p>Which is across multiple lines</p>
</body>
</html>
"""
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write(test_return_data)
def test_unique_lines_functionality(client, live_server):
live_server_setup(live_server)
sleep_time_for_fetch_thread = 3
set_original_ignore_response()
# Give the endpoint time to spin up
time.sleep(1)
# Add our URL to the import page
test_url = url_for('test_endpoint', _external=True)
res = client.post(
url_for("import_page"),
data={"urls": test_url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
# Add our URL to the import page
res = client.post(
url_for("edit_page", uuid="first"),
data={"check_unique_lines": "y",
"url": test_url,
"fetch_backend": "html_requests"},
follow_redirects=True
)
assert b"Updated watch." in res.data
assert b'unviewed' not in res.data
# Make a change
set_modified_swapped_lines()
time.sleep(sleep_time_for_fetch_thread)
# Trigger a check
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# Now set the content which contains the new text and re-ordered existing text
set_modified_with_trigger_text_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
time.sleep(sleep_time_for_fetch_thread)
res = client.get(url_for("index"))
assert b'unviewed' in res.data

View File

@@ -45,7 +45,6 @@ class update_worker(threading.Thread):
try:
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
# Re #342
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
# We then convert/.decode('utf-8') for the notification etc
@@ -56,18 +55,18 @@ class update_worker(threading.Thread):
except content_fetcher.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text
if self.datastore.data['watching'][uuid].get('css_filter'):
if self.datastore.data['watching'].get(uuid, False) and self.datastore.data['watching'][uuid].get('css_filter'):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found (CSS / xPath Filter not found in page?)"})
else:
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': "Got HTML content but no text found."})
pass
except content_fetcher.EmptyReply as e:
# Some kind of custom to-str handler in the exception handler that does this?
err_text = "EmptyReply: Status Code {}".format(e.status_code)
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
except content_fetcher.ScreenshotUnavailable as e:
err_text = "Screenshot unavailable, page did not render fully in the expected time"
err_text = "Screenshot unavailable, page did not render fully in the expected time - try increasing 'Wait seconds before extracting text'"
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
except content_fetcher.PageUnloadable as e:
@@ -99,10 +98,18 @@ class update_worker(threading.Thread):
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2:
# Atleast 2, means there really was a change
self.datastore.update_watch(uuid=uuid, update_obj={'last_changed': round(now)})
dates = list(watch.history.keys())
prev_fname = watch.history[dates[-2]]
watch_history = watch.history
dates = list(watch_history.keys())
# Theoretically it's possible that this could be just 1 long,
# - In the case that the timestamp key was not unique
if len(dates) == 1:
raise ValueError(
"History index had 2 or more, but only 1 date loaded, timestamps were not unique? maybe two of the same timestamps got written, needs more delay?"
)
prev_fname = watch_history[dates[-2]]
# Did it have any notification alerts to hit?
if len(watch['notification_urls']):
@@ -151,6 +158,7 @@ class update_worker(threading.Thread):
# Always record that we atleast tried
self.datastore.update_watch(uuid=uuid, update_obj={'fetch_time': round(time.time() - now, 3),
'last_checked': round(time.time())})
# Always save the screenshot if it's available
if screenshot:
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)

View File

@@ -73,6 +73,20 @@ services:
# hostname: playwright-chrome
# image: browserless/chrome
# restart: unless-stopped
# environment:
# - SCREEN_WIDTH=1920
# - SCREEN_HEIGHT=1024
# - SCREEN_DEPTH=16
# - ENABLE_DEBUGGER=false
# - SCREEN_WIDTH=1280
# - SCREEN_HEIGHT=1024
# - SCREEN_DEPTH=16
# - PREBOOT_CHROME=true
# - CONNECTION_TIMEOUT=300000
# - MAX_CONCURRENT_SESSIONS=10
# - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true
volumes:
changedetection-data: