mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-05 17:16:12 +00:00
Compare commits
9 Commits
remove-unu
...
watch-queu
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
58b79170e4 | ||
|
|
1193a7f22c | ||
|
|
0b976827bb | ||
|
|
280e916033 | ||
|
|
5494e61a05 | ||
|
|
e461c0b819 | ||
|
|
d67c654f37 | ||
|
|
06ab34b6af | ||
|
|
ba8676c4ba |
22
README.md
22
README.md
@@ -1,21 +1,15 @@
|
||||
# changedetection.io
|
||||
## Web Site Change Detection, Monitoring and Notification.
|
||||
|
||||
[**Try our $6.99/month subscription - Unlimited checks and watches!**](https://lemonade.changedetection.io/start)
|
||||
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
||||
|
||||
[![Release Version][release-shield]][release-link] [![Docker Pulls][docker-pulls]][docker-link] [![License][license-shield]](LICENSE.md)
|
||||
|
||||

|
||||
|
||||
## Web Site Change Detection, Monitoring and Notification - Self-Hosted or SaaS.
|
||||
|
||||
_Know when web pages change! Stay ontop of new information! get notifications when important website content changes_
|
||||
|
||||
Live your data-life *pro-actively* instead of *re-actively*.
|
||||
|
||||
Free, Open-source web page monitoring, notification and change detection. Don't have time? [**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start)
|
||||
|
||||
|
||||
[<img src="https://raw.githubusercontent.com/dgtlmoon/changedetection.io/master/docs/screenshot.png" style="max-width:100%;" alt="Self-hosted web page change monitoring" title="Self-hosted web page change monitoring" />](https://lemonade.changedetection.io/start)
|
||||
|
||||
|
||||
**Get your own private instance now! Let us host it for you!**
|
||||
Know when important content changes, we support notifications via Discord, Telegram, Home-Assistant, Slack, Email and 70+ more
|
||||
|
||||
[**Try our $6.99/month subscription - unlimited checks and watches!**](https://lemonade.changedetection.io/start) , _half the price of other website change monitoring services and comes with unlimited watches & checks!_
|
||||
|
||||
|
||||
@@ -1385,13 +1385,18 @@ def ticker_thread_check_time_launch_checks():
|
||||
seconds_since_last_recheck = now - watch['last_checked']
|
||||
if seconds_since_last_recheck >= (threshold + watch.jitter_seconds) and seconds_since_last_recheck >= recheck_time_minimum_seconds:
|
||||
if not uuid in running_uuids and uuid not in [q_uuid for p,q_uuid in update_q.queue]:
|
||||
print("> Queued watch UUID {} last checked at {} queued at {:0.2f} jitter {:0.2f}s, {:0.2f}s since last checked".format(uuid,
|
||||
watch['last_checked'],
|
||||
now,
|
||||
watch.jitter_seconds,
|
||||
now - watch['last_checked']))
|
||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||
priority = int(time.time())
|
||||
print(
|
||||
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
|
||||
uuid,
|
||||
watch['last_checked'],
|
||||
now,
|
||||
priority,
|
||||
watch.jitter_seconds,
|
||||
now - watch['last_checked']))
|
||||
# Into the queue with you
|
||||
update_q.put((5, uuid))
|
||||
update_q.put((priority, uuid))
|
||||
|
||||
# Reset for next time
|
||||
watch.jitter_seconds = 0
|
||||
|
||||
@@ -31,11 +31,12 @@ class JSActionExceptions(Exception):
|
||||
return
|
||||
|
||||
class PageUnloadable(Exception):
|
||||
def __init__(self, status_code, url, screenshot=False):
|
||||
def __init__(self, status_code, url, screenshot=False, message=False):
|
||||
# Set this so we can use it in other parts of the app
|
||||
self.status_code = status_code
|
||||
self.url = url
|
||||
self.screenshot = screenshot
|
||||
self.message = message
|
||||
return
|
||||
|
||||
class EmptyReply(Exception):
|
||||
@@ -292,7 +293,15 @@ class base_html_playwright(Fetcher):
|
||||
|
||||
# allow per-watch proxy selection override
|
||||
if proxy_override:
|
||||
self.proxy = {'server': proxy_override}
|
||||
# https://playwright.dev/docs/network#http-proxy
|
||||
from urllib.parse import urlparse
|
||||
parsed = urlparse(proxy_override)
|
||||
proxy_url = "{}://{}:{}".format(parsed.scheme, parsed.hostname, parsed.port)
|
||||
self.proxy = {'server': proxy_url}
|
||||
if parsed.username:
|
||||
self.proxy['username'] = parsed.username
|
||||
if parsed.password:
|
||||
self.proxy['password'] = parsed.password
|
||||
|
||||
def run(self,
|
||||
url,
|
||||
@@ -356,7 +365,7 @@ class base_html_playwright(Fetcher):
|
||||
print(str(e))
|
||||
context.close()
|
||||
browser.close()
|
||||
raise PageUnloadable(url=url, status_code=None)
|
||||
raise PageUnloadable(url=url, status_code=None, message=e.message)
|
||||
|
||||
if response is None:
|
||||
context.close()
|
||||
|
||||
@@ -13,6 +13,8 @@ urllib3.disable_warnings(urllib3.exceptions.InsecureRequestWarning)
|
||||
# Some common stuff here that can be moved to a base class
|
||||
# (set_proxy_from_list)
|
||||
class perform_site_check():
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
def __init__(self, *args, datastore, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@@ -127,6 +129,9 @@ class perform_site_check():
|
||||
fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, watch['css_filter'])
|
||||
fetcher.quit()
|
||||
|
||||
self.screenshot = fetcher.screenshot
|
||||
self.xpath_data = fetcher.xpath_data
|
||||
|
||||
# Fetching complete, now filters
|
||||
# @todo move to class / maybe inside of fetcher abstract base?
|
||||
|
||||
@@ -312,4 +317,4 @@ class perform_site_check():
|
||||
if not watch.get('previous_md5'):
|
||||
watch['previous_md5'] = fetched_md5
|
||||
|
||||
return changed_detected, update_obj, text_content_before_ignored_filter, fetcher.screenshot, fetcher.xpath_data
|
||||
return changed_detected, update_obj, text_content_before_ignored_filter
|
||||
|
||||
@@ -38,13 +38,14 @@ docker kill $$-test_selenium
|
||||
|
||||
echo "TESTING WEBDRIVER FETCH > PLAYWRIGHT/BROWSERLESS..."
|
||||
# Not all platforms support playwright (not ARM/rPI), so it's not packaged in requirements.txt
|
||||
pip3 install playwright~=1.22
|
||||
pip3 install playwright~=1.24
|
||||
docker run -d --name $$-test_browserless -e "DEFAULT_LAUNCH_ARGS=[\"--window-size=1920,1080\"]" --rm -p 3000:3000 --shm-size="2g" browserless/chrome:1.53-chrome-stable
|
||||
# takes a while to spin up
|
||||
sleep 5
|
||||
export PLAYWRIGHT_DRIVER_URL=ws://127.0.0.1:3000
|
||||
pytest tests/fetchers/test_content.py
|
||||
pytest tests/test_errorhandling.py
|
||||
pytest tests/visualselector/test_fetch_data.py
|
||||
|
||||
unset PLAYWRIGHT_DRIVER_URL
|
||||
docker kill $$-test_browserless
|
||||
@@ -363,9 +363,9 @@ class ChangeDetectionStore:
|
||||
def save_xpath_data(self, watch_uuid, data, as_error=False):
|
||||
|
||||
if as_error:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
|
||||
else:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements-error.json")
|
||||
else:
|
||||
target_path = os.path.join(self.datastore_path, watch_uuid, "elements.json")
|
||||
|
||||
with open(target_path, 'w') as f:
|
||||
f.write(json.dumps(data))
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup
|
||||
from ..util import live_server_setup, wait_for_all_checks
|
||||
import logging
|
||||
|
||||
|
||||
@@ -29,14 +29,8 @@ def test_fetch_webdriver_content(client, live_server):
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
time.sleep(3)
|
||||
attempt = 0
|
||||
while attempt < 20:
|
||||
res = client.get(url_for("index"))
|
||||
if not b'Checking now' in res.data:
|
||||
break
|
||||
logging.getLogger().info("Waiting for check to not say 'Checking now'..")
|
||||
time.sleep(3)
|
||||
attempt += 1
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
|
||||
res = client.get(
|
||||
|
||||
@@ -2,6 +2,8 @@
|
||||
|
||||
from flask import make_response, request
|
||||
from flask import url_for
|
||||
import logging
|
||||
import time
|
||||
|
||||
def set_original_response():
|
||||
test_return_data = """<html>
|
||||
@@ -68,6 +70,31 @@ def extract_api_key_from_UI(client):
|
||||
api_key = m.group(1)
|
||||
return api_key.strip()
|
||||
|
||||
|
||||
# kinda funky, but works for now
|
||||
def extract_UUID_from_client(client):
|
||||
import re
|
||||
res = client.get(
|
||||
url_for("index"),
|
||||
)
|
||||
# <span id="api-key">{{api_key}}</span>
|
||||
|
||||
m = re.search('edit/(.+?)"', str(res.data))
|
||||
uuid = m.group(1)
|
||||
return uuid.strip()
|
||||
|
||||
def wait_for_all_checks(client):
|
||||
# Loop waiting until done..
|
||||
attempt=0
|
||||
while attempt < 60:
|
||||
time.sleep(1)
|
||||
res = client.get(url_for("index"))
|
||||
if not b'Checking now' in res.data:
|
||||
break
|
||||
logging.getLogger().info("Waiting for watch-list to not say 'Checking now'.. {}".format(attempt))
|
||||
|
||||
attempt += 1
|
||||
|
||||
def live_server_setup(live_server):
|
||||
|
||||
@live_server.app.route('/test-endpoint')
|
||||
@@ -133,3 +160,4 @@ def live_server_setup(live_server):
|
||||
return ret
|
||||
|
||||
live_server.start()
|
||||
|
||||
|
||||
2
changedetectionio/tests/visualselector/__init__.py
Normal file
2
changedetectionio/tests/visualselector/__init__.py
Normal file
@@ -0,0 +1,2 @@
|
||||
"""Tests for the app."""
|
||||
|
||||
3
changedetectionio/tests/visualselector/conftest.py
Normal file
3
changedetectionio/tests/visualselector/conftest.py
Normal file
@@ -0,0 +1,3 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
from .. import conftest
|
||||
35
changedetectionio/tests/visualselector/test_fetch_data.py
Normal file
35
changedetectionio/tests/visualselector/test_fetch_data.py
Normal file
@@ -0,0 +1,35 @@
|
||||
#!/usr/bin/python3
|
||||
|
||||
import time
|
||||
from flask import url_for
|
||||
from ..util import live_server_setup, wait_for_all_checks, extract_UUID_from_client
|
||||
|
||||
# Add a site in paused mode, add an invalid filter, we should still have visual selector data ready
|
||||
def test_visual_selector_content_ready(client, live_server):
|
||||
import os
|
||||
|
||||
assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test"
|
||||
live_server_setup(live_server)
|
||||
time.sleep(1)
|
||||
|
||||
# Add our URL to the import page, maybe better to use something we control?
|
||||
# We use an external URL because the docker container is too difficult to setup to connect back to the pytest socket
|
||||
test_url = 'https://news.ycombinator.com'
|
||||
res = client.post(
|
||||
url_for("form_quick_watch_add"),
|
||||
data={"url": test_url, "tag": '', 'edit_and_watch_submit_button': 'Edit > Watch'},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"Watch added in Paused state, saving will unpause" in res.data
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first", unpause_on_save=1),
|
||||
data={"css_filter": ".does-not-exist", "url": test_url, "tag": "", "headers": "", 'fetch_backend': "html_webdriver"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b"unpaused" in res.data
|
||||
time.sleep(1)
|
||||
wait_for_all_checks(client)
|
||||
uuid = extract_UUID_from_client(client)
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'last-screenshot.png')), "last-screenshot.png should exist"
|
||||
assert os.path.isfile(os.path.join('test-datastore', uuid, 'elements.json')), "xpath elements.json data should exist"
|
||||
@@ -142,7 +142,7 @@ class update_worker(threading.Thread):
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
changed_detected, update_obj, contents, screenshot, xpath_data = update_handler.run(uuid)
|
||||
changed_detected, update_obj, contents = update_handler.run(uuid)
|
||||
# Re #342
|
||||
# In Python 3, all strings are sequences of Unicode characters. There is a bytes type that holds raw bytes.
|
||||
# We then convert/.decode('utf-8') for the notification etc
|
||||
@@ -223,6 +223,9 @@ class update_worker(threading.Thread):
|
||||
'last_check_status': e.status_code})
|
||||
except content_fetcher.PageUnloadable as e:
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
if e.message:
|
||||
err_text = "{} - {}".format(err_text, e.message)
|
||||
|
||||
if e.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=e.screenshot, as_error=True)
|
||||
|
||||
@@ -234,6 +237,9 @@ class update_worker(threading.Thread):
|
||||
# Other serious error
|
||||
process_changedetection_results = False
|
||||
else:
|
||||
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
# Mark that we never had any failures
|
||||
if not self.datastore.data['watching'][uuid].get('ignore_status_codes'):
|
||||
@@ -241,10 +247,6 @@ class update_worker(threading.Thread):
|
||||
|
||||
self.cleanup_error_artifacts(uuid)
|
||||
|
||||
# Crash protection, the watch entry could have been removed by this point (during a slow chrome fetch etc)
|
||||
if not self.datastore.data['watching'].get(uuid):
|
||||
continue
|
||||
|
||||
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
|
||||
if process_changedetection_results:
|
||||
try:
|
||||
@@ -280,10 +282,10 @@ class update_worker(threading.Thread):
|
||||
'last_checked': round(time.time())})
|
||||
|
||||
# Always save the screenshot if it's available
|
||||
if screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=screenshot)
|
||||
if xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=xpath_data)
|
||||
if update_handler.screenshot:
|
||||
self.datastore.save_screenshot(watch_uuid=uuid, screenshot=update_handler.screenshot)
|
||||
if update_handler.xpath_data:
|
||||
self.datastore.save_xpath_data(watch_uuid=uuid, data=update_handler.xpath_data)
|
||||
|
||||
|
||||
self.current_uuid = None # Done
|
||||
|
||||
Reference in New Issue
Block a user