Compare commits

..

8 Commits

15 changed files with 87 additions and 204 deletions

View File

@@ -12,7 +12,7 @@ from functools import wraps
from threading import Event
import datetime
import flask_login
import logging
from loguru import logger
import os
import pytz
import queue
@@ -505,41 +505,6 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("clear_all_history.html")
return output
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
# the excluded text.
def get_current_checksum_include_ignore_text(uuid):
import hashlib
from changedetectionio import fetch_site_status
# Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
# 0 means that theres only one, so that there should be no 'unviewed' history available
if newest_history_key == 0:
newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
if newest_history_key:
with open(datastore.data['watching'][uuid].history[newest_history_key],
encoding='utf-8') as file:
raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text'])
if datastore.data['settings']['application'].get('ignore_whitespace', False):
checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
else:
checksum = hashlib.md5(stripped_content).hexdigest()
return checksum
return datastore.data['watching'][uuid]['previous_md5']
@app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_optionally_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
@@ -943,8 +908,9 @@ def changedetection_app(config=None, datastore_o=None):
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
is_html_webdriver = False
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
# Never requested successfully, but we detected a fetch error
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
@@ -1035,7 +1001,8 @@ def changedetection_app(config=None, datastore_o=None):
os.unlink(previous_backup_filename)
# create a ZipFile object
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
backupname = "changedetection-backup-{}.zip".format(timestamp)
backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
with zipfile.ZipFile(backup_filepath, "w",
@@ -1350,7 +1317,7 @@ def changedetection_app(config=None, datastore_o=None):
except Exception as e:
logging.error("Error sharing -{}".format(str(e)))
logger.error("Error sharing -{}".format(str(e)))
flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error')
# https://changedetection.io/share/VrMv05wpXyQa
@@ -1425,7 +1392,7 @@ def notification_runner():
sent_obj = notification.process_notification(n_object, datastore)
except Exception as e:
logging.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
logger.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
# UUID wont be present when we submit a 'test' from the global settings
if 'uuid' in n_object:
@@ -1448,7 +1415,7 @@ def ticker_thread_check_time_launch_checks():
proxy_last_called_time = {}
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
logger.info("System env MINIMUM_SECONDS_RECHECK_TIME - {}", recheck_time_minimum_seconds)
# Spin up Workers that do the fetching
# Can be overriden by ENV or use the default settings
@@ -1493,7 +1460,7 @@ def ticker_thread_check_time_launch_checks():
now = time.time()
watch = datastore.data['watching'].get(uuid)
if not watch:
logging.error("Watch: {} no longer present.".format(uuid))
logger.error("Watch: {} no longer present.".format(uuid))
continue
# No need todo further processing if it's paused
@@ -1526,7 +1493,7 @@ def ticker_thread_check_time_launch_checks():
time_since_proxy_used = int(time.time() - proxy_last_used_time)
if time_since_proxy_used < proxy_list_reuse_time_minimum:
# Not enough time difference reached, skip this watch
print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
logger.info("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
watch_proxy,
time_since_proxy_used,
proxy_list_reuse_time_minimum))
@@ -1537,7 +1504,7 @@ def ticker_thread_check_time_launch_checks():
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
priority = int(time.time())
print(
logger.info(
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
uuid,
watch['last_checked'],

View File

@@ -24,7 +24,7 @@
from distutils.util import strtobool
from flask import Blueprint, request, make_response
import os
import logging
from loguru import logger
from changedetectionio.store import ChangeDetectionStore
from changedetectionio import login_optionally_required
browsersteps_live_ui_o = {}
@@ -49,7 +49,7 @@ def cleanup_playwright_session():
browsersteps_playwright_browser_interface_end_time = None
browsersteps_playwright_browser_interface_start_time = None
print("Cleaning up old playwright session because time was up, calling .goodbye()")
logger.info("Cleaning up old playwright session because time was up, calling .goodbye()")
try:
browsersteps_playwright_browser_interface_context.goodbye()
except Exception as e:
@@ -114,7 +114,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
if not this_session:
print("Browser exited")
logger.info("Browser exited")
return make_response('Browser session ran out of time :( Please reload this page.', 401)
this_session.call_action(action_name=step_operation,
@@ -122,7 +122,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
optional_value=step_optional_value)
except Exception as e:
print("Exception when calling step operation", step_operation, str(e))
logger.info("Exception when calling step operation", step_operation, str(e))
# Try to find something of value to give back to the user
return make_response(str(e).splitlines()[0], 401)
@@ -139,7 +139,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
if request.method == 'GET':
if not browsersteps_playwright_browser_interface:
print("Starting connection with playwright")
logger.info("Starting connection with playwright")
logging.debug("browser_steps.py connecting")
global browsersteps_playwright_browser_interface_context
@@ -162,7 +162,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
return make_response('Unable to start the Playwright session properly, is it running?', 401)
browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
print("Starting connection with playwright - done")
logger.info("Starting connection with playwright - done")
if not browsersteps_live_ui_o.get(browsersteps_session_id):
# Boot up a new session
@@ -172,7 +172,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
if proxy_url:
proxy = {'server': proxy_url}
print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
logger.info("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
# Begin the new "Playwright Context" that re-uses the playwright interface
# Each session is a "Playwright Context" as a list, that uses the playwright interface

View File

@@ -51,7 +51,7 @@ class steppable_browser_interface():
if call_action_name == 'choose_one':
return
print("> action calling", call_action_name)
logger.info("> action calling", call_action_name)
# https://playwright.dev/python/docs/selectors#xpath-selectors
if selector.startswith('/') and not selector.startswith('//'):
selector = "xpath=" + selector
@@ -70,7 +70,7 @@ class steppable_browser_interface():
action_handler(selector, optional_value)
self.page.wait_for_timeout(3 * 1000)
print("Call action done in", time.time() - now)
logger.info("Call action done in", time.time() - now)
def action_goto_url(self, url, optional_value):
# self.page.set_viewport_size({"width": 1280, "height": 5000})
@@ -81,7 +81,7 @@ class steppable_browser_interface():
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
# This seemed to solve nearly all 'TimeoutErrors'
print("Time to goto URL ", time.time() - now)
logger.info("Time to goto URL ", time.time() - now)
def action_click_element_containing_text(self, selector=None, value=''):
if not len(value.strip()):
@@ -100,14 +100,14 @@ class steppable_browser_interface():
self.page.evaluate(value)
def action_click_element(self, selector, value):
print("Clicking element")
logger.info("Clicking element")
if not len(selector.strip()):
return
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
def action_click_element_if_exists(self, selector, value):
import playwright._impl._api_types as _api_types
print("Clicking element if exists")
logger.info("Clicking element if exists")
if not len(selector.strip()):
return
try:
@@ -207,13 +207,13 @@ class browsersteps_live_ui(steppable_browser_interface):
self.mark_as_closed,
)
# Listen for all console events and handle errors
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
self.page.on("console", lambda msg: logger.info(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
print("Time to browser setup", time.time() - now)
logger.info("Time to browser setup", time.time() - now)
self.page.wait_for_timeout(1 * 1000)
def mark_as_closed(self):
print("Page closed, cleaning up..")
logger.info("Page closed, cleaning up..")
@property
def has_expired(self):
@@ -239,7 +239,7 @@ class browsersteps_live_ui(steppable_browser_interface):
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
# So the JS will find the smallest one first
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
print("Time to complete get_current_state of browser", time.time() - now)
logger.info("Time to complete get_current_state of browser", time.time() - now)
# except
# playwright._impl._api_types.Error: Browser closed.
# @todo show some countdown timer?

View File

@@ -8,6 +8,7 @@ from json.decoder import JSONDecodeError
import eventlet
import eventlet.wsgi
import getopt
from loguru import logger
import os
import signal
import socket
@@ -24,7 +25,7 @@ def sigterm_handler(_signo, _stack_frame):
global app
global datastore
# app.config.exit.set()
print('Shutdown: Got SIGTERM, DB saved to disk')
logger.warning('Shutdown: Got SIGTERM, DB saved to disk')
datastore.sync_to_json()
# raise SystemExit

View File

@@ -2,7 +2,7 @@ import hashlib
from abc import abstractmethod
import chardet
import json
import logging
from loguru import logger
import os
import requests
import sys
@@ -155,7 +155,7 @@ class Fetcher():
for step in valid_steps:
step_n += 1
print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
logger.info(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
self.screenshot_step("before-"+str(step_n))
self.save_step_html("before-"+str(step_n))
try:
@@ -510,7 +510,7 @@ class base_html_webdriver(Fetcher):
try:
self.driver.quit()
except Exception as e:
print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
logger.error("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
# "html_requests" is listed as the default fetcher in store.py!
@@ -568,6 +568,7 @@ class html_requests(Fetcher):
if not r.content or not len(r.content):
raise EmptyReply(url=url, status_code=r.status_code)
# @todo test this
# @todo maybe you really want to test zero-byte return pages?
if r.status_code != 200 and not ignore_status_codes:
# maybe check with content works?

View File

@@ -1,6 +1,6 @@
import hashlib
import json
import logging
from loguru import logger
import os
import re
import urllib3
@@ -105,7 +105,7 @@ class perform_site_check():
proxy_url = None
if proxy_id:
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
print("UUID {} Using proxy {}".format(uuid, proxy_url))
logger.info("UUID {} Using proxy {}".format(uuid, proxy_url))
fetcher = klass(proxy_override=proxy_url)
@@ -135,7 +135,7 @@ class perform_site_check():
# Track the content type
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
logger.info("UUID: {} - Fetch complete {:,} bytes".format(watch.get('uuid'), len(fetcher.content)))
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
# Saves a lot of CPU
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
@@ -349,6 +349,7 @@ class perform_site_check():
# The main thing that all this at the moment comes down to :)
if watch.get('previous_md5') != fetched_md5:
logger.debug("UUID: {} - Change detected - Prev MD5: {} - Fetched MD5: {}, applying filters...".format(uuid, watch.get('previous_md5'), fetched_md5))
changed_detected = True
# Looks like something changed, but did it match all the rules?
@@ -366,10 +367,10 @@ class perform_site_check():
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
# One or more lines? unsure?
if not has_unique_lines:
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
logger.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
changed_detected = False
else:
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
logger.debug("check_unique_lines: UUID {} had unique content".format(uuid))
# Always record the new checksum
update_obj["previous_md5"] = fetched_md5
@@ -378,4 +379,9 @@ class perform_site_check():
if not watch.get('previous_md5'):
watch['previous_md5'] = fetched_md5
if changed_detected:
logger.success("UUID: {} Change detected after all filters applied.", uuid)
else:
logger.info("UUID: {} NO Change detected after all filters applied.", uuid)
return changed_detected, update_obj, text_content_before_ignored_filter

View File

@@ -1,5 +1,5 @@
from distutils.util import strtobool
import logging
from loguru import logger
import os
import re
import time
@@ -109,7 +109,7 @@ class model(dict):
def ensure_data_dir_exists(self):
if not os.path.isdir(self.watch_data_dir):
print ("> Creating data dir {}".format(self.watch_data_dir))
logger.debug("> Creating data dir {}".format(self.watch_data_dir))
os.mkdir(self.watch_data_dir)
@property
@@ -153,7 +153,9 @@ class model(dict):
@property
def is_pdf(self):
# content_type field is set in the future
return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower()
# https://github.com/dgtlmoon/changedetection.io/issues/1392
# Not sure the best logic here
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
@property
def label(self):
@@ -192,7 +194,7 @@ class model(dict):
# Read the history file as a dict
fname = os.path.join(self.watch_data_dir, "history.txt")
if os.path.isfile(fname):
logging.debug("Reading history index " + str(time.time()))
logger.debug("UUID: {} Reading history index".format(self.get('uuid')))
with open(fname, "r") as f:
for i in f.readlines():
if ',' in i:

View File

@@ -101,7 +101,7 @@ def process_notification(n_object, datastore):
apobj = apprise.Apprise(debug=True, asset=asset)
url = url.strip()
if len(url):
print(">> Process Notification: AppRise notifying {}".format(url))
logger.info(">> Process Notification: AppRise notifying {}".format(url))
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
# Because different notifications may require different pre-processing, run each sequentially :(

View File

@@ -7,7 +7,7 @@ from copy import deepcopy
from os import path, unlink
from threading import Lock
import json
import logging
from loguru import logger
import os
import re
import requests
@@ -75,12 +75,12 @@ class ChangeDetectionStore:
for uuid, watch in self.__data['watching'].items():
watch['uuid']=uuid
self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
print("Watching:", uuid, self.__data['watching'][uuid]['url'])
logger.info("Watching: {} - {}", uuid, self.__data['watching'][uuid]['url'])
# First time ran, Create the datastore.
except (FileNotFoundError):
if include_default_watches:
print("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
logger.info("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
self.add_watch(url='https://news.ycombinator.com/',
tag='Tech news',
extras={'fetch_backend': 'html_requests'})
@@ -192,27 +192,24 @@ class ChangeDetectionStore:
tags.sort()
return tags
def unlink_history_file(self, path):
try:
unlink(path)
except (FileNotFoundError, IOError):
pass
# Delete a single watch by UUID
def delete(self, uuid):
import pathlib
import shutil
with self.lock:
if uuid == 'all':
self.__data['watching'] = {}
# GitHub #30 also delete history records
for uuid in self.data['watching']:
for path in self.data['watching'][uuid].history.values():
self.unlink_history_file(path)
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
shutil.rmtree(path)
self.needs_write_urgent = True
else:
for path in self.data['watching'][uuid].history.values():
self.unlink_history_file(path)
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
shutil.rmtree(path)
del self.data['watching'][uuid]
self.needs_write_urgent = True
@@ -306,7 +303,7 @@ class ChangeDetectionStore:
apply_extras['include_filters'] = [res['css_filter']]
except Exception as e:
logging.error("Error fetching metadata for shared watch link", url, str(e))
logger.error("Error fetching metadata for shared watch link", url, str(e))
flash("Error fetching metadata for {}".format(url), 'error')
return False
from .model.Watch import is_safe_url
@@ -391,14 +388,13 @@ class ChangeDetectionStore:
def sync_to_json(self):
logging.info("Saving JSON..")
print("Saving JSON..")
logger.debug("Saving JSON DB")
try:
data = deepcopy(self.__data)
except RuntimeError as e:
# Try again in 15 seconds
time.sleep(15)
logging.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
logger.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
self.sync_to_json()
return
else:
@@ -411,7 +407,7 @@ class ChangeDetectionStore:
json.dump(data, json_file, indent=4)
os.replace(self.json_store_path+".tmp", self.json_store_path)
except Exception as e:
logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
logger.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
self.needs_write = False
self.needs_write_urgent = False
@@ -422,7 +418,7 @@ class ChangeDetectionStore:
while True:
if self.stop_thread:
print("Shutting down datastore thread")
logger.info("Shutting down datastore thread")
return
if self.needs_write or self.needs_write_urgent:
@@ -536,8 +532,8 @@ class ChangeDetectionStore:
try:
update_method = getattr(self, "update_{}".format(update_n))()
except Exception as e:
print("Error while trying update_{}".format((update_n)))
print(e)
logger.error("Error while trying update_{}".format((update_n)))
logger.error(str(e))
# Don't run any more updates
return
else:
@@ -575,7 +571,7 @@ class ChangeDetectionStore:
with open(os.path.join(target_path, "history.txt"), "w") as f:
f.writelines(history)
else:
logging.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
logger.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
# No longer needed, dynamically pulled from the disk when needed.
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.

View File

@@ -15,78 +15,10 @@ def test_inscriptus():
stripped_text_from_html = get_text(html_content)
assert stripped_text_from_html == 'test!\nok man'
def test_setup(client, live_server):
live_server_setup(live_server)
# Assert that non-200's dont give notifications or register as a change
def test_non_200_doesnt_trigger_change(client, live_server):
# live_server_setup(live_server)
set_original_response()
url = url_for('test_changing_status_code_endpoint', _external=True)
# Add our URL to the import page
res = client.post(
url_for("import_page"),
data={"urls": url},
follow_redirects=True
)
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
res = client.post(
url_for("edit_page", uuid="first"),
data={
"include_filters": ".foobar-detection",
"fetch_backend": "html_requests",
"headers": "",
"tag": "",
"url": url
},
follow_redirects=True
)
# A recheck will happen here automatically
time.sleep(sleep_time_for_fetch_thread)
# hit the mark all viewed link
res = client.get(url_for("mark_all_viewed"), follow_redirects=True)
# Now be sure the filter is missing and then recheck it
set_modified_response()
# https://github.com/dgtlmoon/changedetection.io/issues/962#issuecomment-1416807742
for ecode in ['429', '400', '204', '429', '403', '404', '500']:
with open("test-endpoint-status-code.txt", 'w') as f:
f.write(ecode)
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
assert b'1 watches queued for rechecking.' in res.data
time.sleep(sleep_time_for_fetch_thread)
# No change should be seen/no trigger of change
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# load preview page so we can see what was returned
res = client.get(url_for("preview_page", uuid="first"))
# with open('/tmp/debug-'+ecode+'.html', 'wb') as f:
# f.write(res.data)
# Should still say the original 200, because "ignore_status_codes" should be off by default
# commented out - this will fail because we also show what the error was
# assert b'code: '+ecode.encode('utf-8') not in res.data
assert b'code: 200' in res.data
# Cleanup everything
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
assert b'Deleted' in res.data
def test_check_basic_change_detection_functionality(client, live_server):
set_original_response()
live_server_setup(live_server)
# Add our URL to the import page
res = client.post(

View File

@@ -69,8 +69,6 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
url = url_for('test_notification_endpoint', _external=True)
notification_url = url.replace('http', 'json')
print(">>>> Notification URL: " + notification_url)
# Just a regular notification setting, this will be used by the special 'filter not found' notification
notification_form_data = {"notification_urls": notification_url,
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",

View File

@@ -1,5 +1,4 @@
#!/usr/bin/python3
import os.path
from flask import make_response, request
from flask import url_for
@@ -113,25 +112,6 @@ def live_server_setup(live_server):
import secrets
return "Random content - {}\n".format(secrets.token_hex(64))
@live_server.app.route('/test-changing-status-code-endpoint')
def test_changing_status_code_endpoint():
# status_code can also be overriden in a file, used for doing things that it wouldnt normally expect
# (test_non_200_doesnt_trigger_change)
status_code = '200'
if os.path.isfile("test-endpoint-status-code.txt"):
with open("test-endpoint-status-code.txt", 'r') as f:
status_code = f.read().strip()
os.unlink("test-endpoint-status-code.txt")
# Contents includes the status code, which will change and should not trigger a change
# (Non-200 should get ignored)
with open("test-datastore/endpoint-content.txt", "r") as f:
contents ="{} code: {} ".format(f.read(), status_code)
if status_code == '204':
contents=''
resp = make_response(contents, status_code)
resp.headers['Content-Type'] = 'text/html'
return resp, status_code
@live_server.app.route('/test-endpoint')
def test_endpoint():

View File

@@ -2,6 +2,7 @@ import os
import threading
import queue
import time
from loguru import logger
from changedetectionio import content_fetcher
from changedetectionio import queuedWatchMetaData
@@ -12,14 +13,12 @@ from changedetectionio.fetch_site_status import FilterNotFoundInResponse
# Requests for checking on a single site(watch) from a queue of watches
# (another process inserts watches into the queue that are time-ready for checking)
import logging
import sys
class update_worker(threading.Thread):
current_uuid = None
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
self.q = q
self.app = app
self.notification_q = notification_q
@@ -80,10 +79,10 @@ class update_worker(threading.Thread):
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
})
logging.info (">> SENDING NOTIFICATION")
logger.info ("UUID: {} - Sending notification".format(watch_uuid))
self.notification_q.put(n_object)
else:
logging.info (">> NO Notification sent, notification_url was empty in both watch and system")
logger.info("UUID: {} - NO Notification sent, notification_url was empty in both watch and system".format(watch_uuid))
def send_filter_failure_notification(self, watch_uuid):
@@ -112,7 +111,7 @@ class update_worker(threading.Thread):
'screenshot': None
})
self.notification_q.put(n_object)
print("Sent filter not found notification for {}".format(watch_uuid))
logger.info("Sent filter not found notification for {}".format(watch_uuid))
def send_step_failure_notification(self, watch_uuid, step_n):
watch = self.datastore.data['watching'].get(watch_uuid, False)
@@ -139,7 +138,7 @@ class update_worker(threading.Thread):
'uuid': watch_uuid
})
self.notification_q.put(n_object)
print("Sent step not found notification for {}".format(watch_uuid))
logger.error("Sent step not found notification for {}".format(watch_uuid))
def cleanup_error_artifacts(self, uuid):
@@ -173,7 +172,7 @@ class update_worker(threading.Thread):
update_obj= {}
xpath_data = False
process_changedetection_results = True
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
logger.info("UUID: {} Start processing, Priority {} URL {}",uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])
now = time.time()
try:
@@ -184,7 +183,7 @@ class update_worker(threading.Thread):
if not isinstance(contents, (bytes, bytearray)):
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
except PermissionError as e:
self.app.logger.error("File permission error updating", uuid, str(e))
logger.error("UUID: {} File permission error updating - {}", uuid, str(e))
process_changedetection_results = False
except content_fetcher.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about
@@ -233,7 +232,7 @@ class update_worker(threading.Thread):
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0)
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
logger.error("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
if threshold > 0 and c >= threshold:
if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_filter_failure_notification(uuid)
@@ -264,7 +263,7 @@ class update_worker(threading.Thread):
# Send notification if we reached the threshold?
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
0)
print("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
logger.error("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
if threshold > 0 and c >= threshold:
if not self.datastore.data['watching'][uuid].get('notification_muted'):
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
@@ -330,8 +329,6 @@ class update_worker(threading.Thread):
# A change was detected
if changed_detected:
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
if watch.history_n >= 2:
if not self.datastore.data['watching'][uuid].get('notification_muted'):
@@ -340,7 +337,7 @@ class update_worker(threading.Thread):
except Exception as e:
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
print("!!!! Exception in update_worker !!!\n", e)
logger.error("!!!! Exception in update_worker !!!\n", e)
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})

View File

@@ -41,7 +41,6 @@ services:
#
# Base URL of your changedetection.io install (Added to the notification alert)
# - BASE_URL=https://mysite.com
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
# - USE_X_SETTINGS=1
@@ -95,7 +94,10 @@ services:
# - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true
#
# Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
#
volumes:
changedetection-data:

View File

@@ -10,6 +10,7 @@ inscriptis~=2.2
pytz
timeago~=1.0
validators
loguru
# Set these versions together to avoid a RequestsDependencyWarning
# >= 2.26 also adds Brotli support if brotli is installed