mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-13 21:16:11 +00:00
Compare commits
8 Commits
bug-non-20
...
improve-lo
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
7b664c43ea | ||
|
|
d4eb9f2b64 | ||
|
|
7b8b50138b | ||
|
|
01af21f856 | ||
|
|
f7f4ab314b | ||
|
|
ce0355c0ad | ||
|
|
0f43213d9d | ||
|
|
93c57d9fad |
@@ -12,7 +12,7 @@ from functools import wraps
|
||||
from threading import Event
|
||||
import datetime
|
||||
import flask_login
|
||||
import logging
|
||||
from loguru import logger
|
||||
import os
|
||||
import pytz
|
||||
import queue
|
||||
@@ -505,41 +505,6 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
output = render_template("clear_all_history.html")
|
||||
return output
|
||||
|
||||
|
||||
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
|
||||
# the excluded text.
|
||||
def get_current_checksum_include_ignore_text(uuid):
|
||||
|
||||
import hashlib
|
||||
|
||||
from changedetectionio import fetch_site_status
|
||||
|
||||
# Get the most recent one
|
||||
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
|
||||
|
||||
# 0 means that theres only one, so that there should be no 'unviewed' history available
|
||||
if newest_history_key == 0:
|
||||
newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
|
||||
|
||||
if newest_history_key:
|
||||
with open(datastore.data['watching'][uuid].history[newest_history_key],
|
||||
encoding='utf-8') as file:
|
||||
raw_content = file.read()
|
||||
|
||||
handler = fetch_site_status.perform_site_check(datastore=datastore)
|
||||
stripped_content = html_tools.strip_ignore_text(raw_content,
|
||||
datastore.data['watching'][uuid]['ignore_text'])
|
||||
|
||||
if datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
|
||||
else:
|
||||
checksum = hashlib.md5(stripped_content).hexdigest()
|
||||
|
||||
return checksum
|
||||
|
||||
return datastore.data['watching'][uuid]['previous_md5']
|
||||
|
||||
|
||||
@app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
|
||||
@login_optionally_required
|
||||
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
|
||||
@@ -943,8 +908,9 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
|
||||
|
||||
|
||||
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or (
|
||||
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False
|
||||
is_html_webdriver = False
|
||||
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
|
||||
is_html_webdriver = True
|
||||
|
||||
# Never requested successfully, but we detected a fetch error
|
||||
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
|
||||
@@ -1035,7 +1001,8 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
os.unlink(previous_backup_filename)
|
||||
|
||||
# create a ZipFile object
|
||||
backupname = "changedetection-backup-{}.zip".format(int(time.time()))
|
||||
timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
|
||||
backupname = "changedetection-backup-{}.zip".format(timestamp)
|
||||
backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
|
||||
|
||||
with zipfile.ZipFile(backup_filepath, "w",
|
||||
@@ -1350,7 +1317,7 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
|
||||
|
||||
except Exception as e:
|
||||
logging.error("Error sharing -{}".format(str(e)))
|
||||
logger.error("Error sharing -{}".format(str(e)))
|
||||
flash("Could not share, something went wrong while communicating with the share server - {}".format(str(e)), 'error')
|
||||
|
||||
# https://changedetection.io/share/VrMv05wpXyQa
|
||||
@@ -1425,7 +1392,7 @@ def notification_runner():
|
||||
sent_obj = notification.process_notification(n_object, datastore)
|
||||
|
||||
except Exception as e:
|
||||
logging.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
|
||||
logger.error("Watch URL: {} Error {}".format(n_object['watch_url'], str(e)))
|
||||
|
||||
# UUID wont be present when we submit a 'test' from the global settings
|
||||
if 'uuid' in n_object:
|
||||
@@ -1448,7 +1415,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
proxy_last_called_time = {}
|
||||
|
||||
recheck_time_minimum_seconds = int(os.getenv('MINIMUM_SECONDS_RECHECK_TIME', 20))
|
||||
print("System env MINIMUM_SECONDS_RECHECK_TIME", recheck_time_minimum_seconds)
|
||||
logger.info("System env MINIMUM_SECONDS_RECHECK_TIME - {}", recheck_time_minimum_seconds)
|
||||
|
||||
# Spin up Workers that do the fetching
|
||||
# Can be overriden by ENV or use the default settings
|
||||
@@ -1493,7 +1460,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
now = time.time()
|
||||
watch = datastore.data['watching'].get(uuid)
|
||||
if not watch:
|
||||
logging.error("Watch: {} no longer present.".format(uuid))
|
||||
logger.error("Watch: {} no longer present.".format(uuid))
|
||||
continue
|
||||
|
||||
# No need todo further processing if it's paused
|
||||
@@ -1526,7 +1493,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
time_since_proxy_used = int(time.time() - proxy_last_used_time)
|
||||
if time_since_proxy_used < proxy_list_reuse_time_minimum:
|
||||
# Not enough time difference reached, skip this watch
|
||||
print("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
|
||||
logger.info("> Skipped UUID {} using proxy '{}', not enough time between proxy requests {}s/{}s".format(uuid,
|
||||
watch_proxy,
|
||||
time_since_proxy_used,
|
||||
proxy_list_reuse_time_minimum))
|
||||
@@ -1537,7 +1504,7 @@ def ticker_thread_check_time_launch_checks():
|
||||
|
||||
# Use Epoch time as priority, so we get a "sorted" PriorityQueue, but we can still push a priority 1 into it.
|
||||
priority = int(time.time())
|
||||
print(
|
||||
logger.info(
|
||||
"> Queued watch UUID {} last checked at {} queued at {:0.2f} priority {} jitter {:0.2f}s, {:0.2f}s since last checked".format(
|
||||
uuid,
|
||||
watch['last_checked'],
|
||||
|
||||
@@ -24,7 +24,7 @@
|
||||
from distutils.util import strtobool
|
||||
from flask import Blueprint, request, make_response
|
||||
import os
|
||||
import logging
|
||||
from loguru import logger
|
||||
from changedetectionio.store import ChangeDetectionStore
|
||||
from changedetectionio import login_optionally_required
|
||||
browsersteps_live_ui_o = {}
|
||||
@@ -49,7 +49,7 @@ def cleanup_playwright_session():
|
||||
browsersteps_playwright_browser_interface_end_time = None
|
||||
browsersteps_playwright_browser_interface_start_time = None
|
||||
|
||||
print("Cleaning up old playwright session because time was up, calling .goodbye()")
|
||||
logger.info("Cleaning up old playwright session because time was up, calling .goodbye()")
|
||||
try:
|
||||
browsersteps_playwright_browser_interface_context.goodbye()
|
||||
except Exception as e:
|
||||
@@ -114,7 +114,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
this_session = browsersteps_live_ui_o.get(browsersteps_session_id)
|
||||
if not this_session:
|
||||
print("Browser exited")
|
||||
logger.info("Browser exited")
|
||||
return make_response('Browser session ran out of time :( Please reload this page.', 401)
|
||||
|
||||
this_session.call_action(action_name=step_operation,
|
||||
@@ -122,7 +122,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
optional_value=step_optional_value)
|
||||
|
||||
except Exception as e:
|
||||
print("Exception when calling step operation", step_operation, str(e))
|
||||
logger.info("Exception when calling step operation", step_operation, str(e))
|
||||
# Try to find something of value to give back to the user
|
||||
return make_response(str(e).splitlines()[0], 401)
|
||||
|
||||
@@ -139,7 +139,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
if request.method == 'GET':
|
||||
|
||||
if not browsersteps_playwright_browser_interface:
|
||||
print("Starting connection with playwright")
|
||||
logger.info("Starting connection with playwright")
|
||||
logging.debug("browser_steps.py connecting")
|
||||
|
||||
global browsersteps_playwright_browser_interface_context
|
||||
@@ -162,7 +162,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
return make_response('Unable to start the Playwright session properly, is it running?', 401)
|
||||
|
||||
browsersteps_playwright_browser_interface_end_time = time.time() + (seconds_keepalive-3)
|
||||
print("Starting connection with playwright - done")
|
||||
logger.info("Starting connection with playwright - done")
|
||||
|
||||
if not browsersteps_live_ui_o.get(browsersteps_session_id):
|
||||
# Boot up a new session
|
||||
@@ -172,7 +172,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
proxy_url = datastore.proxy_list.get(proxy_id).get('url')
|
||||
if proxy_url:
|
||||
proxy = {'server': proxy_url}
|
||||
print("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||
logger.info("Browser Steps: UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||
|
||||
# Begin the new "Playwright Context" that re-uses the playwright interface
|
||||
# Each session is a "Playwright Context" as a list, that uses the playwright interface
|
||||
|
||||
@@ -51,7 +51,7 @@ class steppable_browser_interface():
|
||||
if call_action_name == 'choose_one':
|
||||
return
|
||||
|
||||
print("> action calling", call_action_name)
|
||||
logger.info("> action calling", call_action_name)
|
||||
# https://playwright.dev/python/docs/selectors#xpath-selectors
|
||||
if selector.startswith('/') and not selector.startswith('//'):
|
||||
selector = "xpath=" + selector
|
||||
@@ -70,7 +70,7 @@ class steppable_browser_interface():
|
||||
|
||||
action_handler(selector, optional_value)
|
||||
self.page.wait_for_timeout(3 * 1000)
|
||||
print("Call action done in", time.time() - now)
|
||||
logger.info("Call action done in", time.time() - now)
|
||||
|
||||
def action_goto_url(self, url, optional_value):
|
||||
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
||||
@@ -81,7 +81,7 @@ class steppable_browser_interface():
|
||||
# - `'commit'` - consider operation to be finished when network response is received and the document started loading.
|
||||
# Better to not use any smarts from Playwright and just wait an arbitrary number of seconds
|
||||
# This seemed to solve nearly all 'TimeoutErrors'
|
||||
print("Time to goto URL ", time.time() - now)
|
||||
logger.info("Time to goto URL ", time.time() - now)
|
||||
|
||||
def action_click_element_containing_text(self, selector=None, value=''):
|
||||
if not len(value.strip()):
|
||||
@@ -100,14 +100,14 @@ class steppable_browser_interface():
|
||||
self.page.evaluate(value)
|
||||
|
||||
def action_click_element(self, selector, value):
|
||||
print("Clicking element")
|
||||
logger.info("Clicking element")
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
self.page.click(selector, timeout=10 * 1000, delay=randint(200, 500))
|
||||
|
||||
def action_click_element_if_exists(self, selector, value):
|
||||
import playwright._impl._api_types as _api_types
|
||||
print("Clicking element if exists")
|
||||
logger.info("Clicking element if exists")
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
try:
|
||||
@@ -207,13 +207,13 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
self.mark_as_closed,
|
||||
)
|
||||
# Listen for all console events and handle errors
|
||||
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
||||
self.page.on("console", lambda msg: logger.info(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
||||
|
||||
print("Time to browser setup", time.time() - now)
|
||||
logger.info("Time to browser setup", time.time() - now)
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
def mark_as_closed(self):
|
||||
print("Page closed, cleaning up..")
|
||||
logger.info("Page closed, cleaning up..")
|
||||
|
||||
@property
|
||||
def has_expired(self):
|
||||
@@ -239,7 +239,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
xpath_data = self.page.evaluate("async () => {" + xpath_element_js + "}")
|
||||
# So the JS will find the smallest one first
|
||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||
print("Time to complete get_current_state of browser", time.time() - now)
|
||||
logger.info("Time to complete get_current_state of browser", time.time() - now)
|
||||
# except
|
||||
# playwright._impl._api_types.Error: Browser closed.
|
||||
# @todo show some countdown timer?
|
||||
|
||||
@@ -8,6 +8,7 @@ from json.decoder import JSONDecodeError
|
||||
import eventlet
|
||||
import eventlet.wsgi
|
||||
import getopt
|
||||
from loguru import logger
|
||||
import os
|
||||
import signal
|
||||
import socket
|
||||
@@ -24,7 +25,7 @@ def sigterm_handler(_signo, _stack_frame):
|
||||
global app
|
||||
global datastore
|
||||
# app.config.exit.set()
|
||||
print('Shutdown: Got SIGTERM, DB saved to disk')
|
||||
logger.warning('Shutdown: Got SIGTERM, DB saved to disk')
|
||||
datastore.sync_to_json()
|
||||
# raise SystemExit
|
||||
|
||||
|
||||
@@ -2,7 +2,7 @@ import hashlib
|
||||
from abc import abstractmethod
|
||||
import chardet
|
||||
import json
|
||||
import logging
|
||||
from loguru import logger
|
||||
import os
|
||||
import requests
|
||||
import sys
|
||||
@@ -155,7 +155,7 @@ class Fetcher():
|
||||
|
||||
for step in valid_steps:
|
||||
step_n += 1
|
||||
print(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
|
||||
logger.info(">> Iterating check - browser Step n {} - {}...".format(step_n, step['operation']))
|
||||
self.screenshot_step("before-"+str(step_n))
|
||||
self.save_step_html("before-"+str(step_n))
|
||||
try:
|
||||
@@ -510,7 +510,7 @@ class base_html_webdriver(Fetcher):
|
||||
try:
|
||||
self.driver.quit()
|
||||
except Exception as e:
|
||||
print("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
|
||||
logger.error("Content Fetcher > Exception in chrome shutdown/quit" + str(e))
|
||||
|
||||
|
||||
# "html_requests" is listed as the default fetcher in store.py!
|
||||
@@ -568,6 +568,7 @@ class html_requests(Fetcher):
|
||||
if not r.content or not len(r.content):
|
||||
raise EmptyReply(url=url, status_code=r.status_code)
|
||||
|
||||
# @todo test this
|
||||
# @todo maybe you really want to test zero-byte return pages?
|
||||
if r.status_code != 200 and not ignore_status_codes:
|
||||
# maybe check with content works?
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
import hashlib
|
||||
import json
|
||||
import logging
|
||||
from loguru import logger
|
||||
import os
|
||||
import re
|
||||
import urllib3
|
||||
@@ -105,7 +105,7 @@ class perform_site_check():
|
||||
proxy_url = None
|
||||
if proxy_id:
|
||||
proxy_url = self.datastore.proxy_list.get(proxy_id).get('url')
|
||||
print("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||
logger.info("UUID {} Using proxy {}".format(uuid, proxy_url))
|
||||
|
||||
fetcher = klass(proxy_override=proxy_url)
|
||||
|
||||
@@ -135,7 +135,7 @@ class perform_site_check():
|
||||
|
||||
# Track the content type
|
||||
update_obj['content_type'] = fetcher.headers.get('Content-Type', '')
|
||||
|
||||
logger.info("UUID: {} - Fetch complete {:,} bytes".format(watch.get('uuid'), len(fetcher.content)))
|
||||
# Watches added automatically in the queue manager will skip if its the same checksum as the previous run
|
||||
# Saves a lot of CPU
|
||||
update_obj['previous_md5_before_filters'] = hashlib.md5(fetcher.content.encode('utf-8')).hexdigest()
|
||||
@@ -349,6 +349,7 @@ class perform_site_check():
|
||||
|
||||
# The main thing that all this at the moment comes down to :)
|
||||
if watch.get('previous_md5') != fetched_md5:
|
||||
logger.debug("UUID: {} - Change detected - Prev MD5: {} - Fetched MD5: {}, applying filters...".format(uuid, watch.get('previous_md5'), fetched_md5))
|
||||
changed_detected = True
|
||||
|
||||
# Looks like something changed, but did it match all the rules?
|
||||
@@ -366,10 +367,10 @@ class perform_site_check():
|
||||
has_unique_lines = watch.lines_contain_something_unique_compared_to_history(lines=stripped_text_from_html.splitlines())
|
||||
# One or more lines? unsure?
|
||||
if not has_unique_lines:
|
||||
logging.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
|
||||
logger.debug("check_unique_lines: UUID {} didnt have anything new setting change_detected=False".format(uuid))
|
||||
changed_detected = False
|
||||
else:
|
||||
logging.debug("check_unique_lines: UUID {} had unique content".format(uuid))
|
||||
logger.debug("check_unique_lines: UUID {} had unique content".format(uuid))
|
||||
|
||||
# Always record the new checksum
|
||||
update_obj["previous_md5"] = fetched_md5
|
||||
@@ -378,4 +379,9 @@ class perform_site_check():
|
||||
if not watch.get('previous_md5'):
|
||||
watch['previous_md5'] = fetched_md5
|
||||
|
||||
if changed_detected:
|
||||
logger.success("UUID: {} Change detected after all filters applied.", uuid)
|
||||
else:
|
||||
logger.info("UUID: {} NO Change detected after all filters applied.", uuid)
|
||||
|
||||
return changed_detected, update_obj, text_content_before_ignored_filter
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
from distutils.util import strtobool
|
||||
import logging
|
||||
from loguru import logger
|
||||
import os
|
||||
import re
|
||||
import time
|
||||
@@ -109,7 +109,7 @@ class model(dict):
|
||||
|
||||
def ensure_data_dir_exists(self):
|
||||
if not os.path.isdir(self.watch_data_dir):
|
||||
print ("> Creating data dir {}".format(self.watch_data_dir))
|
||||
logger.debug("> Creating data dir {}".format(self.watch_data_dir))
|
||||
os.mkdir(self.watch_data_dir)
|
||||
|
||||
@property
|
||||
@@ -153,7 +153,9 @@ class model(dict):
|
||||
@property
|
||||
def is_pdf(self):
|
||||
# content_type field is set in the future
|
||||
return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower()
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/1392
|
||||
# Not sure the best logic here
|
||||
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
|
||||
|
||||
@property
|
||||
def label(self):
|
||||
@@ -192,7 +194,7 @@ class model(dict):
|
||||
# Read the history file as a dict
|
||||
fname = os.path.join(self.watch_data_dir, "history.txt")
|
||||
if os.path.isfile(fname):
|
||||
logging.debug("Reading history index " + str(time.time()))
|
||||
logger.debug("UUID: {} Reading history index".format(self.get('uuid')))
|
||||
with open(fname, "r") as f:
|
||||
for i in f.readlines():
|
||||
if ',' in i:
|
||||
|
||||
@@ -101,7 +101,7 @@ def process_notification(n_object, datastore):
|
||||
apobj = apprise.Apprise(debug=True, asset=asset)
|
||||
url = url.strip()
|
||||
if len(url):
|
||||
print(">> Process Notification: AppRise notifying {}".format(url))
|
||||
logger.info(">> Process Notification: AppRise notifying {}".format(url))
|
||||
with apprise.LogCapture(level=apprise.logging.DEBUG) as logs:
|
||||
# Re 323 - Limit discord length to their 2000 char limit total or it wont send.
|
||||
# Because different notifications may require different pre-processing, run each sequentially :(
|
||||
|
||||
@@ -7,7 +7,7 @@ from copy import deepcopy
|
||||
from os import path, unlink
|
||||
from threading import Lock
|
||||
import json
|
||||
import logging
|
||||
from loguru import logger
|
||||
import os
|
||||
import re
|
||||
import requests
|
||||
@@ -75,12 +75,12 @@ class ChangeDetectionStore:
|
||||
for uuid, watch in self.__data['watching'].items():
|
||||
watch['uuid']=uuid
|
||||
self.__data['watching'][uuid] = Watch.model(datastore_path=self.datastore_path, default=watch)
|
||||
print("Watching:", uuid, self.__data['watching'][uuid]['url'])
|
||||
logger.info("Watching: {} - {}", uuid, self.__data['watching'][uuid]['url'])
|
||||
|
||||
# First time ran, Create the datastore.
|
||||
except (FileNotFoundError):
|
||||
if include_default_watches:
|
||||
print("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
|
||||
logger.info("No JSON DB found at {}, creating JSON store at {}".format(self.json_store_path, self.datastore_path))
|
||||
self.add_watch(url='https://news.ycombinator.com/',
|
||||
tag='Tech news',
|
||||
extras={'fetch_backend': 'html_requests'})
|
||||
@@ -192,27 +192,24 @@ class ChangeDetectionStore:
|
||||
tags.sort()
|
||||
return tags
|
||||
|
||||
def unlink_history_file(self, path):
|
||||
try:
|
||||
unlink(path)
|
||||
except (FileNotFoundError, IOError):
|
||||
pass
|
||||
|
||||
# Delete a single watch by UUID
|
||||
def delete(self, uuid):
|
||||
import pathlib
|
||||
import shutil
|
||||
|
||||
with self.lock:
|
||||
if uuid == 'all':
|
||||
self.__data['watching'] = {}
|
||||
|
||||
# GitHub #30 also delete history records
|
||||
for uuid in self.data['watching']:
|
||||
for path in self.data['watching'][uuid].history.values():
|
||||
self.unlink_history_file(path)
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
shutil.rmtree(path)
|
||||
self.needs_write_urgent = True
|
||||
|
||||
else:
|
||||
for path in self.data['watching'][uuid].history.values():
|
||||
self.unlink_history_file(path)
|
||||
|
||||
path = pathlib.Path(os.path.join(self.datastore_path, uuid))
|
||||
shutil.rmtree(path)
|
||||
del self.data['watching'][uuid]
|
||||
|
||||
self.needs_write_urgent = True
|
||||
@@ -306,7 +303,7 @@ class ChangeDetectionStore:
|
||||
apply_extras['include_filters'] = [res['css_filter']]
|
||||
|
||||
except Exception as e:
|
||||
logging.error("Error fetching metadata for shared watch link", url, str(e))
|
||||
logger.error("Error fetching metadata for shared watch link", url, str(e))
|
||||
flash("Error fetching metadata for {}".format(url), 'error')
|
||||
return False
|
||||
from .model.Watch import is_safe_url
|
||||
@@ -391,14 +388,13 @@ class ChangeDetectionStore:
|
||||
|
||||
|
||||
def sync_to_json(self):
|
||||
logging.info("Saving JSON..")
|
||||
print("Saving JSON..")
|
||||
logger.debug("Saving JSON DB")
|
||||
try:
|
||||
data = deepcopy(self.__data)
|
||||
except RuntimeError as e:
|
||||
# Try again in 15 seconds
|
||||
time.sleep(15)
|
||||
logging.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
|
||||
logger.error ("! Data changed when writing to JSON, trying again.. %s", str(e))
|
||||
self.sync_to_json()
|
||||
return
|
||||
else:
|
||||
@@ -411,7 +407,7 @@ class ChangeDetectionStore:
|
||||
json.dump(data, json_file, indent=4)
|
||||
os.replace(self.json_store_path+".tmp", self.json_store_path)
|
||||
except Exception as e:
|
||||
logging.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
|
||||
logger.error("Error writing JSON!! (Main JSON file save was skipped) : %s", str(e))
|
||||
|
||||
self.needs_write = False
|
||||
self.needs_write_urgent = False
|
||||
@@ -422,7 +418,7 @@ class ChangeDetectionStore:
|
||||
|
||||
while True:
|
||||
if self.stop_thread:
|
||||
print("Shutting down datastore thread")
|
||||
logger.info("Shutting down datastore thread")
|
||||
return
|
||||
|
||||
if self.needs_write or self.needs_write_urgent:
|
||||
@@ -536,8 +532,8 @@ class ChangeDetectionStore:
|
||||
try:
|
||||
update_method = getattr(self, "update_{}".format(update_n))()
|
||||
except Exception as e:
|
||||
print("Error while trying update_{}".format((update_n)))
|
||||
print(e)
|
||||
logger.error("Error while trying update_{}".format((update_n)))
|
||||
logger.error(str(e))
|
||||
# Don't run any more updates
|
||||
return
|
||||
else:
|
||||
@@ -575,7 +571,7 @@ class ChangeDetectionStore:
|
||||
with open(os.path.join(target_path, "history.txt"), "w") as f:
|
||||
f.writelines(history)
|
||||
else:
|
||||
logging.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
|
||||
logger.warning("Datastore history directory {} does not exist, skipping history import.".format(target_path))
|
||||
|
||||
# No longer needed, dynamically pulled from the disk when needed.
|
||||
# But we should set it back to a empty dict so we don't break if this schema runs on an earlier version.
|
||||
|
||||
@@ -15,78 +15,10 @@ def test_inscriptus():
|
||||
stripped_text_from_html = get_text(html_content)
|
||||
assert stripped_text_from_html == 'test!\nok man'
|
||||
|
||||
def test_setup(client, live_server):
|
||||
live_server_setup(live_server)
|
||||
|
||||
# Assert that non-200's dont give notifications or register as a change
|
||||
def test_non_200_doesnt_trigger_change(client, live_server):
|
||||
# live_server_setup(live_server)
|
||||
|
||||
set_original_response()
|
||||
url = url_for('test_changing_status_code_endpoint', _external=True)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("import_page"),
|
||||
data={"urls": url},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
res = client.post(
|
||||
url_for("edit_page", uuid="first"),
|
||||
data={
|
||||
"include_filters": ".foobar-detection",
|
||||
"fetch_backend": "html_requests",
|
||||
"headers": "",
|
||||
"tag": "",
|
||||
"url": url
|
||||
},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
# A recheck will happen here automatically
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# hit the mark all viewed link
|
||||
res = client.get(url_for("mark_all_viewed"), follow_redirects=True)
|
||||
|
||||
# Now be sure the filter is missing and then recheck it
|
||||
set_modified_response()
|
||||
|
||||
# https://github.com/dgtlmoon/changedetection.io/issues/962#issuecomment-1416807742
|
||||
for ecode in ['429', '400', '204', '429', '403', '404', '500']:
|
||||
with open("test-endpoint-status-code.txt", 'w') as f:
|
||||
f.write(ecode)
|
||||
|
||||
res = client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
assert b'1 watches queued for rechecking.' in res.data
|
||||
time.sleep(sleep_time_for_fetch_thread)
|
||||
|
||||
# No change should be seen/no trigger of change
|
||||
res = client.get(url_for("index"))
|
||||
assert b'unviewed' not in res.data
|
||||
|
||||
# load preview page so we can see what was returned
|
||||
res = client.get(url_for("preview_page", uuid="first"))
|
||||
# with open('/tmp/debug-'+ecode+'.html', 'wb') as f:
|
||||
# f.write(res.data)
|
||||
|
||||
# Should still say the original 200, because "ignore_status_codes" should be off by default
|
||||
# commented out - this will fail because we also show what the error was
|
||||
# assert b'code: '+ecode.encode('utf-8') not in res.data
|
||||
|
||||
assert b'code: 200' in res.data
|
||||
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
|
||||
def test_check_basic_change_detection_functionality(client, live_server):
|
||||
set_original_response()
|
||||
live_server_setup(live_server)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
|
||||
@@ -69,8 +69,6 @@ def test_filter_doesnt_exist_then_exists_should_get_notification(client, live_se
|
||||
url = url_for('test_notification_endpoint', _external=True)
|
||||
notification_url = url.replace('http', 'json')
|
||||
|
||||
print(">>>> Notification URL: " + notification_url)
|
||||
|
||||
# Just a regular notification setting, this will be used by the special 'filter not found' notification
|
||||
notification_form_data = {"notification_urls": notification_url,
|
||||
"notification_title": "New ChangeDetection.io Notification - {{watch_url}}",
|
||||
|
||||
@@ -1,5 +1,4 @@
|
||||
#!/usr/bin/python3
|
||||
import os.path
|
||||
|
||||
from flask import make_response, request
|
||||
from flask import url_for
|
||||
@@ -113,25 +112,6 @@ def live_server_setup(live_server):
|
||||
import secrets
|
||||
return "Random content - {}\n".format(secrets.token_hex(64))
|
||||
|
||||
@live_server.app.route('/test-changing-status-code-endpoint')
|
||||
def test_changing_status_code_endpoint():
|
||||
# status_code can also be overriden in a file, used for doing things that it wouldnt normally expect
|
||||
# (test_non_200_doesnt_trigger_change)
|
||||
status_code = '200'
|
||||
if os.path.isfile("test-endpoint-status-code.txt"):
|
||||
with open("test-endpoint-status-code.txt", 'r') as f:
|
||||
status_code = f.read().strip()
|
||||
os.unlink("test-endpoint-status-code.txt")
|
||||
|
||||
# Contents includes the status code, which will change and should not trigger a change
|
||||
# (Non-200 should get ignored)
|
||||
with open("test-datastore/endpoint-content.txt", "r") as f:
|
||||
contents ="{} code: {} ".format(f.read(), status_code)
|
||||
if status_code == '204':
|
||||
contents=''
|
||||
resp = make_response(contents, status_code)
|
||||
resp.headers['Content-Type'] = 'text/html'
|
||||
return resp, status_code
|
||||
|
||||
@live_server.app.route('/test-endpoint')
|
||||
def test_endpoint():
|
||||
|
||||
@@ -2,6 +2,7 @@ import os
|
||||
import threading
|
||||
import queue
|
||||
import time
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio import content_fetcher
|
||||
from changedetectionio import queuedWatchMetaData
|
||||
@@ -12,14 +13,12 @@ from changedetectionio.fetch_site_status import FilterNotFoundInResponse
|
||||
# Requests for checking on a single site(watch) from a queue of watches
|
||||
# (another process inserts watches into the queue that are time-ready for checking)
|
||||
|
||||
import logging
|
||||
import sys
|
||||
|
||||
class update_worker(threading.Thread):
|
||||
current_uuid = None
|
||||
|
||||
def __init__(self, q, notification_q, app, datastore, *args, **kwargs):
|
||||
logging.basicConfig(stream=sys.stderr, level=logging.DEBUG)
|
||||
self.q = q
|
||||
self.app = app
|
||||
self.notification_q = notification_q
|
||||
@@ -80,10 +79,10 @@ class update_worker(threading.Thread):
|
||||
'diff': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], line_feed_sep=line_feed_sep),
|
||||
'diff_full': diff.render_diff(watch_history[dates[-2]], watch_history[dates[-1]], True, line_feed_sep=line_feed_sep)
|
||||
})
|
||||
logging.info (">> SENDING NOTIFICATION")
|
||||
logger.info ("UUID: {} - Sending notification".format(watch_uuid))
|
||||
self.notification_q.put(n_object)
|
||||
else:
|
||||
logging.info (">> NO Notification sent, notification_url was empty in both watch and system")
|
||||
logger.info("UUID: {} - NO Notification sent, notification_url was empty in both watch and system".format(watch_uuid))
|
||||
|
||||
def send_filter_failure_notification(self, watch_uuid):
|
||||
|
||||
@@ -112,7 +111,7 @@ class update_worker(threading.Thread):
|
||||
'screenshot': None
|
||||
})
|
||||
self.notification_q.put(n_object)
|
||||
print("Sent filter not found notification for {}".format(watch_uuid))
|
||||
logger.info("Sent filter not found notification for {}".format(watch_uuid))
|
||||
|
||||
def send_step_failure_notification(self, watch_uuid, step_n):
|
||||
watch = self.datastore.data['watching'].get(watch_uuid, False)
|
||||
@@ -139,7 +138,7 @@ class update_worker(threading.Thread):
|
||||
'uuid': watch_uuid
|
||||
})
|
||||
self.notification_q.put(n_object)
|
||||
print("Sent step not found notification for {}".format(watch_uuid))
|
||||
logger.error("Sent step not found notification for {}".format(watch_uuid))
|
||||
|
||||
|
||||
def cleanup_error_artifacts(self, uuid):
|
||||
@@ -173,7 +172,7 @@ class update_worker(threading.Thread):
|
||||
update_obj= {}
|
||||
xpath_data = False
|
||||
process_changedetection_results = True
|
||||
print("> Processing UUID {} Priority {} URL {}".format(uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url']))
|
||||
logger.info("UUID: {} Start processing, Priority {} URL {}",uuid, queued_item_data.priority, self.datastore.data['watching'][uuid]['url'])
|
||||
now = time.time()
|
||||
|
||||
try:
|
||||
@@ -184,7 +183,7 @@ class update_worker(threading.Thread):
|
||||
if not isinstance(contents, (bytes, bytearray)):
|
||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||
except PermissionError as e:
|
||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||
logger.error("UUID: {} File permission error updating - {}", uuid, str(e))
|
||||
process_changedetection_results = False
|
||||
except content_fetcher.ReplyWithContentButNoText as e:
|
||||
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||
@@ -233,7 +232,7 @@ class update_worker(threading.Thread):
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
print("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
logger.error("Filter for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_filter_failure_notification(uuid)
|
||||
@@ -264,7 +263,7 @@ class update_worker(threading.Thread):
|
||||
# Send notification if we reached the threshold?
|
||||
threshold = self.datastore.data['settings']['application'].get('filter_failure_notification_threshold_attempts',
|
||||
0)
|
||||
print("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
logger.error("Step for {} not found, consecutive_filter_failures: {}".format(uuid, c))
|
||||
if threshold > 0 and c >= threshold:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
self.send_step_failure_notification(watch_uuid=uuid, step_n=e.step_n)
|
||||
@@ -330,8 +329,6 @@ class update_worker(threading.Thread):
|
||||
|
||||
# A change was detected
|
||||
if changed_detected:
|
||||
print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))
|
||||
|
||||
# Notifications should only trigger on the second time (first time, we gather the initial snapshot)
|
||||
if watch.history_n >= 2:
|
||||
if not self.datastore.data['watching'][uuid].get('notification_muted'):
|
||||
@@ -340,7 +337,7 @@ class update_worker(threading.Thread):
|
||||
|
||||
except Exception as e:
|
||||
# Catch everything possible here, so that if a worker crashes, we don't lose it until restart!
|
||||
print("!!!! Exception in update_worker !!!\n", e)
|
||||
logger.error("!!!! Exception in update_worker !!!\n", e)
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
|
||||
@@ -41,7 +41,6 @@ services:
|
||||
#
|
||||
# Base URL of your changedetection.io install (Added to the notification alert)
|
||||
# - BASE_URL=https://mysite.com
|
||||
|
||||
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
|
||||
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
|
||||
# - USE_X_SETTINGS=1
|
||||
@@ -95,7 +94,10 @@ services:
|
||||
# - CHROME_REFRESH_TIME=600000
|
||||
# - DEFAULT_BLOCK_ADS=true
|
||||
# - DEFAULT_STEALTH=true
|
||||
|
||||
#
|
||||
# Ignore HTTPS errors, like for self-signed certs
|
||||
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
|
||||
#
|
||||
volumes:
|
||||
changedetection-data:
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ inscriptis~=2.2
|
||||
pytz
|
||||
timeago~=1.0
|
||||
validators
|
||||
loguru
|
||||
|
||||
# Set these versions together to avoid a RequestsDependencyWarning
|
||||
# >= 2.26 also adds Brotli support if brotli is installed
|
||||
|
||||
Reference in New Issue
Block a user