Compare commits

...

10 Commits

6 changed files with 46 additions and 81 deletions

View File

@@ -505,41 +505,6 @@ def changedetection_app(config=None, datastore_o=None):
output = render_template("clear_all_history.html") output = render_template("clear_all_history.html")
return output return output
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
# the excluded text.
def get_current_checksum_include_ignore_text(uuid):
import hashlib
from changedetectionio import fetch_site_status
# Get the most recent one
newest_history_key = datastore.data['watching'][uuid].get('newest_history_key')
# 0 means that theres only one, so that there should be no 'unviewed' history available
if newest_history_key == 0:
newest_history_key = list(datastore.data['watching'][uuid].history.keys())[0]
if newest_history_key:
with open(datastore.data['watching'][uuid].history[newest_history_key],
encoding='utf-8') as file:
raw_content = file.read()
handler = fetch_site_status.perform_site_check(datastore=datastore)
stripped_content = html_tools.strip_ignore_text(raw_content,
datastore.data['watching'][uuid]['ignore_text'])
if datastore.data['settings']['application'].get('ignore_whitespace', False):
checksum = hashlib.md5(stripped_content.translate(None, b'\r\n\t ')).hexdigest()
else:
checksum = hashlib.md5(stripped_content).hexdigest()
return checksum
return datastore.data['watching'][uuid]['previous_md5']
@app.route("/edit/<string:uuid>", methods=['GET', 'POST']) @app.route("/edit/<string:uuid>", methods=['GET', 'POST'])
@login_optionally_required @login_optionally_required
# https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists # https://stackoverflow.com/questions/42984453/wtforms-populate-form-with-data-if-data-exists
@@ -943,8 +908,9 @@ def changedetection_app(config=None, datastore_o=None):
extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')] extra_stylesheets = [url_for('static_content', group='styles', filename='diff.css')]
is_html_webdriver = True if watch.get('fetch_backend') == 'html_webdriver' or ( is_html_webdriver = False
watch.get('fetch_backend', None) is None and system_uses_webdriver) else False if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver':
is_html_webdriver = True
# Never requested successfully, but we detected a fetch error # Never requested successfully, but we detected a fetch error
if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()): if datastore.data['watching'][uuid].history_n == 0 and (watch.get_error_text() or watch.get_error_snapshot()):
@@ -1035,7 +1001,8 @@ def changedetection_app(config=None, datastore_o=None):
os.unlink(previous_backup_filename) os.unlink(previous_backup_filename)
# create a ZipFile object # create a ZipFile object
backupname = "changedetection-backup-{}.zip".format(int(time.time())) timestamp = datetime.datetime.now().strftime("%Y%m%d%H%M%S")
backupname = "changedetection-backup-{}.zip".format(timestamp)
backup_filepath = os.path.join(datastore_o.datastore_path, backupname) backup_filepath = os.path.join(datastore_o.datastore_path, backupname)
with zipfile.ZipFile(backup_filepath, "w", with zipfile.ZipFile(backup_filepath, "w",

View File

@@ -153,7 +153,9 @@ class model(dict):
@property @property
def is_pdf(self): def is_pdf(self):
# content_type field is set in the future # content_type field is set in the future
return '.pdf' in self.get('url', '').lower() or 'pdf' in self.get('content_type', '').lower() # https://github.com/dgtlmoon/changedetection.io/issues/1392
# Not sure the best logic here
return self.get('url', '').lower().endswith('.pdf') or 'pdf' in self.get('content_type', '').lower()
@property @property
def label(self): def label(self):
@@ -239,7 +241,7 @@ class model(dict):
# Save some text file to the appropriate path and bump the history # Save some text file to the appropriate path and bump the history
# result_obj from fetch_site_status.run() # result_obj from fetch_site_status.run()
def save_history_text(self, contents, timestamp): def save_history_text(self, contents, timestamp, snapshot_id):
self.ensure_data_dir_exists() self.ensure_data_dir_exists()
@@ -248,11 +250,14 @@ class model(dict):
if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key): if self.__newest_history_key and int(timestamp) == int(self.__newest_history_key):
time.sleep(timestamp - self.__newest_history_key) time.sleep(timestamp - self.__newest_history_key)
snapshot_fname = "{}.txt".format(str(uuid.uuid4())) snapshot_fname = f"{snapshot_id}.txt"
# Only write if it does not exist, this is so that we dont bother re-saving the same data by checksum under different filenames.
dest = os.path.join(self.watch_data_dir, snapshot_fname)
if not os.path.exists(dest):
# in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading # in /diff/ and /preview/ we are going to assume for now that it's UTF-8 when reading
# most sites are utf-8 and some are even broken utf-8 # most sites are utf-8 and some are even broken utf-8
with open(os.path.join(self.watch_data_dir, snapshot_fname), 'wb') as f: with open(dest, 'wb') as f:
f.write(contents) f.write(contents)
f.close() f.close()

View File

@@ -192,27 +192,24 @@ class ChangeDetectionStore:
tags.sort() tags.sort()
return tags return tags
def unlink_history_file(self, path):
try:
unlink(path)
except (FileNotFoundError, IOError):
pass
# Delete a single watch by UUID # Delete a single watch by UUID
def delete(self, uuid): def delete(self, uuid):
import pathlib
import shutil
with self.lock: with self.lock:
if uuid == 'all': if uuid == 'all':
self.__data['watching'] = {} self.__data['watching'] = {}
# GitHub #30 also delete history records # GitHub #30 also delete history records
for uuid in self.data['watching']: for uuid in self.data['watching']:
for path in self.data['watching'][uuid].history.values(): path = pathlib.Path(os.path.join(self.datastore_path, uuid))
self.unlink_history_file(path) shutil.rmtree(path)
self.needs_write_urgent = True
else: else:
for path in self.data['watching'][uuid].history.values(): path = pathlib.Path(os.path.join(self.datastore_path, uuid))
self.unlink_history_file(path) shutil.rmtree(path)
del self.data['watching'][uuid] del self.data['watching'][uuid]
self.needs_write_urgent = True self.needs_write_urgent = True

View File

@@ -57,9 +57,9 @@
<th></th> <th></th>
{% set link_order = "desc" if sort_order else "asc" %} {% set link_order = "desc" if sort_order else "asc" %}
{% set arrow_span = "" %} {% set arrow_span = "" %}
<th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order)}}">Website <span class='arrow {{link_order}}'></span></a></th> <th><a class="{{ 'active '+link_order if sort_attribute == 'label' else 'inactive' }}" href="{{url_for('index', sort='label', order=link_order, tag=active_tag)}}">Website <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th> <th><a class="{{ 'active '+link_order if sort_attribute == 'last_checked' else 'inactive' }}" href="{{url_for('index', sort='last_checked', order=link_order, tag=active_tag)}}">Last Checked <span class='arrow {{link_order}}'></span></a></th>
<th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th> <th><a class="{{ 'active '+link_order if sort_attribute == 'last_changed' else 'inactive' }}" href="{{url_for('index', sort='last_changed', order=link_order, tag=active_tag)}}">Last Changed <span class='arrow {{link_order}}'></span></a></th>
<th></th> <th></th>
</tr> </tr>
</thead> </thead>

View File

@@ -212,9 +212,7 @@ class update_worker(threading.Thread):
if e.page_text: if e.page_text:
self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text) self.datastore.save_error_text(watch_uuid=uuid, contents=e.page_text)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
# So that we get a trigger when the content is added again
'previous_md5': ''})
process_changedetection_results = False process_changedetection_results = False
except FilterNotFoundInResponse as e: except FilterNotFoundInResponse as e:
@@ -222,9 +220,7 @@ class update_worker(threading.Thread):
continue continue
err_text = "Warning, no filters were found, no change detection ran." err_text = "Warning, no filters were found, no change detection ran."
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
# So that we get a trigger when the content is added again
'previous_md5': ''})
# Only when enabled, send the notification # Only when enabled, send the notification
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False): if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
@@ -244,8 +240,9 @@ class update_worker(threading.Thread):
process_changedetection_results = True process_changedetection_results = True
except content_fetcher.checksumFromPreviousCheckWasTheSame as e: except content_fetcher.checksumFromPreviousCheckWasTheSame as e:
# Yes fine, so nothing todo # Yes fine, so nothing todo, don't continue to process.
pass process_changedetection_results = False
changed_detected = False
except content_fetcher.BrowserStepsStepTimout as e: except content_fetcher.BrowserStepsStepTimout as e:
@@ -253,9 +250,7 @@ class update_worker(threading.Thread):
continue continue
err_text = "Warning, browser step at position {} could not run, target not found, check the watch, add a delay if necessary.".format(e.step_n+1) err_text = "Warning, browser step at position {} could not run, target not found, check the watch, add a delay if necessary.".format(e.step_n+1)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text, self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
# So that we get a trigger when the content is added again
'previous_md5': ''})
if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False): if self.datastore.data['watching'][uuid].get('filter_failure_notification_send', False):
@@ -318,16 +313,15 @@ class update_worker(threading.Thread):
# Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc # Different exceptions mean that we may or may not want to bump the snapshot, trigger notifications etc
if process_changedetection_results: if process_changedetection_results:
try: try:
watch = self.datastore.data['watching'][uuid] watch = self.datastore.data['watching'].get(uuid)
fname = "" # Saved history text filename
# For the FIRST time we check a site, or a change detected, save the snapshot.
if changed_detected or not watch['last_checked']:
# A change was detected
watch.save_history_text(contents=contents, timestamp=str(round(time.time())))
self.datastore.update_watch(uuid=uuid, update_obj=update_obj) self.datastore.update_watch(uuid=uuid, update_obj=update_obj)
# Also save the snapshot on the first time checked
if changed_detected or not watch['last_checked']:
watch.save_history_text(contents=contents,
timestamp=str(round(time.time())),
snapshot_id=update_obj.get('previous_md5', 'none'))
# A change was detected # A change was detected
if changed_detected: if changed_detected:
print (">> Change detected in UUID {} - {}".format(uuid, watch['url'])) print (">> Change detected in UUID {} - {}".format(uuid, watch['url']))

View File

@@ -41,7 +41,6 @@ services:
# #
# Base URL of your changedetection.io install (Added to the notification alert) # Base URL of your changedetection.io install (Added to the notification alert)
# - BASE_URL=https://mysite.com # - BASE_URL=https://mysite.com
# Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;` # Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;`
# More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory # More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory
# - USE_X_SETTINGS=1 # - USE_X_SETTINGS=1
@@ -95,7 +94,10 @@ services:
# - CHROME_REFRESH_TIME=600000 # - CHROME_REFRESH_TIME=600000
# - DEFAULT_BLOCK_ADS=true # - DEFAULT_BLOCK_ADS=true
# - DEFAULT_STEALTH=true # - DEFAULT_STEALTH=true
#
# Ignore HTTPS errors, like for self-signed certs
# - DEFAULT_IGNORE_HTTPS_ERRORS=true
#
volumes: volumes:
changedetection-data: changedetection-data: