When new ignore text is specified, reprocess the checksum

This commit is contained in:
Leigh Morresi
2021-02-27 20:30:06 +01:00
parent 8bc7b5be40
commit f1da8f96b6
4 changed files with 74 additions and 20 deletions

View File

@@ -12,13 +12,20 @@ class perform_site_check():
self.datastore = datastore
def strip_ignore_text(self, content, list_ignore_text):
ignore = []
for k in list_ignore_text:
ignore.append(k.encode('utf8'))
output=[]
output = []
for line in content.splitlines():
if not any(skip_text in line for skip_text in list_ignore_text):
output.append(line)
line = line.encode('utf8')
return "\n".join(output)
# Always ignore blank lines in this mode. (when this function gets called)
if len(line.strip()):
if not any(skip_text in line for skip_text in ignore):
output.append(line)
return "\n".encode('utf8').join(output)
def run(self, uuid):
timestamp = int(time.time()) # used for storage etc too
@@ -85,16 +92,32 @@ class perform_site_check():
if not len(r.text):
update_obj["last_error"] = "Empty reply"
content = stripped_text_from_html.encode('utf-8')
# If there's text to skip
# @todo we could abstract out the get_text() to handle this cleaner
if len(self.datastore.data['watching'][uuid]['ignore_text']):
content = self.strip_ignore_text(content, self.datastore.data['watching'][uuid]['ignore_text'])
content = self.strip_ignore_text(stripped_text_from_html,
self.datastore.data['watching'][uuid]['ignore_text'])
else:
content = stripped_text_from_html
fetched_md5 = hashlib.md5(content).hexdigest()
# If they edited an existing watch, we need to know to reset the current/previous md5 to include
# the excluded text.
if self.datastore.data['watching'][uuid]['previous_md5'] == "reprocess previous":
# Get the most recent one
newest_history_key = self.datastore.get_newest_history_key(uuid)
if newest_history_key:
with open(self.datastore.data['watching'][uuid]['history'][newest_history_key],
encoding='utf-8') as file:
raw_content = file.read()
stripped_content = self.strip_ignore_text(raw_content,
self.datastore.data['watching'][uuid]['ignore_text'])
checksum = hashlib.md5(stripped_content).hexdigest()
self.datastore.data['watching'][uuid]['previous_md5'] = checksum
# could be None or False depending on JSON type
if self.datastore.data['watching'][uuid]['previous_md5'] != fetched_md5: