mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-17 05:26:30 +00:00
Refactor of extract regex
This commit is contained in:
@@ -215,15 +215,25 @@ class perform_site_check():
|
||||
if len(extract_text) > 0:
|
||||
regex_matched_output = []
|
||||
for s_re in extract_text:
|
||||
result = re.findall(s_re.encode('utf8'), stripped_text_from_html,
|
||||
flags=re.MULTILINE | re.DOTALL | re.LOCALE)
|
||||
result = re.findall(s_re.encode('utf8'), stripped_text_from_html, flags=re.DOTALL)
|
||||
if result:
|
||||
regex_matched_output = regex_matched_output + result
|
||||
for l in result:
|
||||
if type(l) is tuple:
|
||||
#@todo - some formatter option default (between groups)
|
||||
regex_matched_output += list(l) + [b'\n']
|
||||
else:
|
||||
# @todo - some formatter option default (between each ungrouped result)
|
||||
regex_matched_output += [l] + [b'\n']
|
||||
|
||||
# Now we will only show what the regex matched
|
||||
stripped_text_from_html = b''
|
||||
text_content_before_ignored_filter = b''
|
||||
if regex_matched_output:
|
||||
stripped_text_from_html = b'\n'.join(regex_matched_output)
|
||||
# @todo some formatter for presentation?
|
||||
stripped_text_from_html = b''.join(regex_matched_output)
|
||||
text_content_before_ignored_filter = stripped_text_from_html
|
||||
|
||||
|
||||
# Re #133 - if we should strip whitespaces from triggering the change detected comparison
|
||||
if self.datastore.data['settings']['application'].get('ignore_whitespace', False):
|
||||
fetched_md5 = hashlib.md5(stripped_text_from_html.translate(None, b'\r\n\t ')).hexdigest()
|
||||
|
||||
@@ -64,6 +64,7 @@ def test_check_filter_and_regex_extract(client, live_server):
|
||||
)
|
||||
assert b"1 Imported" in res.data
|
||||
|
||||
time.sleep(1)
|
||||
# Trigger a check
|
||||
client.get(url_for("form_watch_checknow"), follow_redirects=True)
|
||||
|
||||
@@ -86,6 +87,8 @@ def test_check_filter_and_regex_extract(client, live_server):
|
||||
|
||||
assert b"Updated watch." in res.data
|
||||
|
||||
time.sleep(2)
|
||||
|
||||
# Check it saved
|
||||
res = client.get(
|
||||
url_for("edit_page", uuid="first"),
|
||||
|
||||
@@ -113,7 +113,6 @@ class update_worker(threading.Thread):
|
||||
err_text = "Page request from server didnt respond correctly"
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
|
||||
'last_check_status': e.status_code})
|
||||
|
||||
except Exception as e:
|
||||
self.app.logger.error("Exception reached processing watch UUID: %s - %s", uuid, str(e))
|
||||
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': str(e)})
|
||||
|
||||
Reference in New Issue
Block a user