mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-06 17:46:06 +00:00
Compare commits
4 Commits
html-fixes
...
skip-chang
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
ac1b5ee6f7 | ||
|
|
aca6c753bf | ||
|
|
efae86c134 | ||
|
|
0f72471343 |
@@ -3,17 +3,22 @@ import chardet
|
|||||||
import os
|
import os
|
||||||
import requests
|
import requests
|
||||||
import time
|
import time
|
||||||
import urllib3.exceptions
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
|
|
||||||
class EmptyReply(Exception):
|
class EmptyReply(Exception):
|
||||||
def __init__(self, status_code, url):
|
def __init__(self, status_code, url):
|
||||||
# Set this so we can use it in other parts of the app
|
# Set this so we can use it in other parts of the app
|
||||||
self.status_code = status_code
|
self.status_code = status_code
|
||||||
self.url = url
|
self.url = url
|
||||||
return
|
return
|
||||||
|
pass
|
||||||
|
|
||||||
|
class ReplyWithContentButNoText(Exception):
|
||||||
|
def __init__(self, status_code, url):
|
||||||
|
# Set this so we can use it in other parts of the app
|
||||||
|
self.status_code = status_code
|
||||||
|
self.url = url
|
||||||
|
return
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -184,6 +184,11 @@ class perform_site_check():
|
|||||||
# Re #340 - return the content before the 'ignore text' was applied
|
# Re #340 - return the content before the 'ignore text' was applied
|
||||||
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
|
||||||
|
|
||||||
|
# Treat pages with no renderable text content as a change? No by default
|
||||||
|
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
|
||||||
|
if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
|
||||||
|
raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200)
|
||||||
|
|
||||||
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
# We rely on the actual text in the html output.. many sites have random script vars etc,
|
||||||
# in the future we'll implement other mechanisms.
|
# in the future we'll implement other mechanisms.
|
||||||
|
|
||||||
|
|||||||
@@ -371,6 +371,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
|||||||
ignore_whitespace = BooleanField('Ignore whitespace')
|
ignore_whitespace = BooleanField('Ignore whitespace')
|
||||||
real_browser_save_screenshot = BooleanField('Save last screenshot when using Chrome?')
|
real_browser_save_screenshot = BooleanField('Save last screenshot when using Chrome?')
|
||||||
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
|
||||||
|
empty_pages_are_a_change = BooleanField('Treat empty pages as a change?', default=False)
|
||||||
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
|
||||||
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
|
||||||
password = SaltyPasswordField()
|
password = SaltyPasswordField()
|
||||||
|
|||||||
@@ -30,6 +30,7 @@ class model(dict):
|
|||||||
'password': False,
|
'password': False,
|
||||||
'base_url' : None,
|
'base_url' : None,
|
||||||
'extract_title_as_title': False,
|
'extract_title_as_title': False,
|
||||||
|
'empty_pages_are_a_change': False,
|
||||||
'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
|
||||||
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
|
||||||
'global_subtractive_selectors': [],
|
'global_subtractive_selectors': [],
|
||||||
|
|||||||
@@ -61,6 +61,11 @@
|
|||||||
{{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }}
|
{{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }}
|
||||||
<span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span>
|
<span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
|
<div class="pure-control-group">
|
||||||
|
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
|
||||||
|
<span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
|
||||||
|
</div>
|
||||||
{% if form.requests.proxy %}
|
{% if form.requests.proxy %}
|
||||||
<div class="pure-control-group inline-radio">
|
<div class="pure-control-group inline-radio">
|
||||||
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
{{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
|
||||||
|
|||||||
@@ -13,7 +13,7 @@
|
|||||||
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
|
{{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
|
||||||
<button type="submit" class="pure-button pure-button-primary">Watch</button>
|
<button type="submit" class="pure-button pure-button-primary">Watch</button>
|
||||||
</fieldset>
|
</fieldset>
|
||||||
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="#">More info</a></a></span>
|
<span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
|
||||||
</form>
|
</form>
|
||||||
<div>
|
<div>
|
||||||
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
<a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
|
||||||
|
|||||||
102
changedetectionio/tests/test_nonrenderable_pages.py
Normal file
102
changedetectionio/tests/test_nonrenderable_pages.py
Normal file
@@ -0,0 +1,102 @@
|
|||||||
|
#!/usr/bin/python3
|
||||||
|
|
||||||
|
import time
|
||||||
|
from flask import url_for
|
||||||
|
from urllib.request import urlopen
|
||||||
|
from .util import set_original_response, set_modified_response, live_server_setup
|
||||||
|
|
||||||
|
sleep_time_for_fetch_thread = 3
|
||||||
|
|
||||||
|
|
||||||
|
def set_nonrenderable_response():
|
||||||
|
test_return_data = """<html>
|
||||||
|
<head><title>modified head title</title></head>
|
||||||
|
<!-- like when some angular app was broken and doesnt render or whatever -->
|
||||||
|
<body>
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
|
"""
|
||||||
|
|
||||||
|
with open("test-datastore/endpoint-content.txt", "w") as f:
|
||||||
|
f.write(test_return_data)
|
||||||
|
|
||||||
|
return None
|
||||||
|
|
||||||
|
def test_check_basic_change_detection_functionality(client, live_server):
|
||||||
|
set_original_response()
|
||||||
|
live_server_setup(live_server)
|
||||||
|
|
||||||
|
# Add our URL to the import page
|
||||||
|
res = client.post(
|
||||||
|
url_for("import_page"),
|
||||||
|
data={"urls": url_for('test_endpoint', _external=True)},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
assert b"1 Imported" in res.data
|
||||||
|
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# Do this a few times.. ensures we dont accidently set the status
|
||||||
|
for n in range(3):
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
|
||||||
|
#####################
|
||||||
|
client.post(
|
||||||
|
url_for("settings_page"),
|
||||||
|
data={"application-empty_pages_are_a_change": "",
|
||||||
|
"requests-time_between_check-minutes": 180,
|
||||||
|
'application-fetch_backend': "html_requests"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
|
||||||
|
# this should not trigger a change, because no good text could be converted from the HTML
|
||||||
|
set_nonrenderable_response()
|
||||||
|
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' not in res.data
|
||||||
|
|
||||||
|
|
||||||
|
# ok now do the opposite
|
||||||
|
|
||||||
|
client.post(
|
||||||
|
url_for("settings_page"),
|
||||||
|
data={"application-empty_pages_are_a_change": "y",
|
||||||
|
"requests-time_between_check-minutes": 180,
|
||||||
|
'application-fetch_backend': "html_requests"},
|
||||||
|
follow_redirects=True
|
||||||
|
)
|
||||||
|
set_modified_response()
|
||||||
|
|
||||||
|
|
||||||
|
client.get(url_for("api_watch_checknow"), follow_redirects=True)
|
||||||
|
|
||||||
|
# Give the thread time to pick it up
|
||||||
|
time.sleep(sleep_time_for_fetch_thread)
|
||||||
|
|
||||||
|
# It should report nothing found (no new 'unviewed' class)
|
||||||
|
res = client.get(url_for("index"))
|
||||||
|
assert b'unviewed' in res.data
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#
|
||||||
|
# Cleanup everything
|
||||||
|
res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
|
||||||
|
assert b'Deleted' in res.data
|
||||||
|
|
||||||
@@ -52,6 +52,10 @@ class update_worker(threading.Thread):
|
|||||||
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
|
||||||
except PermissionError as e:
|
except PermissionError as e:
|
||||||
self.app.logger.error("File permission error updating", uuid, str(e))
|
self.app.logger.error("File permission error updating", uuid, str(e))
|
||||||
|
except content_fetcher.ReplyWithContentButNoText as e:
|
||||||
|
# Totally fine, it's by choice - just continue on, nothing more to care about
|
||||||
|
# Page had elements/content but no renderable text
|
||||||
|
pass
|
||||||
except content_fetcher.EmptyReply as e:
|
except content_fetcher.EmptyReply as e:
|
||||||
# Some kind of custom to-str handler in the exception handler that does this?
|
# Some kind of custom to-str handler in the exception handler that does this?
|
||||||
err_text = "EmptyReply: Status Code {}".format(e.status_code)
|
err_text = "EmptyReply: Status Code {}".format(e.status_code)
|
||||||
|
|||||||
Reference in New Issue
Block a user