mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 14:47:21 +00:00 
			
		
		
		
	Compare commits
	
		
			4 Commits
		
	
	
		
			browserste
			...
			skip-chang
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | ac1b5ee6f7 | ||
|   | aca6c753bf | ||
|   | efae86c134 | ||
|   | 0f72471343 | 
| @@ -3,17 +3,22 @@ import chardet | ||||
| import os | ||||
| import requests | ||||
| import time | ||||
| import urllib3.exceptions | ||||
| import sys | ||||
|  | ||||
|  | ||||
| class EmptyReply(Exception): | ||||
|     def __init__(self, status_code, url): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         return | ||||
|     pass | ||||
|  | ||||
| class ReplyWithContentButNoText(Exception): | ||||
|     def __init__(self, status_code, url): | ||||
|         # Set this so we can use it in other parts of the app | ||||
|         self.status_code = status_code | ||||
|         self.url = url | ||||
|         return | ||||
|     pass | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -184,6 +184,11 @@ class perform_site_check(): | ||||
|         # Re #340 - return the content before the 'ignore text' was applied | ||||
|         text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8') | ||||
|  | ||||
|         # Treat pages with no renderable text content as a change? No by default | ||||
|         empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False) | ||||
|         if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0: | ||||
|             raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200) | ||||
|  | ||||
|         # We rely on the actual text in the html output.. many sites have random script vars etc, | ||||
|         # in the future we'll implement other mechanisms. | ||||
|  | ||||
|   | ||||
| @@ -371,6 +371,7 @@ class globalSettingsApplicationForm(commonSettingsForm): | ||||
|     ignore_whitespace = BooleanField('Ignore whitespace') | ||||
|     real_browser_save_screenshot = BooleanField('Save last screenshot when using Chrome?') | ||||
|     removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"}) | ||||
|     empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False) | ||||
|     render_anchor_tag_content = BooleanField('Render anchor tag content', default=False) | ||||
|     fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()]) | ||||
|     password = SaltyPasswordField() | ||||
|   | ||||
| @@ -30,6 +30,7 @@ class model(dict): | ||||
|                     'password': False, | ||||
|                     'base_url' : None, | ||||
|                     'extract_title_as_title': False, | ||||
|                     'empty_pages_are_a_change': False, | ||||
|                     'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"), | ||||
|                     'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum | ||||
|                     'global_subtractive_selectors': [], | ||||
|   | ||||
| @@ -61,6 +61,11 @@ | ||||
|                         {{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }} | ||||
|                         <span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span> | ||||
|                     </div> | ||||
|  | ||||
|                     <div class="pure-control-group"> | ||||
|                         {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }} | ||||
|                         <span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span> | ||||
|                     </div> | ||||
|                 {% if form.requests.proxy %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                         {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }} | ||||
|   | ||||
| @@ -13,7 +13,7 @@ | ||||
|                 {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }} | ||||
|             <button type="submit" class="pure-button pure-button-primary">Watch</button> | ||||
|         </fieldset> | ||||
|         <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="#">More info</a></a></span> | ||||
|         <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span> | ||||
|     </form> | ||||
|     <div> | ||||
|         <a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a> | ||||
|   | ||||
							
								
								
									
										102
									
								
								changedetectionio/tests/test_nonrenderable_pages.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								changedetectionio/tests/test_nonrenderable_pages.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,102 @@ | ||||
| #!/usr/bin/python3 | ||||
|  | ||||
| import time | ||||
| from flask import url_for | ||||
| from urllib.request import urlopen | ||||
| from .util import set_original_response, set_modified_response, live_server_setup | ||||
|  | ||||
| sleep_time_for_fetch_thread = 3 | ||||
|  | ||||
|  | ||||
| def set_nonrenderable_response(): | ||||
|     test_return_data = """<html> | ||||
|     <head><title>modified head title</title></head> | ||||
|     <!-- like when some angular app was broken and doesnt render or whatever --> | ||||
|     <body> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     return None | ||||
|  | ||||
| def test_check_basic_change_detection_functionality(client, live_server): | ||||
|     set_original_response() | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     res = client.post( | ||||
|         url_for("import_page"), | ||||
|         data={"urls": url_for('test_endpoint', _external=True)}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # Do this a few times.. ensures we dont accidently set the status | ||||
|     for n in range(3): | ||||
|         client.get(url_for("api_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|         # Give the thread time to pick it up | ||||
|         time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|         # It should report nothing found (no new 'unviewed' class) | ||||
|         res = client.get(url_for("index")) | ||||
|         assert b'unviewed' not in res.data | ||||
|  | ||||
|  | ||||
|     ##################### | ||||
|     client.post( | ||||
|         url_for("settings_page"), | ||||
|         data={"application-empty_pages_are_a_change": "", | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     # this should not trigger a change, because no good text could be converted from the HTML | ||||
|     set_nonrenderable_response() | ||||
|  | ||||
|     client.get(url_for("api_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|  | ||||
|     # ok now do the opposite | ||||
|  | ||||
|     client.post( | ||||
|         url_for("settings_page"), | ||||
|         data={"application-empty_pages_are_a_change": "y", | ||||
|               "requests-time_between_check-minutes": 180, | ||||
|               'application-fetch_backend': "html_requests"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     set_modified_response() | ||||
|  | ||||
|  | ||||
|     client.get(url_for("api_watch_checknow"), follow_redirects=True) | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     time.sleep(sleep_time_for_fetch_thread) | ||||
|  | ||||
|     # It should report nothing found (no new 'unviewed' class) | ||||
|     res = client.get(url_for("index")) | ||||
|     assert b'unviewed' in res.data | ||||
|  | ||||
|  | ||||
|  | ||||
|  | ||||
|     # | ||||
|     # Cleanup everything | ||||
|     res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| @@ -52,6 +52,10 @@ class update_worker(threading.Thread): | ||||
|                             raise Exception("Error - returned data from the fetch handler SHOULD be bytes") | ||||
|                     except PermissionError as e: | ||||
|                         self.app.logger.error("File permission error updating", uuid, str(e)) | ||||
|                     except content_fetcher.ReplyWithContentButNoText as e: | ||||
|                         # Totally fine, it's by choice - just continue on, nothing more to care about | ||||
|                         # Page had elements/content but no renderable text | ||||
|                         pass | ||||
|                     except content_fetcher.EmptyReply as e: | ||||
|                         # Some kind of custom to-str handler in the exception handler that does this? | ||||
|                         err_text = "EmptyReply: Status Code {}".format(e.status_code) | ||||
|   | ||||
		Reference in New Issue
	
	Block a user