mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 08:34:57 +00:00 
			
		
		
		
	Compare commits
	
		
			4 Commits
		
	
	
		
			0.45.6
			...
			skip-chang
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					ac1b5ee6f7 | ||
| 
						 | 
					aca6c753bf | ||
| 
						 | 
					efae86c134 | ||
| 
						 | 
					0f72471343 | 
@@ -3,17 +3,22 @@ import chardet
 | 
			
		||||
import os
 | 
			
		||||
import requests
 | 
			
		||||
import time
 | 
			
		||||
import urllib3.exceptions
 | 
			
		||||
import sys
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
class EmptyReply(Exception):
 | 
			
		||||
    def __init__(self, status_code, url):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        return
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
class ReplyWithContentButNoText(Exception):
 | 
			
		||||
    def __init__(self, status_code, url):
 | 
			
		||||
        # Set this so we can use it in other parts of the app
 | 
			
		||||
        self.status_code = status_code
 | 
			
		||||
        self.url = url
 | 
			
		||||
        return
 | 
			
		||||
    pass
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -184,6 +184,11 @@ class perform_site_check():
 | 
			
		||||
        # Re #340 - return the content before the 'ignore text' was applied
 | 
			
		||||
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
 | 
			
		||||
 | 
			
		||||
        # Treat pages with no renderable text content as a change? No by default
 | 
			
		||||
        empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
 | 
			
		||||
        if not is_json and not empty_pages_are_a_change and len(stripped_text_from_html.strip()) == 0:
 | 
			
		||||
            raise content_fetcher.ReplyWithContentButNoText(url=url, status_code=200)
 | 
			
		||||
 | 
			
		||||
        # We rely on the actual text in the html output.. many sites have random script vars etc,
 | 
			
		||||
        # in the future we'll implement other mechanisms.
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -371,6 +371,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
 | 
			
		||||
    ignore_whitespace = BooleanField('Ignore whitespace')
 | 
			
		||||
    real_browser_save_screenshot = BooleanField('Save last screenshot when using Chrome?')
 | 
			
		||||
    removepassword_button = SubmitField('Remove password', render_kw={"class": "pure-button pure-button-primary"})
 | 
			
		||||
    empty_pages_are_a_change =  BooleanField('Treat empty pages as a change?', default=False)
 | 
			
		||||
    render_anchor_tag_content = BooleanField('Render anchor tag content', default=False)
 | 
			
		||||
    fetch_backend = RadioField('Fetch Method', default="html_requests", choices=content_fetcher.available_fetchers(), validators=[ValidateContentFetcherIsReady()])
 | 
			
		||||
    password = SaltyPasswordField()
 | 
			
		||||
 
 | 
			
		||||
@@ -30,6 +30,7 @@ class model(dict):
 | 
			
		||||
                    'password': False,
 | 
			
		||||
                    'base_url' : None,
 | 
			
		||||
                    'extract_title_as_title': False,
 | 
			
		||||
                    'empty_pages_are_a_change': False,
 | 
			
		||||
                    'fetch_backend': os.getenv("DEFAULT_FETCH_BACKEND", "html_requests"),
 | 
			
		||||
                    'global_ignore_text': [], # List of text to ignore when calculating the comparison checksum
 | 
			
		||||
                    'global_subtractive_selectors': [],
 | 
			
		||||
 
 | 
			
		||||
@@ -61,6 +61,11 @@
 | 
			
		||||
                        {{ render_checkbox_field(form.application.form.real_browser_save_screenshot) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">When using a Chrome browser, a screenshot from the last check will be available on the Diff page</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
 | 
			
		||||
                    <div class="pure-control-group">
 | 
			
		||||
                        {{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
 | 
			
		||||
                        <span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
 | 
			
		||||
                    </div>
 | 
			
		||||
                {% if form.requests.proxy %}
 | 
			
		||||
                    <div class="pure-control-group inline-radio">
 | 
			
		||||
                        {{ render_field(form.requests.form.proxy, class="fetch-backend-proxy") }}
 | 
			
		||||
 
 | 
			
		||||
@@ -13,7 +13,7 @@
 | 
			
		||||
                {{ render_simple_field(form.tag, value=active_tag if active_tag else '', placeholder="watch group") }}
 | 
			
		||||
            <button type="submit" class="pure-button pure-button-primary">Watch</button>
 | 
			
		||||
        </fieldset>
 | 
			
		||||
        <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="#">More info</a></a></span>
 | 
			
		||||
        <span style="color:#eee; font-size: 80%;"><img style="height: 1em;display:inline-block;" src="{{url_for('static_content', group='images', filename='spread.svg')}}" /> Tip: You can also add 'shared' watches. <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Sharing-a-Watch">More info</a></a></span>
 | 
			
		||||
    </form>
 | 
			
		||||
    <div>
 | 
			
		||||
        <a href="{{url_for('index')}}" class="pure-button button-tag {{'active' if not active_tag }}">All</a>
 | 
			
		||||
 
 | 
			
		||||
							
								
								
									
										102
									
								
								changedetectionio/tests/test_nonrenderable_pages.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										102
									
								
								changedetectionio/tests/test_nonrenderable_pages.py
									
									
									
									
									
										Normal file
									
								
							@@ -0,0 +1,102 @@
 | 
			
		||||
#!/usr/bin/python3
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from urllib.request import urlopen
 | 
			
		||||
from .util import set_original_response, set_modified_response, live_server_setup
 | 
			
		||||
 | 
			
		||||
sleep_time_for_fetch_thread = 3
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_nonrenderable_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
    <head><title>modified head title</title></head>
 | 
			
		||||
    <!-- like when some angular app was broken and doesnt render or whatever -->
 | 
			
		||||
    <body>
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
    return None
 | 
			
		||||
 | 
			
		||||
def test_check_basic_change_detection_functionality(client, live_server):
 | 
			
		||||
    set_original_response()
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": url_for('test_endpoint', _external=True)},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # Do this a few times.. ensures we dont accidently set the status
 | 
			
		||||
    for n in range(3):
 | 
			
		||||
        client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
        # Give the thread time to pick it up
 | 
			
		||||
        time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
        # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
        res = client.get(url_for("index"))
 | 
			
		||||
        assert b'unviewed' not in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    #####################
 | 
			
		||||
    client.post(
 | 
			
		||||
        url_for("settings_page"),
 | 
			
		||||
        data={"application-empty_pages_are_a_change": "",
 | 
			
		||||
              "requests-time_between_check-minutes": 180,
 | 
			
		||||
              'application-fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    # this should not trigger a change, because no good text could be converted from the HTML
 | 
			
		||||
    set_nonrenderable_response()
 | 
			
		||||
 | 
			
		||||
    client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' not in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # ok now do the opposite
 | 
			
		||||
 | 
			
		||||
    client.post(
 | 
			
		||||
        url_for("settings_page"),
 | 
			
		||||
        data={"application-empty_pages_are_a_change": "y",
 | 
			
		||||
              "requests-time_between_check-minutes": 180,
 | 
			
		||||
              'application-fetch_backend': "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    set_modified_response()
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    client.get(url_for("api_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    #
 | 
			
		||||
    # Cleanup everything
 | 
			
		||||
    res = client.get(url_for("api_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
 | 
			
		||||
@@ -52,6 +52,10 @@ class update_worker(threading.Thread):
 | 
			
		||||
                            raise Exception("Error - returned data from the fetch handler SHOULD be bytes")
 | 
			
		||||
                    except PermissionError as e:
 | 
			
		||||
                        self.app.logger.error("File permission error updating", uuid, str(e))
 | 
			
		||||
                    except content_fetcher.ReplyWithContentButNoText as e:
 | 
			
		||||
                        # Totally fine, it's by choice - just continue on, nothing more to care about
 | 
			
		||||
                        # Page had elements/content but no renderable text
 | 
			
		||||
                        pass
 | 
			
		||||
                    except content_fetcher.EmptyReply as e:
 | 
			
		||||
                        # Some kind of custom to-str handler in the exception handler that does this?
 | 
			
		||||
                        err_text = "EmptyReply: Status Code {}".format(e.status_code)
 | 
			
		||||
 
 | 
			
		||||
		Reference in New Issue
	
	Block a user