mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-11-04 08:34:57 +00:00 
			
		
		
		
	Compare commits
	
		
			2 Commits
		
	
	
		
			filter-fai
			...
			sort-text-
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
| 
						 | 
					fa4eeb24cc | ||
| 
						 | 
					64a764f541 | 
@@ -465,6 +465,7 @@ class watchForm(commonSettingsForm):
 | 
			
		||||
    method = SelectField('Request method', choices=valid_method, default=default_method)
 | 
			
		||||
    ignore_status_codes = BooleanField('Ignore status codes (process non-2xx status codes as normal)', default=False)
 | 
			
		||||
    check_unique_lines = BooleanField('Only trigger when unique lines appear', default=False)
 | 
			
		||||
    sort_text_alphabetically =  BooleanField('Sort text alphabetically', default=False)
 | 
			
		||||
 | 
			
		||||
    filter_text_added = BooleanField('Added lines', default=True)
 | 
			
		||||
    filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
 | 
			
		||||
 
 | 
			
		||||
@@ -58,6 +58,7 @@ base_config = {
 | 
			
		||||
    'previous_md5_before_filters': False,  # Used for skipping changedetection entirely
 | 
			
		||||
    'proxy': None,  # Preferred proxy connection
 | 
			
		||||
    'remote_server_reply': None, # From 'server' reply header
 | 
			
		||||
    'sort_text_alphabetically': False,
 | 
			
		||||
    'subtractive_selectors': [],
 | 
			
		||||
    'tag': '', # Old system of text name for a tag, to be removed
 | 
			
		||||
    'tags': [], # list of UUIDs to App.Tags
 | 
			
		||||
 
 | 
			
		||||
@@ -204,6 +204,12 @@ class perform_site_check(difference_detection_processor):
 | 
			
		||||
                            is_rss=is_rss # #1874 activate the <title workaround hack
 | 
			
		||||
                        )
 | 
			
		||||
 | 
			
		||||
        if watch.get('sort_text_alphabetically') and stripped_text_from_html:
 | 
			
		||||
            # Note: Because a <p>something</p> will add an extra line feed to signify the paragraph gap
 | 
			
		||||
            # we end up with 'Some text\n\n', sorting will add all those extra \n at the start, so we remove them here.
 | 
			
		||||
            stripped_text_from_html = stripped_text_from_html.replace('\n\n', '\n')
 | 
			
		||||
            stripped_text_from_html = '\n'.join( sorted(stripped_text_from_html.splitlines(), key=lambda x: x.lower() ))
 | 
			
		||||
 | 
			
		||||
        # Re #340 - return the content before the 'ignore text' was applied
 | 
			
		||||
        text_content_before_ignored_filter = stripped_text_from_html.encode('utf-8')
 | 
			
		||||
 | 
			
		||||
 
 | 
			
		||||
@@ -339,6 +339,10 @@ nav
 | 
			
		||||
                    <span class="pure-form-message-inline">When content is merely moved in a list, it will also trigger an <strong>addition</strong>, consider enabling <code><strong>Only trigger when unique lines appear</strong></code></span>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
 | 
			
		||||
                <fieldset class="pure-control-group">
 | 
			
		||||
                    {{ render_checkbox_field(form.sort_text_alphabetically) }}
 | 
			
		||||
                    <span class="pure-form-message-inline">Helps reduce changes detected caused by sites shuffling lines around, combine with <i>check unique lines</i> below.</span>
 | 
			
		||||
                </fieldset>
 | 
			
		||||
                <fieldset class="pure-control-group">
 | 
			
		||||
                    {{ render_checkbox_field(form.check_unique_lines) }}
 | 
			
		||||
                    <span class="pure-form-message-inline">Good for websites that just move the content around, and you want to know when NEW content is added, compares new lines against all history for this watch.</span>
 | 
			
		||||
 
 | 
			
		||||
@@ -2,7 +2,7 @@
 | 
			
		||||
 | 
			
		||||
import time
 | 
			
		||||
from flask import url_for
 | 
			
		||||
from .util import live_server_setup
 | 
			
		||||
from .util import live_server_setup, wait_for_all_checks
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_original_ignore_response():
 | 
			
		||||
@@ -34,6 +34,23 @@ def set_modified_swapped_lines():
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
def set_modified_swapped_lines_with_extra_text_for_sorting():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
     <body>
 | 
			
		||||
     <p> Which is across multiple lines</p>     
 | 
			
		||||
     <p>Some initial text</p>
 | 
			
		||||
     <p>   So let's see what happens.</p>
 | 
			
		||||
     <p>Z last</p>
 | 
			
		||||
     <p>0 numerical</p>
 | 
			
		||||
     <p>A uppercase</p>
 | 
			
		||||
     <p>a lowercase</p>     
 | 
			
		||||
     </body>
 | 
			
		||||
     </html>
 | 
			
		||||
    """
 | 
			
		||||
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def set_modified_with_trigger_text_response():
 | 
			
		||||
    test_return_data = """<html>
 | 
			
		||||
@@ -49,15 +66,14 @@ def set_modified_with_trigger_text_response():
 | 
			
		||||
    with open("test-datastore/endpoint-content.txt", "w") as f:
 | 
			
		||||
        f.write(test_return_data)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
def test_unique_lines_functionality(client, live_server):
 | 
			
		||||
def test_setup(client, live_server):
 | 
			
		||||
    live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    sleep_time_for_fetch_thread = 3
 | 
			
		||||
def test_unique_lines_functionality(client, live_server):
 | 
			
		||||
    #live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    set_original_ignore_response()
 | 
			
		||||
    # Give the endpoint time to spin up
 | 
			
		||||
    time.sleep(1)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
@@ -67,7 +83,7 @@ def test_unique_lines_functionality(client, live_server):
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
@@ -83,12 +99,11 @@ def test_unique_lines_functionality(client, live_server):
 | 
			
		||||
    #  Make a change
 | 
			
		||||
    set_modified_swapped_lines()
 | 
			
		||||
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # It should report nothing found (no new 'unviewed' class)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
@@ -97,7 +112,57 @@ def test_unique_lines_functionality(client, live_server):
 | 
			
		||||
    # Now set the content which contains the new text and re-ordered existing text
 | 
			
		||||
    set_modified_with_trigger_text_response()
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
    time.sleep(sleep_time_for_fetch_thread)
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
 | 
			
		||||
def test_sort_lines_functionality(client, live_server):
 | 
			
		||||
    #live_server_setup(live_server)
 | 
			
		||||
 | 
			
		||||
    set_modified_swapped_lines_with_extra_text_for_sorting()
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    test_url = url_for('test_endpoint', _external=True)
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("import_page"),
 | 
			
		||||
        data={"urls": test_url},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"1 Imported" in res.data
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
    # Add our URL to the import page
 | 
			
		||||
    res = client.post(
 | 
			
		||||
        url_for("edit_page", uuid="first"),
 | 
			
		||||
        data={"sort_text_alphabetically": "n",
 | 
			
		||||
              "url": test_url,
 | 
			
		||||
              "fetch_backend": "html_requests"},
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
    assert b"Updated watch." in res.data
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    # Trigger a check
 | 
			
		||||
    client.get(url_for("form_watch_checknow"), follow_redirects=True)
 | 
			
		||||
 | 
			
		||||
    # Give the thread time to pick it up
 | 
			
		||||
    wait_for_all_checks(client)
 | 
			
		||||
 | 
			
		||||
 | 
			
		||||
    res = client.get(url_for("index"))
 | 
			
		||||
    # Should be a change registered
 | 
			
		||||
    assert b'unviewed' in res.data
 | 
			
		||||
 | 
			
		||||
    res = client.get(
 | 
			
		||||
        url_for("preview_page", uuid="first"),
 | 
			
		||||
        follow_redirects=True
 | 
			
		||||
    )
 | 
			
		||||
 | 
			
		||||
    assert res.data.find(b'0 numerical') < res.data.find(b'Z last')
 | 
			
		||||
    assert res.data.find(b'A uppercase') < res.data.find(b'Z last')
 | 
			
		||||
    assert res.data.find(b'Some initial text') < res.data.find(b'Which is across multiple lines')
 | 
			
		||||
    
 | 
			
		||||
    res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)
 | 
			
		||||
    assert b'Deleted' in res.data
 | 
			
		||||
		Reference in New Issue
	
	Block a user