#!/usr/bin/env python3
import time
import os
from flask import url_for
from ..html_tools import *
from .util import live_server_setup, wait_for_all_checks, delete_all_watches
def set_response_with_multiple_index(datastore_path):
data= """
Person 1
Person 2
Person 3
Emil
Tobias
Linus
16
14
10
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(data)
def set_original_response(datastore_path):
test_return_data = """
Header
Some initial text
Which is across multiple lines
So let's see what happens.
Some text that will change
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
def set_modified_response(datastore_path):
test_return_data = """
Header changed
Some initial text
Which is across multiple lines
So let's see what happens.
Some text that changes
"""
with open(os.path.join(datastore_path, "endpoint-content.txt"), "w") as f:
f.write(test_return_data)
def test_element_removal_output():
from inscriptis import get_text
# Check text with sub-parts renders correctly
content = """
Header
Some initial text
across multiple lines
Some text that changes
Some text should be matched by xPath // selector
Some text should be matched by xPath selector
Some text should be matched by xPath1 selector
"""
html_blob = element_removal(
[
"header",
"footer",
"nav",
"#changetext",
"//*[contains(text(), 'xPath // selector')]",
"xpath://*[contains(text(), 'xPath selector')]",
"xpath1://*[contains(text(), 'xPath1 selector')]"
],
html_content=content
)
text = get_text(html_blob)
assert (
text
== """Some initial text
across multiple lines
"""
)
def test_element_removal_full(client, live_server, measure_memory_usage, datastore_path):
set_original_response(datastore_path=datastore_path)
# Add our URL to the import page
test_url = url_for("test_endpoint", _external=True)
res = client.post(
url_for("imports.import_page"), data={"urls": test_url}, follow_redirects=True
)
assert b"1 Imported" in res.data
wait_for_all_checks(client)
# Goto the edit page, add the filter data
# Not sure why \r needs to be added - absent of the #changetext this is not necessary
subtractive_selectors_data = "header\r\nfooter\r\nnav\r\n#changetext"
res = client.post(
url_for("ui.ui_edit.edit_page", uuid="first"),
data={
"subtractive_selectors": subtractive_selectors_data,
"url": test_url,
"tags": "",
"headers": "",
"fetch_backend": "html_requests",
"time_between_check_use_default": "y",
},
follow_redirects=True,
)
assert b"Updated watch." in res.data
wait_for_all_checks(client)
# Check it saved
res = client.get(
url_for("ui.ui_edit.edit_page", uuid="first"),
)
assert bytes(subtractive_selectors_data.encode("utf-8")) in res.data
# Trigger a check
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
wait_for_all_checks(client)
# so that we set the state to 'has-unread-changes' after all the edits
client.get(url_for("ui.ui_views.diff_history_page", uuid="first"))
# Make a change to header/footer/nav
set_modified_response(datastore_path=datastore_path)
# Trigger a check
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
assert b'Queued 1 watch for rechecking.' in res.data
# Give the thread time to pick it up
wait_for_all_checks(client)
# There should not be an unviewed change, as changes should be removed
res = client.get(url_for("watchlist.index"))
assert b"unviewed" not in res.data
# Re #2752
def test_element_removal_nth_offset_no_shift(client, live_server, measure_memory_usage, datastore_path):
set_response_with_multiple_index(datastore_path=datastore_path)
subtractive_selectors_data = [
### css style ###
"""body > table > tr:nth-child(1) > th:nth-child(2)
body > table > tr:nth-child(2) > td:nth-child(2)
body > table > tr:nth-child(3) > td:nth-child(2)
body > table > tr:nth-child(1) > th:nth-child(3)
body > table > tr:nth-child(2) > td:nth-child(3)
body > table > tr:nth-child(3) > td:nth-child(3)""",
### second type, xpath ###
"""//body/table/tr[1]/th[2]
//body/table/tr[2]/td[2]
//body/table/tr[3]/td[2]
//body/table/tr[1]/th[3]
//body/table/tr[2]/td[3]
//body/table/tr[3]/td[3]"""]
test_url = url_for("test_endpoint", _external=True)
for selector_list in subtractive_selectors_data:
delete_all_watches(client)
uuid = client.application.config.get('DATASTORE').add_watch(url=test_url, extras={"subtractive_selectors": selector_list.splitlines()})
client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(
url_for("ui.ui_views.preview_page", uuid="first"),
follow_redirects=True
)
# the filters above should have removed this but they never say to remove the "emil" column
assert b"Tobias" not in res.data
assert b"Linus" not in res.data
assert b"Person 2" not in res.data
assert b"Person 3" not in res.data
# First column should exist
assert b"Emil" in res.data