mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-11-25 19:03:22 +00:00
Compare commits
9 Commits
field-rena
...
update-oth
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
3d798bfa2e | ||
|
|
9f326783e5 | ||
|
|
4e6e680d79 | ||
|
|
1378b5b2ff | ||
|
|
456c6e3f58 | ||
|
|
61be7f68db | ||
|
|
0e38a3c881 | ||
|
|
2c630e9853 | ||
|
|
786e0d1fab |
@@ -105,6 +105,12 @@ We [recommend and use Bright Data](https://brightdata.grsm.io/n0r16zf7eivq) glob
|
||||
|
||||
Please :star: star :star: this project and help it grow! https://github.com/dgtlmoon/changedetection.io/
|
||||
|
||||
### Conditional web page changes
|
||||
|
||||
Easily [configure conditional actions](https://changedetection.io/tutorial/conditional-actions-web-page-changes), for example, only trigger when a price is above or below a preset amount, or [when a web page includes (or does not include) a keyword](https://changedetection.io/tutorial/how-monitor-keywords-any-website)
|
||||
|
||||
<img src="./docs/web-page-change-conditions.png" style="max-width:80%;" alt="Conditional web page changes" title="Conditional web page changes" />
|
||||
|
||||
### Schedule web page watches in any timezone, limit by day of week and time.
|
||||
|
||||
Easily set a re-check schedule, for example you could limit the web page change detection to only operate during business hours.
|
||||
|
||||
@@ -4,7 +4,7 @@ import re
|
||||
from random import randint
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
from changedetectionio.content_fetchers.helpers import capture_full_page
|
||||
from changedetectionio.content_fetchers.base import manage_user_agent
|
||||
from changedetectionio.safe_jinja import render as jinja_render
|
||||
|
||||
@@ -298,14 +298,7 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
now = time.time()
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
|
||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||
logger.warning(f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||
screenshot = capture_stitched_together_full_page(self.page)
|
||||
else:
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=40)
|
||||
screenshot = capture_full_page(self.page)
|
||||
|
||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||
|
||||
|
||||
@@ -22,6 +22,7 @@
|
||||
<li class="tab"><a href="#notifications">Notifications</a></li>
|
||||
<li class="tab"><a href="#fetching">Fetching</a></li>
|
||||
<li class="tab"><a href="#filters">Global Filters</a></li>
|
||||
<li class="tab"><a href="#ui-options">UI Options</a></li>
|
||||
<li class="tab"><a href="#api">API</a></li>
|
||||
<li class="tab"><a href="#timedate">Time & Date</a></li>
|
||||
<li class="tab"><a href="#proxies">CAPTCHA & Proxies</a></li>
|
||||
@@ -240,6 +241,12 @@ nav
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="ui-options">
|
||||
<div class="pure-control-group">
|
||||
{{ render_checkbox_field(form.application.form.ui.form.open_diff_in_new_tab, class="open_diff_in_new_tab") }}
|
||||
<span class="pure-form-message-inline">Enable this setting to open the diff page in a new tab. If disabled, the diff page will open in the current tab.</span>
|
||||
</div>
|
||||
</div>
|
||||
<div class="tab-pane-inner" id="proxies">
|
||||
<div id="recommended-proxy">
|
||||
<div>
|
||||
|
||||
@@ -125,7 +125,10 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
|
||||
else:
|
||||
# Recheck all, including muted
|
||||
for watch_uuid, watch in datastore.data['watching'].items():
|
||||
# Get most overdue first
|
||||
for k in sorted(datastore.data['watching'].items(), key=lambda item: item[1].get('last_checked', 0)):
|
||||
watch_uuid = k[0]
|
||||
watch = k[1]
|
||||
if not watch['paused']:
|
||||
if watch_uuid not in running_uuids:
|
||||
if with_errors and not watch.get('last_error'):
|
||||
@@ -140,7 +143,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, running_updat
|
||||
if i == 1:
|
||||
flash("Queued 1 watch for rechecking.")
|
||||
if i > 1:
|
||||
flash("Queued {} watches for rechecking.".format(i))
|
||||
flash(f"Queued {i} watches for rechecking.")
|
||||
if i == 0:
|
||||
flash("No watches available to recheck.")
|
||||
|
||||
|
||||
@@ -209,15 +209,18 @@
|
||||
<a href="{{ url_for('ui.ui_edit.edit_page', uuid=watch.uuid, tag=active_tag_uuid)}}#general" class="pure-button pure-button-primary">Edit</a>
|
||||
{% if watch.history_n >= 2 %}
|
||||
|
||||
{% set open_diff_in_new_tab = datastore.data['settings']['application']['ui'].get('open_diff_in_new_tab') %}
|
||||
{% set target_attr = ' target="' ~ watch.uuid ~ '"' if open_diff_in_new_tab else '' %}
|
||||
|
||||
{% if is_unviewed %}
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid, from_version=watch.get_from_version_based_on_last_viewed) }}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
|
||||
{% else %}
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary diff-link">History</a>
|
||||
<a href="{{ url_for('ui.ui_views.diff_history_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary diff-link">History</a>
|
||||
{% endif %}
|
||||
|
||||
{% else %}
|
||||
{% if watch.history_n == 1 or (watch.history_n ==0 and watch.error_text_ctime )%}
|
||||
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" target="{{watch.uuid}}" class="pure-button pure-button-primary">Preview</a>
|
||||
<a href="{{ url_for('ui.ui_views.preview_page', uuid=watch.uuid)}}" {{target_attr}} class="pure-button pure-button-primary">Preview</a>
|
||||
{% endif %}
|
||||
{% endif %}
|
||||
</td>
|
||||
|
||||
@@ -87,7 +87,7 @@ class Fetcher():
|
||||
pass
|
||||
|
||||
@abstractmethod
|
||||
def quit(self):
|
||||
def quit(self, watch=None):
|
||||
return
|
||||
|
||||
@abstractmethod
|
||||
|
||||
@@ -1,79 +1,107 @@
|
||||
|
||||
# Pages with a vertical height longer than this will use the 'stitch together' method.
|
||||
|
||||
# - Many GPUs have a max texture size of 16384x16384px (or lower on older devices).
|
||||
# - If a page is taller than ~8000–10000px, it risks exceeding GPU memory limits.
|
||||
# - This is especially important on headless Chromium, where Playwright may fail to allocate a massive full-page buffer.
|
||||
|
||||
|
||||
# The size at which we will switch to stitching method
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD=8000
|
||||
|
||||
from loguru import logger
|
||||
|
||||
def capture_stitched_together_full_page(page):
|
||||
def capture_full_page(page):
|
||||
import io
|
||||
import os
|
||||
import time
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
MAX_TOTAL_HEIGHT = SCREENSHOT_SIZE_STITCH_THRESHOLD*4 # Maximum total height for the final image (When in stitch mode)
|
||||
MAX_CHUNK_HEIGHT = 4000 # Height per screenshot chunk
|
||||
# Maximum total height for the final image (When in stitch mode).
|
||||
# We limit this to 16000px due to the huge amount of RAM that was being used
|
||||
# Example: 16000 × 1400 × 3 = 67,200,000 bytes ≈ 64.1 MB (not including buffers in PIL etc)
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", 16000))
|
||||
|
||||
# The size at which we will switch to stitching method, when below this (and
|
||||
# MAX_TOTAL_HEIGHT which can be set by a user) we will use the default
|
||||
# screenshot method.
|
||||
SCREENSHOT_SIZE_STITCH_THRESHOLD = 8000
|
||||
|
||||
WARNING_TEXT_HEIGHT = 20 # Height of the warning text overlay
|
||||
|
||||
# Save the original viewport size
|
||||
original_viewport = page.viewport_size
|
||||
now = time.time()
|
||||
start = time.time()
|
||||
|
||||
stitched_image = None
|
||||
|
||||
try:
|
||||
viewport = page.viewport_size
|
||||
viewport_width = original_viewport["width"]
|
||||
viewport_height = original_viewport["height"]
|
||||
|
||||
page_height = page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
# Optimization to avoid unnecessary stitching if we can avoid it
|
||||
# Use the default screenshot method for smaller pages to take advantage
|
||||
# of GPU and native playwright screenshot optimizations
|
||||
if (
|
||||
page_height < SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
and page_height < MAX_TOTAL_HEIGHT
|
||||
):
|
||||
logger.debug("Using default screenshot method")
|
||||
screenshot = page.screenshot(
|
||||
type="jpeg",
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
|
||||
full_page=True,
|
||||
)
|
||||
logger.debug(f"Screenshot captured in {time.time() - start:.2f}s")
|
||||
return screenshot
|
||||
|
||||
logger.debug(
|
||||
"Using stitching method for large screenshot because page height exceeds threshold"
|
||||
)
|
||||
|
||||
# Limit the total capture height
|
||||
capture_height = min(page_height, MAX_TOTAL_HEIGHT)
|
||||
|
||||
images = []
|
||||
total_captured_height = 0
|
||||
# Calculate number of chunks needed using ORIGINAL viewport height
|
||||
num_chunks = (capture_height + viewport_height - 1) // viewport_height
|
||||
|
||||
for offset in range(0, capture_height, MAX_CHUNK_HEIGHT):
|
||||
# Ensure we do not exceed the total height limit
|
||||
chunk_height = min(MAX_CHUNK_HEIGHT, MAX_TOTAL_HEIGHT - total_captured_height)
|
||||
# Create the final image upfront to avoid holding all chunks in memory
|
||||
stitched_image = Image.new("RGB", (viewport_width, capture_height))
|
||||
|
||||
# Adjust viewport size for this chunk
|
||||
page.set_viewport_size({"width": viewport["width"], "height": chunk_height})
|
||||
|
||||
# Scroll to the correct position
|
||||
page.evaluate(f"window.scrollTo(0, {offset})")
|
||||
|
||||
# Capture screenshot chunk
|
||||
screenshot_bytes = page.screenshot(type='jpeg', quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
images.append(Image.open(io.BytesIO(screenshot_bytes)))
|
||||
|
||||
total_captured_height += chunk_height
|
||||
|
||||
# Stop if we reached the maximum total height
|
||||
if total_captured_height >= MAX_TOTAL_HEIGHT:
|
||||
break
|
||||
|
||||
# Create the final stitched image
|
||||
stitched_image = Image.new('RGB', (viewport["width"], total_captured_height))
|
||||
# Track cumulative paste position
|
||||
y_offset = 0
|
||||
|
||||
# Stitch the screenshot chunks together
|
||||
for img in images:
|
||||
stitched_image.paste(img, (0, y_offset))
|
||||
y_offset += img.height
|
||||
for _ in range(num_chunks):
|
||||
# Scroll to position (no viewport resizing)
|
||||
page.evaluate(f"window.scrollTo(0, {y_offset})")
|
||||
|
||||
logger.debug(f"Screenshot stitched together in {time.time()-now:.2f}s")
|
||||
# Capture only the visible area using clip
|
||||
with io.BytesIO(
|
||||
page.screenshot(
|
||||
type="jpeg",
|
||||
clip={
|
||||
"x": 0,
|
||||
"y": 0,
|
||||
"width": viewport_width,
|
||||
"height": min(viewport_height, capture_height - y_offset),
|
||||
},
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 30)),
|
||||
)
|
||||
) as buf:
|
||||
with Image.open(buf) as img:
|
||||
img.load()
|
||||
stitched_image.paste(img, (0, y_offset))
|
||||
y_offset += img.height
|
||||
|
||||
logger.debug(f"Screenshot stitched together in {time.time() - start:.2f}s")
|
||||
|
||||
# Overlay warning text if the screenshot was trimmed
|
||||
if page_height > MAX_TOTAL_HEIGHT:
|
||||
if capture_height < page_height:
|
||||
draw = ImageDraw.Draw(stitched_image)
|
||||
warning_text = f"WARNING: Screenshot was {page_height}px but trimmed to {MAX_TOTAL_HEIGHT}px because it was too long"
|
||||
|
||||
# Load font (default system font if Arial is unavailable)
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", WARNING_TEXT_HEIGHT) # Arial (Windows/Mac)
|
||||
font = ImageFont.truetype(
|
||||
"arial.ttf", WARNING_TEXT_HEIGHT
|
||||
) # Arial (Windows/Mac)
|
||||
except IOError:
|
||||
font = ImageFont.load_default() # Default font if Arial not found
|
||||
|
||||
@@ -83,22 +111,28 @@ def capture_stitched_together_full_page(page):
|
||||
text_height = text_bbox[3] - text_bbox[1] # Calculate text height
|
||||
|
||||
# Define background rectangle (top of the image)
|
||||
draw.rectangle([(0, 0), (viewport["width"], WARNING_TEXT_HEIGHT)], fill="white")
|
||||
draw.rectangle(
|
||||
[(0, 0), (viewport_width, WARNING_TEXT_HEIGHT)], fill="white"
|
||||
)
|
||||
|
||||
# Center text horizontally within the warning area
|
||||
text_x = (viewport["width"] - text_width) // 2
|
||||
text_x = (viewport_width - text_width) // 2
|
||||
text_y = (WARNING_TEXT_HEIGHT - text_height) // 2
|
||||
|
||||
# Draw the warning text in red
|
||||
draw.text((text_x, text_y), warning_text, fill="red", font=font)
|
||||
|
||||
# Save or return the final image
|
||||
output = io.BytesIO()
|
||||
stitched_image.save(output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
screenshot = output.getvalue()
|
||||
# Save final image
|
||||
with io.BytesIO() as output:
|
||||
stitched_image.save(
|
||||
output, format="JPEG", quality=int(os.getenv("SCREENSHOT_QUALITY", 30))
|
||||
)
|
||||
screenshot = output.getvalue()
|
||||
|
||||
finally:
|
||||
# Restore the original viewport size
|
||||
page.set_viewport_size(original_viewport)
|
||||
if stitched_image is not None:
|
||||
stitched_image.close()
|
||||
|
||||
return screenshot
|
||||
|
||||
@@ -4,7 +4,7 @@ from urllib.parse import urlparse
|
||||
|
||||
from loguru import logger
|
||||
|
||||
from changedetectionio.content_fetchers.helpers import capture_stitched_together_full_page, SCREENSHOT_SIZE_STITCH_THRESHOLD
|
||||
from changedetectionio.content_fetchers.helpers import capture_full_page
|
||||
from changedetectionio.content_fetchers.base import Fetcher, manage_user_agent
|
||||
from changedetectionio.content_fetchers.exceptions import PageUnloadable, Non200ErrorCodeReceived, EmptyReply, ScreenshotUnavailable
|
||||
|
||||
@@ -60,7 +60,7 @@ class fetcher(Fetcher):
|
||||
|
||||
def screenshot_step(self, step_n=''):
|
||||
super().screenshot_step(step_n=step_n)
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
screenshot = capture_full_page(self.page)
|
||||
|
||||
if self.browser_steps_screenshot_path is not None:
|
||||
destination = os.path.join(self.browser_steps_screenshot_path, 'step_{}.jpeg'.format(step_n))
|
||||
@@ -164,9 +164,7 @@ class fetcher(Fetcher):
|
||||
raise PageUnloadable(url=url, status_code=None, message=str(e))
|
||||
|
||||
if self.status_code != 200 and not ignore_status_codes:
|
||||
screenshot = self.page.screenshot(type='jpeg', full_page=True,
|
||||
quality=int(os.getenv("SCREENSHOT_QUALITY", 72)))
|
||||
|
||||
screenshot = capture_full_page(self.page)
|
||||
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
|
||||
|
||||
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
|
||||
@@ -204,14 +202,7 @@ class fetcher(Fetcher):
|
||||
# acceptable screenshot quality here
|
||||
try:
|
||||
# The actual screenshot - this always base64 and needs decoding! horrible! huge CPU usage
|
||||
full_height = self.page.evaluate("document.documentElement.scrollHeight")
|
||||
|
||||
if full_height >= SCREENSHOT_SIZE_STITCH_THRESHOLD:
|
||||
logger.warning(
|
||||
f"Page full Height: {full_height}px longer than {SCREENSHOT_SIZE_STITCH_THRESHOLD}px, using 'stitched screenshot method'.")
|
||||
self.screenshot = capture_stitched_together_full_page(self.page)
|
||||
else:
|
||||
self.screenshot = self.page.screenshot(type='jpeg', full_page=True, quality=int(os.getenv("SCREENSHOT_QUALITY", 30)))
|
||||
self.screenshot = capture_full_page(self.page)
|
||||
|
||||
except Exception as e:
|
||||
# It's likely the screenshot was too long/big and something crashed
|
||||
|
||||
@@ -96,3 +96,17 @@ class fetcher(Fetcher):
|
||||
|
||||
|
||||
self.raw_content = r.content
|
||||
|
||||
def quit(self, watch=None):
|
||||
|
||||
# In case they switched to `requests` fetcher from something else
|
||||
# Then the screenshot could be old, in any case, it's not used here.
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS - Mainly used for testing
|
||||
if strtobool(os.getenv("REMOVE_REQUESTS_OLD_SCREENSHOTS", 'true')):
|
||||
screenshot = watch.get_screenshot()
|
||||
if screenshot:
|
||||
try:
|
||||
os.unlink(screenshot)
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")
|
||||
|
||||
|
||||
@@ -65,6 +65,7 @@ class fetcher(Fetcher):
|
||||
# request_body, request_method unused for now, until some magic in the future happens.
|
||||
|
||||
options = ChromeOptions()
|
||||
options.add_argument("--headless")
|
||||
if self.proxy:
|
||||
options.proxy = self.proxy
|
||||
|
||||
@@ -112,9 +113,9 @@ class fetcher(Fetcher):
|
||||
self.quit()
|
||||
return True
|
||||
|
||||
def quit(self):
|
||||
def quit(self, watch=None):
|
||||
if self.driver:
|
||||
try:
|
||||
self.driver.quit()
|
||||
except Exception as e:
|
||||
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
|
||||
logger.debug(f"Content Fetcher > Exception in chrome shutdown/quit {str(e)}")
|
||||
|
||||
@@ -586,7 +586,7 @@ class processor_text_json_diff_form(commonSettingsForm):
|
||||
filter_text_replaced = BooleanField('Replaced/changed lines', default=True)
|
||||
filter_text_removed = BooleanField('Removed lines', default=True)
|
||||
|
||||
trigger_text = StringListField('Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
trigger_text = StringListField('Keyword triggers - Trigger/wait for text', [validators.Optional(), ValidateListRegex()])
|
||||
if os.getenv("PLAYWRIGHT_DRIVER_URL"):
|
||||
browser_steps = FieldList(FormField(SingleBrowserStep), min_entries=10)
|
||||
text_should_not_be_present = StringListField('Block change-detection while text matches', [validators.Optional(), ValidateListRegex()])
|
||||
@@ -721,6 +721,8 @@ class globalSettingsRequestForm(Form):
|
||||
self.extra_proxies.errors.append('Both a name, and a Proxy URL is required.')
|
||||
return False
|
||||
|
||||
class globalSettingsApplicationUIForm(Form):
|
||||
open_diff_in_new_tab = BooleanField('Open diff page in a new tab', default=True, validators=[validators.Optional()])
|
||||
|
||||
# datastore.data['settings']['application']..
|
||||
class globalSettingsApplicationForm(commonSettingsForm):
|
||||
@@ -752,6 +754,7 @@ class globalSettingsApplicationForm(commonSettingsForm):
|
||||
render_kw={"style": "width: 5em;"},
|
||||
validators=[validators.NumberRange(min=0,
|
||||
message="Should contain zero or more attempts")])
|
||||
ui = FormField(globalSettingsApplicationUIForm)
|
||||
|
||||
|
||||
class globalSettingsForm(Form):
|
||||
|
||||
@@ -366,22 +366,41 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
|
||||
# wordlist - list of regex's (str) or words (str)
|
||||
# Preserves all linefeeds and other whitespacing, its not the job of this to remove that
|
||||
def strip_ignore_text(content, wordlist, mode="content"):
|
||||
i = 0
|
||||
output = []
|
||||
ignore_text = []
|
||||
ignore_regex = []
|
||||
ignored_line_numbers = []
|
||||
ignore_regex_multiline = []
|
||||
ignored_lines = []
|
||||
|
||||
for k in wordlist:
|
||||
# Is it a regex?
|
||||
res = re.search(PERL_STYLE_REGEX, k, re.IGNORECASE)
|
||||
if res:
|
||||
ignore_regex.append(re.compile(perl_style_slash_enclosed_regex_to_options(k)))
|
||||
res = re.compile(perl_style_slash_enclosed_regex_to_options(k))
|
||||
if res.flags & re.DOTALL or res.flags & re.MULTILINE:
|
||||
ignore_regex_multiline.append(res)
|
||||
else:
|
||||
ignore_regex.append(res)
|
||||
else:
|
||||
ignore_text.append(k.strip())
|
||||
|
||||
for line in content.splitlines(keepends=True):
|
||||
i += 1
|
||||
for r in ignore_regex_multiline:
|
||||
for match in r.finditer(content):
|
||||
content_lines = content[:match.end()].splitlines(keepends=True)
|
||||
match_lines = content[match.start():match.end()].splitlines(keepends=True)
|
||||
|
||||
end_line = len(content_lines)
|
||||
start_line = end_line - len(match_lines)
|
||||
|
||||
if end_line - start_line <= 1:
|
||||
# Match is empty or in the middle of the line
|
||||
ignored_lines.append(start_line)
|
||||
else:
|
||||
for i in range(start_line, end_line):
|
||||
ignored_lines.append(i)
|
||||
|
||||
line_index = 0
|
||||
lines = content.splitlines(keepends=True)
|
||||
for line in lines:
|
||||
# Always ignore blank lines in this mode. (when this function gets called)
|
||||
got_match = False
|
||||
for l in ignore_text:
|
||||
@@ -393,17 +412,19 @@ def strip_ignore_text(content, wordlist, mode="content"):
|
||||
if r.search(line):
|
||||
got_match = True
|
||||
|
||||
if not got_match:
|
||||
# Not ignored, and should preserve "keepends"
|
||||
output.append(line)
|
||||
else:
|
||||
ignored_line_numbers.append(i)
|
||||
if got_match:
|
||||
ignored_lines.append(line_index)
|
||||
|
||||
line_index += 1
|
||||
|
||||
ignored_lines = set([i for i in ignored_lines if i >= 0 and i < len(lines)])
|
||||
|
||||
# Used for finding out what to highlight
|
||||
if mode == "line numbers":
|
||||
return ignored_line_numbers
|
||||
return [i + 1 for i in ignored_lines]
|
||||
|
||||
return ''.join(output)
|
||||
output_lines = set(range(len(lines))) - ignored_lines
|
||||
return ''.join([lines[i] for i in output_lines])
|
||||
|
||||
def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False) -> str:
|
||||
from xml.sax.saxutils import escape as xml_escape
|
||||
|
||||
@@ -60,6 +60,9 @@ class model(dict):
|
||||
'webdriver_delay': None , # Extra delay in seconds before extracting text
|
||||
'tags': {}, #@todo use Tag.model initialisers
|
||||
'timezone': None, # Default IANA timezone name
|
||||
'ui': {
|
||||
'open_diff_in_new_tab': True,
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -159,7 +159,7 @@ class difference_detection_processor():
|
||||
)
|
||||
|
||||
#@todo .quit here could go on close object, so we can run JS if change-detected
|
||||
self.fetcher.quit()
|
||||
self.fetcher.quit(watch=self.watch)
|
||||
|
||||
# After init, call run_changedetection() which will do the actual change-detection
|
||||
|
||||
|
||||
@@ -14,7 +14,8 @@ SCRIPT_DIR=$( cd -- "$( dirname -- "${BASH_SOURCE[0]}" )" &> /dev/null && pwd )
|
||||
find tests/test_*py -type f|while read test_name
|
||||
do
|
||||
echo "TEST RUNNING $test_name"
|
||||
pytest $test_name
|
||||
# REMOVE_REQUESTS_OLD_SCREENSHOTS disabled so that we can write a screenshot and send it in test_notifications.py without a real browser
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest $test_name
|
||||
done
|
||||
|
||||
echo "RUNNING WITH BASE_URL SET"
|
||||
@@ -22,7 +23,7 @@ echo "RUNNING WITH BASE_URL SET"
|
||||
# Now re-run some tests with BASE_URL enabled
|
||||
# Re #65 - Ability to include a link back to the installation, in the notification.
|
||||
export BASE_URL="https://really-unique-domain.io"
|
||||
pytest tests/test_notification.py
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||
|
||||
|
||||
# Re-run with HIDE_REFERER set - could affect login
|
||||
@@ -32,7 +33,7 @@ pytest tests/test_access_control.py
|
||||
# Re-run a few tests that will trigger brotli based storage
|
||||
export SNAPSHOT_BROTLI_COMPRESSION_THRESHOLD=5
|
||||
pytest tests/test_access_control.py
|
||||
pytest tests/test_notification.py
|
||||
REMOVE_REQUESTS_OLD_SCREENSHOTS=false pytest tests/test_notification.py
|
||||
pytest tests/test_backend.py
|
||||
pytest tests/test_rss.py
|
||||
pytest tests/test_unique_lines.py
|
||||
|
||||
@@ -32,7 +32,6 @@ def test_strip_regex_text_func():
|
||||
]
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines)
|
||||
|
||||
assert "but 1 lines" in stripped_content
|
||||
assert "igNORe-cAse text" not in stripped_content
|
||||
assert "but 1234 lines" not in stripped_content
|
||||
@@ -42,6 +41,46 @@ def test_strip_regex_text_func():
|
||||
# Check line number reporting
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ignore_lines, mode="line numbers")
|
||||
assert stripped_content == [2, 5, 6, 7, 8, 10]
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/but 1.+5 lines/s'])
|
||||
assert "but 1 lines" not in stripped_content
|
||||
assert "skip 5 lines" not in stripped_content
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/but 1.+5 lines/s'], mode="line numbers")
|
||||
assert stripped_content == [4, 5]
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/.+/s'])
|
||||
assert stripped_content == ""
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/.+/s'], mode="line numbers")
|
||||
assert stripped_content == [1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11]
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+but.+\\n.+lines$/m'])
|
||||
assert "but 1 lines" not in stripped_content
|
||||
assert "skip 5 lines" not in stripped_content
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+but.+\\n.+lines$/m'], mode="line numbers")
|
||||
assert stripped_content == [4, 5]
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+?\.$/m'])
|
||||
assert "but sometimes we want to remove the lines." not in stripped_content
|
||||
assert "but not always." not in stripped_content
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/^.+?\.$/m'], mode="line numbers")
|
||||
assert stripped_content == [2, 11]
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/but.+?but/ms'])
|
||||
assert "but sometimes we want to remove the lines." not in stripped_content
|
||||
assert "but 1 lines" not in stripped_content
|
||||
assert "but 1234 lines" not in stripped_content
|
||||
assert "igNORe-cAse text we dont want to keep" not in stripped_content
|
||||
assert "but not always." not in stripped_content
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text(test_content, ['/but.+?but/ms'], mode="line numbers")
|
||||
assert stripped_content == [2, 3, 4, 9, 10, 11]
|
||||
|
||||
stripped_content = html_tools.strip_ignore_text("\n\ntext\n\ntext\n\n", ['/^$/ms'], mode="line numbers")
|
||||
assert stripped_content == [1, 2, 4, 6]
|
||||
|
||||
# Check that linefeeds are preserved when there are is no matching ignores
|
||||
content = "some text\n\nand other text\n"
|
||||
|
||||
80
changedetectionio/tests/test_ui.py
Normal file
80
changedetectionio/tests/test_ui.py
Normal file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env python3
|
||||
|
||||
from flask import url_for
|
||||
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
|
||||
|
||||
def test_checkbox_open_diff_in_new_tab(client, live_server):
|
||||
|
||||
set_original_response()
|
||||
live_server_setup(live_server)
|
||||
|
||||
# Add our URL to the import page
|
||||
res = client.post(
|
||||
url_for("imports.import_page"),
|
||||
data={"urls": url_for('test_endpoint', _external=True)},
|
||||
follow_redirects=True
|
||||
)
|
||||
|
||||
assert b"1 Imported" in res.data
|
||||
wait_for_all_checks(client)
|
||||
|
||||
# Make a change
|
||||
set_modified_response()
|
||||
|
||||
# Test case 1 - checkbox is enabled in settings
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-ui-open_diff_in_new_tab": "1"},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
# Force recheck
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
assert b'Queued 1 watch for rechecking.' in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
lines = res.data.decode().split("\n")
|
||||
|
||||
# Find link to diff page
|
||||
target_line = None
|
||||
for line in lines:
|
||||
if '/diff' in line:
|
||||
target_line = line.strip()
|
||||
break
|
||||
|
||||
assert target_line != None
|
||||
assert 'target=' in target_line
|
||||
|
||||
# Test case 2 - checkbox is disabled in settings
|
||||
res = client.post(
|
||||
url_for("settings.settings_page"),
|
||||
data={"application-ui-open_diff_in_new_tab": ""},
|
||||
follow_redirects=True
|
||||
)
|
||||
assert b'Settings updated' in res.data
|
||||
|
||||
# Force recheck
|
||||
res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True)
|
||||
assert b'Queued 1 watch for rechecking.' in res.data
|
||||
|
||||
wait_for_all_checks(client)
|
||||
|
||||
res = client.get(url_for("watchlist.index"))
|
||||
lines = res.data.decode().split("\n")
|
||||
|
||||
# Find link to diff page
|
||||
target_line = None
|
||||
for line in lines:
|
||||
if '/diff' in line:
|
||||
target_line = line.strip()
|
||||
break
|
||||
|
||||
assert target_line != None
|
||||
assert 'target=' not in target_line
|
||||
|
||||
# Cleanup everything
|
||||
res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True)
|
||||
assert b'Deleted' in res.data
|
||||
@@ -63,6 +63,10 @@ services:
|
||||
#
|
||||
# A valid timezone name to run as (for scheduling watch checking) see https://en.wikipedia.org/wiki/List_of_tz_database_time_zones
|
||||
# - TZ=America/Los_Angeles
|
||||
#
|
||||
# Maximum height of screenshots, default is 16000 px, screenshots will be clipped to this if exceeded.
|
||||
# RAM usage will be higher if you increase this.
|
||||
# - SCREENSHOT_MAX_HEIGHT=16000
|
||||
|
||||
# Comment out ports: when using behind a reverse proxy , enable networks: etc.
|
||||
ports:
|
||||
|
||||
BIN
docs/web-page-change-conditions.png
Normal file
BIN
docs/web-page-change-conditions.png
Normal file
Binary file not shown.
|
After Width: | Height: | Size: 104 KiB |
Reference in New Issue
Block a user