Compare commits

...

16 Commits

Author SHA1 Message Date
dgtlmoon
21c63db01f Update requirements.txt 2024-08-22 14:17:02 +02:00
dgtlmoon
e3387a00e3 Re #1708 unpin jsonschema 2024-08-22 14:13:04 +02:00
dgtlmoon
6dd1fa2b88 0.46.03 2024-08-19 17:22:13 +02:00
dgtlmoon
371f85d544 Watch 'Download last snapshot' link/button should give last, not first snapshot (#2576) 2024-08-19 17:20:30 +02:00
dgtlmoon
932cf15e1e Price and restock scraping - small price fix scraper (#2575) 2024-08-19 15:47:19 +02:00
Mike Splain
bf0d410d32 Browser Steps UI - Interactive UI wasn't sending headers but was when the check ran (#2551) 2024-08-19 10:21:05 +02:00
dgtlmoon
730f37c7ba Set encoding type for scraper script reader (#2574 #2568) 2024-08-19 09:17:18 +02:00
dgtlmoon
8a35d62e02 Handle zero-byte/empty content responses with "[ ] Empty pages are a change" option, the same as when the HTML doesnt render any useful text (#2530) 2024-07-29 13:27:59 +02:00
dgtlmoon
f527744024 0.46.02 2024-07-27 20:28:04 +02:00
dgtlmoon
71c9b1273c Adding test for #1995 UTF-8 encoding in POST request body and post:// notifications (#2525) 2024-07-27 19:47:03 +02:00
dgtlmoon
ec68450df1 Updating Apprise notification library , Splunk/VictorOps, Africas Talking, Microsoft Power Automate / Workflows, Société Française du Radiotéléphone (SFR) Support (#2524) 2024-07-27 14:28:57 +02:00
dgtlmoon
2fd762a783 Encode POST style requests and notifications as UTF-8 if it has no encoding/basic string (#2523) 2024-07-27 14:27:15 +02:00
Kenny Root
d7e85ffe8f VisualSelector+BrowserSteps - When scraping elements, check for null results (#2517) 2024-07-25 14:44:10 +02:00
Kenny Root
d23a301826 Use #!/usr/bin/env to support virtualenv (#2518) 2024-07-25 14:39:01 +02:00
dgtlmoon
3ce6096fdb Update README.md 2024-07-21 18:18:24 +02:00
dgtlmoon
8acdcdd861 UI - Adding "Download latest HTML snapshot" from Edit Watch > Stats page for easier debugging (#2513) 2024-07-21 18:17:34 +02:00
79 changed files with 213 additions and 146 deletions

View File

@@ -43,7 +43,7 @@ Requires Playwright to be enabled.
### Awesome restock and price change notifications
Enable the _"Re-stock & Price detection for single product pages"_ option to activate the best way to monitor product pricing.
Enable the _"Re-stock & Price detection for single product pages"_ option to activate the best way to monitor product pricing, this will extract any meta-data in the HTML page and give you many options to follow the pricing of the product.
Easily organise and monitor prices for products from the dashboard, get alerts and notifications when the price of a product changes or comes back in stock again!

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# Only exists for direct CLI usage

View File

@@ -1,8 +1,8 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.46.01'
__version__ = '0.46.03'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -85,7 +85,8 @@ def construct_blueprint(datastore: ChangeDetectionStore):
browsersteps_start_session['browserstepper'] = browser_steps.browsersteps_live_ui(
playwright_browser=browsersteps_start_session['browser'],
proxy=proxy,
start_url=datastore.data['watching'][watch_uuid].get('url')
start_url=datastore.data['watching'][watch_uuid].get('url'),
headers=datastore.data['watching'][watch_uuid].get('headers')
)
# For test

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os
import time

View File

@@ -65,8 +65,8 @@ class Fetcher():
def __init__(self):
import importlib.resources
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text()
self.xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text(encoding='utf-8')
self.instock_data_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('stock-not-in-stock.js').read_text(encoding='utf-8')
@abstractmethod
def get_error(self):
@@ -81,7 +81,8 @@ class Fetcher():
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False):
is_binary=False,
empty_pages_are_a_change=False):
# Should set self.error, self.status_code and self.content
pass

View File

@@ -83,7 +83,8 @@ class fetcher(Fetcher):
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False):
is_binary=False,
empty_pages_are_a_change=False):
from playwright.sync_api import sync_playwright
import playwright._impl._errors
@@ -130,7 +131,7 @@ class fetcher(Fetcher):
if response is None:
context.close()
browser.close()
logger.debug("Content Fetcher > Response object was none")
logger.debug("Content Fetcher > Response object from the browser communication was none")
raise EmptyReply(url=url, status_code=None)
try:
@@ -166,10 +167,10 @@ class fetcher(Fetcher):
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
if len(self.page.content().strip()) == 0:
if not empty_pages_are_a_change and len(self.page.content().strip()) == 0:
logger.debug("Content Fetcher > Content was empty, empty_pages_are_a_change = False")
context.close()
browser.close()
logger.debug("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=response.status)
# Run Browser Steps here

View File

@@ -75,7 +75,8 @@ class fetcher(Fetcher):
request_method,
ignore_status_codes,
current_include_filters,
is_binary
is_binary,
empty_pages_are_a_change
):
from changedetectionio.content_fetchers import visualselector_xpath_selectors
@@ -153,7 +154,7 @@ class fetcher(Fetcher):
if response is None:
await self.page.close()
await browser.close()
logger.warning("Content Fetcher > Response object was none")
logger.warning("Content Fetcher > Response object was none (as in, the response from the browser was empty, not just the content)")
raise EmptyReply(url=url, status_code=None)
self.headers = response.headers
@@ -186,10 +187,11 @@ class fetcher(Fetcher):
raise Non200ErrorCodeReceived(url=url, status_code=self.status_code, screenshot=screenshot)
content = await self.page.content
if len(content.strip()) == 0:
if not empty_pages_are_a_change and len(content.strip()) == 0:
logger.error("Content Fetcher > Content was empty (empty_pages_are_a_change is False), closing browsers")
await self.page.close()
await browser.close()
logger.error("Content Fetcher > Content was empty")
raise EmptyReply(url=url, status_code=response.status)
# Run Browser Steps here
@@ -247,7 +249,7 @@ class fetcher(Fetcher):
await self.fetch_page(**kwargs)
def run(self, url, timeout, request_headers, request_body, request_method, ignore_status_codes=False,
current_include_filters=None, is_binary=False):
current_include_filters=None, is_binary=False, empty_pages_are_a_change=False):
#@todo make update_worker async which could run any of these content_fetchers within memory and time constraints
max_time = os.getenv('PUPPETEER_MAX_PROCESSING_TIMEOUT_SECONDS', 180)
@@ -262,7 +264,8 @@ class fetcher(Fetcher):
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=current_include_filters,
is_binary=is_binary
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change
), timeout=max_time))
except asyncio.TimeoutError:
raise(BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))

View File

@@ -1,9 +1,8 @@
from loguru import logger
import chardet
import hashlib
import os
import chardet
import requests
from changedetectionio import strtobool
from changedetectionio.content_fetchers.exceptions import BrowserStepsInUnsupportedFetcher, EmptyReply, Non200ErrorCodeReceived
from changedetectionio.content_fetchers.base import Fetcher
@@ -26,7 +25,8 @@ class fetcher(Fetcher):
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False):
is_binary=False,
empty_pages_are_a_change=False):
if self.browser_steps_get_valid_steps():
raise BrowserStepsInUnsupportedFetcher(url=url)
@@ -53,7 +53,7 @@ class fetcher(Fetcher):
session.mount('file://', FileAdapter())
r = session.request(method=request_method,
data=request_body,
data=request_body.encode('utf-8') if type(request_body) is str else request_body,
url=url,
headers=request_headers,
timeout=timeout,
@@ -74,7 +74,10 @@ class fetcher(Fetcher):
self.headers = r.headers
if not r.content or not len(r.content):
raise EmptyReply(url=url, status_code=r.status_code)
if not empty_pages_are_a_change:
raise EmptyReply(url=url, status_code=r.status_code)
else:
logger.debug(f"URL {url} gave zero byte content reply with Status Code {r.status_code}, but empty_pages_are_a_change = True")
# @todo test this
# @todo maybe you really want to test zero-byte return pages?

View File

@@ -214,7 +214,7 @@ if (include_filters.length) {
console.log(e);
}
if (results.length) {
if (results != null && results.length) {
// Iterate over the results
results.forEach(node => {

View File

@@ -56,7 +56,8 @@ class fetcher(Fetcher):
request_method,
ignore_status_codes=False,
current_include_filters=None,
is_binary=False):
is_binary=False,
empty_pages_are_a_change=False):
from selenium import webdriver
from selenium.webdriver.chrome.options import Options as ChromeOptions

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import datetime
import flask_login
@@ -1369,6 +1369,32 @@ def changedetection_app(config=None, datastore_o=None):
except FileNotFoundError:
abort(404)
@app.route("/edit/<string:uuid>/get-html", methods=['GET'])
@login_optionally_required
def watch_get_latest_html(uuid):
from io import BytesIO
from flask import send_file
import brotli
watch = datastore.data['watching'].get(uuid)
if watch and watch.history.keys() and os.path.isdir(watch.watch_data_dir):
latest_filename = list(watch.history.keys())[-1]
html_fname = os.path.join(watch.watch_data_dir, f"{latest_filename}.html.br")
with open(html_fname, 'rb') as f:
if html_fname.endswith('.br'):
# Read and decompress the Brotli file
decompressed_data = brotli.decompress(f.read())
else:
decompressed_data = f.read()
buffer = BytesIO(decompressed_data)
return send_file(buffer, as_attachment=True, download_name=f"{latest_filename}.html", mimetype='text/html')
# Return a 500 error
abort(500)
@app.route("/form/add/quickwatch", methods=['POST'])
@login_optionally_required
def form_quick_watch_add():

View File

@@ -107,7 +107,7 @@ def apprise_custom_api_call_wrapper(body, title, notify_type, *args, **kwargs):
r(results.get('url'),
auth=auth,
data=body,
data=body.encode('utf-8') if type(body) is str else body,
headers=headers,
params=params
)

View File

@@ -26,6 +26,8 @@ class difference_detection_processor():
def call_browser(self):
from requests.structures import CaseInsensitiveDict
from changedetectionio.content_fetchers.exceptions import EmptyReply
# Protect against file:// access
if re.search(r'^file://', self.watch.get('url', '').strip(), re.IGNORECASE):
if not strtobool(os.getenv('ALLOW_FILE_URI', 'false')):
@@ -133,8 +135,18 @@ class difference_detection_processor():
is_binary = self.watch.is_pdf
# And here we go! call the right browser with browser-specific settings
self.fetcher.run(url, timeout, request_headers, request_body, request_method, ignore_status_codes, self.watch.get('include_filters'),
is_binary=is_binary)
empty_pages_are_a_change = self.datastore.data['settings']['application'].get('empty_pages_are_a_change', False)
self.fetcher.run(url=url,
timeout=timeout,
request_headers=request_headers,
request_body=request_body,
request_method=request_method,
ignore_status_codes=ignore_status_codes,
current_include_filters=self.watch.get('include_filters'),
is_binary=is_binary,
empty_pages_are_a_change=empty_pages_are_a_change
)
#@todo .quit here could go on close object, so we can run JS if change-detected
self.fetcher.quit()

View File

@@ -1,11 +1,12 @@
from changedetectionio.model.Watch import model as BaseWatch
import re
from babel.numbers import parse_decimal
from changedetectionio.model.Watch import model as BaseWatch
from typing import Union
import re
class Restock(dict):
def parse_currency(self, raw_value: str) -> float:
def parse_currency(self, raw_value: str) -> Union[float, None]:
# Clean and standardize the value (ie 1,400.00 should be 1400.00), even better would be store the whole thing as an integer.
standardized_value = raw_value
@@ -21,8 +22,11 @@ class Restock(dict):
# Remove any non-numeric characters except for the decimal point
standardized_value = re.sub(r'[^\d.-]', '', standardized_value)
# Convert to float
return float(parse_decimal(standardized_value, locale='en'))
if standardized_value:
# Convert to float
return float(parse_decimal(standardized_value, locale='en'))
return None
def __init__(self, *args, **kwargs):
# Define default values

View File

@@ -479,6 +479,12 @@ Unavailable") }}
</tr>
</tbody>
</table>
{% if watch.history_n %}
<p>
<a href="{{url_for('watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a>
</p>
{% endif %}
</div>
</div>
<div id="actions">

View File

@@ -76,7 +76,7 @@
</div>
<div class="pure-control-group">
{{ render_checkbox_field(form.application.form.empty_pages_are_a_change) }}
<span class="pure-form-message-inline">When a page contains HTML, but no renderable text appears (empty page), is this considered a change?</span>
<span class="pure-form-message-inline">When a request returns no content, or the HTML does not contain any text, is this considered a change?</span>
</div>
{% if form.requests.proxy %}
<div class="pure-control-group inline-radio">

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import resource
import time
from threading import Thread

View File

@@ -1,4 +1,4 @@
# !/usr/bin/python3
#!/usr/bin/env python3
import os
from flask import url_for

View File

@@ -1,3 +1,3 @@
#!/usr/bin/python3
#!/usr/bin/env python3
from .. import conftest

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
from .. import conftest

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os
import time
from flask import url_for

View File

@@ -1,3 +1,3 @@
#!/usr/bin/python3
#!/usr/bin/env python3
from .. import conftest

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import asyncio
from aiosmtpd.controller import Controller
from aiosmtpd.smtp import SMTP

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os.path
import time
from flask import url_for
@@ -112,7 +112,7 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
res = client.post(
url_for("settings_page"),
data={"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
"application-notification_body": 'triggered text was -{{triggered_text}}-',
"application-notification_body": 'triggered text was -{{triggered_text}}- 网站监测 内容更新了',
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
"application-notification_urls": test_notification_url,
"application-minutes_between_check": 180,
@@ -167,9 +167,10 @@ def test_check_add_line_contains_trigger(client, live_server, measure_memory_usa
# Takes a moment for apprise to fire
time.sleep(3)
assert os.path.isfile("test-datastore/notification.txt"), "Notification fired because I can see the output file"
with open("test-datastore/notification.txt", 'r') as f:
response= f.read()
assert '-Oh yes please-' in response
with open("test-datastore/notification.txt", 'rb') as f:
response = f.read()
assert b'-Oh yes please-' in response
assert '网站监测 内容更新了'.encode('utf-8') in response
res = client.get(url_for("form_delete", uuid="all"), follow_redirects=True)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for
@@ -69,6 +69,12 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
wait_for_all_checks(client)
uuid = extract_UUID_from_client(client)
# Check the 'get latest snapshot works'
res = client.get(url_for("watch_get_latest_html", uuid=uuid))
assert b'which has this one new line' in res.data
# Now something should be ready, indicated by having a 'unviewed' class
res = client.get(url_for("index"))
assert b'unviewed' in res.data
@@ -86,7 +92,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert expected_url.encode('utf-8') in res.data
# Following the 'diff' link, it should no longer display as 'unviewed' even after we recheck it a few times
res = client.get(url_for("diff_history_page", uuid="first"))
res = client.get(url_for("diff_history_page", uuid=uuid))
assert b'selected=""' in res.data, "Confirm diff history page loaded"
# Check the [preview] pulls the right one
@@ -143,7 +149,6 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert b'unviewed' not in res.data
# #2458 "clear history" should make the Watch object update its status correctly when the first snapshot lands again
uuid = extract_UUID_from_client(client)
client.get(url_for("clear_watch_history", uuid=uuid))
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
from .util import set_original_response, live_server_setup, wait_for_all_checks
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# coding=utf-8
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# https://www.reddit.com/r/selfhosted/comments/wa89kp/comment/ii3a4g7/?context=3
import os

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
import os

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
"""Test suite for the method to extract text from an html string"""
from ..html_tools import html_to_text

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
from . util import live_server_setup
from changedetectionio import html_tools

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
"""Test suite for the render/not render anchor tag content functionality"""
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import io
import os
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# coding=utf-8
import time

View File

@@ -1,12 +1,7 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for
from urllib.request import urlopen
from .util import set_original_response, set_modified_response, live_server_setup
sleep_time_for_fetch_thread = 3
from .util import set_original_response, set_modified_response, live_server_setup, wait_for_all_checks
def set_nonrenderable_response():
test_return_data = """<html>
@@ -22,6 +17,13 @@ def set_nonrenderable_response():
return None
def set_zero_byte_response():
with open("test-datastore/endpoint-content.txt", "w") as f:
f.write("")
return None
def test_check_basic_change_detection_functionality(client, live_server, measure_memory_usage):
set_original_response()
live_server_setup(live_server)
@@ -35,18 +37,11 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
assert b"1 Imported" in res.data
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# Do this a few times.. ensures we dont accidently set the status
for n in range(3):
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' not in res.data
#####################
@@ -64,7 +59,7 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
@@ -86,14 +81,20 @@ def test_check_basic_change_detection_functionality(client, live_server, measure
client.get(url_for("form_watch_checknow"), follow_redirects=True)
# Give the thread time to pick it up
time.sleep(sleep_time_for_fetch_thread)
wait_for_all_checks(client)
# It should report nothing found (no new 'unviewed' class)
res = client.get(url_for("index"))
assert b'unviewed' in res.data
client.get(url_for("mark_all_viewed"), follow_redirects=True)
# A totally zero byte (#2528) response should also not trigger an error
set_zero_byte_response()
client.get(url_for("form_watch_checknow"), follow_redirects=True)
wait_for_all_checks(client)
res = client.get(url_for("index"))
assert b'unviewed' in res.data # A change should have registered because empty_pages_are_a_change is ON
assert b'fetch-error' not in res.data
#
# Cleanup everything

View File

@@ -291,11 +291,11 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444 }',
"application-notification_body": '{ "url" : "{{ watch_url }}", "secret": 444, "somebug": "网站监测 内容更新了" }',
"application-notification_format": default_notification_format,
"application-notification_urls": test_notification_url,
# https://github.com/caronc/apprise/wiki/Notify_Custom_JSON#get-parameter-manipulation
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }} ",
},
follow_redirects=True
)
@@ -324,6 +324,7 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
j = json.loads(x)
assert j['url'].startswith('http://localhost')
assert j['secret'] == 444
assert j['somebug'] == '网站监测 内容更新了'
# URL check, this will always be converted to lowercase
assert os.path.isfile("test-datastore/notification-url.txt")
@@ -354,9 +355,10 @@ def test_notification_custom_endpoint_and_jinja2(client, live_server, measure_me
#2510
def test_global_send_test_notification(client, live_server, measure_memory_usage):
#live_server_setup(live_server)
set_original_response()
if os.path.isfile("test-datastore/notification.txt"):
os.unlink("test-datastore/notification.txt")
# otherwise other settings would have already existed from previous tests in this file
res = client.post(
@@ -364,7 +366,8 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
data={
"application-fetch_backend": "html_requests",
"application-minutes_between_check": 180,
"application-notification_body": 'change detection is cool',
#1995 UTF-8 content should be encoded
"application-notification_body": 'change detection is cool 网站监测 内容更新了',
"application-notification_format": default_notification_format,
"application-notification_urls": "",
"application-notification_title": "New ChangeDetection.io Notification - {{ watch_url }}",
@@ -399,8 +402,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
with open("test-datastore/notification.txt", 'r') as f:
x = f.read()
assert 'change detection is coo' in x
assert 'change detection is cool 网站监测 内容更新了' in x
os.unlink("test-datastore/notification.txt")
@@ -420,7 +422,7 @@ def test_global_send_test_notification(client, live_server, measure_memory_usage
with open("test-datastore/notification.txt", 'r') as f:
x = f.read()
# Should come from notification.py default handler when there is no notification body to pull from
assert 'change detection is coo' in x
assert 'change detection is cool 网站监测 内容更新了' in x
client.get(
url_for("form_delete", uuid="all"),

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os
import time

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import time
from flask import url_for

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_jinja2_security

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_notification_diff

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_restock_logic

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
# run from dir above changedetectionio/ dir
# python3 -m unittest changedetectionio.tests.unit.test_notification_diff

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
from flask import make_response, request
from flask import url_for

View File

@@ -1,3 +1,3 @@
#!/usr/bin/python3
#!/usr/bin/env python3
from .. import conftest

View File

@@ -1,4 +1,4 @@
#!/usr/bin/python3
#!/usr/bin/env python3
import os
from flask import url_for

View File

@@ -1,6 +1,5 @@
from .processors.exceptions import ProcessorException
from . import content_fetchers
import changedetectionio.content_fetchers.exceptions as content_fetchers_exceptions
from changedetectionio.processors.text_json_diff.processor import FilterNotFoundInResponse
from changedetectionio import html_tools
@@ -301,7 +300,7 @@ class update_worker(threading.Thread):
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': e.message})
process_changedetection_results = False
except content_fetchers.exceptions.ReplyWithContentButNoText as e:
except content_fetchers_exceptions.ReplyWithContentButNoText as e:
# Totally fine, it's by choice - just continue on, nothing more to care about
# Page had elements/content but no renderable text
# Backend (not filters) gave zero output
@@ -327,7 +326,7 @@ class update_worker(threading.Thread):
process_changedetection_results = False
except content_fetchers.exceptions.Non200ErrorCodeReceived as e:
except content_fetchers_exceptions.Non200ErrorCodeReceived as e:
if e.status_code == 403:
err_text = "Error - 403 (Access denied) received"
elif e.status_code == 404:
@@ -380,23 +379,23 @@ class update_worker(threading.Thread):
process_changedetection_results = False
except content_fetchers.exceptions.checksumFromPreviousCheckWasTheSame as e:
except content_fetchers_exceptions.checksumFromPreviousCheckWasTheSame as e:
# Yes fine, so nothing todo, don't continue to process.
process_changedetection_results = False
changed_detected = False
except content_fetchers.exceptions.BrowserConnectError as e:
except content_fetchers_exceptions.BrowserConnectError as e:
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg
}
)
process_changedetection_results = False
except content_fetchers.exceptions.BrowserFetchTimedOut as e:
except content_fetchers_exceptions.BrowserFetchTimedOut as e:
self.datastore.update_watch(uuid=uuid,
update_obj={'last_error': e.msg
}
)
process_changedetection_results = False
except content_fetchers.exceptions.BrowserStepsStepException as e:
except content_fetchers_exceptions.BrowserStepsStepException as e:
if not self.datastore.data['watching'].get(uuid):
continue
@@ -438,25 +437,25 @@ class update_worker(threading.Thread):
process_changedetection_results = False
except content_fetchers.exceptions.EmptyReply as e:
except content_fetchers_exceptions.EmptyReply as e:
# Some kind of custom to-str handler in the exception handler that does this?
err_text = "EmptyReply - try increasing 'Wait seconds before extracting text', Status Code {}".format(e.status_code)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers.exceptions.ScreenshotUnavailable as e:
except content_fetchers_exceptions.ScreenshotUnavailable as e:
err_text = "Screenshot unavailable, page did not render fully in the expected time or page was too long - try increasing 'Wait seconds before extracting text'"
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers.exceptions.JSActionExceptions as e:
except content_fetchers_exceptions.JSActionExceptions as e:
err_text = "Error running JS Actions - Page request - "+e.message
if e.screenshot:
watch.save_screenshot(screenshot=e.screenshot, as_error=True)
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text,
'last_check_status': e.status_code})
process_changedetection_results = False
except content_fetchers.exceptions.PageUnloadable as e:
except content_fetchers_exceptions.PageUnloadable as e:
err_text = "Page request from server didnt respond correctly"
if e.message:
err_text = "{} - {}".format(err_text, e.message)
@@ -468,7 +467,7 @@ class update_worker(threading.Thread):
'last_check_status': e.status_code,
'has_ldjson_price_data': None})
process_changedetection_results = False
except content_fetchers.exceptions.BrowserStepsInUnsupportedFetcher as e:
except content_fetchers_exceptions.BrowserStepsInUnsupportedFetcher as e:
err_text = "This watch has Browser Steps configured and so it cannot run with the 'Basic fast Plaintext/HTTP Client', either remove the Browser Steps or select a Chrome fetcher."
self.datastore.update_watch(uuid=uuid, update_obj={'last_error': err_text})
process_changedetection_results = False

View File

@@ -35,7 +35,7 @@ dnspython==2.6.1 # related to eventlet fixes
# jq not available on Windows so must be installed manually
# Notification library
apprise~=1.8.0
apprise~=1.8.1
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
# and 2.0.0 https://github.com/dgtlmoon/changedetection.io/issues/2241 not yet compatible
@@ -79,8 +79,9 @@ pyppeteerstealth>=0.0.4
pytest ~=7.2
pytest-flask ~=1.2
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
jsonschema==4.17.3
# Anything 4.0 and up but not 5.0
jsonschema ~= 4.0
loguru