Compare commits

..

4 Commits

Author SHA1 Message Date
dgtlmoon
24b9e4dc83 Revert multiprocess html to text and add test for high concurrency 2025-05-08 18:54:14 +02:00
dgtlmoon
d481a6b7b1 Improve wait check 2025-05-08 18:38:44 +02:00
dgtlmoon
e38f264750 Avoid pickling issues 2025-05-08 18:15:16 +02:00
dgtlmoon
1ec86bd38d Revert multiprocess memory management, was unreliable under high concurrency 2025-05-08 18:09:47 +02:00
11 changed files with 16 additions and 16 deletions

View File

@@ -2,7 +2,7 @@
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
__version__ = '0.49.17'
__version__ = '0.49.16'
from changedetectionio.strtobool import strtobool
from json.decoder import JSONDecodeError

View File

@@ -186,7 +186,7 @@ class fetcher(Fetcher):
self.page = context.new_page()
# Listen for all console events and handle errors
self.page.on("console", lambda msg: logger.debug(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
self.page.on("console", lambda msg: print(f"Playwright console: Watch URL: {url} {msg.type}: {msg.text} {msg.args}"))
# Re-use as much code from browser steps as possible so its the same
from changedetectionio.blueprint.browser_steps.browser_steps import steppable_browser_interface

View File

@@ -309,10 +309,10 @@ def extract_json_as_string(content, json_filter, ensure_is_ldjson_info_type=None
soup = BeautifulSoup(content, 'html.parser')
if ensure_is_ldjson_info_type:
bs_result = soup.find_all('script', {"type": "application/ld+json"})
bs_result = soup.findAll('script', {"type": "application/ld+json"})
else:
bs_result = soup.find_all('script')
bs_result += soup.find_all('body')
bs_result = soup.findAll('script')
bs_result += soup.findAll('body')
bs_jsons = []
for result in bs_result:

View File

@@ -89,7 +89,7 @@ class difference_detection_processor():
proxy_url = self.datastore.proxy_list.get(preferred_proxy_id).get('url')
logger.debug(f"Selected proxy key '{preferred_proxy_id}' as proxy URL '{proxy_url}' for {url}")
else:
logger.debug("Skipping adding proxy data when custom Browser endpoint is specified. ")
logger.debug(f"Skipping adding proxy data when custom Browser endpoint is specified. ")
# Now call the fetcher (playwright/requests/etc) with arguments that only a fetcher would need.
# When browser_connection_url is None, it method should default to working out whats the best defaults (os env vars etc)

View File

@@ -79,7 +79,7 @@ def get_itemprop_availability(html_content) -> Restock:
# First phase, dead simple scanning of anything that looks useful
value = Restock()
if data:
logger.debug("Using jsonpath to find price/availability/etc")
logger.debug(f"Using jsonpath to find price/availability/etc")
price_parse = parse('$..(price|Price)')
pricecurrency_parse = parse('$..(pricecurrency|currency|priceCurrency )')
availability_parse = parse('$..(availability|Availability)')
@@ -110,7 +110,7 @@ def get_itemprop_availability(html_content) -> Restock:
# Second, go dig OpenGraph which is something that jsonpath_ng cant do because of the tuples and double-dots (:)
if not value.get('price') or value.get('availability'):
logger.debug("Alternatively digging through OpenGraph properties for restock/price info..")
logger.debug(f"Alternatively digging through OpenGraph properties for restock/price info..")
jsonpath_expr = parse('$..properties')
for match in jsonpath_expr.find(data):

View File

@@ -15,7 +15,7 @@ def _task(watch, update_handler):
except FilterNotFoundInResponse as e:
text_after_filter = f"Filter not found in HTML: {str(e)}"
except ReplyWithContentButNoText as e:
text_after_filter = "Filter found but no text (empty result)"
text_after_filter = f"Filter found but no text (empty result)"
except Exception as e:
text_after_filter = f"Error: {str(e)}"

View File

@@ -7,7 +7,7 @@ from ..util import live_server_setup, wait_for_all_checks
def do_test(client, live_server, make_test_use_extra_browser=False):
# Grep for this string in the logs?
test_url = "https://changedetection.io/ci-test.html?non-custom-default=true"
test_url = f"https://changedetection.io/ci-test.html?non-custom-default=true"
# "non-custom-default" should not appear in the custom browser connection
custom_browser_name = 'custom browser URL'
@@ -51,7 +51,7 @@ def do_test(client, live_server, make_test_use_extra_browser=False):
url_for("ui.ui_edit.edit_page", uuid="first"),
data={
# 'run_customer_browser_url_tests.sh' will search for this string to know if we hit the right browser container or not
"url": "https://changedetection.io/ci-test.html?custom-browser-search-string=1",
"url": f"https://changedetection.io/ci-test.html?custom-browser-search-string=1",
"tags": "",
"headers": "",
'fetch_backend': f"extra_browser_{custom_browser_name}",

View File

@@ -7,7 +7,7 @@ from changedetectionio.tests.util import live_server_setup, wait_for_all_checks,
def set_response():
import time
data = """<html>
data = f"""<html>
<body>
<h1>Awesome, you made it</h1>
yeah the socks request worked

View File

@@ -6,7 +6,7 @@ from changedetectionio.tests.util import live_server_setup, wait_for_all_checks
def set_response():
import time
data = """<html>
data = f"""<html>
<body>
<h1>Awesome, you made it</h1>
yeah the socks request worked

View File

@@ -6,7 +6,7 @@ from changedetectionio.tests.util import live_server_setup, wait_for_all_checks,
def set_response():
data = """<html>
data = f"""<html>
<body>Awesome, you made it<br>
yeah the socks request worked<br>
something to ignore<br>

View File

@@ -32,7 +32,7 @@ dnspython==2.6.1 # related to eventlet fixes
# jq not available on Windows so must be installed manually
# Notification library
apprise==1.9.3
apprise==1.9.2
# apprise mqtt https://github.com/dgtlmoon/changedetection.io/issues/315
# use any version other than 2.0.x due to https://github.com/eclipse/paho.mqtt.python/issues/814
@@ -42,7 +42,7 @@ paho-mqtt!=2.0.*
cryptography~=42.0.8
# Used for CSS filtering
beautifulsoup4>=4.0.0
beautifulsoup4
# XPath filtering, lxml is required by bs4 anyway, but put it here to be safe.
# #2328 - 5.2.0 and 5.2.1 had extra CPU flag CFLAGS set which was not compatible on older hardware