mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-10-30 14:17:40 +00:00
Merge branch 'master' into conditions-levenshtein
This commit is contained in:
15
.github/workflows/test-only.yml
vendored
15
.github/workflows/test-only.yml
vendored
@@ -8,13 +8,13 @@ jobs:
|
||||
runs-on: ubuntu-latest
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
- name: Lint with flake8
|
||||
- name: Lint with Ruff
|
||||
run: |
|
||||
pip3 install flake8
|
||||
# stop the build if there are Python syntax errors or undefined names
|
||||
flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics
|
||||
# exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide
|
||||
flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics
|
||||
pip install ruff
|
||||
# Check for syntax errors and undefined names
|
||||
ruff check . --select E9,F63,F7,F82
|
||||
# Complete check with errors treated as warnings
|
||||
ruff check . --exit-zero
|
||||
|
||||
test-application-3-10:
|
||||
needs: lint-code
|
||||
@@ -41,5 +41,4 @@ jobs:
|
||||
uses: ./.github/workflows/test-stack-reusable-workflow.yml
|
||||
with:
|
||||
python-version: '3.13'
|
||||
skip-pypuppeteer: true
|
||||
|
||||
skip-pypuppeteer: true
|
||||
@@ -172,8 +172,8 @@ jobs:
|
||||
curl --retry-connrefused --retry 6 -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid
|
||||
|
||||
# Check whether TRACE log is enabled.
|
||||
# Also, check whether TRACE is came from STDERR
|
||||
docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1
|
||||
# Also, check whether TRACE came from STDOUT
|
||||
docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1
|
||||
# Check whether DEBUG is came from STDOUT
|
||||
docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1
|
||||
|
||||
|
||||
1
.gitignore
vendored
1
.gitignore
vendored
@@ -16,6 +16,7 @@ dist/
|
||||
.env
|
||||
.venv/
|
||||
venv/
|
||||
.python-version
|
||||
|
||||
# IDEs
|
||||
.idea
|
||||
|
||||
9
.pre-commit-config.yaml
Normal file
9
.pre-commit-config.yaml
Normal file
@@ -0,0 +1,9 @@
|
||||
repos:
|
||||
- repo: https://github.com/astral-sh/ruff-pre-commit
|
||||
rev: v0.11.2
|
||||
hooks:
|
||||
# Lint (and apply safe fixes)
|
||||
- id: ruff
|
||||
args: [--fix]
|
||||
# Fomrat
|
||||
- id: ruff-format
|
||||
48
.ruff.toml
Normal file
48
.ruff.toml
Normal file
@@ -0,0 +1,48 @@
|
||||
# Minimum supported version
|
||||
target-version = "py310"
|
||||
|
||||
# Formatting options
|
||||
line-length = 100
|
||||
indent-width = 4
|
||||
|
||||
exclude = [
|
||||
"__pycache__",
|
||||
".eggs",
|
||||
".git",
|
||||
".tox",
|
||||
".venv",
|
||||
"*.egg-info",
|
||||
"*.pyc",
|
||||
]
|
||||
|
||||
[lint]
|
||||
# https://docs.astral.sh/ruff/rules/
|
||||
select = [
|
||||
"B", # flake8-bugbear
|
||||
"B9",
|
||||
"C",
|
||||
"E", # pycodestyle
|
||||
"F", # Pyflakes
|
||||
"I", # isort
|
||||
"N", # pep8-naming
|
||||
"UP", # pyupgrade
|
||||
"W", # pycodestyle
|
||||
]
|
||||
ignore = [
|
||||
"B007", # unused-loop-control-variable
|
||||
"B909", # loop-iterator-mutation
|
||||
"E203", # whitespace-before-punctuation
|
||||
"E266", # multiple-leading-hashes-for-block-comment
|
||||
"E501", # redundant-backslash
|
||||
"F403", # undefined-local-with-import-star
|
||||
"N802", # invalid-function-name
|
||||
"N806", # non-lowercase-variable-in-function
|
||||
"N815", # mixed-case-variable-in-class-scope
|
||||
]
|
||||
|
||||
[lint.mccabe]
|
||||
max-complexity = 12
|
||||
|
||||
[format]
|
||||
indent-style = "space"
|
||||
quote-style = "preserve"
|
||||
@@ -2,7 +2,7 @@
|
||||
|
||||
# Read more https://github.com/dgtlmoon/changedetection.io/wiki
|
||||
|
||||
__version__ = '0.49.14'
|
||||
__version__ = '0.49.15'
|
||||
|
||||
from changedetectionio.strtobool import strtobool
|
||||
from json.decoder import JSONDecodeError
|
||||
@@ -106,7 +106,7 @@ def main():
|
||||
# Without this, a logger will be duplicated
|
||||
logger.remove()
|
||||
try:
|
||||
log_level_for_stdout = { 'DEBUG', 'SUCCESS' }
|
||||
log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' }
|
||||
logger.configure(handlers=[
|
||||
{"sink": sys.stdout, "level": logger_level,
|
||||
"filter" : lambda record: record['level'].name in log_level_for_stdout},
|
||||
|
||||
@@ -53,14 +53,7 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
a = "?" if not '?' in base_url else '&'
|
||||
base_url += a + f"timeout={keepalive_ms}"
|
||||
|
||||
try:
|
||||
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
|
||||
except Exception as e:
|
||||
if 'ECONNREFUSED' in str(e):
|
||||
return make_response('Unable to start the Playwright Browser session, is it running?', 401)
|
||||
else:
|
||||
# Other errors, bad URL syntax, bad reply etc
|
||||
return make_response(str(e), 401)
|
||||
browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url)
|
||||
|
||||
proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid)
|
||||
proxy = None
|
||||
@@ -109,7 +102,16 @@ def construct_blueprint(datastore: ChangeDetectionStore):
|
||||
|
||||
logger.debug("Starting connection with playwright")
|
||||
logger.debug("browser_steps.py connecting")
|
||||
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
|
||||
|
||||
try:
|
||||
browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid)
|
||||
except Exception as e:
|
||||
if 'ECONNREFUSED' in str(e):
|
||||
return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401)
|
||||
else:
|
||||
# Other errors, bad URL syntax, bad reply etc
|
||||
return make_response(str(e), 401)
|
||||
|
||||
logger.debug("Starting connection with playwright - done")
|
||||
return {'browsersteps_session_id': browsersteps_session_id}
|
||||
|
||||
|
||||
@@ -1,6 +1,8 @@
|
||||
import os
|
||||
import time
|
||||
import re
|
||||
import sys
|
||||
import traceback
|
||||
from random import randint
|
||||
from loguru import logger
|
||||
|
||||
@@ -54,14 +56,34 @@ browser_step_ui_config = {'Choose one': '0 0',
|
||||
class steppable_browser_interface():
|
||||
page = None
|
||||
start_url = None
|
||||
|
||||
action_timeout = 10 * 1000
|
||||
|
||||
def __init__(self, start_url):
|
||||
self.start_url = start_url
|
||||
|
||||
def safe_page_operation(self, operation_fn, default_return=None):
|
||||
"""Safely execute a page operation with error handling"""
|
||||
if self.page is None:
|
||||
logger.warning("Attempted operation on None page object")
|
||||
return default_return
|
||||
|
||||
try:
|
||||
return operation_fn()
|
||||
except Exception as e:
|
||||
logger.debug(f"Page operation failed: {str(e)}")
|
||||
# Try to reclaim memory if possible
|
||||
try:
|
||||
self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
return default_return
|
||||
|
||||
# Convert and perform "Click Button" for example
|
||||
def call_action(self, action_name, selector=None, optional_value=None):
|
||||
if self.page is None:
|
||||
logger.warning("Cannot call action on None page object")
|
||||
return
|
||||
|
||||
now = time.time()
|
||||
call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower())
|
||||
if call_action_name == 'choose_one':
|
||||
@@ -72,28 +94,46 @@ class steppable_browser_interface():
|
||||
if selector and selector.startswith('/') and not selector.startswith('//'):
|
||||
selector = "xpath=" + selector
|
||||
|
||||
# Check if action handler exists
|
||||
if not hasattr(self, "action_" + call_action_name):
|
||||
logger.warning(f"Action handler for '{call_action_name}' not found")
|
||||
return
|
||||
|
||||
action_handler = getattr(self, "action_" + call_action_name)
|
||||
|
||||
# Support for Jinja2 variables in the value and selector
|
||||
|
||||
if selector and ('{%' in selector or '{{' in selector):
|
||||
selector = jinja_render(template_str=selector)
|
||||
|
||||
if optional_value and ('{%' in optional_value or '{{' in optional_value):
|
||||
optional_value = jinja_render(template_str=optional_value)
|
||||
|
||||
action_handler(selector, optional_value)
|
||||
self.page.wait_for_timeout(1.5 * 1000)
|
||||
logger.debug(f"Call action done in {time.time()-now:.2f}s")
|
||||
try:
|
||||
action_handler(selector, optional_value)
|
||||
# Safely wait for timeout
|
||||
def wait_timeout():
|
||||
self.page.wait_for_timeout(1.5 * 1000)
|
||||
self.safe_page_operation(wait_timeout)
|
||||
logger.debug(f"Call action done in {time.time()-now:.2f}s")
|
||||
except Exception as e:
|
||||
logger.error(f"Error executing action '{call_action_name}': {str(e)}")
|
||||
# Request garbage collection to free up resources after error
|
||||
try:
|
||||
self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
def action_goto_url(self, selector=None, value=None):
|
||||
# self.page.set_viewport_size({"width": 1280, "height": 5000})
|
||||
if not value:
|
||||
logger.warning("No URL provided for goto_url action")
|
||||
return None
|
||||
|
||||
now = time.time()
|
||||
response = self.page.goto(value, timeout=0, wait_until='load')
|
||||
# Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout)
|
||||
#and also wait for seconds ?
|
||||
#await page.waitForTimeout(1000);
|
||||
#await page.waitForTimeout(extra_wait_ms);
|
||||
|
||||
def goto_operation():
|
||||
return self.page.goto(value, timeout=0, wait_until='load')
|
||||
|
||||
response = self.safe_page_operation(goto_operation)
|
||||
logger.debug(f"Time to goto URL {time.time()-now:.2f}s")
|
||||
return response
|
||||
|
||||
@@ -103,116 +143,209 @@ class steppable_browser_interface():
|
||||
|
||||
def action_click_element_containing_text(self, selector=None, value=''):
|
||||
logger.debug("Clicking element containing text")
|
||||
if not len(value.strip()):
|
||||
if not value or not len(value.strip()):
|
||||
return
|
||||
elem = self.page.get_by_text(value)
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
|
||||
def click_operation():
|
||||
elem = self.page.get_by_text(value)
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(click_operation)
|
||||
|
||||
def action_click_element_containing_text_if_exists(self, selector=None, value=''):
|
||||
logger.debug("Clicking element containing text if exists")
|
||||
if not len(value.strip()):
|
||||
return
|
||||
elem = self.page.get_by_text(value)
|
||||
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
else:
|
||||
if not value or not len(value.strip()):
|
||||
return
|
||||
|
||||
def click_if_exists_operation():
|
||||
elem = self.page.get_by_text(value)
|
||||
logger.debug(f"Clicking element containing text - {elem.count()} elements found")
|
||||
if elem.count():
|
||||
elem.first.click(delay=randint(200, 500), timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(click_if_exists_operation)
|
||||
|
||||
def action_enter_text_in_field(self, selector, value):
|
||||
if not len(selector.strip()):
|
||||
if not selector or not len(selector.strip()):
|
||||
return
|
||||
|
||||
self.page.fill(selector, value, timeout=self.action_timeout)
|
||||
def fill_operation():
|
||||
self.page.fill(selector, value, timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(fill_operation)
|
||||
|
||||
def action_execute_js(self, selector, value):
|
||||
response = self.page.evaluate(value)
|
||||
return response
|
||||
if not value:
|
||||
return None
|
||||
|
||||
def evaluate_operation():
|
||||
return self.page.evaluate(value)
|
||||
|
||||
return self.safe_page_operation(evaluate_operation)
|
||||
|
||||
def action_click_element(self, selector, value):
|
||||
logger.debug("Clicking element")
|
||||
if not len(selector.strip()):
|
||||
if not selector or not len(selector.strip()):
|
||||
return
|
||||
|
||||
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
||||
def click_operation():
|
||||
self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(click_operation)
|
||||
|
||||
def action_click_element_if_exists(self, selector, value):
|
||||
import playwright._impl._errors as _api_types
|
||||
logger.debug("Clicking element if exists")
|
||||
if not len(selector.strip()):
|
||||
return
|
||||
try:
|
||||
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
||||
except _api_types.TimeoutError as e:
|
||||
return
|
||||
except _api_types.Error as e:
|
||||
# Element was there, but page redrew and now its long long gone
|
||||
if not selector or not len(selector.strip()):
|
||||
return
|
||||
|
||||
def click_if_exists_operation():
|
||||
try:
|
||||
self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500))
|
||||
except _api_types.TimeoutError:
|
||||
return
|
||||
except _api_types.Error:
|
||||
# Element was there, but page redrew and now its long long gone
|
||||
return
|
||||
|
||||
self.safe_page_operation(click_if_exists_operation)
|
||||
|
||||
def action_click_x_y(self, selector, value):
|
||||
if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
|
||||
raise Exception("'Click X,Y' step should be in the format of '100 , 90'")
|
||||
if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value):
|
||||
logger.warning("'Click X,Y' step should be in the format of '100 , 90'")
|
||||
return
|
||||
|
||||
x, y = value.strip().split(',')
|
||||
x = int(float(x.strip()))
|
||||
y = int(float(y.strip()))
|
||||
self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
|
||||
try:
|
||||
x, y = value.strip().split(',')
|
||||
x = int(float(x.strip()))
|
||||
y = int(float(y.strip()))
|
||||
|
||||
def click_xy_operation():
|
||||
self.page.mouse.click(x=x, y=y, delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(click_xy_operation)
|
||||
except Exception as e:
|
||||
logger.error(f"Error parsing x,y coordinates: {str(e)}")
|
||||
|
||||
def action_scroll_down(self, selector, value):
|
||||
# Some sites this doesnt work on for some reason
|
||||
self.page.mouse.wheel(0, 600)
|
||||
self.page.wait_for_timeout(1000)
|
||||
def scroll_operation():
|
||||
# Some sites this doesnt work on for some reason
|
||||
self.page.mouse.wheel(0, 600)
|
||||
self.page.wait_for_timeout(1000)
|
||||
|
||||
self.safe_page_operation(scroll_operation)
|
||||
|
||||
def action_wait_for_seconds(self, selector, value):
|
||||
self.page.wait_for_timeout(float(value.strip()) * 1000)
|
||||
try:
|
||||
seconds = float(value.strip()) if value else 1.0
|
||||
|
||||
def wait_operation():
|
||||
self.page.wait_for_timeout(seconds * 1000)
|
||||
|
||||
self.safe_page_operation(wait_operation)
|
||||
except (ValueError, TypeError) as e:
|
||||
logger.error(f"Invalid value for wait_for_seconds: {str(e)}")
|
||||
|
||||
def action_wait_for_text(self, selector, value):
|
||||
if not value:
|
||||
return
|
||||
|
||||
import json
|
||||
v = json.dumps(value)
|
||||
self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000)
|
||||
|
||||
def wait_for_text_operation():
|
||||
self.page.wait_for_function(
|
||||
f'document.querySelector("body").innerText.includes({v});',
|
||||
timeout=30000
|
||||
)
|
||||
|
||||
self.safe_page_operation(wait_for_text_operation)
|
||||
|
||||
def action_wait_for_text_in_element(self, selector, value):
|
||||
if not selector or not value:
|
||||
return
|
||||
|
||||
import json
|
||||
s = json.dumps(selector)
|
||||
v = json.dumps(value)
|
||||
self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000)
|
||||
|
||||
def wait_for_text_in_element_operation():
|
||||
self.page.wait_for_function(
|
||||
f'document.querySelector({s}).innerText.includes({v});',
|
||||
timeout=30000
|
||||
)
|
||||
|
||||
self.safe_page_operation(wait_for_text_in_element_operation)
|
||||
|
||||
# @todo - in the future make some popout interface to capture what needs to be set
|
||||
# https://playwright.dev/python/docs/api/class-keyboard
|
||||
def action_press_enter(self, selector, value):
|
||||
self.page.keyboard.press("Enter", delay=randint(200, 500))
|
||||
def press_operation():
|
||||
self.page.keyboard.press("Enter", delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(press_operation)
|
||||
|
||||
def action_press_page_up(self, selector, value):
|
||||
self.page.keyboard.press("PageUp", delay=randint(200, 500))
|
||||
def press_operation():
|
||||
self.page.keyboard.press("PageUp", delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(press_operation)
|
||||
|
||||
def action_press_page_down(self, selector, value):
|
||||
self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
||||
def press_operation():
|
||||
self.page.keyboard.press("PageDown", delay=randint(200, 500))
|
||||
|
||||
self.safe_page_operation(press_operation)
|
||||
|
||||
def action_check_checkbox(self, selector, value):
|
||||
self.page.locator(selector).check(timeout=self.action_timeout)
|
||||
if not selector:
|
||||
return
|
||||
|
||||
def check_operation():
|
||||
self.page.locator(selector).check(timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(check_operation)
|
||||
|
||||
def action_uncheck_checkbox(self, selector, value):
|
||||
self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
||||
if not selector:
|
||||
return
|
||||
|
||||
def uncheck_operation():
|
||||
self.page.locator(selector).uncheck(timeout=self.action_timeout)
|
||||
|
||||
self.safe_page_operation(uncheck_operation)
|
||||
|
||||
def action_remove_elements(self, selector, value):
|
||||
"""Removes all elements matching the given selector from the DOM."""
|
||||
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
||||
if not selector:
|
||||
return
|
||||
|
||||
def remove_operation():
|
||||
self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())")
|
||||
|
||||
self.safe_page_operation(remove_operation)
|
||||
|
||||
def action_make_all_child_elements_visible(self, selector, value):
|
||||
"""Recursively makes all child elements inside the given selector fully visible."""
|
||||
self.page.locator(selector).locator("*").evaluate_all("""
|
||||
els => els.forEach(el => {
|
||||
el.style.display = 'block'; // Forces it to be displayed
|
||||
el.style.visibility = 'visible'; // Ensures it's not hidden
|
||||
el.style.opacity = '1'; // Fully opaque
|
||||
el.style.position = 'relative'; // Avoids 'absolute' hiding
|
||||
el.style.height = 'auto'; // Expands collapsed elements
|
||||
el.style.width = 'auto'; // Ensures full visibility
|
||||
el.removeAttribute('hidden'); // Removes hidden attribute
|
||||
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
|
||||
})
|
||||
""")
|
||||
if not selector:
|
||||
return
|
||||
|
||||
def make_visible_operation():
|
||||
self.page.locator(selector).locator("*").evaluate_all("""
|
||||
els => els.forEach(el => {
|
||||
el.style.display = 'block'; // Forces it to be displayed
|
||||
el.style.visibility = 'visible'; // Ensures it's not hidden
|
||||
el.style.opacity = '1'; // Fully opaque
|
||||
el.style.position = 'relative'; // Avoids 'absolute' hiding
|
||||
el.style.height = 'auto'; // Expands collapsed elements
|
||||
el.style.width = 'auto'; // Ensures full visibility
|
||||
el.removeAttribute('hidden'); // Removes hidden attribute
|
||||
el.classList.remove('hidden', 'd-none'); // Removes common CSS hidden classes
|
||||
})
|
||||
""")
|
||||
|
||||
self.safe_page_operation(make_visible_operation)
|
||||
|
||||
# Responsible for maintaining a live 'context' with the chrome CDP
|
||||
# @todo - how long do contexts live for anyway?
|
||||
@@ -224,7 +357,9 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
# bump and kill this if idle after X sec
|
||||
age_start = 0
|
||||
headers = {}
|
||||
|
||||
# Track if resources are properly cleaned up
|
||||
_is_cleaned_up = False
|
||||
|
||||
# use a special driver, maybe locally etc
|
||||
command_executor = os.getenv(
|
||||
"PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL"
|
||||
@@ -243,9 +378,14 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
self.age_start = time.time()
|
||||
self.playwright_browser = playwright_browser
|
||||
self.start_url = start_url
|
||||
self._is_cleaned_up = False
|
||||
if self.context is None:
|
||||
self.connect(proxy=proxy)
|
||||
|
||||
def __del__(self):
|
||||
# Ensure cleanup happens if object is garbage collected
|
||||
self.cleanup()
|
||||
|
||||
# Connect and setup a new context
|
||||
def connect(self, proxy=None):
|
||||
# Should only get called once - test that
|
||||
@@ -264,31 +404,74 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
user_agent=manage_user_agent(headers=self.headers),
|
||||
)
|
||||
|
||||
|
||||
self.page = self.context.new_page()
|
||||
|
||||
# self.page.set_default_navigation_timeout(keep_open)
|
||||
self.page.set_default_timeout(keep_open)
|
||||
# @todo probably this doesnt work
|
||||
self.page.on(
|
||||
"close",
|
||||
self.mark_as_closed,
|
||||
)
|
||||
# Set event handlers
|
||||
self.page.on("close", self.mark_as_closed)
|
||||
# Listen for all console events and handle errors
|
||||
self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}"))
|
||||
|
||||
logger.debug(f"Time to browser setup {time.time()-now:.2f}s")
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
|
||||
def mark_as_closed(self):
|
||||
logger.debug("Page closed, cleaning up..")
|
||||
self.cleanup()
|
||||
|
||||
def cleanup(self):
|
||||
"""Properly clean up all resources to prevent memory leaks"""
|
||||
if self._is_cleaned_up:
|
||||
return
|
||||
|
||||
logger.debug("Cleaning up browser steps resources")
|
||||
|
||||
# Clean up page
|
||||
if hasattr(self, 'page') and self.page is not None:
|
||||
try:
|
||||
# Force garbage collection before closing
|
||||
self.page.request_gc()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error during page garbage collection: {str(e)}")
|
||||
|
||||
try:
|
||||
# Remove event listeners before closing
|
||||
self.page.remove_listener("close", self.mark_as_closed)
|
||||
except Exception as e:
|
||||
logger.debug(f"Error removing event listeners: {str(e)}")
|
||||
|
||||
try:
|
||||
self.page.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error closing page: {str(e)}")
|
||||
|
||||
self.page = None
|
||||
|
||||
# Clean up context
|
||||
if hasattr(self, 'context') and self.context is not None:
|
||||
try:
|
||||
self.context.close()
|
||||
except Exception as e:
|
||||
logger.debug(f"Error closing context: {str(e)}")
|
||||
|
||||
self.context = None
|
||||
|
||||
self._is_cleaned_up = True
|
||||
logger.debug("Browser steps resources cleanup complete")
|
||||
|
||||
@property
|
||||
def has_expired(self):
|
||||
if not self.page:
|
||||
if not self.page or self._is_cleaned_up:
|
||||
return True
|
||||
|
||||
|
||||
# Check if session has expired based on age
|
||||
max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10)) # Default 10 minutes
|
||||
if (time.time() - self.age_start) > max_age_seconds:
|
||||
logger.debug(f"Browser steps session expired after {max_age_seconds} seconds")
|
||||
return True
|
||||
|
||||
return False
|
||||
|
||||
def get_current_state(self):
|
||||
"""Return the screenshot and interactive elements mapping, generally always called after action_()"""
|
||||
@@ -297,36 +480,55 @@ class browsersteps_live_ui(steppable_browser_interface):
|
||||
# because we for now only run browser steps in playwright mode (not puppeteer mode)
|
||||
from changedetectionio.content_fetchers.playwright import capture_full_page
|
||||
|
||||
# Safety check - don't proceed if resources are cleaned up
|
||||
if self._is_cleaned_up or self.page is None:
|
||||
logger.warning("Attempted to get current state after cleanup")
|
||||
return (None, None)
|
||||
|
||||
xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text()
|
||||
|
||||
now = time.time()
|
||||
self.page.wait_for_timeout(1 * 1000)
|
||||
|
||||
screenshot = capture_full_page(page=self.page)
|
||||
screenshot = None
|
||||
xpath_data = None
|
||||
|
||||
try:
|
||||
# Get screenshot first
|
||||
screenshot = capture_full_page(page=self.page)
|
||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||
|
||||
logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s")
|
||||
# Then get interactive elements
|
||||
now = time.time()
|
||||
self.page.evaluate("var include_filters=''")
|
||||
self.page.request_gc()
|
||||
|
||||
now = time.time()
|
||||
self.page.evaluate("var include_filters=''")
|
||||
# Go find the interactive elements
|
||||
# @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers?
|
||||
scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
||||
|
||||
self.page.request_gc()
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
|
||||
"visualselector_xpath_selectors": scan_elements,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
}))
|
||||
self.page.request_gc()
|
||||
|
||||
scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span'
|
||||
|
||||
MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT))
|
||||
xpath_data = json.loads(self.page.evaluate(xpath_element_js, {
|
||||
"visualselector_xpath_selectors": scan_elements,
|
||||
"max_height": MAX_TOTAL_HEIGHT
|
||||
}))
|
||||
self.page.request_gc()
|
||||
|
||||
# So the JS will find the smallest one first
|
||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||
logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
|
||||
|
||||
# playwright._impl._api_types.Error: Browser closed.
|
||||
# @todo show some countdown timer?
|
||||
# Sort elements by size
|
||||
xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True)
|
||||
logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s")
|
||||
|
||||
except Exception as e:
|
||||
logger.error(f"Error getting current state: {str(e)}")
|
||||
# Attempt recovery - force garbage collection
|
||||
try:
|
||||
self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
# Request garbage collection one final time
|
||||
try:
|
||||
self.page.request_gc()
|
||||
except:
|
||||
pass
|
||||
|
||||
return (screenshot, xpath_data)
|
||||
|
||||
|
||||
@@ -10,6 +10,7 @@ async () => {
|
||||
'article épuisé',
|
||||
'artikel zurzeit vergriffen',
|
||||
'as soon as stock is available',
|
||||
'aucune offre n\'est disponible',
|
||||
'ausverkauft', // sold out
|
||||
'available for back order',
|
||||
'awaiting stock',
|
||||
@@ -25,9 +26,8 @@ async () => {
|
||||
'dieser artikel ist bald wieder verfügbar',
|
||||
'dostępne wkrótce',
|
||||
'en rupture',
|
||||
'en rupture de stock',
|
||||
'épuisé',
|
||||
'esgotado',
|
||||
'in kürze lieferbar',
|
||||
'indisponible',
|
||||
'indisponível',
|
||||
'isn\'t in stock right now',
|
||||
@@ -50,10 +50,11 @@ async () => {
|
||||
'niet leverbaar',
|
||||
'niet op voorraad',
|
||||
'no disponible',
|
||||
'non disponibile',
|
||||
'non disponible',
|
||||
'no featured offers available',
|
||||
'no longer in stock',
|
||||
'no tickets available',
|
||||
'non disponibile',
|
||||
'non disponible',
|
||||
'not available',
|
||||
'not currently available',
|
||||
'not in stock',
|
||||
@@ -89,13 +90,15 @@ async () => {
|
||||
'vergriffen',
|
||||
'vorbestellen',
|
||||
'vorbestellung ist bald möglich',
|
||||
'we don\'t currently have any',
|
||||
'we couldn\'t find any products that match',
|
||||
'we do not currently have an estimate of when this product will be back in stock.',
|
||||
'we don\'t currently have any',
|
||||
'we don\'t know when or if this item will be back in stock.',
|
||||
'we were not able to find a match',
|
||||
'when this arrives in stock',
|
||||
'when this item is available to order',
|
||||
'zur zeit nicht an lager',
|
||||
'épuisé',
|
||||
'品切れ',
|
||||
'已售',
|
||||
'已售完',
|
||||
|
||||
@@ -31,33 +31,33 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_
|
||||
# Draw caption on top (overlaid, not extending canvas)
|
||||
draw = ImageDraw.Draw(stitched)
|
||||
|
||||
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font_size = 35
|
||||
font_color = (255, 0, 0)
|
||||
background_color = (255, 255, 255)
|
||||
if original_page_height > capture_height:
|
||||
caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long"
|
||||
padding = 10
|
||||
font_size = 35
|
||||
font_color = (255, 0, 0)
|
||||
background_color = (255, 255, 255)
|
||||
|
||||
|
||||
# Try to load a proper font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", font_size)
|
||||
except IOError:
|
||||
font = ImageFont.load_default()
|
||||
# Try to load a proper font
|
||||
try:
|
||||
font = ImageFont.truetype("arial.ttf", font_size)
|
||||
except IOError:
|
||||
font = ImageFont.load_default()
|
||||
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
bbox = draw.textbbox((0, 0), caption_text, font=font)
|
||||
text_width = bbox[2] - bbox[0]
|
||||
text_height = bbox[3] - bbox[1]
|
||||
|
||||
# Draw white rectangle background behind text
|
||||
rect_top = 0
|
||||
rect_bottom = text_height + 2 * padding
|
||||
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
|
||||
# Draw white rectangle background behind text
|
||||
rect_top = 0
|
||||
rect_bottom = text_height + 2 * padding
|
||||
draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color)
|
||||
|
||||
# Draw text centered horizontally, 10px padding from top of the rectangle
|
||||
text_x = (max_width - text_width) // 2
|
||||
text_y = padding
|
||||
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
|
||||
# Draw text centered horizontally, 10px padding from top of the rectangle
|
||||
text_x = (max_width - text_width) // 2
|
||||
text_y = padding
|
||||
draw.text((text_x, text_y), caption_text, font=font, fill=font_color)
|
||||
|
||||
# Encode and send image
|
||||
output = io.BytesIO()
|
||||
|
||||
@@ -251,6 +251,10 @@ $(document).ready(function () {
|
||||
400: function () {
|
||||
// More than likely the CSRF token was lost when the server restarted
|
||||
alert("There was a problem processing the request, please reload the page.");
|
||||
},
|
||||
401: function (err) {
|
||||
// This will be a custom error
|
||||
alert(err.responseText);
|
||||
}
|
||||
}
|
||||
}).done(function (data) {
|
||||
|
||||
@@ -383,13 +383,13 @@ Math: {{ 1 + 1 }}") }}
|
||||
<div class="pure-control-group">
|
||||
{% if watch_needs_selenium_or_playwright %}
|
||||
{% if system_has_playwright_configured %}
|
||||
{% if visual_selector_data_ready %}
|
||||
<span class="pure-form-message-inline" id="visual-selector-heading">
|
||||
The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items.
|
||||
</span>
|
||||
|
||||
<div id="selector-header">
|
||||
<a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a>
|
||||
<!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text -->
|
||||
<i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i>
|
||||
</div>
|
||||
<div id="selector-wrapper" style="display: none">
|
||||
@@ -400,9 +400,6 @@ Math: {{ 1 + 1 }}") }}
|
||||
<canvas id="selector-canvas"></canvas>
|
||||
</div>
|
||||
<div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div>
|
||||
{% else %}
|
||||
<strong>Error, The Visual selector data is not ready, it needs to complete atleast one fetch, please queue the item and reload.</strong>
|
||||
{% endif %}
|
||||
{% else %}
|
||||
{# The watch needed chrome but system says that playwright is not ready #}
|
||||
{{ playwright_warning() }}
|
||||
|
||||
@@ -110,3 +110,6 @@ pluggy ~= 1.5
|
||||
|
||||
# Needed for testing, cross-platform for process and system monitoring
|
||||
psutil==7.0.0
|
||||
|
||||
ruff >= 0.11.2
|
||||
pre_commit >= 4.2.0
|
||||
|
||||
Reference in New Issue
Block a user