mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-31 14:47:21 +00:00 
			
		
		
		
	Compare commits
	
		
			26 Commits
		
	
	
		
			0.49.13
			...
			update-sel
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 6143fb3c30 | ||
|   | d2e8f822d6 | ||
|   | 5fd8200fd9 | ||
|   | d0da8c9825 | ||
|   | fd7574d21b | ||
|   | c70706a27b | ||
|   | 968c364999 | ||
|   | 031cb76b7d | ||
|   | af568d064c | ||
|   | a75f57de43 | ||
|   | 72a1c3dda1 | ||
|   | ffde79ecac | ||
|   | 66ad43b2df | ||
|   | 6b0e56ca80 | ||
|   | 5a2d84d8b4 | ||
|   | a941156f26 | ||
|   | a1fdeeaa29 | ||
|   | 40ea2604a7 | ||
|   | ceda526093 | ||
|   | 4197254c53 | ||
|   | a0b7efb436 | ||
|   | 5f5e8ede6c | ||
|   | 52ca855a29 | ||
|   | 079efd0a85 | ||
|   | 3a583a4e5d | ||
|   | cfb4decf67 | 
							
								
								
									
										15
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -8,13 +8,13 @@ jobs: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - name: Lint with flake8 | ||||
|       - name: Lint with Ruff | ||||
|         run: | | ||||
|           pip3 install flake8 | ||||
|           # stop the build if there are Python syntax errors or undefined names | ||||
|           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||||
|           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||||
|           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||||
|           pip install ruff | ||||
|           # Check for syntax errors and undefined names | ||||
|           ruff check . --select E9,F63,F7,F82 | ||||
|           # Complete check with errors treated as warnings | ||||
|           ruff check . --exit-zero | ||||
|  | ||||
|   test-application-3-10: | ||||
|     needs: lint-code | ||||
| @@ -41,5 +41,4 @@ jobs: | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
|       python-version: '3.13' | ||||
|       skip-pypuppeteer: true | ||||
|        | ||||
|       skip-pypuppeteer: true | ||||
| @@ -172,8 +172,8 @@ jobs: | ||||
|           curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|  | ||||
|           # Check whether TRACE log is enabled. | ||||
|           # Also, check whether TRACE is came from STDERR | ||||
|           docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Also, check whether TRACE came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Check whether DEBUG is came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 | ||||
|  | ||||
|   | ||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -16,6 +16,7 @@ dist/ | ||||
| .env | ||||
| .venv/ | ||||
| venv/ | ||||
| .python-version | ||||
|  | ||||
| # IDEs | ||||
| .idea | ||||
|   | ||||
							
								
								
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| repos: | ||||
|   - repo: https://github.com/astral-sh/ruff-pre-commit | ||||
|     rev: v0.11.2 | ||||
|     hooks: | ||||
|       # Lint (and apply safe fixes) | ||||
|       - id: ruff | ||||
|         args: [--fix] | ||||
|       # Fomrat | ||||
|       - id: ruff-format | ||||
							
								
								
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| # Minimum supported version | ||||
| target-version = "py310" | ||||
|  | ||||
| # Formatting options | ||||
| line-length = 100 | ||||
| indent-width = 4 | ||||
|  | ||||
| exclude = [ | ||||
|     "__pycache__", | ||||
|     ".eggs", | ||||
|     ".git", | ||||
|     ".tox", | ||||
|     ".venv", | ||||
|     "*.egg-info", | ||||
|     "*.pyc", | ||||
| ] | ||||
|  | ||||
| [lint] | ||||
| # https://docs.astral.sh/ruff/rules/ | ||||
| select = [ | ||||
|     "B", # flake8-bugbear | ||||
|     "B9", | ||||
|     "C",  | ||||
|     "E", # pycodestyle | ||||
|     "F", # Pyflakes | ||||
|     "I", # isort | ||||
|     "N", # pep8-naming | ||||
|     "UP", # pyupgrade | ||||
|     "W", # pycodestyle | ||||
| ] | ||||
| ignore = [ | ||||
|     "B007", # unused-loop-control-variable | ||||
|     "B909", # loop-iterator-mutation | ||||
|     "E203", # whitespace-before-punctuation | ||||
|     "E266", # multiple-leading-hashes-for-block-comment | ||||
|     "E501", # redundant-backslash | ||||
|     "F403", # undefined-local-with-import-star | ||||
|     "N802", # invalid-function-name | ||||
|     "N806", # non-lowercase-variable-in-function | ||||
|     "N815", # mixed-case-variable-in-class-scope | ||||
| ] | ||||
|  | ||||
| [lint.mccabe] | ||||
| max-complexity = 12 | ||||
|  | ||||
| [format] | ||||
| indent-style = "space" | ||||
| quote-style = "preserve" | ||||
| @@ -68,7 +68,7 @@ COPY changedetection.py /app/changedetection.py | ||||
| # Github Action test purpose(test-only.yml). | ||||
| # On production, it is effectively LOGGER_LEVEL=''. | ||||
| ARG LOGGER_LEVEL='' | ||||
| ENV LOGGER_LEVEL "$LOGGER_LEVEL" | ||||
| ENV LOGGER_LEVEL="$LOGGER_LEVEL" | ||||
|  | ||||
| WORKDIR /app | ||||
| CMD ["python", "./changedetection.py", "-d", "/datastore"] | ||||
|   | ||||
| @@ -3,4 +3,6 @@ | ||||
| # Only exists for direct CLI usage | ||||
|  | ||||
| import changedetectionio | ||||
| changedetectionio.main() | ||||
|  | ||||
| if __name__ == '__main__': | ||||
|     changedetectionio.main() | ||||
|   | ||||
							
								
								
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| # Creating Plugins for changedetection.io | ||||
|  | ||||
| This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways. | ||||
|  | ||||
| ## Plugin Types | ||||
|  | ||||
| ### UI Stats Tab Plugins | ||||
|  | ||||
| These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch. | ||||
|  | ||||
| #### Creating a UI Stats Tab Plugin | ||||
|  | ||||
| 1. Create a Python file in a directory that will be loaded by the plugin system. | ||||
|  | ||||
| 2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook: | ||||
|  | ||||
| ```python | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add custom content to the stats tab""" | ||||
|     # Calculate or retrieve your stats | ||||
|     my_stat = calculate_something(watch) | ||||
|      | ||||
|     # Return HTML content as a string | ||||
|     html = f""" | ||||
|     <div class="my-plugin-stats"> | ||||
|         <h4>My Plugin Statistics</h4> | ||||
|         <p>My statistic: {my_stat}</p> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
| ``` | ||||
|  | ||||
| 3. The HTML you return will be included in the Stats tab. | ||||
|  | ||||
| ## Plugin Loading | ||||
|  | ||||
| Plugins can be loaded from: | ||||
|  | ||||
| 1. Built-in plugin directories in the codebase | ||||
| 2. External packages using setuptools entry points | ||||
|  | ||||
| To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`. | ||||
|  | ||||
| ## Example Plugin | ||||
|  | ||||
| Here's a simple example of a plugin that adds a word count statistic to the Stats tab: | ||||
|  | ||||
| ```python | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def count_words_in_history(watch): | ||||
|     """Count words in the latest snapshot""" | ||||
|     try: | ||||
|         if not watch.history.keys(): | ||||
|             return 0 | ||||
|              | ||||
|         latest_key = list(watch.history.keys())[-1] | ||||
|         latest_content = watch.get_history_snapshot(latest_key) | ||||
|         return len(latest_content.split()) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error counting words: {str(e)}") | ||||
|         return 0 | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count to the Stats tab""" | ||||
|     word_count = count_words_in_history(watch) | ||||
|      | ||||
|     html = f""" | ||||
|     <div class="word-count-stats"> | ||||
|         <h4>Content Analysis</h4> | ||||
|         <table class="pure-table"> | ||||
|             <tbody> | ||||
|                 <tr> | ||||
|                     <td>Word count (latest snapshot)</td> | ||||
|                     <td>{word_count}</td> | ||||
|                 </tr> | ||||
|             </tbody> | ||||
|         </table> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
| ``` | ||||
|  | ||||
| ## Testing Your Plugin | ||||
|  | ||||
| 1. Place your plugin in one of the directories scanned by the plugin system | ||||
| 2. Restart changedetection.io | ||||
| 3. Go to the Edit page of a watch and check the Stats tab to see your content | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.49.13' | ||||
| __version__ = '0.49.15' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
| @@ -106,7 +106,7 @@ def main(): | ||||
|     # Without this, a logger will be duplicated | ||||
|     logger.remove() | ||||
|     try: | ||||
|         log_level_for_stdout = { 'DEBUG', 'SUCCESS' } | ||||
|         log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' } | ||||
|         logger.configure(handlers=[ | ||||
|             {"sink": sys.stdout, "level": logger_level, | ||||
|              "filter" : lambda record: record['level'].name in log_level_for_stdout}, | ||||
|   | ||||
| @@ -53,14 +53,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         a = "?" if not '?' in base_url else '&' | ||||
|         base_url += a + f"timeout={keepalive_ms}" | ||||
|  | ||||
|         try: | ||||
|             browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url) | ||||
|         except Exception as e: | ||||
|             if 'ECONNREFUSED' in str(e): | ||||
|                 return make_response('Unable to start the Playwright Browser session, is it running?', 401) | ||||
|             else: | ||||
|                 # Other errors, bad URL syntax, bad reply etc | ||||
|                 return make_response(str(e), 401) | ||||
|         browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url) | ||||
|  | ||||
|         proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid) | ||||
|         proxy = None | ||||
| @@ -109,7 +102,16 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|         logger.debug("Starting connection with playwright") | ||||
|         logger.debug("browser_steps.py connecting") | ||||
|         browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid) | ||||
|  | ||||
|         try: | ||||
|             browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid) | ||||
|         except Exception as e: | ||||
|             if 'ECONNREFUSED' in str(e): | ||||
|                 return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401) | ||||
|             else: | ||||
|                 # Other errors, bad URL syntax, bad reply etc | ||||
|                 return make_response(str(e), 401) | ||||
|  | ||||
|         logger.debug("Starting connection with playwright - done") | ||||
|         return {'browsersteps_session_id': browsersteps_session_id} | ||||
|  | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| import os | ||||
| import time | ||||
| import re | ||||
| import sys | ||||
| import traceback | ||||
| from random import randint | ||||
| from loguru import logger | ||||
|  | ||||
| @@ -35,6 +37,7 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
|                           'Make all child elements visible': '1 0', | ||||
|                           'Press Enter': '0 0', | ||||
|                           'Select by label': '1 1', | ||||
|                           '<select> by option text': '1 1', | ||||
|                           'Scroll down': '0 0', | ||||
|                           'Uncheck checkbox': '1 0', | ||||
|                           'Wait for seconds': '0 1', | ||||
| @@ -54,14 +57,34 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
| class steppable_browser_interface(): | ||||
|     page = None | ||||
|     start_url = None | ||||
|  | ||||
|     action_timeout = 10 * 1000 | ||||
|  | ||||
|     def __init__(self, start_url): | ||||
|         self.start_url = start_url | ||||
|          | ||||
|     def safe_page_operation(self, operation_fn, default_return=None): | ||||
|         """Safely execute a page operation with error handling""" | ||||
|         if self.page is None: | ||||
|             logger.warning("Attempted operation on None page object") | ||||
|             return default_return | ||||
|              | ||||
|         try: | ||||
|             return operation_fn() | ||||
|         except Exception as e: | ||||
|             logger.debug(f"Page operation failed: {str(e)}") | ||||
|             # Try to reclaim memory if possible | ||||
|             try: | ||||
|                 self.page.request_gc() | ||||
|             except: | ||||
|                 pass | ||||
|             return default_return | ||||
|  | ||||
|     # Convert and perform "Click Button" for example | ||||
|     def call_action(self, action_name, selector=None, optional_value=None): | ||||
|         if self.page is None: | ||||
|             logger.warning("Cannot call action on None page object") | ||||
|             return | ||||
|              | ||||
|         now = time.time() | ||||
|         call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower()) | ||||
|         if call_action_name == 'choose_one': | ||||
| @@ -72,28 +95,46 @@ class steppable_browser_interface(): | ||||
|         if selector and selector.startswith('/') and not selector.startswith('//'): | ||||
|             selector = "xpath=" + selector | ||||
|  | ||||
|         # Check if action handler exists | ||||
|         if not hasattr(self, "action_" + call_action_name): | ||||
|             logger.warning(f"Action handler for '{call_action_name}' not found") | ||||
|             return | ||||
|              | ||||
|         action_handler = getattr(self, "action_" + call_action_name) | ||||
|  | ||||
|         # Support for Jinja2 variables in the value and selector | ||||
|  | ||||
|         if selector and ('{%' in selector or '{{' in selector): | ||||
|             selector = jinja_render(template_str=selector) | ||||
|  | ||||
|         if optional_value and ('{%' in optional_value or '{{' in optional_value): | ||||
|             optional_value = jinja_render(template_str=optional_value) | ||||
|  | ||||
|         action_handler(selector, optional_value) | ||||
|         self.page.wait_for_timeout(1.5 * 1000) | ||||
|         logger.debug(f"Call action done in {time.time()-now:.2f}s") | ||||
|         try: | ||||
|             action_handler(selector, optional_value) | ||||
|             # Safely wait for timeout | ||||
|             def wait_timeout(): | ||||
|                 self.page.wait_for_timeout(1.5 * 1000) | ||||
|             self.safe_page_operation(wait_timeout) | ||||
|             logger.debug(f"Call action done in {time.time()-now:.2f}s") | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error executing action '{call_action_name}': {str(e)}") | ||||
|             # Request garbage collection to free up resources after error | ||||
|             try: | ||||
|                 self.page.request_gc() | ||||
|             except: | ||||
|                 pass | ||||
|  | ||||
|     def action_goto_url(self, selector=None, value=None): | ||||
|         # self.page.set_viewport_size({"width": 1280, "height": 5000}) | ||||
|         if not value: | ||||
|             logger.warning("No URL provided for goto_url action") | ||||
|             return None | ||||
|              | ||||
|         now = time.time() | ||||
|         response = self.page.goto(value, timeout=0, wait_until='load') | ||||
|         # Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout) | ||||
|         #and also wait for seconds ? | ||||
|         #await page.waitForTimeout(1000); | ||||
|         #await page.waitForTimeout(extra_wait_ms); | ||||
|          | ||||
|         def goto_operation(): | ||||
|             return self.page.goto(value, timeout=0, wait_until='load') | ||||
|              | ||||
|         response = self.safe_page_operation(goto_operation) | ||||
|         logger.debug(f"Time to goto URL {time.time()-now:.2f}s") | ||||
|         return response | ||||
|  | ||||
| @@ -103,116 +144,218 @@ class steppable_browser_interface(): | ||||
|  | ||||
|     def action_click_element_containing_text(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text") | ||||
|         if not len(value.strip()): | ||||
|         if not value or not len(value.strip()): | ||||
|             return | ||||
|         elem = self.page.get_by_text(value) | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|              | ||||
|         def click_operation(): | ||||
|             elem = self.page.get_by_text(value) | ||||
|             if elem.count(): | ||||
|                 elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|                  | ||||
|         self.safe_page_operation(click_operation) | ||||
|  | ||||
|     def action_click_element_containing_text_if_exists(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text if exists") | ||||
|         if not len(value.strip()): | ||||
|             return | ||||
|         elem = self.page.get_by_text(value) | ||||
|         logger.debug(f"Clicking element containing text - {elem.count()} elements found") | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|         else: | ||||
|         if not value or not len(value.strip()): | ||||
|             return | ||||
|              | ||||
|         def click_if_exists_operation(): | ||||
|             elem = self.page.get_by_text(value) | ||||
|             logger.debug(f"Clicking element containing text - {elem.count()} elements found") | ||||
|             if elem.count(): | ||||
|                 elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|                  | ||||
|         self.safe_page_operation(click_if_exists_operation) | ||||
|  | ||||
|     def action_enter_text_in_field(self, selector, value): | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         self.page.fill(selector, value, timeout=self.action_timeout) | ||||
|         def fill_operation(): | ||||
|             self.page.fill(selector, value, timeout=self.action_timeout) | ||||
|              | ||||
|         self.safe_page_operation(fill_operation) | ||||
|  | ||||
|     def action_execute_js(self, selector, value): | ||||
|         response = self.page.evaluate(value) | ||||
|         return response | ||||
|         if not value: | ||||
|             return None | ||||
|              | ||||
|         def evaluate_operation(): | ||||
|             return self.page.evaluate(value) | ||||
|              | ||||
|         return self.safe_page_operation(evaluate_operation) | ||||
|  | ||||
|     def action_click_element(self, selector, value): | ||||
|         logger.debug("Clicking element") | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500)) | ||||
|         def click_operation(): | ||||
|             self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500)) | ||||
|              | ||||
|         self.safe_page_operation(click_operation) | ||||
|  | ||||
|     def action_click_element_if_exists(self, selector, value): | ||||
|         import playwright._impl._errors as _api_types | ||||
|         logger.debug("Clicking element if exists") | ||||
|         if not len(selector.strip()): | ||||
|             return | ||||
|         try: | ||||
|             self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500)) | ||||
|         except _api_types.TimeoutError as e: | ||||
|             return | ||||
|         except _api_types.Error as e: | ||||
|             # Element was there, but page redrew and now its long long gone | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|              | ||||
|         def click_if_exists_operation(): | ||||
|             try: | ||||
|                 self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500)) | ||||
|             except _api_types.TimeoutError: | ||||
|                 return | ||||
|             except _api_types.Error: | ||||
|                 # Element was there, but page redrew and now its long long gone | ||||
|                 return | ||||
|                  | ||||
|         self.safe_page_operation(click_if_exists_operation) | ||||
|  | ||||
|     def action_click_x_y(self, selector, value): | ||||
|         if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): | ||||
|             raise Exception("'Click X,Y' step should be in the format of '100 , 90'") | ||||
|         if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): | ||||
|             logger.warning("'Click X,Y' step should be in the format of '100 , 90'") | ||||
|             return | ||||
|  | ||||
|         x, y = value.strip().split(',') | ||||
|         x = int(float(x.strip())) | ||||
|         y = int(float(y.strip())) | ||||
|         self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) | ||||
|         try: | ||||
|             x, y = value.strip().split(',') | ||||
|             x = int(float(x.strip())) | ||||
|             y = int(float(y.strip())) | ||||
|              | ||||
|             def click_xy_operation(): | ||||
|                 self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) | ||||
|                  | ||||
|             self.safe_page_operation(click_xy_operation) | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error parsing x,y coordinates: {str(e)}") | ||||
|  | ||||
|     def action__select_by_option_text(self, selector, value): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         def select_operation(): | ||||
|             self.page.select_option(selector, label=value, timeout=self.action_timeout) | ||||
|  | ||||
|         self.safe_page_operation(select_operation) | ||||
|  | ||||
|     def action_scroll_down(self, selector, value): | ||||
|         # Some sites this doesnt work on for some reason | ||||
|         self.page.mouse.wheel(0, 600) | ||||
|         self.page.wait_for_timeout(1000) | ||||
|         def scroll_operation(): | ||||
|             # Some sites this doesnt work on for some reason | ||||
|             self.page.mouse.wheel(0, 600) | ||||
|             self.page.wait_for_timeout(1000) | ||||
|              | ||||
|         self.safe_page_operation(scroll_operation) | ||||
|  | ||||
|     def action_wait_for_seconds(self, selector, value): | ||||
|         self.page.wait_for_timeout(float(value.strip()) * 1000) | ||||
|         try: | ||||
|             seconds = float(value.strip()) if value else 1.0 | ||||
|              | ||||
|             def wait_operation(): | ||||
|                 self.page.wait_for_timeout(seconds * 1000) | ||||
|                  | ||||
|             self.safe_page_operation(wait_operation) | ||||
|         except (ValueError, TypeError) as e: | ||||
|             logger.error(f"Invalid value for wait_for_seconds: {str(e)}") | ||||
|  | ||||
|     def action_wait_for_text(self, selector, value): | ||||
|         if not value: | ||||
|             return | ||||
|              | ||||
|         import json | ||||
|         v = json.dumps(value) | ||||
|         self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000) | ||||
|          | ||||
|         def wait_for_text_operation(): | ||||
|             self.page.wait_for_function( | ||||
|                 f'document.querySelector("body").innerText.includes({v});',  | ||||
|                 timeout=30000 | ||||
|             ) | ||||
|              | ||||
|         self.safe_page_operation(wait_for_text_operation) | ||||
|  | ||||
|     def action_wait_for_text_in_element(self, selector, value): | ||||
|         if not selector or not value: | ||||
|             return | ||||
|              | ||||
|         import json | ||||
|         s = json.dumps(selector) | ||||
|         v = json.dumps(value) | ||||
|         self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000) | ||||
|          | ||||
|         def wait_for_text_in_element_operation(): | ||||
|             self.page.wait_for_function( | ||||
|                 f'document.querySelector({s}).innerText.includes({v});',  | ||||
|                 timeout=30000 | ||||
|             ) | ||||
|              | ||||
|         self.safe_page_operation(wait_for_text_in_element_operation) | ||||
|  | ||||
|     # @todo - in the future make some popout interface to capture what needs to be set | ||||
|     # https://playwright.dev/python/docs/api/class-keyboard | ||||
|     def action_press_enter(self, selector, value): | ||||
|         self.page.keyboard.press("Enter", delay=randint(200, 500)) | ||||
|         def press_operation(): | ||||
|             self.page.keyboard.press("Enter", delay=randint(200, 500)) | ||||
|              | ||||
|         self.safe_page_operation(press_operation) | ||||
|  | ||||
|     def action_press_page_up(self, selector, value): | ||||
|         self.page.keyboard.press("PageUp", delay=randint(200, 500)) | ||||
|         def press_operation(): | ||||
|             self.page.keyboard.press("PageUp", delay=randint(200, 500)) | ||||
|              | ||||
|         self.safe_page_operation(press_operation) | ||||
|  | ||||
|     def action_press_page_down(self, selector, value): | ||||
|         self.page.keyboard.press("PageDown", delay=randint(200, 500)) | ||||
|         def press_operation(): | ||||
|             self.page.keyboard.press("PageDown", delay=randint(200, 500)) | ||||
|              | ||||
|         self.safe_page_operation(press_operation) | ||||
|  | ||||
|     def action_check_checkbox(self, selector, value): | ||||
|         self.page.locator(selector).check(timeout=self.action_timeout) | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         def check_operation(): | ||||
|             self.page.locator(selector).check(timeout=self.action_timeout) | ||||
|              | ||||
|         self.safe_page_operation(check_operation) | ||||
|  | ||||
|     def action_uncheck_checkbox(self, selector, value): | ||||
|         self.page.locator(selector).uncheck(timeout=self.action_timeout) | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         def uncheck_operation(): | ||||
|             self.page.locator(selector).uncheck(timeout=self.action_timeout) | ||||
|              | ||||
|         self.safe_page_operation(uncheck_operation) | ||||
|  | ||||
|     def action_remove_elements(self, selector, value): | ||||
|         """Removes all elements matching the given selector from the DOM.""" | ||||
|         self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())") | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         def remove_operation(): | ||||
|             self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())") | ||||
|              | ||||
|         self.safe_page_operation(remove_operation) | ||||
|  | ||||
|     def action_make_all_child_elements_visible(self, selector, value): | ||||
|         """Recursively makes all child elements inside the given selector fully visible.""" | ||||
|         self.page.locator(selector).locator("*").evaluate_all(""" | ||||
|             els => els.forEach(el => { | ||||
|                 el.style.display = 'block';   // Forces it to be displayed | ||||
|                 el.style.visibility = 'visible';   // Ensures it's not hidden | ||||
|                 el.style.opacity = '1';   // Fully opaque | ||||
|                 el.style.position = 'relative';   // Avoids 'absolute' hiding | ||||
|                 el.style.height = 'auto';   // Expands collapsed elements | ||||
|                 el.style.width = 'auto';   // Ensures full visibility | ||||
|                 el.removeAttribute('hidden');   // Removes hidden attribute | ||||
|                 el.classList.remove('hidden', 'd-none');  // Removes common CSS hidden classes | ||||
|             }) | ||||
|         """) | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         def make_visible_operation(): | ||||
|             self.page.locator(selector).locator("*").evaluate_all(""" | ||||
|                 els => els.forEach(el => { | ||||
|                     el.style.display = 'block';   // Forces it to be displayed | ||||
|                     el.style.visibility = 'visible';   // Ensures it's not hidden | ||||
|                     el.style.opacity = '1';   // Fully opaque | ||||
|                     el.style.position = 'relative';   // Avoids 'absolute' hiding | ||||
|                     el.style.height = 'auto';   // Expands collapsed elements | ||||
|                     el.style.width = 'auto';   // Ensures full visibility | ||||
|                     el.removeAttribute('hidden');   // Removes hidden attribute | ||||
|                     el.classList.remove('hidden', 'd-none');  // Removes common CSS hidden classes | ||||
|                 }) | ||||
|             """) | ||||
|              | ||||
|         self.safe_page_operation(make_visible_operation) | ||||
|  | ||||
| # Responsible for maintaining a live 'context' with the chrome CDP | ||||
| # @todo - how long do contexts live for anyway? | ||||
| @@ -224,7 +367,9 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|     # bump and kill this if idle after X sec | ||||
|     age_start = 0 | ||||
|     headers = {} | ||||
|  | ||||
|     # Track if resources are properly cleaned up | ||||
|     _is_cleaned_up = False | ||||
|      | ||||
|     # use a special driver, maybe locally etc | ||||
|     command_executor = os.getenv( | ||||
|         "PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL" | ||||
| @@ -243,9 +388,14 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|         self.age_start = time.time() | ||||
|         self.playwright_browser = playwright_browser | ||||
|         self.start_url = start_url | ||||
|         self._is_cleaned_up = False | ||||
|         if self.context is None: | ||||
|             self.connect(proxy=proxy) | ||||
|  | ||||
|     def __del__(self): | ||||
|         # Ensure cleanup happens if object is garbage collected | ||||
|         self.cleanup() | ||||
|  | ||||
|     # Connect and setup a new context | ||||
|     def connect(self, proxy=None): | ||||
|         # Should only get called once - test that | ||||
| @@ -264,31 +414,74 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|             user_agent=manage_user_agent(headers=self.headers), | ||||
|         ) | ||||
|  | ||||
|  | ||||
|         self.page = self.context.new_page() | ||||
|  | ||||
|         # self.page.set_default_navigation_timeout(keep_open) | ||||
|         self.page.set_default_timeout(keep_open) | ||||
|         # @todo probably this doesnt work | ||||
|         self.page.on( | ||||
|             "close", | ||||
|             self.mark_as_closed, | ||||
|         ) | ||||
|         # Set event handlers | ||||
|         self.page.on("close", self.mark_as_closed) | ||||
|         # Listen for all console events and handle errors | ||||
|         self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|         logger.debug(f"Time to browser setup {time.time()-now:.2f}s") | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|  | ||||
|     def mark_as_closed(self): | ||||
|         logger.debug("Page closed, cleaning up..") | ||||
|         self.cleanup() | ||||
|  | ||||
|     def cleanup(self): | ||||
|         """Properly clean up all resources to prevent memory leaks""" | ||||
|         if self._is_cleaned_up: | ||||
|             return | ||||
|              | ||||
|         logger.debug("Cleaning up browser steps resources") | ||||
|          | ||||
|         # Clean up page | ||||
|         if hasattr(self, 'page') and self.page is not None: | ||||
|             try: | ||||
|                 # Force garbage collection before closing | ||||
|                 self.page.request_gc() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error during page garbage collection: {str(e)}") | ||||
|                  | ||||
|             try: | ||||
|                 # Remove event listeners before closing | ||||
|                 self.page.remove_listener("close", self.mark_as_closed) | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error removing event listeners: {str(e)}") | ||||
|                  | ||||
|             try: | ||||
|                 self.page.close() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error closing page: {str(e)}") | ||||
|              | ||||
|             self.page = None | ||||
|  | ||||
|         # Clean up context | ||||
|         if hasattr(self, 'context') and self.context is not None: | ||||
|             try: | ||||
|                 self.context.close() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error closing context: {str(e)}") | ||||
|              | ||||
|             self.context = None | ||||
|              | ||||
|         self._is_cleaned_up = True | ||||
|         logger.debug("Browser steps resources cleanup complete") | ||||
|  | ||||
|     @property | ||||
|     def has_expired(self): | ||||
|         if not self.page: | ||||
|         if not self.page or self._is_cleaned_up: | ||||
|             return True | ||||
|  | ||||
|          | ||||
|         # Check if session has expired based on age | ||||
|         max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10))  # Default 10 minutes | ||||
|         if (time.time() - self.age_start) > max_age_seconds: | ||||
|             logger.debug(f"Browser steps session expired after {max_age_seconds} seconds") | ||||
|             return True | ||||
|              | ||||
|         return False | ||||
|  | ||||
|     def get_current_state(self): | ||||
|         """Return the screenshot and interactive elements mapping, generally always called after action_()""" | ||||
| @@ -297,36 +490,55 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|         # because we for now only run browser steps in playwright mode (not puppeteer mode) | ||||
|         from changedetectionio.content_fetchers.playwright import capture_full_page | ||||
|  | ||||
|         # Safety check - don't proceed if resources are cleaned up | ||||
|         if self._is_cleaned_up or self.page is None: | ||||
|             logger.warning("Attempted to get current state after cleanup") | ||||
|             return (None, None) | ||||
|  | ||||
|         xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() | ||||
|  | ||||
|         now = time.time() | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|         screenshot = capture_full_page(page=self.page) | ||||
|         screenshot = None | ||||
|         xpath_data = None | ||||
|          | ||||
|         try: | ||||
|             # Get screenshot first | ||||
|             screenshot = capture_full_page(page=self.page) | ||||
|             logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") | ||||
|  | ||||
|         logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") | ||||
|             # Then get interactive elements | ||||
|             now = time.time() | ||||
|             self.page.evaluate("var include_filters=''") | ||||
|             self.page.request_gc() | ||||
|  | ||||
|         now = time.time() | ||||
|         self.page.evaluate("var include_filters=''") | ||||
|         # Go find the interactive elements | ||||
|         # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers? | ||||
|             scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' | ||||
|  | ||||
|         self.page.request_gc() | ||||
|             MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|             xpath_data = json.loads(self.page.evaluate(xpath_element_js, { | ||||
|                 "visualselector_xpath_selectors": scan_elements, | ||||
|                 "max_height": MAX_TOTAL_HEIGHT | ||||
|             })) | ||||
|             self.page.request_gc() | ||||
|  | ||||
|         scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' | ||||
|  | ||||
|         MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|         xpath_data = json.loads(self.page.evaluate(xpath_element_js, { | ||||
|             "visualselector_xpath_selectors": scan_elements, | ||||
|             "max_height": MAX_TOTAL_HEIGHT | ||||
|         })) | ||||
|         self.page.request_gc() | ||||
|  | ||||
|         # So the JS will find the smallest one first | ||||
|         xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) | ||||
|         logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s") | ||||
|  | ||||
|         # playwright._impl._api_types.Error: Browser closed. | ||||
|         # @todo show some countdown timer? | ||||
|             # Sort elements by size | ||||
|             xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) | ||||
|             logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s") | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error getting current state: {str(e)}") | ||||
|             # Attempt recovery - force garbage collection | ||||
|             try: | ||||
|                 self.page.request_gc() | ||||
|             except: | ||||
|                 pass | ||||
|          | ||||
|         # Request garbage collection one final time | ||||
|         try: | ||||
|             self.page.request_gc() | ||||
|         except: | ||||
|             pass | ||||
|              | ||||
|         return (screenshot, xpath_data) | ||||
|  | ||||
|   | ||||
| @@ -104,6 +104,9 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             uuid = list(datastore.data['settings']['application']['tags'].keys()).pop() | ||||
|  | ||||
|         default = datastore.data['settings']['application']['tags'].get(uuid) | ||||
|         if not default: | ||||
|             flash("Tag not found", "error") | ||||
|             return redirect(url_for('watchlist.index')) | ||||
|  | ||||
|         form = group_restock_settings_form( | ||||
|                                        formdata=request.form if request.method == 'POST' else None, | ||||
|   | ||||
| @@ -66,7 +66,7 @@ | ||||
|                     <div  class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_muted) }} | ||||
|                     </div> | ||||
|                     {% if is_html_webdriver %} | ||||
|                     {% if 1 %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_screenshot) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
|   | ||||
| @@ -213,9 +213,6 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|             if request.method == 'POST' and not form.validate(): | ||||
|                 flash("An error occurred, please see below.", "error") | ||||
|  | ||||
|             visualselector_data_is_ready = datastore.visualselector_data_is_ready(uuid) | ||||
|  | ||||
|  | ||||
|             # JQ is difficult to install on windows and must be manually added (outside requirements.txt) | ||||
|             jq_support = True | ||||
|             try: | ||||
| @@ -225,16 +222,20 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|  | ||||
|             watch = datastore.data['watching'].get(uuid) | ||||
|  | ||||
|             # if system or watch is configured to need a chrome type browser | ||||
|             system_uses_webdriver = datastore.data['settings']['application']['fetch_backend'] == 'html_webdriver' | ||||
|  | ||||
|             watch_uses_webdriver = False | ||||
|             watch_needs_selenium_or_playwright = False | ||||
|             if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'): | ||||
|                 watch_uses_webdriver = True | ||||
|                 watch_needs_selenium_or_playwright = True | ||||
|  | ||||
|  | ||||
|             from zoneinfo import available_timezones | ||||
|  | ||||
|             # Only works reliably with Playwright | ||||
|  | ||||
|             # Import the global plugin system | ||||
|             from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras | ||||
|              | ||||
|             template_args = { | ||||
|                 'available_processors': processors.available_processors(), | ||||
|                 'available_timezones': sorted(available_timezones()), | ||||
| @@ -247,14 +248,18 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 'has_default_notification_urls': True if len(datastore.data['settings']['application']['notification_urls']) else False, | ||||
|                 'has_extra_headers_file': len(datastore.get_all_headers_in_textfile_for_watch(uuid=uuid)) > 0, | ||||
|                 'has_special_tag_options': _watch_has_tag_options_set(watch=watch), | ||||
|                 'watch_uses_webdriver': watch_uses_webdriver, | ||||
|                 'jq_support': jq_support, | ||||
|                 'playwright_enabled': os.getenv('PLAYWRIGHT_DRIVER_URL', False), | ||||
|                 'settings_application': datastore.data['settings']['application'], | ||||
|                 'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'), | ||||
|                 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), | ||||
|                 'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch), | ||||
|                 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), | ||||
|                 'using_global_webdriver_wait': not default['webdriver_delay'], | ||||
|                 'uuid': uuid, | ||||
|                 'watch': watch | ||||
|                 'watch': watch, | ||||
|                 'watch_needs_selenium_or_playwright': watch_needs_selenium_or_playwright, | ||||
|             } | ||||
|  | ||||
|             included_content = None | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from json_logic.builtins import BUILTINS | ||||
| from .exceptions import EmptyConditionRuleRowNotUsable | ||||
| from .pluggy_interface import plugin_manager  # Import the pluggy plugin manager | ||||
| from . import default_plugin | ||||
|  | ||||
| from loguru import logger | ||||
| # List of all supported JSON Logic operators | ||||
| operator_choices = [ | ||||
|     (None, "Choose one - Operator"), | ||||
| @@ -94,20 +94,41 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat | ||||
|     EXECUTE_DATA = {} | ||||
|     result = True | ||||
|      | ||||
|     ruleset_settings = application_datastruct['watching'].get(current_watch_uuid) | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|  | ||||
|     if ruleset_settings.get("conditions"): | ||||
|         logic_operator = "and" if ruleset_settings.get("conditions_match_logic", "ALL") == "ALL" else "or" | ||||
|         complete_rules = filter_complete_rules(ruleset_settings['conditions']) | ||||
|     if watch and watch.get("conditions"): | ||||
|         logic_operator = "and" if watch.get("conditions_match_logic", "ALL") == "ALL" else "or" | ||||
|         complete_rules = filter_complete_rules(watch['conditions']) | ||||
|         if complete_rules: | ||||
|             # Give all plugins a chance to update the data dict again (that we will test the conditions against) | ||||
|             for plugin in plugin_manager.get_plugins(): | ||||
|                 new_execute_data = plugin.add_data(current_watch_uuid=current_watch_uuid, | ||||
|                                                    application_datastruct=application_datastruct, | ||||
|                                                    ephemeral_data=ephemeral_data) | ||||
|                 try: | ||||
|                     import concurrent.futures | ||||
|                     import time | ||||
|                      | ||||
|                     with concurrent.futures.ThreadPoolExecutor() as executor: | ||||
|                         future = executor.submit( | ||||
|                             plugin.add_data, | ||||
|                             current_watch_uuid=current_watch_uuid, | ||||
|                             application_datastruct=application_datastruct, | ||||
|                             ephemeral_data=ephemeral_data | ||||
|                         ) | ||||
|                         logger.debug(f"Trying plugin {plugin}....") | ||||
|  | ||||
|                 if new_execute_data and isinstance(new_execute_data, dict): | ||||
|                     EXECUTE_DATA.update(new_execute_data) | ||||
|                         # Set a timeout of 10 seconds | ||||
|                         try: | ||||
|                             new_execute_data = future.result(timeout=10) | ||||
|                             if new_execute_data and isinstance(new_execute_data, dict): | ||||
|                                 EXECUTE_DATA.update(new_execute_data) | ||||
|  | ||||
|                         except concurrent.futures.TimeoutError: | ||||
|                             # The plugin took too long, abort processing for this watch | ||||
|                             raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.") | ||||
|                 except Exception as e: | ||||
|                     # Log the error but continue with the next plugin | ||||
|                     import logging | ||||
|                     logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}") | ||||
|                     continue | ||||
|  | ||||
|             # Create the ruleset | ||||
|             ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules) | ||||
| @@ -132,3 +153,18 @@ for plugin in plugin_manager.get_plugins(): | ||||
|     if isinstance(new_field_choices, list): | ||||
|         field_choices.extend(new_field_choices) | ||||
|  | ||||
| def collect_ui_edit_stats_extras(watch): | ||||
|     """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" | ||||
|     extras_content = [] | ||||
|      | ||||
|     for plugin in plugin_manager.get_plugins(): | ||||
|         try: | ||||
|             content = plugin.ui_edit_stats_extras(watch=watch) | ||||
|             if content: | ||||
|                 extras_content.append(content) | ||||
|         except Exception as e: | ||||
|             # Skip plugins that don't implement the hook or have errors | ||||
|             pass | ||||
|              | ||||
|     return "\n".join(extras_content) if extras_content else "" | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| import pluggy | ||||
| from . import default_plugin  # Import the default plugin | ||||
| import os | ||||
| import importlib | ||||
| import sys | ||||
| from . import default_plugin | ||||
|  | ||||
| # ✅ Ensure that the namespace in HookspecMarker matches PluginManager | ||||
| PLUGIN_NAMESPACE = "changedetectionio_conditions" | ||||
| @@ -30,6 +33,11 @@ class ConditionsSpec: | ||||
|     def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|         """Add to the datadict""" | ||||
|         pass | ||||
|          | ||||
|     @hookspec | ||||
|     def ui_edit_stats_extras(watch): | ||||
|         """Return HTML content to add to the stats tab in the edit view""" | ||||
|         pass | ||||
|  | ||||
| # ✅ Set up Pluggy Plugin Manager | ||||
| plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) | ||||
| @@ -40,5 +48,27 @@ plugin_manager.add_hookspecs(ConditionsSpec) | ||||
| # ✅ Register built-in plugins manually | ||||
| plugin_manager.register(default_plugin, "default_plugin") | ||||
|  | ||||
| # ✅ Load plugins from the plugins directory | ||||
| def load_plugins_from_directory(): | ||||
|     plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins') | ||||
|     if not os.path.exists(plugins_dir): | ||||
|         return | ||||
|          | ||||
|     # Get all Python files (excluding __init__.py) | ||||
|     for filename in os.listdir(plugins_dir): | ||||
|         if filename.endswith(".py") and filename != "__init__.py": | ||||
|             module_name = filename[:-3]  # Remove .py extension | ||||
|             module_path = f"changedetectionio.conditions.plugins.{module_name}" | ||||
|              | ||||
|             try: | ||||
|                 module = importlib.import_module(module_path) | ||||
|                 # Register the plugin with pluggy | ||||
|                 plugin_manager.register(module, module_name) | ||||
|             except (ImportError, AttributeError) as e: | ||||
|                 print(f"Error loading plugin {module_name}: {e}") | ||||
|  | ||||
| # Load plugins from the plugins directory | ||||
| load_plugins_from_directory() | ||||
|  | ||||
| # ✅ Discover installed plugins from external packages (if any) | ||||
| plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) | ||||
|   | ||||
							
								
								
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| # Import plugins package to make them discoverable | ||||
							
								
								
									
										107
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def levenshtein_ratio_recent_history(watch, incoming_text=None): | ||||
|     try: | ||||
|         from Levenshtein import ratio, distance | ||||
|         k = list(watch.history.keys()) | ||||
|         a = None | ||||
|         b = None | ||||
|  | ||||
|         # When called from ui_edit_stats_extras, we don't have incoming_text | ||||
|         if incoming_text is None: | ||||
|             a = watch.get_history_snapshot(timestamp=k[-1])  # Latest snapshot | ||||
|             b = watch.get_history_snapshot(timestamp=k[-2])  # Previous snapshot | ||||
|  | ||||
|         # Needs atleast one snapshot | ||||
|         elif len(k) >= 1: # Should be atleast one snapshot to compare against | ||||
|             a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot | ||||
|             b = incoming_text if incoming_text else k[-2] | ||||
|  | ||||
|         if a and b: | ||||
|             distance_value = distance(a, b) | ||||
|             ratio_value = ratio(a, b) | ||||
|             return { | ||||
|                 'distance': distance_value, | ||||
|                 'ratio': ratio_value, | ||||
|                 'percent_similar': round(ratio_value * 100, 2) | ||||
|             } | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Unable to calc similarity: {str(e)}") | ||||
|  | ||||
|     return '' | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operators(): | ||||
|     pass | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operator_choices(): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_field_choices(): | ||||
|     return [ | ||||
|         ("levenshtein_ratio", "Levenshtein - Text similarity ratio"), | ||||
|         ("levenshtein_distance", "Levenshtein - Text change distance"), | ||||
|     ] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|     res = {} | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|     # ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc | ||||
|  | ||||
|     if watch and 'text' in ephemeral_data: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text','')) | ||||
|         if isinstance(lev_data, dict): | ||||
|             res['levenshtein_ratio'] = lev_data.get('ratio', 0) | ||||
|             res['levenshtein_similarity'] = lev_data.get('percent_similar', 0) | ||||
|             res['levenshtein_distance'] = lev_data.get('distance', 0) | ||||
|  | ||||
|     return res | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add Levenshtein stats to the UI using the global plugin system""" | ||||
|     """Generate the HTML for Levenshtein stats - shared by both plugin systems""" | ||||
|     if len(watch.history.keys()) < 2: | ||||
|         return "<p>Not enough history to calculate Levenshtein metrics</p>" | ||||
|      | ||||
|     try: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch) | ||||
|         if not lev_data or not isinstance(lev_data, dict): | ||||
|             return "<p>Unable to calculate Levenshtein metrics</p>" | ||||
|              | ||||
|         html = f""" | ||||
|         <div class="levenshtein-stats"> | ||||
|             <h4>Levenshtein Text Similarity Details</h4> | ||||
|             <table class="pure-table"> | ||||
|                 <tbody> | ||||
|                     <tr> | ||||
|                         <td>Raw distance (edits needed)</td> | ||||
|                         <td>{lev_data['distance']}</td> | ||||
|                     </tr> | ||||
|                     <tr> | ||||
|                         <td>Similarity ratio</td> | ||||
|                         <td>{lev_data['ratio']:.4f}</td> | ||||
|                     </tr> | ||||
|                     <tr> | ||||
|                         <td>Percent similar</td> | ||||
|                         <td>{lev_data['percent_similar']}%</td> | ||||
|                     </tr> | ||||
|                 </tbody> | ||||
|             </table> | ||||
|             <p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p> | ||||
|         </div> | ||||
|         """ | ||||
|         return html | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error generating Levenshtein UI extras: {str(e)}") | ||||
|         return "<p>Error calculating Levenshtein metrics</p>" | ||||
|          | ||||
							
								
								
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def count_words_in_history(watch, incoming_text=None): | ||||
|     """Count words in snapshot text""" | ||||
|     try: | ||||
|         if incoming_text is not None: | ||||
|             # When called from add_data with incoming text | ||||
|             return len(incoming_text.split()) | ||||
|         elif watch.history.keys(): | ||||
|             # When called from UI extras to count latest snapshot | ||||
|             latest_key = list(watch.history.keys())[-1] | ||||
|             latest_content = watch.get_history_snapshot(latest_key) | ||||
|             return len(latest_content.split()) | ||||
|         return 0 | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error counting words: {str(e)}") | ||||
|         return 0 | ||||
|  | ||||
| # Implement condition plugin hooks | ||||
| @conditions_hookimpl | ||||
| def register_operators(): | ||||
|     # No custom operators needed | ||||
|     return {} | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operator_choices(): | ||||
|     # No custom operator choices needed | ||||
|     return [] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_field_choices(): | ||||
|     # Add a field that will be available in conditions | ||||
|     return [ | ||||
|         ("word_count", "Word count of content"), | ||||
|     ] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|     """Add word count data for conditions""" | ||||
|     result = {} | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|      | ||||
|     if watch and 'text' in ephemeral_data: | ||||
|         word_count = count_words_in_history(watch, ephemeral_data['text']) | ||||
|         result['word_count'] = word_count | ||||
|      | ||||
|     return result | ||||
|  | ||||
| def _generate_stats_html(watch): | ||||
|     """Generate the HTML content for the stats tab""" | ||||
|     word_count = count_words_in_history(watch) | ||||
|      | ||||
|     html = f""" | ||||
|     <div class="word-count-stats"> | ||||
|         <h4>Content Analysis</h4> | ||||
|         <table class="pure-table"> | ||||
|             <tbody> | ||||
|                 <tr> | ||||
|                     <td>Word count (latest snapshot)</td> | ||||
|                     <td>{word_count}</td> | ||||
|                 </tr> | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count stats to the UI through conditions plugin system""" | ||||
|     return _generate_stats_html(watch) | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count stats to the UI using the global plugin system""" | ||||
|     return _generate_stats_html(watch) | ||||
| @@ -26,9 +26,11 @@ def capture_full_page(page): | ||||
|     step_size = SCREENSHOT_SIZE_STITCH_THRESHOLD # Size that won't cause GPU to overflow | ||||
|     screenshot_chunks = [] | ||||
|     y = 0 | ||||
|      | ||||
|     # If page height is larger than current viewport, use a larger viewport for better capturing | ||||
|  | ||||
|     if page_height > page.viewport_size['height']: | ||||
|         if page_height < step_size: | ||||
|             step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size | ||||
|         logger.debug(f"Setting bigger viewport to step through large page width W{page.viewport_size['width']}xH{step_size} because page_height > viewport_size") | ||||
|         # Set viewport to a larger size to capture more content at once | ||||
|         page.set_viewport_size({'width': page.viewport_size['width'], 'height': step_size}) | ||||
|  | ||||
| @@ -59,7 +61,10 @@ def capture_full_page(page): | ||||
|         p.join() | ||||
|         logger.debug( | ||||
|             f"Screenshot (chunked/stitched) - Page height: {page_height} Capture height: {SCREENSHOT_MAX_TOTAL_HEIGHT} - Stitched together in {time.time() - start:.2f}s") | ||||
|  | ||||
|         # Explicit cleanup | ||||
|         del screenshot_chunks | ||||
|         del p | ||||
|         del parent_conn, child_conn | ||||
|         screenshot_chunks = None | ||||
|         return screenshot | ||||
|  | ||||
| @@ -286,12 +291,28 @@ class fetcher(Fetcher): | ||||
|                     pass | ||||
|                  | ||||
|                 # Clean up resources properly | ||||
|                 context.close() | ||||
|                 context = None | ||||
|                 try: | ||||
|                     self.page.request_gc() | ||||
|                 except: | ||||
|                     pass | ||||
|  | ||||
|                 self.page.close() | ||||
|                 try: | ||||
|                     self.page.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 self.page = None | ||||
|  | ||||
|                 browser.close() | ||||
|                 borwser = None | ||||
|                 try: | ||||
|                     context.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 context = None | ||||
|  | ||||
|                 try: | ||||
|                     browser.close() | ||||
|                 except: | ||||
|                     pass | ||||
|                 browser = None | ||||
|  | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -46,9 +46,10 @@ async def capture_full_page(page): | ||||
|     screenshot_chunks = [] | ||||
|     y = 0 | ||||
|     if page_height > page.viewport['height']: | ||||
|         if page_height < step_size: | ||||
|             step_size = page_height # Incase page is bigger than default viewport but smaller than proposed step size | ||||
|         await page.setViewport({'width': page.viewport['width'], 'height': step_size}) | ||||
|  | ||||
|  | ||||
|     while y < min(page_height, SCREENSHOT_MAX_TOTAL_HEIGHT): | ||||
|         await page.evaluate(f"window.scrollTo(0, {y})") | ||||
|         screenshot_chunks.append(await page.screenshot(type_='jpeg', | ||||
| @@ -146,7 +147,7 @@ class fetcher(Fetcher): | ||||
|                          is_binary, | ||||
|                          empty_pages_are_a_change | ||||
|                          ): | ||||
|  | ||||
|         import re | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|  | ||||
| @@ -171,6 +172,17 @@ class fetcher(Fetcher): | ||||
|         # headless - ask a new page | ||||
|         self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() | ||||
|  | ||||
|         if '--window-size' in self.browser_connection_url: | ||||
|             # Be sure the viewport is always the window-size, this is often not the same thing | ||||
|             match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url) | ||||
|             if match: | ||||
|                 logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}") | ||||
|                 await self.page.setViewport({ | ||||
|                     "width": int(match.group(1)), | ||||
|                     "height": int(match.group(2)) | ||||
|                 }) | ||||
|                 logger.debug(f"Puppeteer viewport size {self.page.viewport}") | ||||
|  | ||||
|         try: | ||||
|             from pyppeteerstealth import inject_evasions_into_page | ||||
|         except ImportError: | ||||
| @@ -217,7 +229,6 @@ class fetcher(Fetcher): | ||||
|  | ||||
|         response = await self.page.goto(url, waitUntil="load") | ||||
|  | ||||
|  | ||||
|         if response is None: | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|   | ||||
| @@ -10,6 +10,7 @@ async () => { | ||||
|             'article épuisé', | ||||
|             'artikel zurzeit vergriffen', | ||||
|             'as soon as stock is available', | ||||
|             'aucune offre n\'est disponible', | ||||
|             'ausverkauft', // sold out | ||||
|             'available for back order', | ||||
|             'awaiting stock', | ||||
| @@ -25,9 +26,8 @@ async () => { | ||||
|             'dieser artikel ist bald wieder verfügbar', | ||||
|             'dostępne wkrótce', | ||||
|             'en rupture', | ||||
|             'en rupture de stock', | ||||
|             'épuisé', | ||||
|             'esgotado', | ||||
|             'in kürze lieferbar', | ||||
|             'indisponible', | ||||
|             'indisponível', | ||||
|             'isn\'t in stock right now', | ||||
| @@ -50,10 +50,12 @@ async () => { | ||||
|             'niet leverbaar', | ||||
|             'niet op voorraad', | ||||
|             'no disponible', | ||||
|             'non disponibile', | ||||
|             'non disponible', | ||||
|             'no featured offers available', | ||||
|             'no longer available', | ||||
|             'no longer in stock', | ||||
|             'no tickets available', | ||||
|             'non disponibile', | ||||
|             'non disponible', | ||||
|             'not available', | ||||
|             'not currently available', | ||||
|             'not in stock', | ||||
| @@ -89,13 +91,15 @@ async () => { | ||||
|             'vergriffen', | ||||
|             'vorbestellen', | ||||
|             'vorbestellung ist bald möglich', | ||||
|             'we don\'t currently have any', | ||||
|             'we couldn\'t find any products that match', | ||||
|             'we do not currently have an estimate of when this product will be back in stock.', | ||||
|             'we don\'t currently have any', | ||||
|             'we don\'t know when or if this item will be back in stock.', | ||||
|             'we were not able to find a match', | ||||
|             'when this arrives in stock', | ||||
|             'when this item is available to order', | ||||
|             'zur zeit nicht an lager', | ||||
|             'épuisé', | ||||
|             '品切れ', | ||||
|             '已售', | ||||
|             '已售完', | ||||
| @@ -122,6 +126,20 @@ async () => { | ||||
|         // so it's good to filter to just the 'above the fold' elements | ||||
|         // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist | ||||
|  | ||||
|         function elementIsInEyeBallRange(element) { | ||||
|             // outside the 'fold' or some weird text in the heading area | ||||
|             // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|             // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|             // Skip text that could be in the header area | ||||
|             if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) { | ||||
|                 return false; | ||||
|             } | ||||
|             // Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there | ||||
|             if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) { | ||||
|                 return false; | ||||
|             } | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
|  | ||||
| @@ -158,9 +176,7 @@ async () => { | ||||
|         for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|             const element = elementsToScan[i]; | ||||
|  | ||||
|             // outside the 'fold' or some weird text in the heading area | ||||
|             // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|             if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             if (!elementIsInEyeBallRange(element)) { | ||||
|                 continue | ||||
|             } | ||||
|  | ||||
| @@ -174,11 +190,11 @@ async () => { | ||||
|             } catch (e) { | ||||
|                 console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e); | ||||
|             } | ||||
|  | ||||
|             if (elementText.length) { | ||||
|                 // try which ones could mean its in stock | ||||
|                 if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { | ||||
|                     console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) | ||||
|                     element.style.border = "2px solid green"; // highlight the element that was detected as in stock | ||||
|                     return 'Possibly in stock'; | ||||
|                 } | ||||
|             } | ||||
| @@ -187,10 +203,8 @@ async () => { | ||||
|         // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK | ||||
|         for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|             const element = elementsToScan[i]; | ||||
|             // outside the 'fold' or some weird text in the heading area | ||||
|             // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|             // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|             if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|  | ||||
|             if (!elementIsInEyeBallRange(element)) { | ||||
|                 continue | ||||
|             } | ||||
|             elementText = ""; | ||||
| @@ -205,6 +219,7 @@ async () => { | ||||
|                 for (const outOfStockText of outOfStockTexts) { | ||||
|                     if (elementText.includes(outOfStockText)) { | ||||
|                         console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) | ||||
|                         element.style.border = "2px solid red"; // highlight the element that was detected as out of stock | ||||
|                         return outOfStockText; // item is out of stock | ||||
|                     } | ||||
|                 } | ||||
|   | ||||
| @@ -202,7 +202,6 @@ async (options) => { | ||||
|         // Foreach filter, go and find it on the page and add it to the results so we can visualise it again | ||||
|         for (const f of include_filters) { | ||||
|             bbox = false; | ||||
|             q = false; | ||||
|  | ||||
|             if (!f.length) { | ||||
|                 console.log("xpath_element_scraper: Empty filter, skipping"); | ||||
| @@ -255,7 +254,7 @@ async (options) => { | ||||
|                             console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) | ||||
|                         } catch (e) { | ||||
|                             console.log(e) | ||||
|                             console.log("xpath_element_scraper: error looking up q.ownerElement") | ||||
|                             console.log("xpath_element_scraper: error looking up node.ownerElement") | ||||
|                         } | ||||
|                     } | ||||
|  | ||||
|   | ||||
| @@ -31,33 +31,33 @@ def stitch_images_worker(pipe_conn, chunks_bytes, original_page_height, capture_ | ||||
|         # Draw caption on top (overlaid, not extending canvas) | ||||
|         draw = ImageDraw.Draw(stitched) | ||||
|  | ||||
|  | ||||
|         caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long" | ||||
|         padding = 10 | ||||
|         font_size = 35 | ||||
|         font_color = (255, 0, 0) | ||||
|         background_color = (255, 255, 255) | ||||
|         if original_page_height > capture_height: | ||||
|             caption_text = f"WARNING: Screenshot was {original_page_height}px but trimmed to {capture_height}px because it was too long" | ||||
|             padding = 10 | ||||
|             font_size = 35 | ||||
|             font_color = (255, 0, 0) | ||||
|             background_color = (255, 255, 255) | ||||
|  | ||||
|  | ||||
|         # Try to load a proper font | ||||
|         try: | ||||
|             font = ImageFont.truetype("arial.ttf", font_size) | ||||
|         except IOError: | ||||
|             font = ImageFont.load_default() | ||||
|             # Try to load a proper font | ||||
|             try: | ||||
|                 font = ImageFont.truetype("arial.ttf", font_size) | ||||
|             except IOError: | ||||
|                 font = ImageFont.load_default() | ||||
|  | ||||
|         bbox = draw.textbbox((0, 0), caption_text, font=font) | ||||
|         text_width = bbox[2] - bbox[0] | ||||
|         text_height = bbox[3] - bbox[1] | ||||
|             bbox = draw.textbbox((0, 0), caption_text, font=font) | ||||
|             text_width = bbox[2] - bbox[0] | ||||
|             text_height = bbox[3] - bbox[1] | ||||
|  | ||||
|         # Draw white rectangle background behind text | ||||
|         rect_top = 0 | ||||
|         rect_bottom = text_height + 2 * padding | ||||
|         draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color) | ||||
|             # Draw white rectangle background behind text | ||||
|             rect_top = 0 | ||||
|             rect_bottom = text_height + 2 * padding | ||||
|             draw.rectangle([(0, rect_top), (max_width, rect_bottom)], fill=background_color) | ||||
|  | ||||
|         # Draw text centered horizontally, 10px padding from top of the rectangle | ||||
|         text_x = (max_width - text_width) // 2 | ||||
|         text_y = padding | ||||
|         draw.text((text_x, text_y), caption_text, font=font, fill=font_color) | ||||
|             # Draw text centered horizontally, 10px padding from top of the rectangle | ||||
|             text_x = (max_width - text_width) // 2 | ||||
|             text_y = padding | ||||
|             draw.text((text_x, text_y), caption_text, font=font, fill=font_color) | ||||
|  | ||||
|         # Encode and send image | ||||
|         output = io.BytesIO() | ||||
|   | ||||
| @@ -65,7 +65,17 @@ class fetcher(Fetcher): | ||||
|         # request_body, request_method unused for now, until some magic in the future happens. | ||||
|  | ||||
|         options = ChromeOptions() | ||||
|         options.add_argument("--headless") | ||||
|  | ||||
|         # Load Chrome options from env | ||||
|         CHROME_OPTIONS = [ | ||||
|             line.strip() | ||||
|             for line in os.getenv("CHROME_OPTIONS", "").strip().splitlines() | ||||
|             if line.strip() | ||||
|         ] | ||||
|  | ||||
|         for opt in CHROME_OPTIONS: | ||||
|             options.add_argument(opt) | ||||
|  | ||||
|         if self.proxy: | ||||
|             options.proxy = self.proxy | ||||
|  | ||||
| @@ -80,7 +90,9 @@ class fetcher(Fetcher): | ||||
|             self.quit() | ||||
|             raise | ||||
|  | ||||
|         self.driver.set_window_size(1280, 1024) | ||||
|         if not "--window-size" in os.getenv("CHROME_OPTIONS", ""): | ||||
|             self.driver.set_window_size(1280, 1024) | ||||
|  | ||||
|         self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|         if self.webdriver_js_execute_code is not None: | ||||
| @@ -88,6 +100,7 @@ class fetcher(Fetcher): | ||||
|             # Selenium doesn't automatically wait for actions as good as Playwright, so wait again | ||||
|             self.driver.implicitly_wait(int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5))) | ||||
|  | ||||
|  | ||||
|         # @todo - how to check this? is it possible? | ||||
|         self.status_code = 200 | ||||
|         # @todo somehow we should try to get this working for WebDriver | ||||
|   | ||||
| @@ -435,7 +435,9 @@ def cdata_in_document_to_text(html_content: str, render_anchor_tag_content=False | ||||
|  | ||||
|     return re.sub(pattern, repl, html_content) | ||||
|  | ||||
| def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False) -> str: | ||||
|  | ||||
| def html_to_text_sub_worker(conn, html_content: str, render_anchor_tag_content=False, is_rss=False): | ||||
|  | ||||
|     from inscriptis import get_text | ||||
|     from inscriptis.model.config import ParserConfig | ||||
|  | ||||
| @@ -470,9 +472,19 @@ def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=Fals | ||||
|         html_content = re.sub(r'</title>', r'</h1>', html_content) | ||||
|  | ||||
|     text_content = get_text(html_content, config=parser_config) | ||||
|     conn.send(text_content) | ||||
|     conn.close() | ||||
|  | ||||
|     return text_content | ||||
| # NOTE!! ANYTHING LIBXML, HTML5LIB ETC WILL CAUSE SOME SMALL MEMORY LEAK IN THE LOCAL "LIB" IMPLEMENTATION OUTSIDE PYTHON | ||||
| def html_to_text(html_content: str, render_anchor_tag_content=False, is_rss=False): | ||||
|     from multiprocessing import Process, Pipe | ||||
|  | ||||
|     parent_conn, child_conn = Pipe() | ||||
|     p = Process(target=html_to_text_sub_worker, args=(child_conn, html_content, render_anchor_tag_content, is_rss)) | ||||
|     p.start() | ||||
|     text = parent_conn.recv() | ||||
|     p.join() | ||||
|     return text | ||||
|  | ||||
| # Does LD+JSON exist with a @type=='product' and a .price set anywhere? | ||||
| def has_ldjson_product_info(content): | ||||
|   | ||||
							
								
								
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| import pluggy | ||||
| import os | ||||
| import importlib | ||||
| import sys | ||||
|  | ||||
| # Global plugin namespace for changedetection.io | ||||
| PLUGIN_NAMESPACE = "changedetectionio" | ||||
|  | ||||
| hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE) | ||||
| hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE) | ||||
|  | ||||
|  | ||||
| class ChangeDetectionSpec: | ||||
|     """Hook specifications for extending changedetection.io functionality.""" | ||||
|  | ||||
|     @hookspec | ||||
|     def ui_edit_stats_extras(watch): | ||||
|         """Return HTML content to add to the stats tab in the edit view. | ||||
|          | ||||
|         Args: | ||||
|             watch: The watch object being edited | ||||
|              | ||||
|         Returns: | ||||
|             str: HTML content to be inserted in the stats tab | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|  | ||||
| # Set up Plugin Manager | ||||
| plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # Register hookspecs | ||||
| plugin_manager.add_hookspecs(ChangeDetectionSpec) | ||||
|  | ||||
| # Load plugins from subdirectories | ||||
| def load_plugins_from_directories(): | ||||
|     # Dictionary of directories to scan for plugins | ||||
|     plugin_dirs = { | ||||
|         'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'), | ||||
|         # Add more plugin directories here as needed | ||||
|     } | ||||
|      | ||||
|     # Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory | ||||
|      | ||||
|     for dir_name, dir_path in plugin_dirs.items(): | ||||
|         if not os.path.exists(dir_path): | ||||
|             continue | ||||
|              | ||||
|         # Get all Python files (excluding __init__.py) | ||||
|         for filename in os.listdir(dir_path): | ||||
|             if filename.endswith(".py") and filename != "__init__.py": | ||||
|                 module_name = filename[:-3]  # Remove .py extension | ||||
|                 module_path = f"changedetectionio.{dir_name}.plugins.{module_name}" | ||||
|                  | ||||
|                 try: | ||||
|                     module = importlib.import_module(module_path) | ||||
|                     # Register the plugin with pluggy | ||||
|                     plugin_manager.register(module, module_name) | ||||
|                 except (ImportError, AttributeError) as e: | ||||
|                     print(f"Error loading plugin {module_name}: {e}") | ||||
|  | ||||
| # Load plugins | ||||
| load_plugins_from_directories() | ||||
|  | ||||
| # Discover installed plugins from external packages (if any) | ||||
| plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # Helper function to collect UI stats extras from all plugins | ||||
| def collect_ui_edit_stats_extras(watch): | ||||
|     """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" | ||||
|     extras_content = [] | ||||
|      | ||||
|     # Get all plugins that implement the ui_edit_stats_extras hook | ||||
|     results = plugin_manager.hook.ui_edit_stats_extras(watch=watch) | ||||
|      | ||||
|     # If we have results, add them to our content | ||||
|     if results: | ||||
|         for result in results: | ||||
|             if result:  # Skip empty results | ||||
|                 extras_content.append(result) | ||||
|              | ||||
|     return "\n".join(extras_content) if extras_content else "" | ||||
| @@ -211,7 +211,14 @@ $(document).ready(function () { | ||||
|                     $('input[type=text]', first_available).first().val(x['xpath']); | ||||
|                     $('input[placeholder="Value"]', first_available).addClass('ok').click().focus(); | ||||
|                     found_something = true; | ||||
|                 } else { | ||||
|                 } | ||||
|                 else if (x['tagName'] === 'select') { | ||||
|                     $('select', first_available).val('<select> by option text').change(); | ||||
|                     $('input[type=text]', first_available).first().val(x['xpath']); | ||||
|                     $('input[placeholder="Value"]', first_available).addClass('ok').click().focus(); | ||||
|                     found_something = true; | ||||
|                 } | ||||
|                 else { | ||||
|                     // There's no good way (that I know) to find if this | ||||
|                     // see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging | ||||
|                     // https://codepen.io/azaslavsky/pen/DEJVWv | ||||
| @@ -251,6 +258,10 @@ $(document).ready(function () { | ||||
|                 400: function () { | ||||
|                     // More than likely the CSRF token was lost when the server restarted | ||||
|                     alert("There was a problem processing the request, please reload the page."); | ||||
|                 }, | ||||
|                 401: function (err) { | ||||
|                     // This will be a custom error | ||||
|                     alert(err.responseText); | ||||
|                 } | ||||
|             } | ||||
|         }).done(function (data) { | ||||
|   | ||||
| @@ -98,15 +98,13 @@ | ||||
|  | ||||
|  | ||||
| {% macro playwright_warning() %} | ||||
|     <p><strong>Error - Playwright support for Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p> | ||||
|     <p><strong>Error - This watch needs Chrome (with playwright/sockpuppetbrowser), but Chrome based fetching is not enabled.</strong> Alternatively try our <a href="https://changedetection.io">very affordable subscription based service which has all this setup for you</a>.</p> | ||||
|     <p>You may need to <a href="https://github.com/dgtlmoon/changedetection.io/blob/09ebc6ec6338545bdd694dc6eee57f2e9d2b8075/docker-compose.yml#L31">Enable playwright environment variable</a> and uncomment the <strong>sockpuppetbrowser</strong> in the <a href="https://github.com/dgtlmoon/changedetection.io/blob/master/docker-compose.yml">docker-compose.yml</a> file.</p> | ||||
|     <br> | ||||
|     <p>(Also Selenium/WebDriver can not extract full page screenshots reliably so Playwright is recommended here)</p> | ||||
|  | ||||
| {% endmacro %} | ||||
|  | ||||
| {% macro only_webdriver_type_watches_warning() %} | ||||
|     <p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the Playwright connection enabled.</strong></p><br> | ||||
| {% macro only_playwright_type_watches_warning() %} | ||||
|     <p><strong>Sorry, this functionality only works with Playwright/Chrome enabled watches.<br>You need to <a href="#request">Set the fetch method to Playwright/Chrome mode and resave</a> and have the SockpuppetBrowser/Playwright or Selenium enabled.</strong></p><br> | ||||
| {% endmacro %} | ||||
|  | ||||
| {% macro render_time_schedule_form(form, available_timezones, timezone_default_config) %} | ||||
|   | ||||
| @@ -1,6 +1,6 @@ | ||||
| {% extends 'base.html' %} | ||||
| {% block content %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_webdriver_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %} | ||||
| {% from '_helpers.html' import render_field, render_checkbox_field, render_button, render_time_schedule_form, playwright_warning, only_playwright_type_watches_warning, render_conditions_fieldlist_of_formfields_as_table %} | ||||
| {% from '_common_fields.html' import render_common_settings_form %} | ||||
| <script src="{{url_for('static_content', group='js', filename='tabs.js')}}" defer></script> | ||||
| <script src="{{url_for('static_content', group='js', filename='vis.js')}}" defer></script> | ||||
| @@ -204,7 +204,9 @@ Math: {{ 1 + 1 }}") }} | ||||
|             </div> | ||||
|  | ||||
|             <div class="tab-pane-inner" id="browser-steps"> | ||||
|             {% if playwright_enabled and watch_uses_webdriver %} | ||||
|             {% if watch_needs_selenium_or_playwright %} | ||||
|                 {# Only works with playwright #} | ||||
|                 {% if system_has_playwright_configured %} | ||||
|                 <img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality"> | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
| @@ -223,7 +225,6 @@ Math: {{ 1 + 1 }}") }} | ||||
|                         <div class="flex-wrapper" > | ||||
|  | ||||
|                             <div id="browser-steps-ui" class="noselect"> | ||||
|  | ||||
|                                 <div class="noselect"  id="browsersteps-selector-wrapper" style="width: 100%"> | ||||
|                                     <span class="loader" > | ||||
|                                         <span id="browsersteps-click-start"> | ||||
| @@ -245,15 +246,16 @@ Math: {{ 1 + 1 }}") }} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|                 {% else %} | ||||
|                     <span class="pure-form-message-inline"> | ||||
|                         {% if not watch_uses_webdriver %} | ||||
|                             {{ only_webdriver_type_watches_warning() }} | ||||
|                         {% endif %} | ||||
|                         {%  if not playwright_enabled %} | ||||
|                             {{ playwright_warning() }} | ||||
|                         {% endif %} | ||||
|                     </span> | ||||
|                     {# it's configured to use selenium or chrome but system says its not configured #} | ||||
|                     {{ playwright_warning() }} | ||||
|                     {% if system_has_webdriver_configured %} | ||||
|                         <strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong> | ||||
|                     {% endif %} | ||||
|                 {% endif %} | ||||
|             {% else %} | ||||
|                 {# "This functionality needs chrome.." #} | ||||
|                 {{ only_playwright_type_watches_warning() }} | ||||
|             {% endif %} | ||||
|             </div> | ||||
|  | ||||
|  | ||||
| @@ -262,7 +264,7 @@ Math: {{ 1 + 1 }}") }} | ||||
|                     <div  class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_muted) }} | ||||
|                     </div> | ||||
|                     {% if watch_uses_webdriver %} | ||||
|                     {% if watch_needs_selenium_or_playwright %} | ||||
|                     <div class="pure-control-group inline-radio"> | ||||
|                       {{ render_checkbox_field(form.notification_screenshot) }} | ||||
|                         <span class="pure-form-message-inline"> | ||||
| @@ -379,13 +381,15 @@ Math: {{ 1 + 1 }}") }} | ||||
|  | ||||
|                 <fieldset> | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% if playwright_enabled and watch_uses_webdriver %} | ||||
|                         {% if watch_needs_selenium_or_playwright %} | ||||
|                             {% if system_has_playwright_configured %} | ||||
|                             <span class="pure-form-message-inline" id="visual-selector-heading"> | ||||
|                                 The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items. | ||||
|                             </span> | ||||
|  | ||||
|                             <div id="selector-header"> | ||||
|                                 <a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a> | ||||
|                                 <!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text --> | ||||
|                                 <i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i> | ||||
|                             </div> | ||||
|                             <div id="selector-wrapper" style="display: none"> | ||||
| @@ -397,13 +401,16 @@ Math: {{ 1 + 1 }}") }} | ||||
|                             </div> | ||||
|                             <div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div> | ||||
|                         {% else %} | ||||
|                             {% if not watch_uses_webdriver %} | ||||
|                                 {{ only_webdriver_type_watches_warning() }} | ||||
|                             {% endif %} | ||||
|                             {% if not playwright_enabled %} | ||||
|                                 {{ playwright_warning() }} | ||||
|                             {% endif %} | ||||
|                             {# The watch needed chrome but system says that playwright is not ready #} | ||||
|                             {{ playwright_warning() }} | ||||
|                         {% endif %} | ||||
|                             {% if system_has_webdriver_configured %} | ||||
|                                 <strong>Selenium/Webdriver cant be used here because it wont fetch screenshots reliably.</strong> | ||||
|                             {% endif %} | ||||
|                     {% else %} | ||||
|                         {# "This functionality needs chrome.." #} | ||||
|                         {{ only_playwright_type_watches_warning() }} | ||||
|                     {% endif %} | ||||
|                     </div> | ||||
|                 </fieldset> | ||||
|             </div> | ||||
| @@ -443,6 +450,13 @@ Math: {{ 1 + 1 }}") }} | ||||
|                         </tr> | ||||
|                         </tbody> | ||||
|                     </table> | ||||
|  | ||||
|                     {% if ui_edit_stats_extras %} | ||||
|                     <div class="plugin-stats-extras"> <!-- from pluggy plugin --> | ||||
|                         {{ ui_edit_stats_extras|safe }} | ||||
|                     </div> | ||||
|                     {% endif %} | ||||
|  | ||||
|                     {% if watch.history_n %} | ||||
|                         <p> | ||||
|                              <a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a> | ||||
|   | ||||
| @@ -14,6 +14,8 @@ from changedetectionio.notification import ( | ||||
| def set_original_response(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|        <section id=header style="padding: 50px; height: 350px">This is the header which should be ignored always - <span>add to cart</span></section> | ||||
|        <!-- stock-not-in-stock.js will ignore text in the first 300px, see elementIsInEyeBallRange(), sometimes "add to cart" and other junk is here --> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
| @@ -52,8 +54,6 @@ def test_restock_detection(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     set_original_response() | ||||
|     #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|  | ||||
|     time.sleep(1) | ||||
|     live_server_setup(live_server) | ||||
|     ##################### | ||||
|     notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json') | ||||
| @@ -84,7 +84,8 @@ def test_restock_detection(client, live_server, measure_memory_usage): | ||||
|     # Is it correctly show as NOT in stock? | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'not-in-stock' in res.data | ||||
|     assert b'processor-restock_diff' in res.data # Should have saved in restock mode | ||||
|     assert b'not-in-stock' in res.data # should be out of stock | ||||
|  | ||||
|     # Is it correctly shown as in stock | ||||
|     set_back_in_stock_response() | ||||
|   | ||||
| @@ -45,11 +45,15 @@ def set_number_out_of_range_response(number="150"): | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def test_setup(client, live_server): | ||||
|     """Test that both text and number conditions work together with AND logic.""" | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_conditions_with_text_and_number(client, live_server): | ||||
|     """Test that both text and number conditions work together with AND logic.""" | ||||
|      | ||||
|     set_original_response("50") | ||||
|     live_server_setup(live_server) | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
| @@ -192,6 +196,150 @@ def test_condition_validate_rule_row(client, live_server): | ||||
|     ) | ||||
|     assert res.status_code == 200 | ||||
|     assert b'false' in res.data | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("ui.form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|  | ||||
|  | ||||
| # If there was only a change in the whitespacing, then we shouldnt have a change detected | ||||
| def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Check it saved | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|     ) | ||||
|  | ||||
|     # Assert the word count is counted correctly | ||||
|     assert b'<td>13</td>' in res.data | ||||
|  | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("ui.form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
| # If there was only a change in the whitespacing, then we shouldnt have a change detected | ||||
| def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Watch added in Paused state, saving will unpause" in res.data | ||||
|  | ||||
|     uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             "fetch_backend": "html_requests", | ||||
|             "conditions_match_logic": "ALL",  # ALL = AND logic | ||||
|             "conditions-0-field": "levenshtein_ratio", | ||||
|             "conditions-0-operator": "<", | ||||
|             "conditions-0-value": "0.8" # needs to be more of a diff to trigger a change | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"unpaused" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|     # Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid=uuid), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b'Which is across multiple lines' in res.data | ||||
|  | ||||
|  | ||||
|     ############### Now change it a LITTLE bit... | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happenxxxxxxxxx.  <br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """) | ||||
|  | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'Queued 1 watch for rechecking.' in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold | ||||
|  | ||||
|     ############### Now change it a MORE THAN 50% | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some sxxxx<br> | ||||
|      <p>Which is across a lines</p> | ||||
|      <br> | ||||
|      ok.  <br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'Queued 1 watch for rechecking.' in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("ui.form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -32,13 +32,14 @@ def test_strip_text_func(): | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore) | ||||
|     assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens." | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
| def set_original_ignore_response(ver_stamp="123"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/> | ||||
|      </body> | ||||
|      </html> | ||||
|  | ||||
| @@ -48,13 +49,14 @@ def set_original_ignore_response(): | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def set_modified_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
| def set_modified_original_ignore_response(ver_stamp="123"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      Some NEW nice initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/> | ||||
|      <p>new ignore stuff</p> | ||||
|      <p>blah</p> | ||||
|      </body> | ||||
| @@ -67,14 +69,15 @@ def set_modified_original_ignore_response(): | ||||
|  | ||||
|  | ||||
| # Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text | ||||
| def set_modified_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
| def set_modified_ignore_response(ver_stamp="123"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <P>ZZZZz</P> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/> | ||||
|      </body> | ||||
|      </html> | ||||
|  | ||||
| @@ -165,9 +168,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| # When adding some ignore text, it should not trigger a change, even if something else on that line changes | ||||
| def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|     ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ" | ||||
| def _run_test_global_ignore(client, as_source=False, extra_ignore=""): | ||||
|     ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ\r\n"+extra_ignore | ||||
|  | ||||
|     set_original_ignore_response() | ||||
|  | ||||
|     # Goto the settings page, add our ignore text | ||||
| @@ -186,6 +189,10 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     if as_source: | ||||
|         # Switch to source mode so we can test that too! | ||||
|         test_url = "source:"+test_url | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
| @@ -203,12 +210,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     # Check it saved | ||||
|     res = client.get( | ||||
|         url_for("settings.settings_page"), | ||||
|     ) | ||||
|     assert bytes(ignore_text.encode('utf-8')) in res.data | ||||
|  | ||||
|     for i in ignore_text.splitlines(): | ||||
|         assert bytes(i.encode('utf-8')) in res.data | ||||
|  | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -221,7 +231,8 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     # Make a change which includes the ignore text, it should be ignored and no 'change' triggered | ||||
|     # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list | ||||
|     set_modified_ignore_response() | ||||
|     # And tweaks the ver_stamp which should be picked up by global regex ignore | ||||
|     set_modified_ignore_response(ver_stamp=time.time()) | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -243,3 +254,11 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| def test_check_global_ignore_text_functionality(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|     _run_test_global_ignore(client, as_source=False) | ||||
|  | ||||
| def test_check_global_ignore_text_functionality_as_source(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|     _run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/') | ||||
|   | ||||
| @@ -9,15 +9,20 @@ services: | ||||
| #        - ./proxies.json:/datastore/proxies.json | ||||
|  | ||||
|   #    environment: | ||||
|   #        Default listening port, can also be changed with the -p option | ||||
|   #        Default listening port, can also be changed with the -p option (not to be confused with ports: below) | ||||
|   #      - PORT=5000 | ||||
|   # | ||||
|   #        Log levels are in descending order. (TRACE is the most detailed one) | ||||
|   #        Log output levels: TRACE, DEBUG(default), INFO, SUCCESS, WARNING, ERROR, CRITICAL | ||||
|   #      - LOGGER_LEVEL=TRACE | ||||
|   # | ||||
|   #       Alternative WebDriver/selenium URL, do not use "'s or 's! | ||||
|   #      - WEBDRIVER_URL=http://browser-chrome:4444/wd/hub | ||||
|   # | ||||
|   #       Uncomment below and the "sockpuppetbrowser" to use a real Chrome browser (It uses the "playwright" protocol) | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://browser-sockpuppet-chrome:3000 | ||||
|   # | ||||
|   # | ||||
|   #       Alternative WebDriver/selenium URL, do not use "'s or 's! (old, deprecated, does not support screenshots very well) | ||||
|   #      - WEBDRIVER_URL=http://browser-selenium-chrome:4444/wd/hub | ||||
|   # | ||||
|   #       WebDriver proxy settings webdriver_proxyType, webdriver_ftpProxy, webdriver_noProxy, | ||||
|   #                                webdriver_proxyAutoconfigUrl, webdriver_autodetect, | ||||
| @@ -25,9 +30,6 @@ services: | ||||
|   # | ||||
|   #             https://selenium-python.readthedocs.io/api.html#module-selenium.webdriver.common.proxy | ||||
|   # | ||||
|   #       Alternative target "Chrome" Playwright URL, do not use "'s or 's! | ||||
|   #       "Playwright" is a driver/librarythat allows changedetection to talk to a Chrome or similar browser. | ||||
|   #      - PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 | ||||
|   # | ||||
|   #       Playwright proxy settings playwright_proxy_server, playwright_proxy_bypass, playwright_proxy_username, playwright_proxy_password | ||||
|   # | ||||
| @@ -43,7 +45,7 @@ services: | ||||
|   #        Base URL of your changedetection.io install (Added to the notification alert) | ||||
|   #      - BASE_URL=https://mysite.com | ||||
|   #        Respect proxy_pass type settings, `proxy_set_header Host "localhost";` and `proxy_set_header X-Forwarded-Prefix /app;` | ||||
|   #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy-sub-directory | ||||
|   #        More here https://github.com/dgtlmoon/changedetection.io/wiki/Running-changedetection.io-behind-a-reverse-proxy | ||||
|   #      - USE_X_SETTINGS=1 | ||||
|   # | ||||
|   #        Hides the `Referer` header so that monitored websites can't see the changedetection.io hostname. | ||||
| @@ -70,7 +72,7 @@ services: | ||||
|    | ||||
|       # Comment out ports: when using behind a reverse proxy , enable networks: etc. | ||||
|       ports: | ||||
|         - 5000:5000 | ||||
|         - 127.0.0.1:5000:5000 | ||||
|       restart: unless-stopped | ||||
|  | ||||
|      # Used for fetching pages via WebDriver+Chrome where you need Javascript support. | ||||
| @@ -80,14 +82,14 @@ services: | ||||
|      # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that | ||||
|      # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used) | ||||
| #      depends_on: | ||||
| #          sockpuppetbrowser: | ||||
| #          browser-sockpuppet-chrome: | ||||
| #              condition: service_started | ||||
|  | ||||
|  | ||||
|      # Sockpuppetbrowser is basically chrome wrapped in an API for allowing fast fetching of web-pages. | ||||
|      # RECOMMENDED FOR FETCHING PAGES WITH CHROME, be sure to enable the "PLAYWRIGHT_DRIVER_URL" env variable in the main changedetection container | ||||
| #    sockpuppetbrowser: | ||||
| #        hostname: sockpuppetbrowser | ||||
| #    browser-sockpuppet-chrome: | ||||
| #        hostname: browser-sockpuppet-chrome | ||||
| #        image: dgtlmoon/sockpuppetbrowser:latest | ||||
| #        cap_add: | ||||
| #            - SYS_ADMIN | ||||
| @@ -102,14 +104,18 @@ services: | ||||
|      # Used for fetching pages via Playwright+Chrome where you need Javascript support. | ||||
|      # Note: Works well but is deprecated, does not fetch full page screenshots (doesnt work with Visual Selector) | ||||
|      #       Does not report status codes (200, 404, 403) and other issues | ||||
| #    browser-chrome: | ||||
| #        hostname: browser-chrome | ||||
| #    browser-selenium-chrome: | ||||
| #        hostname: browser-selenium-chrome | ||||
| #        image: selenium/standalone-chrome:4 | ||||
| #        environment: | ||||
| #            - VNC_NO_PASSWORD=1 | ||||
| #            - SCREEN_WIDTH=1920 | ||||
| #            - SCREEN_HEIGHT=1080 | ||||
| #            - SCREEN_DEPTH=24 | ||||
| #          CHROME_OPTIONS: | | ||||
| #            --window-size=1280,1024 | ||||
| #            --headless | ||||
| #            --disable-gpu | ||||
| #        volumes: | ||||
| #            # Workaround to avoid the browser crashing inside a docker container | ||||
| #            # See https://github.com/SeleniumHQ/docker-selenium#quick-start | ||||
|   | ||||
										
											
												File diff suppressed because one or more lines are too long
											
										
									
								
							| @@ -5,13 +5,13 @@ | ||||
|   <meta name="description" content="Manage your changedetection.io watches via API, requires the `x-api-key` header which is found in the settings UI."> | ||||
|   <meta name="viewport" content="width=device-width, initial-scale=1.0"> | ||||
|   <meta http-equiv="Content-Type" content="text/html; charset=UTF-8"> | ||||
|   <link href="assets/bootstrap.min.css?v=1701595483622" rel="stylesheet" media="screen"> | ||||
|   <link href="assets/prism.css?v=1701595483622" rel="stylesheet" /> | ||||
|   <link href="assets/main.css?v=1701595483622" rel="stylesheet" media="screen, print"> | ||||
|   <link href="assets/favicon.ico?v=1701595483622" rel="icon" type="image/x-icon"> | ||||
|   <link href="assets/apple-touch-icon.png?v=1701595483622" rel="apple-touch-icon" sizes="180x180"> | ||||
|   <link href="assets/favicon-32x32.png?v=1701595483622" rel="icon" type="image/png" sizes="32x32"> | ||||
|   <link href="assets/favicon-16x16.png?v=1701595483622" rel="icon" type="image/png" sizes="16x16"> | ||||
|   <link href="assets/bootstrap.min.css?v=1744573753999" rel="stylesheet" media="screen"> | ||||
|   <link href="assets/prism.css?v=1744573753999" rel="stylesheet" /> | ||||
|   <link href="assets/main.css?v=1744573753999" rel="stylesheet" media="screen, print"> | ||||
|   <link href="assets/favicon.ico?v=1744573753999" rel="icon" type="image/x-icon"> | ||||
|   <link href="assets/apple-touch-icon.png?v=1744573753999" rel="apple-touch-icon" sizes="180x180"> | ||||
|   <link href="assets/favicon-32x32.png?v=1744573753999" rel="icon" type="image/png" sizes="32x32"> | ||||
|   <link href="assets/favicon-16x16.png?v=1744573753999" rel="icon" type="image/png" sizes="16x16"> | ||||
| </head> | ||||
|  | ||||
| <body class="container-fluid"> | ||||
| @@ -928,6 +928,6 @@ | ||||
|   </div> | ||||
| </div> | ||||
|  | ||||
| <script src="assets/main.bundle.js?v=1701595483622"></script> | ||||
| <script src="assets/main.bundle.js?v=1744573753999"></script> | ||||
| </body> | ||||
| </html> | ||||
|   | ||||
| @@ -53,7 +53,7 @@ lxml >=4.8.0,<6,!=5.2.0,!=5.2.1 | ||||
| # XPath 2.0-3.1 support - 4.2.0 broke something? | ||||
| elementpath==4.1.5 | ||||
|  | ||||
| selenium~=4.14.0 | ||||
| selenium~=4.31.0 | ||||
|  | ||||
| # https://github.com/pallets/werkzeug/issues/2985 | ||||
| # Maybe related to pytest? | ||||
| @@ -90,6 +90,8 @@ extruct | ||||
| # For cleaning up unknown currency formats | ||||
| babel | ||||
|  | ||||
| levenshtein | ||||
|  | ||||
| # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096 | ||||
| greenlet >= 3.0.3 | ||||
|  | ||||
| @@ -110,3 +112,6 @@ pluggy ~= 1.5 | ||||
|  | ||||
| # Needed for testing, cross-platform for process and system monitoring | ||||
| psutil==7.0.0 | ||||
|  | ||||
| ruff >= 0.11.2 | ||||
| pre_commit >= 4.2.0 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user