mirror of
				https://github.com/dgtlmoon/changedetection.io.git
				synced 2025-10-30 22:27:52 +00:00 
			
		
		
		
	Compare commits
	
		
			19 Commits
		
	
	
		
			3126-visua
			...
			selenium-p
		
	
	| Author | SHA1 | Date | |
|---|---|---|---|
|   | 78f3f2b26a | ||
|   | f57bc10973 | ||
|   | 535ee97ef7 | ||
|   | b2923b8c3a | ||
|   | d2e8f822d6 | ||
|   | 5fd8200fd9 | ||
|   | d0da8c9825 | ||
|   | fd7574d21b | ||
|   | c70706a27b | ||
|   | 968c364999 | ||
|   | 031cb76b7d | ||
|   | af568d064c | ||
|   | a75f57de43 | ||
|   | 72a1c3dda1 | ||
|   | ffde79ecac | ||
|   | 66ad43b2df | ||
|   | 6b0e56ca80 | ||
|   | 5a2d84d8b4 | ||
|   | a941156f26 | 
							
								
								
									
										15
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										15
									
								
								.github/workflows/test-only.yml
									
									
									
									
										vendored
									
									
								
							| @@ -8,13 +8,13 @@ jobs: | ||||
|     runs-on: ubuntu-latest | ||||
|     steps: | ||||
|       - uses: actions/checkout@v4 | ||||
|       - name: Lint with flake8 | ||||
|       - name: Lint with Ruff | ||||
|         run: | | ||||
|           pip3 install flake8 | ||||
|           # stop the build if there are Python syntax errors or undefined names | ||||
|           flake8 . --count --select=E9,F63,F7,F82 --show-source --statistics | ||||
|           # exit-zero treats all errors as warnings. The GitHub editor is 127 chars wide | ||||
|           flake8 . --count --exit-zero --max-complexity=10 --max-line-length=127 --statistics | ||||
|           pip install ruff | ||||
|           # Check for syntax errors and undefined names | ||||
|           ruff check . --select E9,F63,F7,F82 | ||||
|           # Complete check with errors treated as warnings | ||||
|           ruff check . --exit-zero | ||||
|  | ||||
|   test-application-3-10: | ||||
|     needs: lint-code | ||||
| @@ -41,5 +41,4 @@ jobs: | ||||
|     uses: ./.github/workflows/test-stack-reusable-workflow.yml | ||||
|     with: | ||||
|       python-version: '3.13' | ||||
|       skip-pypuppeteer: true | ||||
|        | ||||
|       skip-pypuppeteer: true | ||||
| @@ -172,8 +172,8 @@ jobs: | ||||
|           curl --retry-connrefused --retry 6  -s -g -6 "http://[::1]:5556"|grep -q checkbox-uuid | ||||
|  | ||||
|           # Check whether TRACE log is enabled. | ||||
|           # Also, check whether TRACE is came from STDERR | ||||
|           docker logs test-changedetectionio 2>&1 1>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Also, check whether TRACE came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'TRACE log is enabled' || exit 1 | ||||
|           # Check whether DEBUG is came from STDOUT | ||||
|           docker logs test-changedetectionio 2>/dev/null | grep 'DEBUG' || exit 1 | ||||
|  | ||||
|   | ||||
							
								
								
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							
							
						
						
									
										1
									
								
								.gitignore
									
									
									
									
										vendored
									
									
								
							| @@ -16,6 +16,7 @@ dist/ | ||||
| .env | ||||
| .venv/ | ||||
| venv/ | ||||
| .python-version | ||||
|  | ||||
| # IDEs | ||||
| .idea | ||||
|   | ||||
							
								
								
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										9
									
								
								.pre-commit-config.yaml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,9 @@ | ||||
| repos: | ||||
|   - repo: https://github.com/astral-sh/ruff-pre-commit | ||||
|     rev: v0.11.2 | ||||
|     hooks: | ||||
|       # Lint (and apply safe fixes) | ||||
|       - id: ruff | ||||
|         args: [--fix] | ||||
|       # Fomrat | ||||
|       - id: ruff-format | ||||
							
								
								
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										48
									
								
								.ruff.toml
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,48 @@ | ||||
| # Minimum supported version | ||||
| target-version = "py310" | ||||
|  | ||||
| # Formatting options | ||||
| line-length = 100 | ||||
| indent-width = 4 | ||||
|  | ||||
| exclude = [ | ||||
|     "__pycache__", | ||||
|     ".eggs", | ||||
|     ".git", | ||||
|     ".tox", | ||||
|     ".venv", | ||||
|     "*.egg-info", | ||||
|     "*.pyc", | ||||
| ] | ||||
|  | ||||
| [lint] | ||||
| # https://docs.astral.sh/ruff/rules/ | ||||
| select = [ | ||||
|     "B", # flake8-bugbear | ||||
|     "B9", | ||||
|     "C",  | ||||
|     "E", # pycodestyle | ||||
|     "F", # Pyflakes | ||||
|     "I", # isort | ||||
|     "N", # pep8-naming | ||||
|     "UP", # pyupgrade | ||||
|     "W", # pycodestyle | ||||
| ] | ||||
| ignore = [ | ||||
|     "B007", # unused-loop-control-variable | ||||
|     "B909", # loop-iterator-mutation | ||||
|     "E203", # whitespace-before-punctuation | ||||
|     "E266", # multiple-leading-hashes-for-block-comment | ||||
|     "E501", # redundant-backslash | ||||
|     "F403", # undefined-local-with-import-star | ||||
|     "N802", # invalid-function-name | ||||
|     "N806", # non-lowercase-variable-in-function | ||||
|     "N815", # mixed-case-variable-in-class-scope | ||||
| ] | ||||
|  | ||||
| [lint.mccabe] | ||||
| max-complexity = 12 | ||||
|  | ||||
| [format] | ||||
| indent-style = "space" | ||||
| quote-style = "preserve" | ||||
							
								
								
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										98
									
								
								changedetectionio/PLUGIN_README.md
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,98 @@ | ||||
| # Creating Plugins for changedetection.io | ||||
|  | ||||
| This document describes how to create plugins for changedetection.io. Plugins can be used to extend the functionality of the application in various ways. | ||||
|  | ||||
| ## Plugin Types | ||||
|  | ||||
| ### UI Stats Tab Plugins | ||||
|  | ||||
| These plugins can add content to the Stats tab in the Edit page. This is useful for adding custom statistics or visualizations about a watch. | ||||
|  | ||||
| #### Creating a UI Stats Tab Plugin | ||||
|  | ||||
| 1. Create a Python file in a directory that will be loaded by the plugin system. | ||||
|  | ||||
| 2. Use the `global_hookimpl` decorator to implement the `ui_edit_stats_extras` hook: | ||||
|  | ||||
| ```python | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add custom content to the stats tab""" | ||||
|     # Calculate or retrieve your stats | ||||
|     my_stat = calculate_something(watch) | ||||
|      | ||||
|     # Return HTML content as a string | ||||
|     html = f""" | ||||
|     <div class="my-plugin-stats"> | ||||
|         <h4>My Plugin Statistics</h4> | ||||
|         <p>My statistic: {my_stat}</p> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
| ``` | ||||
|  | ||||
| 3. The HTML you return will be included in the Stats tab. | ||||
|  | ||||
| ## Plugin Loading | ||||
|  | ||||
| Plugins can be loaded from: | ||||
|  | ||||
| 1. Built-in plugin directories in the codebase | ||||
| 2. External packages using setuptools entry points | ||||
|  | ||||
| To add a new plugin directory, modify the `plugin_dirs` dictionary in `pluggy_interface.py`. | ||||
|  | ||||
| ## Example Plugin | ||||
|  | ||||
| Here's a simple example of a plugin that adds a word count statistic to the Stats tab: | ||||
|  | ||||
| ```python | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def count_words_in_history(watch): | ||||
|     """Count words in the latest snapshot""" | ||||
|     try: | ||||
|         if not watch.history.keys(): | ||||
|             return 0 | ||||
|              | ||||
|         latest_key = list(watch.history.keys())[-1] | ||||
|         latest_content = watch.get_history_snapshot(latest_key) | ||||
|         return len(latest_content.split()) | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error counting words: {str(e)}") | ||||
|         return 0 | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count to the Stats tab""" | ||||
|     word_count = count_words_in_history(watch) | ||||
|      | ||||
|     html = f""" | ||||
|     <div class="word-count-stats"> | ||||
|         <h4>Content Analysis</h4> | ||||
|         <table class="pure-table"> | ||||
|             <tbody> | ||||
|                 <tr> | ||||
|                     <td>Word count (latest snapshot)</td> | ||||
|                     <td>{word_count}</td> | ||||
|                 </tr> | ||||
|             </tbody> | ||||
|         </table> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
| ``` | ||||
|  | ||||
| ## Testing Your Plugin | ||||
|  | ||||
| 1. Place your plugin in one of the directories scanned by the plugin system | ||||
| 2. Restart changedetection.io | ||||
| 3. Go to the Edit page of a watch and check the Stats tab to see your content | ||||
| @@ -2,7 +2,7 @@ | ||||
|  | ||||
| # Read more https://github.com/dgtlmoon/changedetection.io/wiki | ||||
|  | ||||
| __version__ = '0.49.14' | ||||
| __version__ = '0.49.15' | ||||
|  | ||||
| from changedetectionio.strtobool import strtobool | ||||
| from json.decoder import JSONDecodeError | ||||
| @@ -106,7 +106,7 @@ def main(): | ||||
|     # Without this, a logger will be duplicated | ||||
|     logger.remove() | ||||
|     try: | ||||
|         log_level_for_stdout = { 'DEBUG', 'SUCCESS' } | ||||
|         log_level_for_stdout = { 'TRACE', 'DEBUG', 'INFO', 'SUCCESS' } | ||||
|         logger.configure(handlers=[ | ||||
|             {"sink": sys.stdout, "level": logger_level, | ||||
|              "filter" : lambda record: record['level'].name in log_level_for_stdout}, | ||||
|   | ||||
| @@ -53,14 +53,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|         a = "?" if not '?' in base_url else '&' | ||||
|         base_url += a + f"timeout={keepalive_ms}" | ||||
|  | ||||
|         try: | ||||
|             browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url) | ||||
|         except Exception as e: | ||||
|             if 'ECONNREFUSED' in str(e): | ||||
|                 return make_response('Unable to start the Playwright Browser session, is it running?', 401) | ||||
|             else: | ||||
|                 # Other errors, bad URL syntax, bad reply etc | ||||
|                 return make_response(str(e), 401) | ||||
|         browsersteps_start_session['browser'] = io_interface_context.chromium.connect_over_cdp(base_url) | ||||
|  | ||||
|         proxy_id = datastore.get_preferred_proxy_for_watch(uuid=watch_uuid) | ||||
|         proxy = None | ||||
| @@ -109,7 +102,16 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|  | ||||
|         logger.debug("Starting connection with playwright") | ||||
|         logger.debug("browser_steps.py connecting") | ||||
|         browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid) | ||||
|  | ||||
|         try: | ||||
|             browsersteps_sessions[browsersteps_session_id] = start_browsersteps_session(watch_uuid) | ||||
|         except Exception as e: | ||||
|             if 'ECONNREFUSED' in str(e): | ||||
|                 return make_response('Unable to start the Playwright Browser session, is sockpuppetbrowser running? Network configuration is OK?', 401) | ||||
|             else: | ||||
|                 # Other errors, bad URL syntax, bad reply etc | ||||
|                 return make_response(str(e), 401) | ||||
|  | ||||
|         logger.debug("Starting connection with playwright - done") | ||||
|         return {'browsersteps_session_id': browsersteps_session_id} | ||||
|  | ||||
| @@ -166,9 +168,7 @@ def construct_blueprint(datastore: ChangeDetectionStore): | ||||
|             step_optional_value = request.form.get('optional_value') | ||||
|             is_last_step = strtobool(request.form.get('is_last_step')) | ||||
|  | ||||
|             # @todo try.. accept.. nice errors not popups.. | ||||
|             try: | ||||
|  | ||||
|                 browsersteps_sessions[browsersteps_session_id]['browserstepper'].call_action(action_name=step_operation, | ||||
|                                          selector=step_selector, | ||||
|                                          optional_value=step_optional_value) | ||||
|   | ||||
| @@ -1,6 +1,8 @@ | ||||
| import os | ||||
| import time | ||||
| import re | ||||
| import sys | ||||
| import traceback | ||||
| from random import randint | ||||
| from loguru import logger | ||||
|  | ||||
| @@ -35,6 +37,7 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
|                           'Make all child elements visible': '1 0', | ||||
|                           'Press Enter': '0 0', | ||||
|                           'Select by label': '1 1', | ||||
|                           '<select> by option text': '1 1', | ||||
|                           'Scroll down': '0 0', | ||||
|                           'Uncheck checkbox': '1 0', | ||||
|                           'Wait for seconds': '0 1', | ||||
| @@ -54,7 +57,6 @@ browser_step_ui_config = {'Choose one': '0 0', | ||||
| class steppable_browser_interface(): | ||||
|     page = None | ||||
|     start_url = None | ||||
|  | ||||
|     action_timeout = 10 * 1000 | ||||
|  | ||||
|     def __init__(self, start_url): | ||||
| @@ -62,6 +64,10 @@ class steppable_browser_interface(): | ||||
|  | ||||
|     # Convert and perform "Click Button" for example | ||||
|     def call_action(self, action_name, selector=None, optional_value=None): | ||||
|         if self.page is None: | ||||
|             logger.warning("Cannot call action on None page object") | ||||
|             return | ||||
|              | ||||
|         now = time.time() | ||||
|         call_action_name = re.sub('[^0-9a-zA-Z]+', '_', action_name.lower()) | ||||
|         if call_action_name == 'choose_one': | ||||
| @@ -72,28 +78,33 @@ class steppable_browser_interface(): | ||||
|         if selector and selector.startswith('/') and not selector.startswith('//'): | ||||
|             selector = "xpath=" + selector | ||||
|  | ||||
|         # Check if action handler exists | ||||
|         if not hasattr(self, "action_" + call_action_name): | ||||
|             logger.warning(f"Action handler for '{call_action_name}' not found") | ||||
|             return | ||||
|              | ||||
|         action_handler = getattr(self, "action_" + call_action_name) | ||||
|  | ||||
|         # Support for Jinja2 variables in the value and selector | ||||
|  | ||||
|         if selector and ('{%' in selector or '{{' in selector): | ||||
|             selector = jinja_render(template_str=selector) | ||||
|  | ||||
|         if optional_value and ('{%' in optional_value or '{{' in optional_value): | ||||
|             optional_value = jinja_render(template_str=optional_value) | ||||
|  | ||||
|  | ||||
|         action_handler(selector, optional_value) | ||||
|         # Safely wait for timeout | ||||
|         self.page.wait_for_timeout(1.5 * 1000) | ||||
|         logger.debug(f"Call action done in {time.time()-now:.2f}s") | ||||
|  | ||||
|     def action_goto_url(self, selector=None, value=None): | ||||
|         # self.page.set_viewport_size({"width": 1280, "height": 5000}) | ||||
|         if not value: | ||||
|             logger.warning("No URL provided for goto_url action") | ||||
|             return None | ||||
|              | ||||
|         now = time.time() | ||||
|         response = self.page.goto(value, timeout=0, wait_until='load') | ||||
|         # Should be the same as the puppeteer_fetch.js methods, means, load with no timeout set (skip timeout) | ||||
|         #and also wait for seconds ? | ||||
|         #await page.waitForTimeout(1000); | ||||
|         #await page.waitForTimeout(extra_wait_ms); | ||||
|         logger.debug(f"Time to goto URL {time.time()-now:.2f}s") | ||||
|         return response | ||||
|  | ||||
| @@ -103,36 +114,40 @@ class steppable_browser_interface(): | ||||
|  | ||||
|     def action_click_element_containing_text(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text") | ||||
|         if not len(value.strip()): | ||||
|         if not value or not len(value.strip()): | ||||
|             return | ||||
|              | ||||
|         elem = self.page.get_by_text(value) | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|  | ||||
|  | ||||
|     def action_click_element_containing_text_if_exists(self, selector=None, value=''): | ||||
|         logger.debug("Clicking element containing text if exists") | ||||
|         if not len(value.strip()): | ||||
|         if not value or not len(value.strip()): | ||||
|             return | ||||
|              | ||||
|         elem = self.page.get_by_text(value) | ||||
|         logger.debug(f"Clicking element containing text - {elem.count()} elements found") | ||||
|         if elem.count(): | ||||
|             elem.first.click(delay=randint(200, 500), timeout=self.action_timeout) | ||||
|         else: | ||||
|             return | ||||
|                  | ||||
|  | ||||
|     def action_enter_text_in_field(self, selector, value): | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         self.page.fill(selector, value, timeout=self.action_timeout) | ||||
|  | ||||
|     def action_execute_js(self, selector, value): | ||||
|         response = self.page.evaluate(value) | ||||
|         return response | ||||
|         if not value: | ||||
|             return None | ||||
|              | ||||
|         return self.page.evaluate(value) | ||||
|  | ||||
|     def action_click_element(self, selector, value): | ||||
|         logger.debug("Clicking element") | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         self.page.click(selector=selector, timeout=self.action_timeout + 20 * 1000, delay=randint(200, 500)) | ||||
| @@ -140,24 +155,38 @@ class steppable_browser_interface(): | ||||
|     def action_click_element_if_exists(self, selector, value): | ||||
|         import playwright._impl._errors as _api_types | ||||
|         logger.debug("Clicking element if exists") | ||||
|         if not len(selector.strip()): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|              | ||||
|         try: | ||||
|             self.page.click(selector, timeout=self.action_timeout, delay=randint(200, 500)) | ||||
|         except _api_types.TimeoutError as e: | ||||
|         except _api_types.TimeoutError: | ||||
|             return | ||||
|         except _api_types.Error as e: | ||||
|         except _api_types.Error: | ||||
|             # Element was there, but page redrew and now its long long gone | ||||
|             return | ||||
|                  | ||||
|  | ||||
|     def action_click_x_y(self, selector, value): | ||||
|         if not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): | ||||
|             raise Exception("'Click X,Y' step should be in the format of '100 , 90'") | ||||
|         if not value or not re.match(r'^\s?\d+\s?,\s?\d+\s?$', value): | ||||
|             logger.warning("'Click X,Y' step should be in the format of '100 , 90'") | ||||
|             return | ||||
|  | ||||
|         x, y = value.strip().split(',') | ||||
|         x = int(float(x.strip())) | ||||
|         y = int(float(y.strip())) | ||||
|         self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) | ||||
|         try: | ||||
|             x, y = value.strip().split(',') | ||||
|             x = int(float(x.strip())) | ||||
|             y = int(float(y.strip())) | ||||
|              | ||||
|             self.page.mouse.click(x=x, y=y, delay=randint(200, 500)) | ||||
|                  | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error parsing x,y coordinates: {str(e)}") | ||||
|  | ||||
|     def action__select_by_option_text(self, selector, value): | ||||
|         if not selector or not len(selector.strip()): | ||||
|             return | ||||
|  | ||||
|         self.page.select_option(selector, label=value, timeout=self.action_timeout) | ||||
|  | ||||
|     def action_scroll_down(self, selector, value): | ||||
|         # Some sites this doesnt work on for some reason | ||||
| @@ -165,23 +194,42 @@ class steppable_browser_interface(): | ||||
|         self.page.wait_for_timeout(1000) | ||||
|  | ||||
|     def action_wait_for_seconds(self, selector, value): | ||||
|         self.page.wait_for_timeout(float(value.strip()) * 1000) | ||||
|         try: | ||||
|             seconds = float(value.strip()) if value else 1.0 | ||||
|             self.page.wait_for_timeout(seconds * 1000) | ||||
|         except (ValueError, TypeError) as e: | ||||
|             logger.error(f"Invalid value for wait_for_seconds: {str(e)}") | ||||
|  | ||||
|     def action_wait_for_text(self, selector, value): | ||||
|         if not value: | ||||
|             return | ||||
|              | ||||
|         import json | ||||
|         v = json.dumps(value) | ||||
|         self.page.wait_for_function(f'document.querySelector("body").innerText.includes({v});', timeout=30000) | ||||
|         self.page.wait_for_function( | ||||
|             f'document.querySelector("body").innerText.includes({v});', | ||||
|             timeout=30000 | ||||
|         ) | ||||
|              | ||||
|  | ||||
|     def action_wait_for_text_in_element(self, selector, value): | ||||
|         if not selector or not value: | ||||
|             return | ||||
|              | ||||
|         import json | ||||
|         s = json.dumps(selector) | ||||
|         v = json.dumps(value) | ||||
|         self.page.wait_for_function(f'document.querySelector({s}).innerText.includes({v});', timeout=30000) | ||||
|          | ||||
|         self.page.wait_for_function( | ||||
|             f'document.querySelector({s}).innerText.includes({v});', | ||||
|             timeout=30000 | ||||
|         ) | ||||
|  | ||||
|     # @todo - in the future make some popout interface to capture what needs to be set | ||||
|     # https://playwright.dev/python/docs/api/class-keyboard | ||||
|     def action_press_enter(self, selector, value): | ||||
|         self.page.keyboard.press("Enter", delay=randint(200, 500)) | ||||
|              | ||||
|  | ||||
|     def action_press_page_up(self, selector, value): | ||||
|         self.page.keyboard.press("PageUp", delay=randint(200, 500)) | ||||
| @@ -190,17 +238,30 @@ class steppable_browser_interface(): | ||||
|         self.page.keyboard.press("PageDown", delay=randint(200, 500)) | ||||
|  | ||||
|     def action_check_checkbox(self, selector, value): | ||||
|         if not selector: | ||||
|             return | ||||
|  | ||||
|         self.page.locator(selector).check(timeout=self.action_timeout) | ||||
|  | ||||
|     def action_uncheck_checkbox(self, selector, value): | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         self.page.locator(selector).uncheck(timeout=self.action_timeout) | ||||
|              | ||||
|  | ||||
|     def action_remove_elements(self, selector, value): | ||||
|         """Removes all elements matching the given selector from the DOM.""" | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         self.page.locator(selector).evaluate_all("els => els.forEach(el => el.remove())") | ||||
|  | ||||
|     def action_make_all_child_elements_visible(self, selector, value): | ||||
|         """Recursively makes all child elements inside the given selector fully visible.""" | ||||
|         if not selector: | ||||
|             return | ||||
|              | ||||
|         self.page.locator(selector).locator("*").evaluate_all(""" | ||||
|             els => els.forEach(el => { | ||||
|                 el.style.display = 'block';   // Forces it to be displayed | ||||
| @@ -224,7 +285,9 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|     # bump and kill this if idle after X sec | ||||
|     age_start = 0 | ||||
|     headers = {} | ||||
|  | ||||
|     # Track if resources are properly cleaned up | ||||
|     _is_cleaned_up = False | ||||
|      | ||||
|     # use a special driver, maybe locally etc | ||||
|     command_executor = os.getenv( | ||||
|         "PLAYWRIGHT_BROWSERSTEPS_DRIVER_URL" | ||||
| @@ -243,9 +306,14 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|         self.age_start = time.time() | ||||
|         self.playwright_browser = playwright_browser | ||||
|         self.start_url = start_url | ||||
|         self._is_cleaned_up = False | ||||
|         if self.context is None: | ||||
|             self.connect(proxy=proxy) | ||||
|  | ||||
|     def __del__(self): | ||||
|         # Ensure cleanup happens if object is garbage collected | ||||
|         self.cleanup() | ||||
|  | ||||
|     # Connect and setup a new context | ||||
|     def connect(self, proxy=None): | ||||
|         # Should only get called once - test that | ||||
| @@ -264,31 +332,74 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|             user_agent=manage_user_agent(headers=self.headers), | ||||
|         ) | ||||
|  | ||||
|  | ||||
|         self.page = self.context.new_page() | ||||
|  | ||||
|         # self.page.set_default_navigation_timeout(keep_open) | ||||
|         self.page.set_default_timeout(keep_open) | ||||
|         # @todo probably this doesnt work | ||||
|         self.page.on( | ||||
|             "close", | ||||
|             self.mark_as_closed, | ||||
|         ) | ||||
|         # Set event handlers | ||||
|         self.page.on("close", self.mark_as_closed) | ||||
|         # Listen for all console events and handle errors | ||||
|         self.page.on("console", lambda msg: print(f"Browser steps console - {msg.type}: {msg.text} {msg.args}")) | ||||
|  | ||||
|         logger.debug(f"Time to browser setup {time.time()-now:.2f}s") | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|  | ||||
|     def mark_as_closed(self): | ||||
|         logger.debug("Page closed, cleaning up..") | ||||
|         self.cleanup() | ||||
|  | ||||
|     def cleanup(self): | ||||
|         """Properly clean up all resources to prevent memory leaks""" | ||||
|         if self._is_cleaned_up: | ||||
|             return | ||||
|              | ||||
|         logger.debug("Cleaning up browser steps resources") | ||||
|          | ||||
|         # Clean up page | ||||
|         if hasattr(self, 'page') and self.page is not None: | ||||
|             try: | ||||
|                 # Force garbage collection before closing | ||||
|                 self.page.request_gc() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error during page garbage collection: {str(e)}") | ||||
|                  | ||||
|             try: | ||||
|                 # Remove event listeners before closing | ||||
|                 self.page.remove_listener("close", self.mark_as_closed) | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error removing event listeners: {str(e)}") | ||||
|                  | ||||
|             try: | ||||
|                 self.page.close() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error closing page: {str(e)}") | ||||
|              | ||||
|             self.page = None | ||||
|  | ||||
|         # Clean up context | ||||
|         if hasattr(self, 'context') and self.context is not None: | ||||
|             try: | ||||
|                 self.context.close() | ||||
|             except Exception as e: | ||||
|                 logger.debug(f"Error closing context: {str(e)}") | ||||
|              | ||||
|             self.context = None | ||||
|              | ||||
|         self._is_cleaned_up = True | ||||
|         logger.debug("Browser steps resources cleanup complete") | ||||
|  | ||||
|     @property | ||||
|     def has_expired(self): | ||||
|         if not self.page: | ||||
|         if not self.page or self._is_cleaned_up: | ||||
|             return True | ||||
|  | ||||
|          | ||||
|         # Check if session has expired based on age | ||||
|         max_age_seconds = int(os.getenv("BROWSER_STEPS_MAX_AGE_SECONDS", 60 * 10))  # Default 10 minutes | ||||
|         if (time.time() - self.age_start) > max_age_seconds: | ||||
|             logger.debug(f"Browser steps session expired after {max_age_seconds} seconds") | ||||
|             return True | ||||
|              | ||||
|         return False | ||||
|  | ||||
|     def get_current_state(self): | ||||
|         """Return the screenshot and interactive elements mapping, generally always called after action_()""" | ||||
| @@ -297,36 +408,55 @@ class browsersteps_live_ui(steppable_browser_interface): | ||||
|         # because we for now only run browser steps in playwright mode (not puppeteer mode) | ||||
|         from changedetectionio.content_fetchers.playwright import capture_full_page | ||||
|  | ||||
|         # Safety check - don't proceed if resources are cleaned up | ||||
|         if self._is_cleaned_up or self.page is None: | ||||
|             logger.warning("Attempted to get current state after cleanup") | ||||
|             return (None, None) | ||||
|  | ||||
|         xpath_element_js = importlib.resources.files("changedetectionio.content_fetchers.res").joinpath('xpath_element_scraper.js').read_text() | ||||
|  | ||||
|         now = time.time() | ||||
|         self.page.wait_for_timeout(1 * 1000) | ||||
|  | ||||
|         screenshot = capture_full_page(page=self.page) | ||||
|         screenshot = None | ||||
|         xpath_data = None | ||||
|          | ||||
|         try: | ||||
|             # Get screenshot first | ||||
|             screenshot = capture_full_page(page=self.page) | ||||
|             logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") | ||||
|  | ||||
|         logger.debug(f"Time to get screenshot from browser {time.time() - now:.2f}s") | ||||
|             # Then get interactive elements | ||||
|             now = time.time() | ||||
|             self.page.evaluate("var include_filters=''") | ||||
|             self.page.request_gc() | ||||
|  | ||||
|         now = time.time() | ||||
|         self.page.evaluate("var include_filters=''") | ||||
|         # Go find the interactive elements | ||||
|         # @todo in the future, something smarter that can scan for elements with .click/focus etc event handlers? | ||||
|             scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' | ||||
|  | ||||
|         self.page.request_gc() | ||||
|             MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|             xpath_data = json.loads(self.page.evaluate(xpath_element_js, { | ||||
|                 "visualselector_xpath_selectors": scan_elements, | ||||
|                 "max_height": MAX_TOTAL_HEIGHT | ||||
|             })) | ||||
|             self.page.request_gc() | ||||
|  | ||||
|         scan_elements = 'a,button,input,select,textarea,i,th,td,p,li,h1,h2,h3,h4,div,span' | ||||
|  | ||||
|         MAX_TOTAL_HEIGHT = int(os.getenv("SCREENSHOT_MAX_HEIGHT", SCREENSHOT_MAX_HEIGHT_DEFAULT)) | ||||
|         xpath_data = json.loads(self.page.evaluate(xpath_element_js, { | ||||
|             "visualselector_xpath_selectors": scan_elements, | ||||
|             "max_height": MAX_TOTAL_HEIGHT | ||||
|         })) | ||||
|         self.page.request_gc() | ||||
|  | ||||
|         # So the JS will find the smallest one first | ||||
|         xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) | ||||
|         logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s") | ||||
|  | ||||
|         # playwright._impl._api_types.Error: Browser closed. | ||||
|         # @todo show some countdown timer? | ||||
|             # Sort elements by size | ||||
|             xpath_data['size_pos'] = sorted(xpath_data['size_pos'], key=lambda k: k['width'] * k['height'], reverse=True) | ||||
|             logger.debug(f"Time to scrape xPath element data in browser {time.time()-now:.2f}s") | ||||
|              | ||||
|         except Exception as e: | ||||
|             logger.error(f"Error getting current state: {str(e)}") | ||||
|             # Attempt recovery - force garbage collection | ||||
|             try: | ||||
|                 self.page.request_gc() | ||||
|             except: | ||||
|                 pass | ||||
|          | ||||
|         # Request garbage collection one final time | ||||
|         try: | ||||
|             self.page.request_gc() | ||||
|         except: | ||||
|             pass | ||||
|              | ||||
|         return (screenshot, xpath_data) | ||||
|  | ||||
|   | ||||
| @@ -233,6 +233,9 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|  | ||||
|             # Only works reliably with Playwright | ||||
|  | ||||
|             # Import the global plugin system | ||||
|             from changedetectionio.pluggy_interface import collect_ui_edit_stats_extras | ||||
|              | ||||
|             template_args = { | ||||
|                 'available_processors': processors.available_processors(), | ||||
|                 'available_timezones': sorted(available_timezones()), | ||||
| @@ -250,6 +253,7 @@ def construct_blueprint(datastore: ChangeDetectionStore, update_q, queuedWatchMe | ||||
|                 'settings_application': datastore.data['settings']['application'], | ||||
|                 'system_has_playwright_configured': os.getenv('PLAYWRIGHT_DRIVER_URL'), | ||||
|                 'system_has_webdriver_configured': os.getenv('WEBDRIVER_URL'), | ||||
|                 'ui_edit_stats_extras': collect_ui_edit_stats_extras(watch), | ||||
|                 'visual_selector_data_ready': datastore.visualselector_data_is_ready(watch_uuid=uuid), | ||||
|                 'timezone_default_config': datastore.data['settings']['application'].get('timezone'), | ||||
|                 'using_global_webdriver_wait': not default['webdriver_delay'], | ||||
|   | ||||
| @@ -5,7 +5,7 @@ from json_logic.builtins import BUILTINS | ||||
| from .exceptions import EmptyConditionRuleRowNotUsable | ||||
| from .pluggy_interface import plugin_manager  # Import the pluggy plugin manager | ||||
| from . import default_plugin | ||||
|  | ||||
| from loguru import logger | ||||
| # List of all supported JSON Logic operators | ||||
| operator_choices = [ | ||||
|     (None, "Choose one - Operator"), | ||||
| @@ -102,12 +102,33 @@ def execute_ruleset_against_all_plugins(current_watch_uuid: str, application_dat | ||||
|         if complete_rules: | ||||
|             # Give all plugins a chance to update the data dict again (that we will test the conditions against) | ||||
|             for plugin in plugin_manager.get_plugins(): | ||||
|                 new_execute_data = plugin.add_data(current_watch_uuid=current_watch_uuid, | ||||
|                                                    application_datastruct=application_datastruct, | ||||
|                                                    ephemeral_data=ephemeral_data) | ||||
|                 try: | ||||
|                     import concurrent.futures | ||||
|                     import time | ||||
|                      | ||||
|                     with concurrent.futures.ThreadPoolExecutor() as executor: | ||||
|                         future = executor.submit( | ||||
|                             plugin.add_data, | ||||
|                             current_watch_uuid=current_watch_uuid, | ||||
|                             application_datastruct=application_datastruct, | ||||
|                             ephemeral_data=ephemeral_data | ||||
|                         ) | ||||
|                         logger.debug(f"Trying plugin {plugin}....") | ||||
|  | ||||
|                 if new_execute_data and isinstance(new_execute_data, dict): | ||||
|                     EXECUTE_DATA.update(new_execute_data) | ||||
|                         # Set a timeout of 10 seconds | ||||
|                         try: | ||||
|                             new_execute_data = future.result(timeout=10) | ||||
|                             if new_execute_data and isinstance(new_execute_data, dict): | ||||
|                                 EXECUTE_DATA.update(new_execute_data) | ||||
|  | ||||
|                         except concurrent.futures.TimeoutError: | ||||
|                             # The plugin took too long, abort processing for this watch | ||||
|                             raise Exception(f"Plugin {plugin.__class__.__name__} took more than 10 seconds to run.") | ||||
|                 except Exception as e: | ||||
|                     # Log the error but continue with the next plugin | ||||
|                     import logging | ||||
|                     logging.error(f"Error executing plugin {plugin.__class__.__name__}: {str(e)}") | ||||
|                     continue | ||||
|  | ||||
|             # Create the ruleset | ||||
|             ruleset = convert_to_jsonlogic(logic_operator=logic_operator, rule_dict=complete_rules) | ||||
| @@ -132,3 +153,18 @@ for plugin in plugin_manager.get_plugins(): | ||||
|     if isinstance(new_field_choices, list): | ||||
|         field_choices.extend(new_field_choices) | ||||
|  | ||||
| def collect_ui_edit_stats_extras(watch): | ||||
|     """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" | ||||
|     extras_content = [] | ||||
|      | ||||
|     for plugin in plugin_manager.get_plugins(): | ||||
|         try: | ||||
|             content = plugin.ui_edit_stats_extras(watch=watch) | ||||
|             if content: | ||||
|                 extras_content.append(content) | ||||
|         except Exception as e: | ||||
|             # Skip plugins that don't implement the hook or have errors | ||||
|             pass | ||||
|              | ||||
|     return "\n".join(extras_content) if extras_content else "" | ||||
|  | ||||
|   | ||||
| @@ -1,5 +1,8 @@ | ||||
| import pluggy | ||||
| from . import default_plugin  # Import the default plugin | ||||
| import os | ||||
| import importlib | ||||
| import sys | ||||
| from . import default_plugin | ||||
|  | ||||
| # ✅ Ensure that the namespace in HookspecMarker matches PluginManager | ||||
| PLUGIN_NAMESPACE = "changedetectionio_conditions" | ||||
| @@ -30,6 +33,11 @@ class ConditionsSpec: | ||||
|     def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|         """Add to the datadict""" | ||||
|         pass | ||||
|          | ||||
|     @hookspec | ||||
|     def ui_edit_stats_extras(watch): | ||||
|         """Return HTML content to add to the stats tab in the edit view""" | ||||
|         pass | ||||
|  | ||||
| # ✅ Set up Pluggy Plugin Manager | ||||
| plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) | ||||
| @@ -40,5 +48,27 @@ plugin_manager.add_hookspecs(ConditionsSpec) | ||||
| # ✅ Register built-in plugins manually | ||||
| plugin_manager.register(default_plugin, "default_plugin") | ||||
|  | ||||
| # ✅ Load plugins from the plugins directory | ||||
| def load_plugins_from_directory(): | ||||
|     plugins_dir = os.path.join(os.path.dirname(__file__), 'plugins') | ||||
|     if not os.path.exists(plugins_dir): | ||||
|         return | ||||
|          | ||||
|     # Get all Python files (excluding __init__.py) | ||||
|     for filename in os.listdir(plugins_dir): | ||||
|         if filename.endswith(".py") and filename != "__init__.py": | ||||
|             module_name = filename[:-3]  # Remove .py extension | ||||
|             module_path = f"changedetectionio.conditions.plugins.{module_name}" | ||||
|              | ||||
|             try: | ||||
|                 module = importlib.import_module(module_path) | ||||
|                 # Register the plugin with pluggy | ||||
|                 plugin_manager.register(module, module_name) | ||||
|             except (ImportError, AttributeError) as e: | ||||
|                 print(f"Error loading plugin {module_name}: {e}") | ||||
|  | ||||
| # Load plugins from the plugins directory | ||||
| load_plugins_from_directory() | ||||
|  | ||||
| # ✅ Discover installed plugins from external packages (if any) | ||||
| plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) | ||||
|   | ||||
							
								
								
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										1
									
								
								changedetectionio/conditions/plugins/__init__.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1 @@ | ||||
| # Import plugins package to make them discoverable | ||||
							
								
								
									
										107
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										107
									
								
								changedetectionio/conditions/plugins/levenshtein_plugin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,107 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def levenshtein_ratio_recent_history(watch, incoming_text=None): | ||||
|     try: | ||||
|         from Levenshtein import ratio, distance | ||||
|         k = list(watch.history.keys()) | ||||
|         a = None | ||||
|         b = None | ||||
|  | ||||
|         # When called from ui_edit_stats_extras, we don't have incoming_text | ||||
|         if incoming_text is None: | ||||
|             a = watch.get_history_snapshot(timestamp=k[-1])  # Latest snapshot | ||||
|             b = watch.get_history_snapshot(timestamp=k[-2])  # Previous snapshot | ||||
|  | ||||
|         # Needs atleast one snapshot | ||||
|         elif len(k) >= 1: # Should be atleast one snapshot to compare against | ||||
|             a = watch.get_history_snapshot(timestamp=k[-1]) # Latest saved snapshot | ||||
|             b = incoming_text if incoming_text else k[-2] | ||||
|  | ||||
|         if a and b: | ||||
|             distance_value = distance(a, b) | ||||
|             ratio_value = ratio(a, b) | ||||
|             return { | ||||
|                 'distance': distance_value, | ||||
|                 'ratio': ratio_value, | ||||
|                 'percent_similar': round(ratio_value * 100, 2) | ||||
|             } | ||||
|     except Exception as e: | ||||
|         logger.warning(f"Unable to calc similarity: {str(e)}") | ||||
|  | ||||
|     return '' | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operators(): | ||||
|     pass | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operator_choices(): | ||||
|     pass | ||||
|  | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_field_choices(): | ||||
|     return [ | ||||
|         ("levenshtein_ratio", "Levenshtein - Text similarity ratio"), | ||||
|         ("levenshtein_distance", "Levenshtein - Text change distance"), | ||||
|     ] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|     res = {} | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|     # ephemeral_data['text'] will be the current text after filters, they may have edited filters but not saved them yet etc | ||||
|  | ||||
|     if watch and 'text' in ephemeral_data: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch, ephemeral_data.get('text','')) | ||||
|         if isinstance(lev_data, dict): | ||||
|             res['levenshtein_ratio'] = lev_data.get('ratio', 0) | ||||
|             res['levenshtein_similarity'] = lev_data.get('percent_similar', 0) | ||||
|             res['levenshtein_distance'] = lev_data.get('distance', 0) | ||||
|  | ||||
|     return res | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add Levenshtein stats to the UI using the global plugin system""" | ||||
|     """Generate the HTML for Levenshtein stats - shared by both plugin systems""" | ||||
|     if len(watch.history.keys()) < 2: | ||||
|         return "<p>Not enough history to calculate Levenshtein metrics</p>" | ||||
|      | ||||
|     try: | ||||
|         lev_data = levenshtein_ratio_recent_history(watch) | ||||
|         if not lev_data or not isinstance(lev_data, dict): | ||||
|             return "<p>Unable to calculate Levenshtein metrics</p>" | ||||
|              | ||||
|         html = f""" | ||||
|         <div class="levenshtein-stats"> | ||||
|             <h4>Levenshtein Text Similarity Details</h4> | ||||
|             <table class="pure-table"> | ||||
|                 <tbody> | ||||
|                     <tr> | ||||
|                         <td>Raw distance (edits needed)</td> | ||||
|                         <td>{lev_data['distance']}</td> | ||||
|                     </tr> | ||||
|                     <tr> | ||||
|                         <td>Similarity ratio</td> | ||||
|                         <td>{lev_data['ratio']:.4f}</td> | ||||
|                     </tr> | ||||
|                     <tr> | ||||
|                         <td>Percent similar</td> | ||||
|                         <td>{lev_data['percent_similar']}%</td> | ||||
|                     </tr> | ||||
|                 </tbody> | ||||
|             </table> | ||||
|             <p style="font-size: 80%;">Levenshtein metrics compare the last two snapshots, measuring how many character edits are needed to transform one into the other.</p> | ||||
|         </div> | ||||
|         """ | ||||
|         return html | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error generating Levenshtein UI extras: {str(e)}") | ||||
|         return "<p>Error calculating Levenshtein metrics</p>" | ||||
|          | ||||
							
								
								
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								changedetectionio/conditions/plugins/wordcount_plugin.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| import pluggy | ||||
| from loguru import logger | ||||
|  | ||||
| # Support both plugin systems | ||||
| conditions_hookimpl = pluggy.HookimplMarker("changedetectionio_conditions") | ||||
| global_hookimpl = pluggy.HookimplMarker("changedetectionio") | ||||
|  | ||||
| def count_words_in_history(watch, incoming_text=None): | ||||
|     """Count words in snapshot text""" | ||||
|     try: | ||||
|         if incoming_text is not None: | ||||
|             # When called from add_data with incoming text | ||||
|             return len(incoming_text.split()) | ||||
|         elif watch.history.keys(): | ||||
|             # When called from UI extras to count latest snapshot | ||||
|             latest_key = list(watch.history.keys())[-1] | ||||
|             latest_content = watch.get_history_snapshot(latest_key) | ||||
|             return len(latest_content.split()) | ||||
|         return 0 | ||||
|     except Exception as e: | ||||
|         logger.error(f"Error counting words: {str(e)}") | ||||
|         return 0 | ||||
|  | ||||
| # Implement condition plugin hooks | ||||
| @conditions_hookimpl | ||||
| def register_operators(): | ||||
|     # No custom operators needed | ||||
|     return {} | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_operator_choices(): | ||||
|     # No custom operator choices needed | ||||
|     return [] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def register_field_choices(): | ||||
|     # Add a field that will be available in conditions | ||||
|     return [ | ||||
|         ("word_count", "Word count of content"), | ||||
|     ] | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def add_data(current_watch_uuid, application_datastruct, ephemeral_data): | ||||
|     """Add word count data for conditions""" | ||||
|     result = {} | ||||
|     watch = application_datastruct['watching'].get(current_watch_uuid) | ||||
|      | ||||
|     if watch and 'text' in ephemeral_data: | ||||
|         word_count = count_words_in_history(watch, ephemeral_data['text']) | ||||
|         result['word_count'] = word_count | ||||
|      | ||||
|     return result | ||||
|  | ||||
| def _generate_stats_html(watch): | ||||
|     """Generate the HTML content for the stats tab""" | ||||
|     word_count = count_words_in_history(watch) | ||||
|      | ||||
|     html = f""" | ||||
|     <div class="word-count-stats"> | ||||
|         <h4>Content Analysis</h4> | ||||
|         <table class="pure-table"> | ||||
|             <tbody> | ||||
|                 <tr> | ||||
|                     <td>Word count (latest snapshot)</td> | ||||
|                     <td>{word_count}</td> | ||||
|                 </tr> | ||||
|             </tbody> | ||||
|         </table> | ||||
|         <p style="font-size: 80%;">Word count is a simple measure of content length, calculated by splitting text on whitespace.</p> | ||||
|     </div> | ||||
|     """ | ||||
|     return html | ||||
|  | ||||
| @conditions_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count stats to the UI through conditions plugin system""" | ||||
|     return _generate_stats_html(watch) | ||||
|  | ||||
| @global_hookimpl | ||||
| def ui_edit_stats_extras(watch): | ||||
|     """Add word count stats to the UI using the global plugin system""" | ||||
|     return _generate_stats_html(watch) | ||||
| @@ -194,7 +194,6 @@ class fetcher(Fetcher): | ||||
|             browsersteps_interface.page = self.page | ||||
|  | ||||
|             response = browsersteps_interface.action_goto_url(value=url) | ||||
|             self.headers = response.all_headers() | ||||
|  | ||||
|             if response is None: | ||||
|                 context.close() | ||||
| @@ -202,6 +201,8 @@ class fetcher(Fetcher): | ||||
|                 logger.debug("Content Fetcher > Response object from the browser communication was none") | ||||
|                 raise EmptyReply(url=url, status_code=None) | ||||
|  | ||||
|             self.headers = response.all_headers() | ||||
|  | ||||
|             try: | ||||
|                 if self.webdriver_js_execute_code is not None and len(self.webdriver_js_execute_code): | ||||
|                     browsersteps_interface.action_execute_js(value=self.webdriver_js_execute_code, selector=None) | ||||
|   | ||||
| @@ -147,7 +147,7 @@ class fetcher(Fetcher): | ||||
|                          is_binary, | ||||
|                          empty_pages_are_a_change | ||||
|                          ): | ||||
|  | ||||
|         import re | ||||
|         self.delete_browser_steps_screenshots() | ||||
|         extra_wait = int(os.getenv("WEBDRIVER_DELAY_BEFORE_CONTENT_READY", 5)) + self.render_extract_delay | ||||
|  | ||||
| @@ -172,6 +172,17 @@ class fetcher(Fetcher): | ||||
|         # headless - ask a new page | ||||
|         self.page = (pages := await browser.pages) and len(pages) or await browser.newPage() | ||||
|  | ||||
|         if '--window-size' in self.browser_connection_url: | ||||
|             # Be sure the viewport is always the window-size, this is often not the same thing | ||||
|             match = re.search(r'--window-size=(\d+),(\d+)', self.browser_connection_url) | ||||
|             if match: | ||||
|                 logger.debug(f"Setting viewport to same as --window-size in browser connection URL {int(match.group(1))},{int(match.group(2))}") | ||||
|                 await self.page.setViewport({ | ||||
|                     "width": int(match.group(1)), | ||||
|                     "height": int(match.group(2)) | ||||
|                 }) | ||||
|                 logger.debug(f"Puppeteer viewport size {self.page.viewport}") | ||||
|  | ||||
|         try: | ||||
|             from pyppeteerstealth import inject_evasions_into_page | ||||
|         except ImportError: | ||||
| @@ -218,7 +229,6 @@ class fetcher(Fetcher): | ||||
|  | ||||
|         response = await self.page.goto(url, waitUntil="load") | ||||
|  | ||||
|  | ||||
|         if response is None: | ||||
|             await self.page.close() | ||||
|             await browser.close() | ||||
|   | ||||
| @@ -28,6 +28,7 @@ class fetcher(Fetcher): | ||||
|  | ||||
|         import chardet | ||||
|         import requests | ||||
|         from requests.exceptions import ProxyError, ConnectionError, RequestException | ||||
|  | ||||
|         if self.browser_steps_get_valid_steps(): | ||||
|             raise BrowserStepsInUnsupportedFetcher(url=url) | ||||
| @@ -52,14 +53,19 @@ class fetcher(Fetcher): | ||||
|         if strtobool(os.getenv('ALLOW_FILE_URI', 'false')) and url.startswith('file://'): | ||||
|             from requests_file import FileAdapter | ||||
|             session.mount('file://', FileAdapter()) | ||||
|  | ||||
|         r = session.request(method=request_method, | ||||
|                             data=request_body.encode('utf-8') if type(request_body) is str else request_body, | ||||
|                             url=url, | ||||
|                             headers=request_headers, | ||||
|                             timeout=timeout, | ||||
|                             proxies=proxies, | ||||
|                             verify=False) | ||||
|         try: | ||||
|             r = session.request(method=request_method, | ||||
|                                 data=request_body.encode('utf-8') if type(request_body) is str else request_body, | ||||
|                                 url=url, | ||||
|                                 headers=request_headers, | ||||
|                                 timeout=timeout, | ||||
|                                 proxies=proxies, | ||||
|                                 verify=False) | ||||
|         except Exception as e: | ||||
|             msg = str(e) | ||||
|             if proxies and 'SOCKSHTTPSConnectionPool' in msg: | ||||
|                 msg = f"Proxy connection failed? {msg}" | ||||
|             raise Exception(msg) from e | ||||
|  | ||||
|         # If the response did not tell us what encoding format to expect, Then use chardet to override what `requests` thinks. | ||||
|         # For example - some sites don't tell us it's utf-8, but return utf-8 content | ||||
|   | ||||
| @@ -10,6 +10,7 @@ async () => { | ||||
|             'article épuisé', | ||||
|             'artikel zurzeit vergriffen', | ||||
|             'as soon as stock is available', | ||||
|             'aucune offre n\'est disponible', | ||||
|             'ausverkauft', // sold out | ||||
|             'available for back order', | ||||
|             'awaiting stock', | ||||
| @@ -25,9 +26,8 @@ async () => { | ||||
|             'dieser artikel ist bald wieder verfügbar', | ||||
|             'dostępne wkrótce', | ||||
|             'en rupture', | ||||
|             'en rupture de stock', | ||||
|             'épuisé', | ||||
|             'esgotado', | ||||
|             'in kürze lieferbar', | ||||
|             'indisponible', | ||||
|             'indisponível', | ||||
|             'isn\'t in stock right now', | ||||
| @@ -50,10 +50,12 @@ async () => { | ||||
|             'niet leverbaar', | ||||
|             'niet op voorraad', | ||||
|             'no disponible', | ||||
|             'non disponibile', | ||||
|             'non disponible', | ||||
|             'no featured offers available', | ||||
|             'no longer available', | ||||
|             'no longer in stock', | ||||
|             'no tickets available', | ||||
|             'non disponibile', | ||||
|             'non disponible', | ||||
|             'not available', | ||||
|             'not currently available', | ||||
|             'not in stock', | ||||
| @@ -89,13 +91,15 @@ async () => { | ||||
|             'vergriffen', | ||||
|             'vorbestellen', | ||||
|             'vorbestellung ist bald möglich', | ||||
|             'we don\'t currently have any', | ||||
|             'we couldn\'t find any products that match', | ||||
|             'we do not currently have an estimate of when this product will be back in stock.', | ||||
|             'we don\'t currently have any', | ||||
|             'we don\'t know when or if this item will be back in stock.', | ||||
|             'we were not able to find a match', | ||||
|             'when this arrives in stock', | ||||
|             'when this item is available to order', | ||||
|             'zur zeit nicht an lager', | ||||
|             'épuisé', | ||||
|             '品切れ', | ||||
|             '已售', | ||||
|             '已售完', | ||||
| @@ -122,6 +126,20 @@ async () => { | ||||
|         // so it's good to filter to just the 'above the fold' elements | ||||
|         // and it should be atleast 100px from the top to ignore items in the toolbar, sometimes menu items like "Coming soon" exist | ||||
|  | ||||
|         function elementIsInEyeBallRange(element) { | ||||
|             // outside the 'fold' or some weird text in the heading area | ||||
|             // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|             // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|             // Skip text that could be in the header area | ||||
|             if (element.getBoundingClientRect().bottom + window.scrollY <= 300 ) { | ||||
|                 return false; | ||||
|             } | ||||
|             // Skip text that could be much further down (like a list of "you may like" products that have 'sold out' in there | ||||
|             if (element.getBoundingClientRect().bottom + window.scrollY >= 1300 ) { | ||||
|                 return false; | ||||
|             } | ||||
|             return true; | ||||
|         } | ||||
|  | ||||
| // @todo - if it's SVG or IMG, go into image diff mode | ||||
|  | ||||
| @@ -158,9 +176,7 @@ async () => { | ||||
|         for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|             const element = elementsToScan[i]; | ||||
|  | ||||
|             // outside the 'fold' or some weird text in the heading area | ||||
|             // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|             if (element.getBoundingClientRect().top + window.scrollY >= vh || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|             if (!elementIsInEyeBallRange(element)) { | ||||
|                 continue | ||||
|             } | ||||
|  | ||||
| @@ -174,11 +190,11 @@ async () => { | ||||
|             } catch (e) { | ||||
|                 console.warn('stock-not-in-stock.js scraper - handling element for gettext failed', e); | ||||
|             } | ||||
|  | ||||
|             if (elementText.length) { | ||||
|                 // try which ones could mean its in stock | ||||
|                 if (negateOutOfStockRegex.test(elementText) && !elementText.includes('(0 products)')) { | ||||
|                     console.log(`Negating/overriding 'Out of Stock' back to "Possibly in stock" found "${elementText}"`) | ||||
|                     element.style.border = "2px solid green"; // highlight the element that was detected as in stock | ||||
|                     return 'Possibly in stock'; | ||||
|                 } | ||||
|             } | ||||
| @@ -187,10 +203,8 @@ async () => { | ||||
|         // OTHER STUFF THAT COULD BE THAT IT'S OUT OF STOCK | ||||
|         for (let i = elementsToScan.length - 1; i >= 0; i--) { | ||||
|             const element = elementsToScan[i]; | ||||
|             // outside the 'fold' or some weird text in the heading area | ||||
|             // .getBoundingClientRect() was causing a crash in chrome 119, can only be run on contentVisibility != hidden | ||||
|             // Note: theres also an automated test that places the 'out of stock' text fairly low down | ||||
|             if (element.getBoundingClientRect().top + window.scrollY >= vh + 250 || element.getBoundingClientRect().top + window.scrollY <= 100) { | ||||
|  | ||||
|             if (!elementIsInEyeBallRange(element)) { | ||||
|                 continue | ||||
|             } | ||||
|             elementText = ""; | ||||
| @@ -205,6 +219,7 @@ async () => { | ||||
|                 for (const outOfStockText of outOfStockTexts) { | ||||
|                     if (elementText.includes(outOfStockText)) { | ||||
|                         console.log(`Selected 'Out of Stock' - found text "${outOfStockText}" - "${elementText}" - offset top ${element.getBoundingClientRect().top}, page height is ${vh}`) | ||||
|                         element.style.border = "2px solid red"; // highlight the element that was detected as out of stock | ||||
|                         return outOfStockText; // item is out of stock | ||||
|                     } | ||||
|                 } | ||||
|   | ||||
| @@ -202,7 +202,6 @@ async (options) => { | ||||
|         // Foreach filter, go and find it on the page and add it to the results so we can visualise it again | ||||
|         for (const f of include_filters) { | ||||
|             bbox = false; | ||||
|             q = false; | ||||
|  | ||||
|             if (!f.length) { | ||||
|                 console.log("xpath_element_scraper: Empty filter, skipping"); | ||||
| @@ -255,7 +254,7 @@ async (options) => { | ||||
|                             console.log("xpath_element_scraper: Got filter by ownerElement element, scroll from top was " + scroll_y) | ||||
|                         } catch (e) { | ||||
|                             console.log(e) | ||||
|                             console.log("xpath_element_scraper: error looking up q.ownerElement") | ||||
|                             console.log("xpath_element_scraper: error looking up node.ownerElement") | ||||
|                         } | ||||
|                     } | ||||
|  | ||||
|   | ||||
| @@ -76,8 +76,7 @@ class fetcher(Fetcher): | ||||
|         for opt in CHROME_OPTIONS: | ||||
|             options.add_argument(opt) | ||||
|  | ||||
|         if self.proxy: | ||||
|             options.proxy = self.proxy | ||||
|         options.add_argument(f"--proxy-server={self.proxy}") | ||||
|  | ||||
|         self.driver = webdriver.Remote( | ||||
|             command_executor=self.browser_connection_url, | ||||
|   | ||||
							
								
								
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										82
									
								
								changedetectionio/pluggy_interface.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,82 @@ | ||||
| import pluggy | ||||
| import os | ||||
| import importlib | ||||
| import sys | ||||
|  | ||||
| # Global plugin namespace for changedetection.io | ||||
| PLUGIN_NAMESPACE = "changedetectionio" | ||||
|  | ||||
| hookspec = pluggy.HookspecMarker(PLUGIN_NAMESPACE) | ||||
| hookimpl = pluggy.HookimplMarker(PLUGIN_NAMESPACE) | ||||
|  | ||||
|  | ||||
| class ChangeDetectionSpec: | ||||
|     """Hook specifications for extending changedetection.io functionality.""" | ||||
|  | ||||
|     @hookspec | ||||
|     def ui_edit_stats_extras(watch): | ||||
|         """Return HTML content to add to the stats tab in the edit view. | ||||
|          | ||||
|         Args: | ||||
|             watch: The watch object being edited | ||||
|              | ||||
|         Returns: | ||||
|             str: HTML content to be inserted in the stats tab | ||||
|         """ | ||||
|         pass | ||||
|  | ||||
|  | ||||
| # Set up Plugin Manager | ||||
| plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # Register hookspecs | ||||
| plugin_manager.add_hookspecs(ChangeDetectionSpec) | ||||
|  | ||||
| # Load plugins from subdirectories | ||||
| def load_plugins_from_directories(): | ||||
|     # Dictionary of directories to scan for plugins | ||||
|     plugin_dirs = { | ||||
|         'conditions': os.path.join(os.path.dirname(__file__), 'conditions', 'plugins'), | ||||
|         # Add more plugin directories here as needed | ||||
|     } | ||||
|      | ||||
|     # Note: Removed the direct import of example_word_count_plugin as it's now in the conditions/plugins directory | ||||
|      | ||||
|     for dir_name, dir_path in plugin_dirs.items(): | ||||
|         if not os.path.exists(dir_path): | ||||
|             continue | ||||
|              | ||||
|         # Get all Python files (excluding __init__.py) | ||||
|         for filename in os.listdir(dir_path): | ||||
|             if filename.endswith(".py") and filename != "__init__.py": | ||||
|                 module_name = filename[:-3]  # Remove .py extension | ||||
|                 module_path = f"changedetectionio.{dir_name}.plugins.{module_name}" | ||||
|                  | ||||
|                 try: | ||||
|                     module = importlib.import_module(module_path) | ||||
|                     # Register the plugin with pluggy | ||||
|                     plugin_manager.register(module, module_name) | ||||
|                 except (ImportError, AttributeError) as e: | ||||
|                     print(f"Error loading plugin {module_name}: {e}") | ||||
|  | ||||
| # Load plugins | ||||
| load_plugins_from_directories() | ||||
|  | ||||
| # Discover installed plugins from external packages (if any) | ||||
| plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE) | ||||
|  | ||||
| # Helper function to collect UI stats extras from all plugins | ||||
| def collect_ui_edit_stats_extras(watch): | ||||
|     """Collect and combine HTML content from all plugins that implement ui_edit_stats_extras""" | ||||
|     extras_content = [] | ||||
|      | ||||
|     # Get all plugins that implement the ui_edit_stats_extras hook | ||||
|     results = plugin_manager.hook.ui_edit_stats_extras(watch=watch) | ||||
|      | ||||
|     # If we have results, add them to our content | ||||
|     if results: | ||||
|         for result in results: | ||||
|             if result:  # Skip empty results | ||||
|                 extras_content.append(result) | ||||
|              | ||||
|     return "\n".join(extras_content) if extras_content else "" | ||||
| @@ -82,3 +82,26 @@ done | ||||
|  | ||||
|  | ||||
| docker kill squid-one squid-two squid-custom | ||||
|  | ||||
| # Test that the UI is returning the correct error message when a proxy is not available | ||||
|  | ||||
| # Requests | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Playwright | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Puppeteer fast | ||||
| docker run --network changedet-network \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && FAST_PUPPETEER_CHROME_FETCHER=1 PLAYWRIGHT_DRIVER_URL=ws://sockpuppetbrowser:3000 pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|  | ||||
| # Selenium - todo - fix proxies | ||||
| docker run --network changedet-network \ | ||||
|   -e "WEBDRIVER_URL=http://selenium:4444/wd/hub" \ | ||||
|   test-changedetectionio \ | ||||
|   bash -c 'cd changedetectionio && pytest tests/proxy_list/test_proxy_noconnect.py' | ||||
|   | ||||
| @@ -211,7 +211,14 @@ $(document).ready(function () { | ||||
|                     $('input[type=text]', first_available).first().val(x['xpath']); | ||||
|                     $('input[placeholder="Value"]', first_available).addClass('ok').click().focus(); | ||||
|                     found_something = true; | ||||
|                 } else { | ||||
|                 } | ||||
|                 else if (x['tagName'] === 'select') { | ||||
|                     $('select', first_available).val('<select> by option text').change(); | ||||
|                     $('input[type=text]', first_available).first().val(x['xpath']); | ||||
|                     $('input[placeholder="Value"]', first_available).addClass('ok').click().focus(); | ||||
|                     found_something = true; | ||||
|                 } | ||||
|                 else { | ||||
|                     // There's no good way (that I know) to find if this | ||||
|                     // see https://stackoverflow.com/questions/446892/how-to-find-event-listeners-on-a-dom-node-in-javascript-or-in-debugging | ||||
|                     // https://codepen.io/azaslavsky/pen/DEJVWv | ||||
| @@ -251,6 +258,10 @@ $(document).ready(function () { | ||||
|                 400: function () { | ||||
|                     // More than likely the CSRF token was lost when the server restarted | ||||
|                     alert("There was a problem processing the request, please reload the page."); | ||||
|                 }, | ||||
|                 401: function (err) { | ||||
|                     // This will be a custom error | ||||
|                     alert(err.responseText); | ||||
|                 } | ||||
|             } | ||||
|         }).done(function (data) { | ||||
|   | ||||
| @@ -383,13 +383,13 @@ Math: {{ 1 + 1 }}") }} | ||||
|                     <div class="pure-control-group"> | ||||
|                         {% if watch_needs_selenium_or_playwright %} | ||||
|                             {% if system_has_playwright_configured %} | ||||
|                                 {%  if visual_selector_data_ready %} | ||||
|                             <span class="pure-form-message-inline" id="visual-selector-heading"> | ||||
|                                 The Visual Selector tool lets you select the <i>text</i> elements that will be used for the change detection. It automatically fills-in the filters in the "CSS/JSONPath/JQ/XPath Filters" box of the <a href="#filters-and-triggers">Filters & Triggers</a> tab. Use <strong>Shift+Click</strong> to select multiple items. | ||||
|                             </span> | ||||
|  | ||||
|                             <div id="selector-header"> | ||||
|                                 <a id="clear-selector" class="pure-button button-secondary button-xsmall" style="font-size: 70%">Clear selection</a> | ||||
|                                 <!-- visual selector IMG will try to load, it will either replace this or on error replace it with some handy text --> | ||||
|                                 <i class="fetching-update-notice" style="font-size: 80%;">One moment, fetching screenshot and element information..</i> | ||||
|                             </div> | ||||
|                             <div id="selector-wrapper" style="display: none"> | ||||
| @@ -400,9 +400,6 @@ Math: {{ 1 + 1 }}") }} | ||||
|                                 <canvas id="selector-canvas"></canvas> | ||||
|                             </div> | ||||
|                             <div id="selector-current-xpath" style="overflow-x: hidden"><strong>Currently:</strong> <span class="text">Loading...</span></div> | ||||
|                             {% else %} | ||||
|                                 <strong>Error, The Visual selector data is not ready, it needs to complete atleast one fetch, please queue the item and reload.</strong> | ||||
|                             {% endif %} | ||||
|                         {% else %} | ||||
|                             {# The watch needed chrome but system says that playwright is not ready #} | ||||
|                             {{ playwright_warning() }} | ||||
| @@ -453,6 +450,13 @@ Math: {{ 1 + 1 }}") }} | ||||
|                         </tr> | ||||
|                         </tbody> | ||||
|                     </table> | ||||
|  | ||||
|                     {% if ui_edit_stats_extras %} | ||||
|                     <div class="plugin-stats-extras"> <!-- from pluggy plugin --> | ||||
|                         {{ ui_edit_stats_extras|safe }} | ||||
|                     </div> | ||||
|                     {% endif %} | ||||
|  | ||||
|                     {% if watch.history_n %} | ||||
|                         <p> | ||||
|                              <a href="{{url_for('ui.ui_edit.watch_get_latest_html', uuid=uuid)}}" class="pure-button button-small">Download latest HTML snapshot</a> | ||||
|   | ||||
							
								
								
									
										53
									
								
								changedetectionio/tests/proxy_list/test_proxy_noconnect.py
									
									
									
									
									
										Normal file
									
								
							
							
						
						
									
										53
									
								
								changedetectionio/tests/proxy_list/test_proxy_noconnect.py
									
									
									
									
									
										Normal file
									
								
							| @@ -0,0 +1,53 @@ | ||||
| #!/usr/bin/env python3 | ||||
|  | ||||
| from flask import url_for | ||||
| from ..util import live_server_setup, wait_for_all_checks | ||||
| import os | ||||
| from ... import strtobool | ||||
|  | ||||
|  | ||||
| # Just to be sure the UI outputs the right error message on proxy connection failed | ||||
| def test_proxy_noconnect_custom(client, live_server, measure_memory_usage): | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
|     # Goto settings, add our custom one | ||||
|     res = client.post( | ||||
|         url_for("settings.settings_page"), | ||||
|         data={ | ||||
|             "requests-time_between_check-minutes": 180, | ||||
|             "application-ignore_whitespace": "y", | ||||
|             "application-fetch_backend": 'html_webdriver' if os.getenv('PLAYWRIGHT_DRIVER_URL') else 'html_requests', | ||||
|             "requests-extra_proxies-0-proxy_name": "custom-test-proxy", | ||||
|             # test:awesome is set in tests/proxy_list/squid-passwords.txt | ||||
|             "requests-extra_proxies-0-proxy_url": "http://THISPROXYDOESNTEXIST:3128", | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"Settings updated." in res.data | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         # Because a URL wont show in squid/proxy logs due it being SSLed | ||||
|         # Use plain HTTP or a specific domain-name here | ||||
|         data={"urls": "https://changedetection.io/CHANGELOG.txt"}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"1 Imported" in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'Page.goto: net::ERR_PROXY_CONNECTION_FAILED' in res.data | ||||
|  | ||||
|     # Requests | ||||
|     check_string = b'Proxy connection failed?' | ||||
|  | ||||
|     if os.getenv('PLAYWRIGHT_DRIVER_URL') or strtobool(os.getenv('FAST_PUPPETEER_CHROME_FETCHER', 'False')): | ||||
|         check_string = b'ERR_PROXY_CONNECTION_FAILED' | ||||
|  | ||||
|     if os.getenv("WEBDRIVER_URL"): | ||||
|         check_string = b'ERR_PROXY_CONNECTION_FAILED' | ||||
|  | ||||
|     assert check_string in res.data | ||||
|  | ||||
| @@ -14,6 +14,8 @@ from changedetectionio.notification import ( | ||||
| def set_original_response(): | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|        <section id=header style="padding: 50px; height: 350px">This is the header which should be ignored always - <span>add to cart</span></section> | ||||
|        <!-- stock-not-in-stock.js will ignore text in the first 300px, see elementIsInEyeBallRange(), sometimes "add to cart" and other junk is here --> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
| @@ -52,8 +54,6 @@ def test_restock_detection(client, live_server, measure_memory_usage): | ||||
|  | ||||
|     set_original_response() | ||||
|     #assert os.getenv('PLAYWRIGHT_DRIVER_URL'), "Needs PLAYWRIGHT_DRIVER_URL set for this test" | ||||
|  | ||||
|     time.sleep(1) | ||||
|     live_server_setup(live_server) | ||||
|     ##################### | ||||
|     notification_url = url_for('test_notification_endpoint', _external=True).replace('http://localhost', 'http://changedet').replace('http', 'json') | ||||
| @@ -84,7 +84,8 @@ def test_restock_detection(client, live_server, measure_memory_usage): | ||||
|     # Is it correctly show as NOT in stock? | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'not-in-stock' in res.data | ||||
|     assert b'processor-restock_diff' in res.data # Should have saved in restock mode | ||||
|     assert b'not-in-stock' in res.data # should be out of stock | ||||
|  | ||||
|     # Is it correctly shown as in stock | ||||
|     set_back_in_stock_response() | ||||
|   | ||||
| @@ -45,11 +45,15 @@ def set_number_out_of_range_response(number="150"): | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def test_setup(client, live_server): | ||||
|     """Test that both text and number conditions work together with AND logic.""" | ||||
|     live_server_setup(live_server) | ||||
|  | ||||
| def test_conditions_with_text_and_number(client, live_server): | ||||
|     """Test that both text and number conditions work together with AND logic.""" | ||||
|      | ||||
|     set_original_response("50") | ||||
|     live_server_setup(live_server) | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|  | ||||
| @@ -192,6 +196,150 @@ def test_condition_validate_rule_row(client, live_server): | ||||
|     ) | ||||
|     assert res.status_code == 200 | ||||
|     assert b'false' in res.data | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("ui.form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|  | ||||
|  | ||||
| # If there was only a change in the whitespacing, then we shouldnt have a change detected | ||||
| def test_wordcount_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"1 Imported" in res.data | ||||
|  | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     # Check it saved | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_edit.edit_page", uuid="first"), | ||||
|     ) | ||||
|  | ||||
|     # Assert the word count is counted correctly | ||||
|     assert b'<td>13</td>' in res.data | ||||
|  | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("ui.form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
| # If there was only a change in the whitespacing, then we shouldnt have a change detected | ||||
| def test_lev_conditions_plugin(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """) | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_views.form_quick_watch_add"), | ||||
|         data={"url": test_url, "tags": '', 'edit_and_watch_submit_button': 'Edit > Watch'}, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Watch added in Paused state, saving will unpause" in res.data | ||||
|  | ||||
|     uuid = next(iter(live_server.app.config['DATASTORE'].data['watching'])) | ||||
|     # Give the thread time to pick it up | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.post( | ||||
|         url_for("ui.ui_edit.edit_page", uuid=uuid, unpause_on_save=1), | ||||
|         data={ | ||||
|             "url": test_url, | ||||
|             "fetch_backend": "html_requests", | ||||
|             "conditions_match_logic": "ALL",  # ALL = AND logic | ||||
|             "conditions-0-field": "levenshtein_ratio", | ||||
|             "conditions-0-operator": "<", | ||||
|             "conditions-0-value": "0.8" # needs to be more of a diff to trigger a change | ||||
|         }, | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|  | ||||
|     assert b"unpaused" in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data | ||||
|  | ||||
|     # Check the content saved initially, even tho a condition was set - this is the first snapshot so shouldnt be affected by conditions | ||||
|     res = client.get( | ||||
|         url_for("ui.ui_views.preview_page", uuid=uuid), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b'Which is across multiple lines' in res.data | ||||
|  | ||||
|  | ||||
|     ############### Now change it a LITTLE bit... | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write("""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happenxxxxxxxxx.  <br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """) | ||||
|  | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'Queued 1 watch for rechecking.' in res.data | ||||
|     wait_for_all_checks(client) | ||||
|  | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' not in res.data #because this will be like 0.90 not 0.8 threshold | ||||
|  | ||||
|     ############### Now change it a MORE THAN 50% | ||||
|     test_return_data = """<html> | ||||
|        <body> | ||||
|      Some sxxxx<br> | ||||
|      <p>Which is across a lines</p> | ||||
|      <br> | ||||
|      ok.  <br> | ||||
|      </body> | ||||
|      </html> | ||||
|     """ | ||||
|  | ||||
|     with open("test-datastore/endpoint-content.txt", "w") as f: | ||||
|         f.write(test_return_data) | ||||
|     res = client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
|     assert b'Queued 1 watch for rechecking.' in res.data | ||||
|     wait_for_all_checks(client) | ||||
|     res = client.get(url_for("watchlist.index")) | ||||
|     assert b'unviewed' in res.data | ||||
|     # cleanup for the next | ||||
|     client.get( | ||||
|         url_for("ui.form_delete", uuid="all"), | ||||
|         follow_redirects=True | ||||
|     ) | ||||
| @@ -32,13 +32,14 @@ def test_strip_text_func(): | ||||
|     stripped_content = html_tools.strip_ignore_text(test_content, ignore) | ||||
|     assert stripped_content == "Some initial text\n\nWhich is across multiple lines\n\n\n\nSo let's see what happens." | ||||
|  | ||||
| def set_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
| def set_original_ignore_response(ver_stamp="123"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/> | ||||
|      </body> | ||||
|      </html> | ||||
|  | ||||
| @@ -48,13 +49,14 @@ def set_original_ignore_response(): | ||||
|         f.write(test_return_data) | ||||
|  | ||||
|  | ||||
| def set_modified_original_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
| def set_modified_original_ignore_response(ver_stamp="123"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      Some NEW nice initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/> | ||||
|      <p>new ignore stuff</p> | ||||
|      <p>blah</p> | ||||
|      </body> | ||||
| @@ -67,14 +69,15 @@ def set_modified_original_ignore_response(): | ||||
|  | ||||
|  | ||||
| # Is the same but includes ZZZZZ, 'ZZZZZ' is the last line in ignore_text | ||||
| def set_modified_ignore_response(): | ||||
|     test_return_data = """<html> | ||||
| def set_modified_ignore_response(ver_stamp="123"): | ||||
|     test_return_data = f"""<html> | ||||
|        <body> | ||||
|      Some initial text<br> | ||||
|      <p>Which is across multiple lines</p> | ||||
|      <P>ZZZZz</P> | ||||
|      <br> | ||||
|      So let's see what happens.  <br> | ||||
|      <link href="https://www.somesite/wp-content/themes/cooltheme/style2.css?v={ver_stamp}" rel="stylesheet"/> | ||||
|      </body> | ||||
|      </html> | ||||
|  | ||||
| @@ -165,9 +168,9 @@ def test_check_ignore_text_functionality(client, live_server, measure_memory_usa | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| # When adding some ignore text, it should not trigger a change, even if something else on that line changes | ||||
| def test_check_global_ignore_text_functionality(client, live_server, measure_memory_usage): | ||||
|     #live_server_setup(live_server) | ||||
|     ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ" | ||||
| def _run_test_global_ignore(client, as_source=False, extra_ignore=""): | ||||
|     ignore_text = "XXXXX\r\nYYYYY\r\nZZZZZ\r\n"+extra_ignore | ||||
|  | ||||
|     set_original_ignore_response() | ||||
|  | ||||
|     # Goto the settings page, add our ignore text | ||||
| @@ -186,6 +189,10 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     # Add our URL to the import page | ||||
|     test_url = url_for('test_endpoint', _external=True) | ||||
|     if as_source: | ||||
|         # Switch to source mode so we can test that too! | ||||
|         test_url = "source:"+test_url | ||||
|  | ||||
|     res = client.post( | ||||
|         url_for("imports.import_page"), | ||||
|         data={"urls": test_url}, | ||||
| @@ -203,12 +210,15 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|         follow_redirects=True | ||||
|     ) | ||||
|     assert b"Updated watch." in res.data | ||||
|  | ||||
|     wait_for_all_checks(client) | ||||
|     # Check it saved | ||||
|     res = client.get( | ||||
|         url_for("settings.settings_page"), | ||||
|     ) | ||||
|     assert bytes(ignore_text.encode('utf-8')) in res.data | ||||
|  | ||||
|     for i in ignore_text.splitlines(): | ||||
|         assert bytes(i.encode('utf-8')) in res.data | ||||
|  | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -221,7 +231,8 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     # Make a change which includes the ignore text, it should be ignored and no 'change' triggered | ||||
|     # It adds text with "ZZZZzzzz" and "ZZZZ" is in the ignore list | ||||
|     set_modified_ignore_response() | ||||
|     # And tweaks the ver_stamp which should be picked up by global regex ignore | ||||
|     set_modified_ignore_response(ver_stamp=time.time()) | ||||
|  | ||||
|     # Trigger a check | ||||
|     client.get(url_for("ui.form_watch_checknow"), follow_redirects=True) | ||||
| @@ -243,3 +254,11 @@ def test_check_global_ignore_text_functionality(client, live_server, measure_mem | ||||
|  | ||||
|     res = client.get(url_for("ui.form_delete", uuid="all"), follow_redirects=True) | ||||
|     assert b'Deleted' in res.data | ||||
|  | ||||
| def test_check_global_ignore_text_functionality(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|     _run_test_global_ignore(client, as_source=False) | ||||
|  | ||||
| def test_check_global_ignore_text_functionality_as_source(client, live_server): | ||||
|     #live_server_setup(live_server) | ||||
|     _run_test_global_ignore(client, as_source=True, extra_ignore='/\?v=\d/') | ||||
|   | ||||
| @@ -72,7 +72,7 @@ services: | ||||
|    | ||||
|       # Comment out ports: when using behind a reverse proxy , enable networks: etc. | ||||
|       ports: | ||||
|         - 5000:5000 | ||||
|         - 127.0.0.1:5000:5000 | ||||
|       restart: unless-stopped | ||||
|  | ||||
|      # Used for fetching pages via WebDriver+Chrome where you need Javascript support. | ||||
| @@ -82,7 +82,7 @@ services: | ||||
|      # If WEBDRIVER or PLAYWRIGHT are enabled, changedetection container depends on that | ||||
|      # and must wait before starting (substitute "browser-chrome" with "playwright-chrome" if last one is used) | ||||
| #      depends_on: | ||||
| #          sockpuppetbrowser: | ||||
| #          browser-sockpuppet-chrome: | ||||
| #              condition: service_started | ||||
|  | ||||
|  | ||||
|   | ||||
| @@ -53,7 +53,8 @@ lxml >=4.8.0,<6,!=5.2.0,!=5.2.1 | ||||
| # XPath 2.0-3.1 support - 4.2.0 broke something? | ||||
| elementpath==4.1.5 | ||||
|  | ||||
| selenium~=4.14.0 | ||||
| selenium==4.31.0 | ||||
|  | ||||
|  | ||||
| # https://github.com/pallets/werkzeug/issues/2985 | ||||
| # Maybe related to pytest? | ||||
| @@ -90,6 +91,8 @@ extruct | ||||
| # For cleaning up unknown currency formats | ||||
| babel | ||||
|  | ||||
| levenshtein | ||||
|  | ||||
| # Needed for > 3.10, https://github.com/microsoft/playwright-python/issues/2096 | ||||
| greenlet >= 3.0.3 | ||||
|  | ||||
| @@ -110,3 +113,6 @@ pluggy ~= 1.5 | ||||
|  | ||||
| # Needed for testing, cross-platform for process and system monitoring | ||||
| psutil==7.0.0 | ||||
|  | ||||
| ruff >= 0.11.2 | ||||
| pre_commit >= 4.2.0 | ||||
|   | ||||
		Reference in New Issue
	
	Block a user