mirror of
https://github.com/dgtlmoon/changedetection.io.git
synced 2025-12-14 03:56:08 +00:00
Pluggable content fetchers
This commit is contained in:
@@ -182,11 +182,9 @@ document.addEventListener('DOMContentLoaded', function() {
|
||||
</div>
|
||||
<div class="status-icons">
|
||||
<a class="link-spread" href="{{url_for('ui.form_share_put_watch', uuid=watch.uuid)}}"><img src="{{url_for('static_content', group='images', filename='spread.svg')}}" class="status-icon icon icon-spread" title="Create a link to share watch config with others" ></a>
|
||||
{%- if watch.get_fetch_backend == "html_webdriver"
|
||||
or ( watch.get_fetch_backend == "system" and system_default_fetcher == 'html_webdriver' )
|
||||
or "extra_browser_" in watch.get_fetch_backend
|
||||
-%}
|
||||
<img class="status-icon" src="{{url_for('static_content', group='images', filename='google-chrome-icon.png')}}" alt="Using a Chrome browser" title="Using a Chrome browser" >
|
||||
{%- set effective_fetcher = watch.get_fetch_backend if watch.get_fetch_backend != "system" else system_default_fetcher -%}
|
||||
{%- if effective_fetcher and ("html_webdriver" in effective_fetcher or "html_" in effective_fetcher or "extra_browser_" in effective_fetcher) -%}
|
||||
{{ effective_fetcher|fetcher_status_icons }}
|
||||
{%- endif -%}
|
||||
{%- if watch.is_pdf -%}<img class="status-icon" src="{{url_for('static_content', group='images', filename='pdf-icon.svg')}}" alt="Converting PDF to text" >{%- endif -%}
|
||||
{%- if watch.has_browser_steps -%}<img class="status-icon status-browsersteps" src="{{url_for('static_content', group='images', filename='steps.svg')}}" alt="Browser Steps is enabled" >{%- endif -%}
|
||||
|
||||
@@ -7,6 +7,9 @@ import os
|
||||
# Visual Selector scraper - 'Button' is there because some sites have <button>OUT OF STOCK</button>.
|
||||
visualselector_xpath_selectors = 'div,span,form,table,tbody,tr,td,a,p,ul,li,h1,h2,h3,h4,header,footer,section,article,aside,details,main,nav,section,summary,button'
|
||||
|
||||
# Import hookimpl from centralized pluggy interface
|
||||
from changedetectionio.pluggy_interface import hookimpl
|
||||
|
||||
SCREENSHOT_MAX_HEIGHT_DEFAULT = 20000
|
||||
SCREENSHOT_DEFAULT_QUALITY = 40
|
||||
|
||||
@@ -35,17 +38,54 @@ def available_fetchers():
|
||||
# See the if statement at the bottom of this file for how we switch between playwright and webdriver
|
||||
import inspect
|
||||
p = []
|
||||
|
||||
# Get built-in fetchers (but skip plugin fetchers that were added via setattr)
|
||||
for name, obj in inspect.getmembers(sys.modules[__name__], inspect.isclass):
|
||||
if inspect.isclass(obj):
|
||||
# @todo html_ is maybe better as fetcher_ or something
|
||||
# In this case, make sure to edit the default one in store.py and fetch_site_status.py
|
||||
if name.startswith('html_'):
|
||||
t = tuple([name, obj.fetcher_description])
|
||||
p.append(t)
|
||||
# Skip plugin fetchers that were already registered
|
||||
if name not in _plugin_fetchers:
|
||||
t = tuple([name, obj.fetcher_description])
|
||||
p.append(t)
|
||||
|
||||
# Get plugin fetchers from cache (already loaded at module init)
|
||||
for name, fetcher_class in _plugin_fetchers.items():
|
||||
if hasattr(fetcher_class, 'fetcher_description'):
|
||||
t = tuple([name, fetcher_class.fetcher_description])
|
||||
p.append(t)
|
||||
else:
|
||||
logger.warning(f"Plugin fetcher '{name}' does not have fetcher_description attribute")
|
||||
|
||||
return p
|
||||
|
||||
|
||||
def get_plugin_fetchers():
|
||||
"""Load and return all plugin fetchers from the centralized plugin manager."""
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
|
||||
fetchers = {}
|
||||
try:
|
||||
# Call the register_content_fetcher hook from all registered plugins
|
||||
results = plugin_manager.hook.register_content_fetcher()
|
||||
for result in results:
|
||||
if result:
|
||||
name, fetcher_class = result
|
||||
fetchers[name] = fetcher_class
|
||||
# Register in current module so hasattr() checks work
|
||||
setattr(sys.modules[__name__], name, fetcher_class)
|
||||
logger.info(f"Registered plugin fetcher: {name} - {getattr(fetcher_class, 'fetcher_description', 'No description')}")
|
||||
except Exception as e:
|
||||
logger.error(f"Error loading plugin fetchers: {e}")
|
||||
|
||||
return fetchers
|
||||
|
||||
|
||||
# Initialize plugins at module load time
|
||||
_plugin_fetchers = get_plugin_fetchers()
|
||||
|
||||
|
||||
# Decide which is the 'real' HTML webdriver, this is more a system wide config
|
||||
# rather than site-specific.
|
||||
use_playwright_as_chrome_fetcher = os.getenv('PLAYWRIGHT_DRIVER_URL', False)
|
||||
@@ -62,3 +102,8 @@ else:
|
||||
logger.debug("Falling back to selenium as fetcher")
|
||||
from .webdriver_selenium import fetcher as html_webdriver
|
||||
|
||||
|
||||
# Register built-in fetchers as plugins after all imports are complete
|
||||
from changedetectionio.pluggy_interface import register_builtin_fetchers
|
||||
register_builtin_fetchers()
|
||||
|
||||
|
||||
@@ -64,6 +64,24 @@ class Fetcher():
|
||||
# Time ONTOP of the system defined env minimum time
|
||||
render_extract_delay = 0
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return data for status icon to display in the watch overview.
|
||||
|
||||
This method can be overridden by subclasses to provide custom status icons.
|
||||
|
||||
Returns:
|
||||
dict or None: Dictionary with icon data:
|
||||
{
|
||||
'filename': 'icon-name.svg', # Icon filename
|
||||
'alt': 'Alt text', # Alt attribute
|
||||
'title': 'Tooltip text', # Title attribute
|
||||
'style': 'height: 1em;' # Optional inline CSS
|
||||
}
|
||||
Or None if no icon
|
||||
"""
|
||||
return None
|
||||
|
||||
def clear_content(self):
|
||||
"""
|
||||
Explicitly clear all content from memory to free up heap space.
|
||||
|
||||
@@ -89,6 +89,15 @@ class fetcher(Fetcher):
|
||||
|
||||
proxy = None
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Playwright fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
|
||||
@@ -330,4 +339,17 @@ class fetcher(Fetcher):
|
||||
browser = None
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PlaywrightFetcherPlugin:
|
||||
"""Plugin class that registers the Playwright fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Playwright fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
playwright_plugin = PlaywrightFetcherPlugin()
|
||||
|
||||
|
||||
|
||||
|
||||
@@ -98,6 +98,15 @@ class fetcher(Fetcher):
|
||||
|
||||
proxy = None
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for Puppeteer fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
|
||||
@@ -384,3 +393,16 @@ class fetcher(Fetcher):
|
||||
)
|
||||
except asyncio.TimeoutError:
|
||||
raise (BrowserFetchTimedOut(msg=f"Browser connected but was unable to process the page in {max_time} seconds."))
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class PuppeteerFetcherPlugin:
|
||||
"""Plugin class that registers the Puppeteer fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the Puppeteer fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
puppeteer_plugin = PuppeteerFetcherPlugin()
|
||||
|
||||
@@ -163,3 +163,15 @@ class fetcher(Fetcher):
|
||||
except Exception as e:
|
||||
logger.warning(f"Failed to unlink screenshot: {screenshot} - {e}")
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class RequestsFetcherPlugin:
|
||||
"""Plugin class that registers the requests fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the requests fetcher"""
|
||||
return ('html_requests', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
requests_plugin = RequestsFetcherPlugin()
|
||||
|
||||
@@ -14,6 +14,15 @@ class fetcher(Fetcher):
|
||||
proxy = None
|
||||
proxy_url = None
|
||||
|
||||
@classmethod
|
||||
def get_status_icon_data(cls):
|
||||
"""Return Chrome browser icon data for WebDriver fetcher."""
|
||||
return {
|
||||
'filename': 'google-chrome-icon.png',
|
||||
'alt': 'Using a Chrome browser',
|
||||
'title': 'Using a Chrome browser'
|
||||
}
|
||||
|
||||
def __init__(self, proxy_override=None, custom_browser_connection_url=None):
|
||||
super().__init__()
|
||||
from urllib.parse import urlparse
|
||||
@@ -141,3 +150,16 @@ class fetcher(Fetcher):
|
||||
# Run the selenium operations in a thread pool to avoid blocking the event loop
|
||||
loop = asyncio.get_event_loop()
|
||||
await loop.run_in_executor(None, _run_sync)
|
||||
|
||||
|
||||
# Plugin registration for built-in fetcher
|
||||
class WebDriverSeleniumFetcherPlugin:
|
||||
"""Plugin class that registers the WebDriver Selenium fetcher as a built-in plugin."""
|
||||
|
||||
def register_content_fetcher(self):
|
||||
"""Register the WebDriver Selenium fetcher"""
|
||||
return ('html_webdriver', fetcher)
|
||||
|
||||
|
||||
# Create module-level instance for plugin registration
|
||||
webdriver_selenium_plugin = WebDriverSeleniumFetcherPlugin()
|
||||
|
||||
@@ -210,6 +210,55 @@ def _jinja2_filter_seconds_precise(timestamp):
|
||||
|
||||
return format(int(time.time()-timestamp), ',d')
|
||||
|
||||
@app.template_filter('fetcher_status_icons')
|
||||
def _jinja2_filter_fetcher_status_icons(fetcher_name):
|
||||
"""Get status icon HTML for a given fetcher.
|
||||
|
||||
This filter checks both built-in fetchers and plugin fetchers for status icons.
|
||||
|
||||
Args:
|
||||
fetcher_name: The fetcher name (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
str: HTML string containing status icon elements
|
||||
"""
|
||||
from changedetectionio import content_fetchers
|
||||
from changedetectionio.pluggy_interface import collect_fetcher_status_icons
|
||||
from markupsafe import Markup
|
||||
from flask import url_for
|
||||
|
||||
icon_data = None
|
||||
|
||||
# First check if it's a plugin fetcher (plugins have priority)
|
||||
plugin_icon_data = collect_fetcher_status_icons(fetcher_name)
|
||||
if plugin_icon_data:
|
||||
icon_data = plugin_icon_data
|
||||
# Check if it's a built-in fetcher
|
||||
elif hasattr(content_fetchers, fetcher_name):
|
||||
fetcher_class = getattr(content_fetchers, fetcher_name)
|
||||
if hasattr(fetcher_class, 'get_status_icon_data'):
|
||||
icon_data = fetcher_class.get_status_icon_data()
|
||||
|
||||
# Build HTML from icon data
|
||||
if icon_data and isinstance(icon_data, dict):
|
||||
# Use 'group' from icon_data if specified, otherwise default to 'images'
|
||||
group = icon_data.get('group', 'images')
|
||||
|
||||
# Try to use url_for, but fall back to manual URL building if endpoint not registered yet
|
||||
try:
|
||||
icon_url = url_for('static_content', group=group, filename=icon_data['filename'])
|
||||
except:
|
||||
# Fallback: build URL manually respecting APPLICATION_ROOT
|
||||
from flask import request
|
||||
app_root = request.script_root if hasattr(request, 'script_root') else ''
|
||||
icon_url = f"{app_root}/static/{group}/{icon_data['filename']}"
|
||||
|
||||
style_attr = f' style="{icon_data["style"]}"' if icon_data.get('style') else ''
|
||||
html = f'<img class="status-icon" src="{icon_url}" alt="{icon_data["alt"]}" title="{icon_data["title"]}"{style_attr}>'
|
||||
return Markup(html)
|
||||
|
||||
return ''
|
||||
|
||||
# Import login_optionally_required from auth_decorator
|
||||
from changedetectionio.auth_decorator import login_optionally_required
|
||||
|
||||
@@ -488,6 +537,31 @@ def changedetection_app(config=None, datastore_o=None):
|
||||
except FileNotFoundError:
|
||||
abort(404)
|
||||
|
||||
# Handle plugin group specially
|
||||
if group == 'plugin':
|
||||
# Serve files from plugin static directories
|
||||
from changedetectionio.pluggy_interface import plugin_manager
|
||||
import os as os_check
|
||||
|
||||
for plugin_name, plugin_obj in plugin_manager.list_name_plugin():
|
||||
if hasattr(plugin_obj, 'plugin_static_path'):
|
||||
try:
|
||||
static_path = plugin_obj.plugin_static_path()
|
||||
if static_path and os_check.path.isdir(static_path):
|
||||
# Check if file exists in plugin's static directory
|
||||
plugin_file_path = os_check.path.join(static_path, filename)
|
||||
if os_check.path.isfile(plugin_file_path):
|
||||
# Found the file in a plugin
|
||||
response = make_response(send_from_directory(static_path, filename))
|
||||
response.headers['Cache-Control'] = 'max-age=3600, public' # Cache for 1 hour
|
||||
return response
|
||||
except Exception as e:
|
||||
logger.debug(f"Error checking plugin {plugin_name} for static file: {e}")
|
||||
pass
|
||||
|
||||
# File not found in any plugin
|
||||
abort(404)
|
||||
|
||||
# These files should be in our subdirectory
|
||||
try:
|
||||
return send_from_directory(f"static/{group}", path=filename)
|
||||
|
||||
@@ -25,6 +25,40 @@ class ChangeDetectionSpec:
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def register_content_fetcher(self):
|
||||
"""Return a tuple of (fetcher_name, fetcher_class) for content fetcher plugins.
|
||||
|
||||
The fetcher_name should start with 'html_' and the fetcher_class
|
||||
should inherit from changedetectionio.content_fetchers.base.Fetcher
|
||||
|
||||
Returns:
|
||||
tuple: (str: fetcher_name, class: fetcher_class)
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def fetcher_status_icon(fetcher_name):
|
||||
"""Return status icon HTML attributes for a content fetcher.
|
||||
|
||||
Args:
|
||||
fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
str: HTML string containing <img> tags or other status icon elements
|
||||
Empty string if no custom status icon is needed
|
||||
"""
|
||||
pass
|
||||
|
||||
@hookspec
|
||||
def plugin_static_path(self):
|
||||
"""Return the path to the plugin's static files directory.
|
||||
|
||||
Returns:
|
||||
str: Absolute path to the plugin's static directory, or None if no static files
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
# Set up Plugin Manager
|
||||
plugin_manager = pluggy.PluginManager(PLUGIN_NAMESPACE)
|
||||
@@ -65,6 +99,28 @@ load_plugins_from_directories()
|
||||
# Discover installed plugins from external packages (if any)
|
||||
plugin_manager.load_setuptools_entrypoints(PLUGIN_NAMESPACE)
|
||||
|
||||
# Function to register built-in fetchers - called later from content_fetchers/__init__.py
|
||||
def register_builtin_fetchers():
|
||||
"""Register built-in content fetchers as internal plugins
|
||||
|
||||
This is called from content_fetchers/__init__.py after all fetchers are imported
|
||||
to avoid circular import issues.
|
||||
"""
|
||||
from changedetectionio.content_fetchers import requests, playwright, puppeteer, webdriver_selenium
|
||||
|
||||
# Register each built-in fetcher plugin
|
||||
if hasattr(requests, 'requests_plugin'):
|
||||
plugin_manager.register(requests.requests_plugin, 'builtin_requests')
|
||||
|
||||
if hasattr(playwright, 'playwright_plugin'):
|
||||
plugin_manager.register(playwright.playwright_plugin, 'builtin_playwright')
|
||||
|
||||
if hasattr(puppeteer, 'puppeteer_plugin'):
|
||||
plugin_manager.register(puppeteer.puppeteer_plugin, 'builtin_puppeteer')
|
||||
|
||||
if hasattr(webdriver_selenium, 'webdriver_selenium_plugin'):
|
||||
plugin_manager.register(webdriver_selenium.webdriver_selenium_plugin, 'builtin_webdriver_selenium')
|
||||
|
||||
# Helper function to collect UI stats extras from all plugins
|
||||
def collect_ui_edit_stats_extras(watch):
|
||||
"""Collect and combine HTML content from all plugins that implement ui_edit_stats_extras"""
|
||||
@@ -80,3 +136,23 @@ def collect_ui_edit_stats_extras(watch):
|
||||
extras_content.append(result)
|
||||
|
||||
return "\n".join(extras_content) if extras_content else ""
|
||||
|
||||
def collect_fetcher_status_icons(fetcher_name):
|
||||
"""Collect status icon data from all plugins
|
||||
|
||||
Args:
|
||||
fetcher_name: The name of the fetcher (e.g., 'html_webdriver', 'html_js_zyte')
|
||||
|
||||
Returns:
|
||||
dict or None: Icon data dictionary from first matching plugin, or None
|
||||
"""
|
||||
# Get status icon data from plugins
|
||||
results = plugin_manager.hook.fetcher_status_icon(fetcher_name=fetcher_name)
|
||||
|
||||
# Return first non-None result
|
||||
if results:
|
||||
for result in results:
|
||||
if result and isinstance(result, dict):
|
||||
return result
|
||||
|
||||
return None
|
||||
Reference in New Issue
Block a user