Compare commits

...

16 Commits

Author SHA1 Message Date
dgtlmoon
65bc76f11b add comments 2024-01-23 17:03:02 +01:00
dgtlmoon
1aa0070ae2 remove example hooks 2024-01-23 17:00:02 +01:00
dgtlmoon
0ab3a83a11 Merge branch 'master' into pluggy-2 2024-01-23 16:50:28 +01:00
dgtlmoon
42c6f8fc37 some plugin config 2024-01-18 23:19:00 +01:00
dgtlmoon
06744dbd3a wrap form 2024-01-17 23:54:15 +01:00
dgtlmoon
c6433815e4 add filter 2024-01-17 23:47:29 +01:00
dgtlmoon
ce97d67ecf Merge branch 'pluggy-2' of github.com:dgtlmoon/changedetection.io into pluggy-2 2024-01-17 23:47:03 +01:00
dgtlmoon
25778a8102 Add plugins to pip 2024-01-17 23:13:46 +01:00
dgtlmoon
b88998feea Merge branch 'master' into pluggy-2 2024-01-17 23:12:22 +01:00
dgtlmoon
494740e3f8 Merge branch 'master' into pluggy-2 2024-01-17 11:44:44 +01:00
dgtlmoon
2769abf374 maybe 2024-01-16 00:02:21 +01:00
dgtlmoon
690b16b710 rename 2024-01-15 23:55:44 +01:00
dgtlmoon
8563126287 tweaks 2024-01-15 23:52:30 +01:00
dgtlmoon
f6c667b0a8 fix import 2024-01-15 23:46:43 +01:00
dgtlmoon
774923f67d add pluggy 2024-01-15 23:37:45 +01:00
dgtlmoon
432ee1236d WIP 2024-01-15 23:34:53 +01:00
14 changed files with 163 additions and 30 deletions

View File

@@ -1,6 +1,7 @@
recursive-include changedetectionio/api *
recursive-include changedetectionio/blueprint *
recursive-include changedetectionio/model *
recursive-include changedetectionio/plugins *
recursive-include changedetectionio/processors *
recursive-include changedetectionio/res *
recursive-include changedetectionio/static *

View File

@@ -101,6 +101,7 @@ class Fetcher():
error = None
fetcher_description = "No description"
headers = {}
is_plaintext = None
instock_data = None
instock_data_js = ""
status_code = None

View File

@@ -1,6 +1,6 @@
#!/usr/bin/python3
from changedetectionio import queuedWatchMetaData
from changedetectionio import queuedWatchMetaData, html_tools, __version__
from copy import deepcopy
from distutils.util import strtobool
from feedgen.feed import FeedGenerator
@@ -35,8 +35,6 @@ from flask import (
)
from flask_paginate import Pagination, get_page_parameter
from changedetectionio import html_tools, __version__
from changedetectionio.api import api_v1
datastore = None
@@ -50,6 +48,18 @@ extra_stylesheets = []
update_q = queue.PriorityQueue()
notification_q = queue.Queue()
def get_plugin_manager():
import pluggy
from changedetectionio.plugins import hookspecs
from changedetectionio.plugins import whois as whois_plugin
pm = pluggy.PluginManager("changedetectionio_plugin")
pm.add_hookspecs(hookspecs)
pm.load_setuptools_entrypoints("changedetectionio_plugin")
pm.register(whois_plugin)
return pm
app = Flask(__name__,
static_url_path="",
static_folder="static",
@@ -96,7 +106,6 @@ def init_app_secret(datastore_path):
return secret
@app.template_global()
def get_darkmode_state():
css_dark_mode = request.cookies.get('css_dark_mode', 'false')
@@ -629,7 +638,6 @@ def changedetection_app(config=None, datastore_o=None):
form.fetch_backend.choices.append(p)
form.fetch_backend.choices.append(("system", 'System settings default'))
# form.browser_steps[0] can be assumed that we 'goto url' first
if datastore.proxy_list is None:
@@ -730,6 +738,8 @@ def changedetection_app(config=None, datastore_o=None):
if (watch.get('fetch_backend') == 'system' and system_uses_webdriver) or watch.get('fetch_backend') == 'html_webdriver' or watch.get('fetch_backend', '').startswith('extra_browser_'):
is_html_webdriver = True
processor_config = next((p[2] for p in processors.available_processors() if p[0] == watch.get('processor')), None)
# Only works reliably with Playwright
visualselector_enabled = os.getenv('PLAYWRIGHT_DRIVER_URL', False) and is_html_webdriver
output = render_template("edit.html",
@@ -744,6 +754,7 @@ def changedetection_app(config=None, datastore_o=None):
is_html_webdriver=is_html_webdriver,
jq_support=jq_support,
playwright_enabled=os.getenv('PLAYWRIGHT_DRIVER_URL', False),
processor_config=processor_config,
settings_application=datastore.data['settings']['application'],
using_global_webdriver_wait=default['webdriver_delay'] is None,
uuid=uuid,
@@ -824,11 +835,14 @@ def changedetection_app(config=None, datastore_o=None):
flash("An error occurred, please see below.", "error")
output = render_template("settings.html",
form=form,
hide_remove_pass=os.getenv("SALTED_PASS", False),
api_key=datastore.data['settings']['application'].get('api_access_token'),
emailprefix=os.getenv('NOTIFICATION_MAIL_BUTTON_PREFIX', False),
settings_application=datastore.data['settings']['application'])
form=form,
hide_remove_pass=os.getenv("SALTED_PASS", False),
settings_application=datastore.data['settings']['application'],
plugins=[]
)
return output

View File

@@ -410,7 +410,7 @@ class quickWatchForm(Form):
url = fields.URLField('URL', validators=[validateURL()])
tags = StringTagUUID('Group tag', [validators.Optional()])
watch_submit_button = SubmitField('Watch', render_kw={"class": "pure-button pure-button-primary"})
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
processor = RadioField(u'Processor', choices=[t[:2] for t in processors.available_processors()], default="text_json_diff")
edit_and_watch_submit_button = SubmitField('Edit > Watch', render_kw={"class": "pure-button pure-button-primary"})
@@ -427,7 +427,7 @@ class commonSettingsForm(Form):
message="Should contain one or more seconds")])
class importForm(Form):
from . import processors
processor = RadioField(u'Processor', choices=processors.available_processors(), default="text_json_diff")
processor = RadioField(u'Processor', choices=[t[:2] for t in processors.available_processors()], default="text_json_diff")
urls = TextAreaField('URLs')
xlsx_file = FileField('Upload .xlsx file', validators=[FileAllowed(['xlsx'], 'Must be .xlsx file!')])
file_mapping = SelectField('File mapping', [validators.DataRequired()], choices={('wachete', 'Wachete mapping'), ('custom','Custom mapping')})

View File

@@ -38,6 +38,7 @@ class model(dict):
'notification_format': default_notification_format,
'notification_title': default_notification_title,
'notification_urls': [], # Apprise URL list
'plugins': [], # list of dict, keyed by plugin name, with dict of the config and enabled true/false
'pager_size': 50,
'password': False,
'render_anchor_tag_content': False,

View File

@@ -0,0 +1,6 @@
import pluggy
hookimpl = pluggy.HookimplMarker("changedetectionio_plugin")
"""Marker to be imported and used in plugins (and for own implementations)"""
x=1

View File

@@ -0,0 +1,20 @@
import pluggy
from changedetectionio.store import ChangeDetectionStore
hookspec = pluggy.HookspecMarker("changedetectionio_plugin")
@hookspec
def extra_processor():
"""Defines a new fetch method
:return: a tuples, (machine_name, description)
"""
@hookspec(firstresult=True)
def processor_call(processor_name: str, datastore: ChangeDetectionStore, watch_uuid: str):
"""
Call processors with processor name
:param processor_name: as defined in extra_processors
:return: data?
"""

View File

@@ -0,0 +1,53 @@
"""
Whois information lookup
- Fetches using whois
- Extends the 'text_json_diff' so that text filters can still be used with whois information
@todo publish to pypi and github as a separate plugin
"""
from ..plugins import hookimpl
import changedetectionio.processors.text_json_diff as text_json_diff
from changedetectionio import content_fetcher
# would be changedetectionio.plugins in other apps
class text_json_filtering_whois(text_json_diff.perform_site_check):
def __init__(self, *args, datastore, watch_uuid, **kwargs):
super().__init__(*args, datastore=datastore, watch_uuid=watch_uuid, **kwargs)
def call_browser(self):
import whois
# the whois data
self.fetcher = content_fetcher.Fetcher()
self.fetcher.is_plaintext = True
from urllib.parse import urlparse
parsed = urlparse(self.watch.link)
w = whois.whois(parsed.hostname)
self.fetcher.content= w.text
@hookimpl
def extra_processor():
"""
Advertise a new processor
:return:
"""
from changedetectionio.processors import default_processor_config
processor_config = dict(default_processor_config)
# Which UI elements are not used
processor_config['needs_request_fetch_method'] = False
processor_config['needs_browsersteps'] = False
processor_config['needs_visualselector'] = False
return ('plugin_processor_whois', "Whois domain information fetch", processor_config)
# @todo When a watch chooses this extra_process processor, the watch should ONLY use this one.
# (one watch can only have one extra_processor)
@hookimpl
def processor_call(processor_name, datastore, watch_uuid):
if processor_name == 'plugin_processor_whois': # could be removed, see above note
x = text_json_filtering_whois(datastore=datastore, watch_uuid=watch_uuid)
return x
return None

View File

@@ -7,6 +7,15 @@ from copy import deepcopy
from distutils.util import strtobool
from loguru import logger
# Which UI elements in settings the processor requires
# For example, restock monitor isnt compatible with visualselector and filters
default_processor_config = {
'needs_request_fetch_method': True,
'needs_browsersteps': True,
'needs_visualselector': True,
'needs_filters': True,
}
class difference_detection_processor():
browser_steps = None
@@ -132,6 +141,15 @@ class difference_detection_processor():
def available_processors():
from . import restock_diff, text_json_diff
x=[('text_json_diff', text_json_diff.name), ('restock_diff', restock_diff.name)]
# @todo Make this smarter with introspection of sorts.
from ..flask_app import get_plugin_manager
pm = get_plugin_manager()
x = [('text_json_diff', text_json_diff.name, dict(default_processor_config)),
('restock_diff', restock_diff.name, dict(default_processor_config))
]
plugin_choices = pm.hook.extra_processor()
if plugin_choices:
for p in plugin_choices:
x.append(p)
return x

View File

@@ -155,7 +155,7 @@ class perform_site_check(difference_detection_processor):
html_content = self.fetcher.content
# If not JSON, and if it's not text/plain..
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower():
if 'text/plain' in self.fetcher.get_all_headers().get('content-type', '').lower() or self.fetcher.is_plaintext:
# Don't run get_text or xpath/css filters on plaintext
stripped_text_from_html = html_content
else:

View File

@@ -39,12 +39,15 @@
<ul>
<li class="tab" id=""><a href="#general">General</a></li>
<li class="tab"><a href="#request">Request</a></li>
{% if playwright_enabled %}
{% if playwright_enabled and processor_config['needs_browsersteps'] %}
<li class="tab"><a id="browsersteps-tab" href="#browser-steps">Browser Steps</a></li>
{% endif %}
{% if watch['processor'] == 'text_json_diff' %}
{% if processor_config['needs_visualselector'] %}
<li class="tab"><a id="visualselector-tab" href="#visualselector">Visual Filter Selector</a></li>
{% endif %}
{% if processor_config['needs_filters'] %}
<li class="tab"><a href="#filters-and-triggers">Filters &amp; Triggers</a></li>
{% endif %}
@@ -67,16 +70,12 @@
{{ render_field(form.url, placeholder="https://...", required=true, class="m-d") }}
<span class="pure-form-message-inline">Some sites use JavaScript to create the content, for this you should <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Fetching-pages-with-WebDriver">use the Chrome/WebDriver Fetcher</a></span><br>
<span class="pure-form-message-inline">You can use variables in the URL, perfect for inserting the current date and other logic, <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Handling-variables-in-the-watched-URL">help and examples here</a></span><br>
<span class="pure-form-message-inline">
{% if watch['processor'] == 'text_json_diff' %}
Current mode: <strong>Webpage Text/HTML, JSON and PDF changes.</strong><br>
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=restock_diff" class="pure-button button-xsmall">Switch to re-stock detection mode.</a>
{% else %}
Current mode: <strong>Re-stock detection.</strong><br>
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor=text_json_diff" class="pure-button button-xsmall">Switch to Webpage Text/HTML, JSON and PDF changes mode.</a>
{% endif %}
</span>
</div>
<div class="pure-control-group">
<label for="title">Processing mode</label>
{% for a in available_processors %}
<a href="{{url_for('edit_page', uuid=uuid)}}?switch_processor={{ a[0] }}" class="pure-button button-xsmall {% if watch['processor'] == a[0] %}button-secondary{% endif %}">{{ a[1]}}.</a>
{% endfor %}
</div>
<div class="pure-control-group">
{{ render_field(form.title, class="m-d") }}
@@ -108,6 +107,7 @@
</div>
<div class="tab-pane-inner" id="request">
{% if processor_config['needs_request_fetch_method'] %}
<div class="pure-control-group inline-radio">
{{ render_field(form.fetch_backend, class="fetch-backend") }}
<span class="pure-form-message-inline">
@@ -116,6 +116,7 @@
Tip: <a href="https://github.com/dgtlmoon/changedetection.io/wiki/Proxy-configuration#brightdata-proxy-support">Connect using Bright Data and Oxylabs Proxies, find out more here.</a>
</span>
</div>
{% endif %}
{% if form.proxy %}
<div class="pure-control-group inline-radio">
<div>{{ form.proxy.label }} <a href="" id="check-all-proxies" class="pure-button button-secondary button-xsmall" >Check/Scan all</a></div>
@@ -193,7 +194,7 @@ User-Agent: wonderbra 1.0") }}
</div>
</fieldset>
</div>
{% if playwright_enabled %}
{% if playwright_enabled and processor_config['needs_browsersteps'] %}
<div class="tab-pane-inner" id="browser-steps">
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">
<fieldset>
@@ -264,8 +265,10 @@ User-Agent: wonderbra 1.0") }}
</fieldset>
</div>
{% if watch['processor'] == 'text_json_diff' %}
{% if processor_config['needs_filters'] %}
<div class="tab-pane-inner" id="filters-and-triggers">
<div class="text-filtering">
<h3>Filter by HTML element</h3>
<div class="pure-control-group">
<strong>Pro-tips:</strong><br>
<ul>
@@ -315,7 +318,7 @@ xpath://body/div/span[contains(@class, 'example-class')]",
href="https://github.com/dgtlmoon/changedetection.io/wiki/CSS-Selector-help">here for more CSS selector help</a>.<br>
</span>
</div>
<fieldset class="pure-control-group">
<div class="pure-control-group">
{{ render_field(form.subtractive_selectors, rows=5, placeholder="header
footer
nav
@@ -326,7 +329,8 @@ nav
<li> Add multiple elements or CSS selectors per line to ignore multiple parts of the HTML. </li>
</ul>
</span>
</fieldset>
</div>
</div>
<div class="text-filtering">
<fieldset class="pure-group" id="text-filtering-type-options">
<h3>Text filtering</h3>
@@ -423,7 +427,7 @@ Unavailable") }}
</div>
{% endif %}
{% if watch['processor'] == 'text_json_diff' %}
{% if processor_config['needs_visualselector'] %}
<div class="tab-pane-inner visual-selector-ui" id="visualselector">
<img class="beta-logo" src="{{url_for('static_content', group='images', filename='beta-logo.png')}}" alt="New beta functionality">

View File

@@ -22,6 +22,7 @@
<li class="tab"><a href="#filters">Global Filters</a></li>
<li class="tab"><a href="#api">API</a></li>
<li class="tab"><a href="#proxies">CAPTCHA &amp; Proxies</a></li>
<li class="tab"><a href="#plugins">Plugins</a></li>
</ul>
</div>
<div class="box-wrap inner">
@@ -243,6 +244,12 @@ nav
{{ render_field(form.requests.form.extra_browsers) }}
</div>
</div>
<div class="tab-pane-inner" id="plugins">
available plugin on/off stuff here
how to let each one expose config?
</div>
<div id="actions">
<div class="pure-control-group">
{{ render_button(form.save_button) }}

View File

@@ -259,6 +259,13 @@ class update_worker(threading.Thread):
update_handler = restock_diff.perform_site_check(datastore=self.datastore,
watch_uuid=uuid
)
elif processor.startswith('plugin_processor_'):
from .flask_app import get_plugin_manager
pm = get_plugin_manager()
x = pm.hook.processor_call(processor_name=processor, datastore=self.datastore, watch_uuid=uuid)
if x:
update_handler = x
else:
# Used as a default and also by some tests
update_handler = text_json_diff.perform_site_check(datastore=self.datastore,

View File

@@ -73,4 +73,5 @@ pytest-flask ~=1.2
# Pin jsonschema version to prevent build errors on armv6 while rpds-py wheels aren't available (1708)
jsonschema==4.17.3
pluggy
loguru